simdutf 7.3.5
Unicode at GB/s.
Loading...
Searching...
No Matches
implementation.h
1#ifndef SIMDUTF_IMPLEMENTATION_H
2#define SIMDUTF_IMPLEMENTATION_H
3#if !defined(SIMDUTF_NO_THREADS)
4 #include <atomic>
5#endif
6#include <string>
7#ifdef SIMDUTF_INTERNAL_TESTS
8 #include <vector>
9#endif
10#include "simdutf/common_defs.h"
11#include "simdutf/compiler_check.h"
12#include "simdutf/encoding_types.h"
13#include "simdutf/error.h"
14#include "simdutf/internal/isadetection.h"
15
16#if SIMDUTF_SPAN
17 #include <concepts>
18 #include <type_traits>
19 #include <span>
20 #include <tuple>
21#endif
22#if SIMDUTF_CPLUSPLUS17
23 #include <string_view>
24#endif
25// The following defines are conditionally enabled/disabled during amalgamation.
26// By default all features are enabled, regular code shouldn't check them. Only
27// when user code really relies of a selected subset, it's good to verify these
28// flags, like:
29//
30// #if !SIMDUTF_FEATURE_UTF16
31// # error("Please amalgamate simdutf with UTF-16 support")
32// #endif
33//
34#define SIMDUTF_FEATURE_DETECT_ENCODING 1
35#define SIMDUTF_FEATURE_ASCII 1
36#define SIMDUTF_FEATURE_LATIN1 1
37#define SIMDUTF_FEATURE_UTF8 1
38#define SIMDUTF_FEATURE_UTF16 1
39#define SIMDUTF_FEATURE_UTF32 1
40#define SIMDUTF_FEATURE_BASE64 1
41
42namespace simdutf {
43
44#if SIMDUTF_SPAN
46namespace detail {
51template <typename T>
52concept byte_like = std::is_same_v<T, std::byte> || //
53 std::is_same_v<T, char> || //
54 std::is_same_v<T, signed char> || //
55 std::is_same_v<T, unsigned char>;
56
57template <typename T>
58concept is_byte_like = byte_like<std::remove_cvref_t<T>>;
59
60template <typename T>
61concept is_pointer = std::is_pointer_v<T>;
62
68template <typename T>
69concept input_span_of_byte_like = requires(const T &t) {
70 { t.size() } noexcept -> std::convertible_to<std::size_t>;
71 { t.data() } noexcept -> is_pointer;
72 { *t.data() } noexcept -> is_byte_like;
73};
74
75template <typename T>
76concept is_mutable = !std::is_const_v<std::remove_reference_t<T>>;
77
81template <typename T>
82concept output_span_of_byte_like = requires(T &t) {
83 { t.size() } noexcept -> std::convertible_to<std::size_t>;
84 { t.data() } noexcept -> is_pointer;
85 { *t.data() } noexcept -> is_byte_like;
86 { *t.data() } noexcept -> is_mutable;
87};
88} // namespace detail
89#endif
90
91#if SIMDUTF_FEATURE_DETECT_ENCODING
102simdutf_warn_unused simdutf::encoding_type
103autodetect_encoding(const char *input, size_t length) noexcept;
104simdutf_really_inline simdutf_warn_unused simdutf::encoding_type
105autodetect_encoding(const uint8_t *input, size_t length) noexcept {
106 return autodetect_encoding(reinterpret_cast<const char *>(input), length);
107}
108 #if SIMDUTF_SPAN
120simdutf_really_inline simdutf_warn_unused simdutf::encoding_type
121autodetect_encoding(
122 const detail::input_span_of_byte_like auto &input) noexcept {
123 return autodetect_encoding(reinterpret_cast<const char *>(input.data()),
124 input.size());
125}
126 #endif // SIMDUTF_SPAN
127
139simdutf_warn_unused int detect_encodings(const char *input,
140 size_t length) noexcept;
141simdutf_really_inline simdutf_warn_unused int
142detect_encodings(const uint8_t *input, size_t length) noexcept {
143 return detect_encodings(reinterpret_cast<const char *>(input), length);
144}
145 #if SIMDUTF_SPAN
146simdutf_really_inline simdutf_warn_unused int
147detect_encodings(const detail::input_span_of_byte_like auto &input) noexcept {
148 return detect_encodings(reinterpret_cast<const char *>(input.data()),
149 input.size());
150}
151 #endif // SIMDUTF_SPAN
152#endif // SIMDUTF_FEATURE_DETECT_ENCODING
153
154#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING
166simdutf_warn_unused bool validate_utf8(const char *buf, size_t len) noexcept;
167 #if SIMDUTF_SPAN
168simdutf_really_inline simdutf_warn_unused bool
169validate_utf8(const detail::input_span_of_byte_like auto &input) noexcept {
170 return validate_utf8(reinterpret_cast<const char *>(input.data()),
171 input.size());
172}
173 #endif // SIMDUTF_SPAN
174#endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING
175
176#if SIMDUTF_FEATURE_UTF8
189simdutf_warn_unused result validate_utf8_with_errors(const char *buf,
190 size_t len) noexcept;
191 #if SIMDUTF_SPAN
192simdutf_really_inline simdutf_warn_unused result validate_utf8_with_errors(
193 const detail::input_span_of_byte_like auto &input) noexcept {
194 return validate_utf8_with_errors(reinterpret_cast<const char *>(input.data()),
195 input.size());
196}
197 #endif // SIMDUTF_SPAN
198#endif // SIMDUTF_FEATURE_UTF8
199
200#if SIMDUTF_FEATURE_ASCII
210simdutf_warn_unused bool validate_ascii(const char *buf, size_t len) noexcept;
211 #if SIMDUTF_SPAN
212simdutf_really_inline simdutf_warn_unused bool
213validate_ascii(const detail::input_span_of_byte_like auto &input) noexcept {
214 return validate_ascii(reinterpret_cast<const char *>(input.data()),
215 input.size());
216}
217 #endif // SIMDUTF_SPAN
218
232simdutf_warn_unused result validate_ascii_with_errors(const char *buf,
233 size_t len) noexcept;
234 #if SIMDUTF_SPAN
235simdutf_really_inline simdutf_warn_unused result validate_ascii_with_errors(
236 const detail::input_span_of_byte_like auto &input) noexcept {
237 return validate_ascii_with_errors(
238 reinterpret_cast<const char *>(input.data()), input.size());
239}
240 #endif // SIMDUTF_SPAN
241#endif // SIMDUTF_FEATURE_ASCII
242
243#if SIMDUTF_FEATURE_UTF16
258simdutf_warn_unused bool validate_utf16(const char16_t *buf,
259 size_t len) noexcept;
260 #if SIMDUTF_SPAN
261simdutf_really_inline simdutf_warn_unused bool
262validate_utf16(std::span<const char16_t> input) noexcept {
263 return validate_utf16(input.data(), input.size());
264}
265 #endif // SIMDUTF_SPAN
266#endif // SIMDUTF_FEATURE_UTF16
267
268#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING
283simdutf_warn_unused bool validate_utf16le(const char16_t *buf,
284 size_t len) noexcept;
285 #if SIMDUTF_SPAN
286simdutf_really_inline simdutf_warn_unused bool
287validate_utf16le(std::span<const char16_t> input) noexcept {
288 return validate_utf16le(input.data(), input.size());
289}
290 #endif // SIMDUTF_SPAN
291#endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING
292
293#if SIMDUTF_FEATURE_UTF16
308simdutf_warn_unused bool validate_utf16be(const char16_t *buf,
309 size_t len) noexcept;
310 #if SIMDUTF_SPAN
311simdutf_really_inline simdutf_warn_unused bool
312validate_utf16be(std::span<const char16_t> input) noexcept {
313 return validate_utf16be(input.data(), input.size());
314}
315 #endif // SIMDUTF_SPAN
316
334simdutf_warn_unused result validate_utf16_with_errors(const char16_t *buf,
335 size_t len) noexcept;
336 #if SIMDUTF_SPAN
337simdutf_really_inline simdutf_warn_unused result
338validate_utf16_with_errors(std::span<const char16_t> input) noexcept {
339 return validate_utf16_with_errors(input.data(), input.size());
340}
341 #endif // SIMDUTF_SPAN
342
359simdutf_warn_unused result validate_utf16le_with_errors(const char16_t *buf,
360 size_t len) noexcept;
361 #if SIMDUTF_SPAN
362simdutf_really_inline simdutf_warn_unused result
363validate_utf16le_with_errors(std::span<const char16_t> input) noexcept {
364 return validate_utf16le_with_errors(input.data(), input.size());
365}
366 #endif // SIMDUTF_SPAN
367
384simdutf_warn_unused result validate_utf16be_with_errors(const char16_t *buf,
385 size_t len) noexcept;
386 #if SIMDUTF_SPAN
387simdutf_really_inline simdutf_warn_unused result
388validate_utf16be_with_errors(std::span<const char16_t> input) noexcept {
389 return validate_utf16be_with_errors(input.data(), input.size());
390}
391 #endif // SIMDUTF_SPAN
392
405void to_well_formed_utf16le(const char16_t *input, size_t len,
406 char16_t *output) noexcept;
407 #if SIMDUTF_SPAN
408simdutf_really_inline void
409to_well_formed_utf16le(std::span<const char16_t> input,
410 std::span<char16_t> output) noexcept {
411 to_well_formed_utf16le(input.data(), input.size(), output.data());
412}
413 #endif // SIMDUTF_SPAN
414
427void to_well_formed_utf16be(const char16_t *input, size_t len,
428 char16_t *output) noexcept;
429 #if SIMDUTF_SPAN
430simdutf_really_inline void
431to_well_formed_utf16be(std::span<const char16_t> input,
432 std::span<char16_t> output) noexcept {
433 to_well_formed_utf16be(input.data(), input.size(), output.data());
434}
435 #endif // SIMDUTF_SPAN
436
449void to_well_formed_utf16(const char16_t *input, size_t len,
450 char16_t *output) noexcept;
451 #if SIMDUTF_SPAN
452simdutf_really_inline void
453to_well_formed_utf16(std::span<const char16_t> input,
454 std::span<char16_t> output) noexcept {
455 to_well_formed_utf16(input.data(), input.size(), output.data());
456}
457 #endif // SIMDUTF_SPAN
458
459#endif // SIMDUTF_FEATURE_UTF16
460
461#if SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING
476simdutf_warn_unused bool validate_utf32(const char32_t *buf,
477 size_t len) noexcept;
478 #if SIMDUTF_SPAN
479simdutf_really_inline simdutf_warn_unused bool
480validate_utf32(std::span<const char32_t> input) noexcept {
481 return validate_utf32(input.data(), input.size());
482}
483 #endif // SIMDUTF_SPAN
484#endif // SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING
485
486#if SIMDUTF_FEATURE_UTF32
503simdutf_warn_unused result validate_utf32_with_errors(const char32_t *buf,
504 size_t len) noexcept;
505 #if SIMDUTF_SPAN
506simdutf_really_inline simdutf_warn_unused result
507validate_utf32_with_errors(std::span<const char32_t> input) noexcept {
508 return validate_utf32_with_errors(input.data(), input.size());
509}
510 #endif // SIMDUTF_SPAN
511#endif // SIMDUTF_FEATURE_UTF32
512
513#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
524simdutf_warn_unused size_t convert_latin1_to_utf8(const char *input,
525 size_t length,
526 char *utf8_output) noexcept;
527 #if SIMDUTF_SPAN
528simdutf_really_inline simdutf_warn_unused size_t convert_latin1_to_utf8(
529 const detail::input_span_of_byte_like auto &latin1_input,
530 detail::output_span_of_byte_like auto &&utf8_output) noexcept {
531 return convert_latin1_to_utf8(
532 reinterpret_cast<const char *>(latin1_input.data()), latin1_input.size(),
533 utf8_output.data());
534}
535 #endif // SIMDUTF_SPAN
536
550simdutf_warn_unused size_t
551convert_latin1_to_utf8_safe(const char *input, size_t length, char *utf8_output,
552 size_t utf8_len) noexcept;
553 #if SIMDUTF_SPAN
554simdutf_really_inline simdutf_warn_unused size_t convert_latin1_to_utf8_safe(
555 const detail::input_span_of_byte_like auto &input,
556 detail::output_span_of_byte_like auto &&utf8_output) noexcept {
557 // implementation note: outputspan is a forwarding ref to avoid copying and
558 // allow both lvalues and rvalues. std::span can be copied without problems,
559 // but std::vector should not, and this function should accept both. it will
560 // allow using an owning rvalue ref (example: passing a temporary std::string)
561 // as output, but the user will quickly find out that he has no way of getting
562 // the data out of the object in that case.
563 return convert_latin1_to_utf8_safe(
564 input.data(), input.size(), reinterpret_cast<char *>(utf8_output.data()),
565 utf8_output.size());
566}
567 #endif // SIMDUTF_SPAN
568#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
569
570#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
581simdutf_warn_unused size_t convert_latin1_to_utf16le(
582 const char *input, size_t length, char16_t *utf16_output) noexcept;
583 #if SIMDUTF_SPAN
584simdutf_really_inline simdutf_warn_unused size_t convert_latin1_to_utf16le(
585 const detail::input_span_of_byte_like auto &latin1_input,
586 std::span<char16_t> utf16_output) noexcept {
587 return convert_latin1_to_utf16le(
588 reinterpret_cast<const char *>(latin1_input.data()), latin1_input.size(),
589 utf16_output.data());
590}
591 #endif // SIMDUTF_SPAN
592
603simdutf_warn_unused size_t convert_latin1_to_utf16be(
604 const char *input, size_t length, char16_t *utf16_output) noexcept;
605 #if SIMDUTF_SPAN
606simdutf_really_inline simdutf_warn_unused size_t
607convert_latin1_to_utf16be(const detail::input_span_of_byte_like auto &input,
608 std::span<char16_t> output) noexcept {
609 return convert_latin1_to_utf16be(reinterpret_cast<const char *>(input.data()),
610 input.size(), output.data());
611}
612 #endif // SIMDUTF_SPAN
621simdutf_warn_unused size_t latin1_length_from_utf16(size_t length) noexcept;
622
631simdutf_warn_unused size_t utf16_length_from_latin1(size_t length) noexcept;
632#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
633
634#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1
645simdutf_warn_unused size_t convert_latin1_to_utf32(
646 const char *input, size_t length, char32_t *utf32_buffer) noexcept;
647 #if SIMDUTF_SPAN
648simdutf_really_inline simdutf_warn_unused size_t convert_latin1_to_utf32(
649 const detail::input_span_of_byte_like auto &latin1_input,
650 std::span<char32_t> utf32_output) noexcept {
651 return convert_latin1_to_utf32(
652 reinterpret_cast<const char *>(latin1_input.data()), latin1_input.size(),
653 utf32_output.data());
654}
655 #endif // SIMDUTF_SPAN
656#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1
657
658#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
671simdutf_warn_unused size_t convert_utf8_to_latin1(const char *input,
672 size_t length,
673 char *latin1_output) noexcept;
674 #if SIMDUTF_SPAN
675simdutf_really_inline simdutf_warn_unused size_t convert_utf8_to_latin1(
676 const detail::input_span_of_byte_like auto &input,
677 detail::output_span_of_byte_like auto &&output) noexcept {
678 return convert_utf8_to_latin1(reinterpret_cast<const char *>(input.data()),
679 input.size(),
680 reinterpret_cast<char *>(output.data()));
681}
682 #endif // SIMDUTF_SPAN
683#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
684
685#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
699simdutf_warn_unused size_t convert_utf8_to_utf16(
700 const char *input, size_t length, char16_t *utf16_output) noexcept;
701 #if SIMDUTF_SPAN
702simdutf_really_inline simdutf_warn_unused size_t
703convert_utf8_to_utf16(const detail::input_span_of_byte_like auto &input,
704 std::span<char16_t> output) noexcept {
705 return convert_utf8_to_utf16(reinterpret_cast<const char *>(input.data()),
706 input.size(), output.data());
707}
708 #endif // SIMDUTF_SPAN
709#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
710
711#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
720simdutf_warn_unused size_t convert_latin1_to_utf16(
721 const char *input, size_t length, char16_t *utf16_output) noexcept;
722 #if SIMDUTF_SPAN
723simdutf_really_inline simdutf_warn_unused size_t
724convert_latin1_to_utf16(const detail::input_span_of_byte_like auto &input,
725 std::span<char16_t> output) noexcept {
726 return convert_latin1_to_utf16(reinterpret_cast<const char *>(input.data()),
727 input.size(), output.data());
728}
729 #endif // SIMDUTF_SPAN
730#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
731
732#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
745simdutf_warn_unused size_t convert_utf8_to_utf16le(
746 const char *input, size_t length, char16_t *utf16_output) noexcept;
747 #if SIMDUTF_SPAN
748simdutf_really_inline simdutf_warn_unused size_t
749convert_utf8_to_utf16le(const detail::input_span_of_byte_like auto &utf8_input,
750 std::span<char16_t> utf16_output) noexcept {
751 return convert_utf8_to_utf16le(
752 reinterpret_cast<const char *>(utf8_input.data()), utf8_input.size(),
753 utf16_output.data());
754}
755 #endif // SIMDUTF_SPAN
756
769simdutf_warn_unused size_t convert_utf8_to_utf16be(
770 const char *input, size_t length, char16_t *utf16_output) noexcept;
771 #if SIMDUTF_SPAN
772simdutf_really_inline simdutf_warn_unused size_t
773convert_utf8_to_utf16be(const detail::input_span_of_byte_like auto &utf8_input,
774 std::span<char16_t> utf16_output) noexcept {
775 return convert_utf8_to_utf16be(
776 reinterpret_cast<const char *>(utf8_input.data()), utf8_input.size(),
777 utf16_output.data());
778}
779 #endif // SIMDUTF_SPAN
780#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
781
782#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
799simdutf_warn_unused result convert_utf8_to_latin1_with_errors(
800 const char *input, size_t length, char *latin1_output) noexcept;
801 #if SIMDUTF_SPAN
802simdutf_really_inline simdutf_warn_unused result
803convert_utf8_to_latin1_with_errors(
804 const detail::input_span_of_byte_like auto &utf8_input,
805 detail::output_span_of_byte_like auto &&latin1_output) noexcept {
806 return convert_utf8_to_latin1_with_errors(
807 reinterpret_cast<const char *>(utf8_input.data()), utf8_input.size(),
808 reinterpret_cast<char *>(latin1_output.data()));
809}
810 #endif // SIMDUTF_SPAN
811#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
812
813#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
829simdutf_warn_unused result convert_utf8_to_utf16_with_errors(
830 const char *input, size_t length, char16_t *utf16_output) noexcept;
831 #if SIMDUTF_SPAN
832simdutf_really_inline simdutf_warn_unused result
833convert_utf8_to_utf16_with_errors(
834 const detail::input_span_of_byte_like auto &utf8_input,
835 std::span<char16_t> utf16_output) noexcept {
836 return convert_utf8_to_utf16_with_errors(
837 reinterpret_cast<const char *>(utf8_input.data()), utf8_input.size(),
838 utf16_output.data());
839}
840 #endif // SIMDUTF_SPAN
841
856simdutf_warn_unused result convert_utf8_to_utf16le_with_errors(
857 const char *input, size_t length, char16_t *utf16_output) noexcept;
858 #if SIMDUTF_SPAN
859simdutf_really_inline simdutf_warn_unused result
860convert_utf8_to_utf16le_with_errors(
861 const detail::input_span_of_byte_like auto &utf8_input,
862 std::span<char16_t> utf16_output) noexcept {
863 return convert_utf8_to_utf16le_with_errors(
864 reinterpret_cast<const char *>(utf8_input.data()), utf8_input.size(),
865 utf16_output.data());
866}
867 #endif // SIMDUTF_SPAN
868
883simdutf_warn_unused result convert_utf8_to_utf16be_with_errors(
884 const char *input, size_t length, char16_t *utf16_output) noexcept;
885 #if SIMDUTF_SPAN
886simdutf_really_inline simdutf_warn_unused result
887convert_utf8_to_utf16be_with_errors(
888 const detail::input_span_of_byte_like auto &utf8_input,
889 std::span<char16_t> utf16_output) noexcept {
890 return convert_utf8_to_utf16be_with_errors(
891 reinterpret_cast<const char *>(utf8_input.data()), utf8_input.size(),
892 utf16_output.data());
893}
894 #endif // SIMDUTF_SPAN
895#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
896
897#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
910simdutf_warn_unused size_t convert_utf8_to_utf32(
911 const char *input, size_t length, char32_t *utf32_output) noexcept;
912 #if SIMDUTF_SPAN
913simdutf_really_inline simdutf_warn_unused size_t
914convert_utf8_to_utf32(const detail::input_span_of_byte_like auto &utf8_input,
915 std::span<char32_t> utf32_output) noexcept {
916 return convert_utf8_to_utf32(
917 reinterpret_cast<const char *>(utf8_input.data()), utf8_input.size(),
918 utf32_output.data());
919}
920 #endif // SIMDUTF_SPAN
921
936simdutf_warn_unused result convert_utf8_to_utf32_with_errors(
937 const char *input, size_t length, char32_t *utf32_output) noexcept;
938 #if SIMDUTF_SPAN
939simdutf_really_inline simdutf_warn_unused result
940convert_utf8_to_utf32_with_errors(
941 const detail::input_span_of_byte_like auto &utf8_input,
942 std::span<char32_t> utf32_output) noexcept {
943 return convert_utf8_to_utf32_with_errors(
944 reinterpret_cast<const char *>(utf8_input.data()), utf8_input.size(),
945 utf32_output.data());
946}
947 #endif // SIMDUTF_SPAN
948#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
949
950#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
970simdutf_warn_unused size_t convert_valid_utf8_to_latin1(
971 const char *input, size_t length, char *latin1_output) noexcept;
972 #if SIMDUTF_SPAN
973simdutf_really_inline simdutf_warn_unused size_t convert_valid_utf8_to_latin1(
974 const detail::input_span_of_byte_like auto &valid_utf8_input,
975 detail::output_span_of_byte_like auto &&latin1_output) noexcept {
976 return convert_valid_utf8_to_latin1(
977 reinterpret_cast<const char *>(valid_utf8_input.data()),
978 valid_utf8_input.size(), latin1_output.data());
979}
980 #endif // SIMDUTF_SPAN
981#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
982
983#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
994simdutf_warn_unused size_t convert_valid_utf8_to_utf16(
995 const char *input, size_t length, char16_t *utf16_buffer) noexcept;
996 #if SIMDUTF_SPAN
997simdutf_really_inline simdutf_warn_unused size_t convert_valid_utf8_to_utf16(
998 const detail::input_span_of_byte_like auto &valid_utf8_input,
999 std::span<char16_t> utf16_output) noexcept {
1000 return convert_valid_utf8_to_utf16(
1001 reinterpret_cast<const char *>(valid_utf8_input.data()),
1002 valid_utf8_input.size(), utf16_output.data());
1003}
1004 #endif // SIMDUTF_SPAN
1005
1016simdutf_warn_unused size_t convert_valid_utf8_to_utf16le(
1017 const char *input, size_t length, char16_t *utf16_buffer) noexcept;
1018 #if SIMDUTF_SPAN
1019simdutf_really_inline simdutf_warn_unused size_t convert_valid_utf8_to_utf16le(
1020 const detail::input_span_of_byte_like auto &valid_utf8_input,
1021 std::span<char16_t> utf16_output) noexcept {
1022 return convert_valid_utf8_to_utf16le(
1023 reinterpret_cast<const char *>(valid_utf8_input.data()),
1024 valid_utf8_input.size(), utf16_output.data());
1025}
1026 #endif // SIMDUTF_SPAN
1027
1038simdutf_warn_unused size_t convert_valid_utf8_to_utf16be(
1039 const char *input, size_t length, char16_t *utf16_buffer) noexcept;
1040 #if SIMDUTF_SPAN
1041simdutf_really_inline simdutf_warn_unused size_t convert_valid_utf8_to_utf16be(
1042 const detail::input_span_of_byte_like auto &valid_utf8_input,
1043 std::span<char16_t> utf16_output) noexcept {
1044 return convert_valid_utf8_to_utf16be(
1045 reinterpret_cast<const char *>(valid_utf8_input.data()),
1046 valid_utf8_input.size(), utf16_output.data());
1047}
1048 #endif // SIMDUTF_SPAN
1049#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
1050
1051#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
1062simdutf_warn_unused size_t convert_valid_utf8_to_utf32(
1063 const char *input, size_t length, char32_t *utf32_buffer) noexcept;
1064 #if SIMDUTF_SPAN
1065simdutf_really_inline simdutf_warn_unused size_t convert_valid_utf8_to_utf32(
1066 const detail::input_span_of_byte_like auto &valid_utf8_input,
1067 std::span<char32_t> utf32_output) noexcept {
1068 return convert_valid_utf8_to_utf32(
1069 reinterpret_cast<const char *>(valid_utf8_input.data()),
1070 valid_utf8_input.size(), utf32_output.data());
1071}
1072 #endif // SIMDUTF_SPAN
1073#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
1074
1075#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
1084simdutf_warn_unused size_t utf8_length_from_latin1(const char *input,
1085 size_t length) noexcept;
1086 #if SIMDUTF_SPAN
1087simdutf_really_inline simdutf_warn_unused size_t utf8_length_from_latin1(
1088 const detail::input_span_of_byte_like auto &latin1_input) noexcept {
1089 return utf8_length_from_latin1(
1090 reinterpret_cast<const char *>(latin1_input.data()), latin1_input.size());
1091}
1092 #endif // SIMDUTF_SPAN
1093
1107simdutf_warn_unused size_t latin1_length_from_utf8(const char *input,
1108 size_t length) noexcept;
1109 #if SIMDUTF_SPAN
1110simdutf_really_inline simdutf_warn_unused size_t latin1_length_from_utf8(
1111 const detail::input_span_of_byte_like auto &valid_utf8_input) noexcept {
1112 return latin1_length_from_utf8(
1113 reinterpret_cast<const char *>(valid_utf8_input.data()),
1114 valid_utf8_input.size());
1115}
1116 #endif // SIMDUTF_SPAN
1117#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
1118
1119#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
1134simdutf_warn_unused size_t utf16_length_from_utf8(const char *input,
1135 size_t length) noexcept;
1136 #if SIMDUTF_SPAN
1137simdutf_really_inline simdutf_warn_unused size_t utf16_length_from_utf8(
1138 const detail::input_span_of_byte_like auto &valid_utf8_input) noexcept {
1139 return utf16_length_from_utf8(
1140 reinterpret_cast<const char *>(valid_utf8_input.data()),
1141 valid_utf8_input.size());
1142}
1143 #endif // SIMDUTF_SPAN
1144#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
1145
1146#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
1163simdutf_warn_unused size_t utf32_length_from_utf8(const char *input,
1164 size_t length) noexcept;
1165 #if SIMDUTF_SPAN
1166simdutf_really_inline simdutf_warn_unused size_t utf32_length_from_utf8(
1167 const detail::input_span_of_byte_like auto &valid_utf8_input) noexcept {
1168 return utf32_length_from_utf8(
1169 reinterpret_cast<const char *>(valid_utf8_input.data()),
1170 valid_utf8_input.size());
1171}
1172 #endif // SIMDUTF_SPAN
1173#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
1174
1175#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
1191simdutf_warn_unused size_t convert_utf16_to_utf8(const char16_t *input,
1192 size_t length,
1193 char *utf8_buffer) noexcept;
1194 #if SIMDUTF_SPAN
1195simdutf_really_inline simdutf_warn_unused size_t convert_utf16_to_utf8(
1196 std::span<const char16_t> utf16_input,
1197 detail::output_span_of_byte_like auto &&utf8_output) noexcept {
1198 return convert_utf16_to_utf8(utf16_input.data(), utf16_input.size(),
1199 reinterpret_cast<char *>(utf8_output.data()));
1200}
1201 #endif // SIMDUTF_SPAN
1202
1221simdutf_warn_unused size_t convert_utf16_to_utf8_safe(const char16_t *input,
1222 size_t length,
1223 char *utf8_output,
1224 size_t utf8_len) noexcept;
1225 #if SIMDUTF_SPAN
1226simdutf_really_inline simdutf_warn_unused size_t convert_utf16_to_utf8_safe(
1227 std::span<const char16_t> utf16_input,
1228 detail::output_span_of_byte_like auto &&utf8_output) noexcept {
1229 // implementation note: outputspan is a forwarding ref to avoid copying and
1230 // allow both lvalues and rvalues. std::span can be copied without problems,
1231 // but std::vector should not, and this function should accept both. it will
1232 // allow using an owning rvalue ref (example: passing a temporary std::string)
1233 // as output, but the user will quickly find out that he has no way of getting
1234 // the data out of the object in that case.
1235 return convert_utf16_to_utf8_safe(
1236 utf16_input.data(), utf16_input.size(),
1237 reinterpret_cast<char *>(utf8_output.data()), utf8_output.size());
1238}
1239 #endif // SIMDUTF_SPAN
1240#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
1241
1242#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
1258simdutf_warn_unused size_t convert_utf16_to_latin1(
1259 const char16_t *input, size_t length, char *latin1_buffer) noexcept;
1260 #if SIMDUTF_SPAN
1261simdutf_really_inline simdutf_warn_unused size_t convert_utf16_to_latin1(
1262 std::span<const char16_t> utf16_input,
1263 detail::output_span_of_byte_like auto &&latin1_output) noexcept {
1264 return convert_utf16_to_latin1(
1265 utf16_input.data(), utf16_input.size(),
1266 reinterpret_cast<char *>(latin1_output.data()));
1267}
1268 #endif // SIMDUTF_SPAN
1269
1286simdutf_warn_unused size_t convert_utf16le_to_latin1(
1287 const char16_t *input, size_t length, char *latin1_buffer) noexcept;
1288 #if SIMDUTF_SPAN
1289simdutf_really_inline simdutf_warn_unused size_t convert_utf16le_to_latin1(
1290 std::span<const char16_t> utf16_input,
1291 detail::output_span_of_byte_like auto &&latin1_output) noexcept {
1292 return convert_utf16le_to_latin1(
1293 utf16_input.data(), utf16_input.size(),
1294 reinterpret_cast<char *>(latin1_output.data()));
1295}
1296 #endif // SIMDUTF_SPAN
1297
1312simdutf_warn_unused size_t convert_utf16be_to_latin1(
1313 const char16_t *input, size_t length, char *latin1_buffer) noexcept;
1314 #if SIMDUTF_SPAN
1315simdutf_really_inline simdutf_warn_unused size_t convert_utf16be_to_latin1(
1316 std::span<const char16_t> utf16_input,
1317 detail::output_span_of_byte_like auto &&latin1_output) noexcept {
1318 return convert_utf16be_to_latin1(
1319 utf16_input.data(), utf16_input.size(),
1320 reinterpret_cast<char *>(latin1_output.data()));
1321}
1322 #endif // SIMDUTF_SPAN
1323#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
1324
1325#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
1340simdutf_warn_unused size_t convert_utf16le_to_utf8(const char16_t *input,
1341 size_t length,
1342 char *utf8_buffer) noexcept;
1343 #if SIMDUTF_SPAN
1344simdutf_really_inline simdutf_warn_unused size_t convert_utf16le_to_utf8(
1345 std::span<const char16_t> utf16_input,
1346 detail::output_span_of_byte_like auto &&utf8_output) noexcept {
1347 return convert_utf16le_to_utf8(utf16_input.data(), utf16_input.size(),
1348 reinterpret_cast<char *>(utf8_output.data()));
1349}
1350 #endif // SIMDUTF_SPAN
1351
1366simdutf_warn_unused size_t convert_utf16be_to_utf8(const char16_t *input,
1367 size_t length,
1368 char *utf8_buffer) noexcept;
1369 #if SIMDUTF_SPAN
1370simdutf_really_inline simdutf_warn_unused size_t convert_utf16be_to_utf8(
1371 std::span<const char16_t> utf16_input,
1372 detail::output_span_of_byte_like auto &&utf8_output) noexcept {
1373 return convert_utf16be_to_utf8(utf16_input.data(), utf16_input.size(),
1374 reinterpret_cast<char *>(utf8_output.data()));
1375}
1376 #endif // SIMDUTF_SPAN
1377#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
1378
1379#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
1396simdutf_warn_unused result convert_utf16_to_latin1_with_errors(
1397 const char16_t *input, size_t length, char *latin1_buffer) noexcept;
1398 #if SIMDUTF_SPAN
1399simdutf_really_inline simdutf_warn_unused result
1400convert_utf16_to_latin1_with_errors(
1401 std::span<const char16_t> utf16_input,
1402 detail::output_span_of_byte_like auto &&latin1_output) noexcept {
1403 return convert_utf16_to_latin1_with_errors(
1404 utf16_input.data(), utf16_input.size(),
1405 reinterpret_cast<char *>(latin1_output.data()));
1406}
1407 #endif // SIMDUTF_SPAN
1408
1424simdutf_warn_unused result convert_utf16le_to_latin1_with_errors(
1425 const char16_t *input, size_t length, char *latin1_buffer) noexcept;
1426 #if SIMDUTF_SPAN
1427simdutf_really_inline simdutf_warn_unused result
1428convert_utf16le_to_latin1_with_errors(
1429 std::span<const char16_t> utf16_input,
1430 detail::output_span_of_byte_like auto &&latin1_output) noexcept {
1431 return convert_utf16le_to_latin1_with_errors(
1432 utf16_input.data(), utf16_input.size(),
1433 reinterpret_cast<char *>(latin1_output.data()));
1434}
1435 #endif // SIMDUTF_SPAN
1436
1454simdutf_warn_unused result convert_utf16be_to_latin1_with_errors(
1455 const char16_t *input, size_t length, char *latin1_buffer) noexcept;
1456 #if SIMDUTF_SPAN
1457simdutf_really_inline simdutf_warn_unused result
1458convert_utf16be_to_latin1_with_errors(
1459 std::span<const char16_t> utf16_input,
1460 detail::output_span_of_byte_like auto &&latin1_output) noexcept {
1461 return convert_utf16be_to_latin1_with_errors(
1462 utf16_input.data(), utf16_input.size(),
1463 reinterpret_cast<char *>(latin1_output.data()));
1464}
1465 #endif // SIMDUTF_SPAN
1466#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
1467
1468#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
1486simdutf_warn_unused result convert_utf16_to_utf8_with_errors(
1487 const char16_t *input, size_t length, char *utf8_buffer) noexcept;
1488 #if SIMDUTF_SPAN
1489simdutf_really_inline simdutf_warn_unused result
1490convert_utf16_to_utf8_with_errors(
1491 std::span<const char16_t> utf16_input,
1492 detail::output_span_of_byte_like auto &&utf8_output) noexcept {
1493 return convert_utf16_to_utf8_with_errors(
1494 utf16_input.data(), utf16_input.size(),
1495 reinterpret_cast<char *>(utf8_output.data()));
1496}
1497 #endif // SIMDUTF_SPAN
1498
1515simdutf_warn_unused result convert_utf16le_to_utf8_with_errors(
1516 const char16_t *input, size_t length, char *utf8_buffer) noexcept;
1517 #if SIMDUTF_SPAN
1518simdutf_really_inline simdutf_warn_unused result
1519convert_utf16le_to_utf8_with_errors(
1520 std::span<const char16_t> utf16_input,
1521 detail::output_span_of_byte_like auto &&utf8_output) noexcept {
1522 return convert_utf16le_to_utf8_with_errors(
1523 utf16_input.data(), utf16_input.size(),
1524 reinterpret_cast<char *>(utf8_output.data()));
1525}
1526 #endif // SIMDUTF_SPAN
1527
1544simdutf_warn_unused result convert_utf16be_to_utf8_with_errors(
1545 const char16_t *input, size_t length, char *utf8_buffer) noexcept;
1546 #if SIMDUTF_SPAN
1547simdutf_really_inline simdutf_warn_unused result
1548convert_utf16be_to_utf8_with_errors(
1549 std::span<const char16_t> utf16_input,
1550 detail::output_span_of_byte_like auto &&utf8_output) noexcept {
1551 return convert_utf16be_to_utf8_with_errors(
1552 utf16_input.data(), utf16_input.size(),
1553 reinterpret_cast<char *>(utf8_output.data()));
1554}
1555 #endif // SIMDUTF_SPAN
1556
1570simdutf_warn_unused size_t convert_valid_utf16_to_utf8(
1571 const char16_t *input, size_t length, char *utf8_buffer) noexcept;
1572 #if SIMDUTF_SPAN
1573simdutf_really_inline simdutf_warn_unused size_t convert_valid_utf16_to_utf8(
1574 std::span<const char16_t> valid_utf16_input,
1575 detail::output_span_of_byte_like auto &&utf8_output) noexcept {
1576 return convert_valid_utf16_to_utf8(
1577 valid_utf16_input.data(), valid_utf16_input.size(),
1578 reinterpret_cast<char *>(utf8_output.data()));
1579}
1580 #endif // SIMDUTF_SPAN
1581#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
1582
1583#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
1603simdutf_warn_unused size_t convert_valid_utf16_to_latin1(
1604 const char16_t *input, size_t length, char *latin1_buffer) noexcept;
1605 #if SIMDUTF_SPAN
1606simdutf_really_inline simdutf_warn_unused size_t convert_valid_utf16_to_latin1(
1607 std::span<const char16_t> valid_utf16_input,
1608 detail::output_span_of_byte_like auto &&latin1_output) noexcept {
1609 return convert_valid_utf16_to_latin1(
1610 valid_utf16_input.data(), valid_utf16_input.size(),
1611 reinterpret_cast<char *>(latin1_output.data()));
1612}
1613 #endif // SIMDUTF_SPAN
1614
1634simdutf_warn_unused size_t convert_valid_utf16le_to_latin1(
1635 const char16_t *input, size_t length, char *latin1_buffer) noexcept;
1636 #if SIMDUTF_SPAN
1637simdutf_really_inline simdutf_warn_unused size_t
1638convert_valid_utf16le_to_latin1(
1639 std::span<const char16_t> valid_utf16_input,
1640 detail::output_span_of_byte_like auto &&latin1_output) noexcept {
1641 return convert_valid_utf16le_to_latin1(
1642 valid_utf16_input.data(), valid_utf16_input.size(),
1643 reinterpret_cast<char *>(latin1_output.data()));
1644}
1645 #endif // SIMDUTF_SPAN
1646
1666simdutf_warn_unused size_t convert_valid_utf16be_to_latin1(
1667 const char16_t *input, size_t length, char *latin1_buffer) noexcept;
1668 #if SIMDUTF_SPAN
1669simdutf_really_inline simdutf_warn_unused size_t
1670convert_valid_utf16be_to_latin1(
1671 std::span<const char16_t> valid_utf16_input,
1672 detail::output_span_of_byte_like auto &&latin1_output) noexcept {
1673 return convert_valid_utf16be_to_latin1(
1674 valid_utf16_input.data(), valid_utf16_input.size(),
1675 reinterpret_cast<char *>(latin1_output.data()));
1676}
1677 #endif // SIMDUTF_SPAN
1678#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
1679
1680#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
1695simdutf_warn_unused size_t convert_valid_utf16le_to_utf8(
1696 const char16_t *input, size_t length, char *utf8_buffer) noexcept;
1697 #if SIMDUTF_SPAN
1698simdutf_really_inline simdutf_warn_unused size_t convert_valid_utf16le_to_utf8(
1699 std::span<const char16_t> valid_utf16_input,
1700 detail::output_span_of_byte_like auto &&utf8_output) noexcept {
1701 return convert_valid_utf16le_to_utf8(
1702 valid_utf16_input.data(), valid_utf16_input.size(),
1703 reinterpret_cast<char *>(utf8_output.data()));
1704}
1705 #endif // SIMDUTF_SPAN
1706
1720simdutf_warn_unused size_t convert_valid_utf16be_to_utf8(
1721 const char16_t *input, size_t length, char *utf8_buffer) noexcept;
1722 #if SIMDUTF_SPAN
1723simdutf_really_inline simdutf_warn_unused size_t convert_valid_utf16be_to_utf8(
1724 std::span<const char16_t> valid_utf16_input,
1725 detail::output_span_of_byte_like auto &&utf8_output) noexcept {
1726 return convert_valid_utf16be_to_utf8(
1727 valid_utf16_input.data(), valid_utf16_input.size(),
1728 reinterpret_cast<char *>(utf8_output.data()));
1729}
1730 #endif // SIMDUTF_SPAN
1731#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
1732
1733#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
1749simdutf_warn_unused size_t convert_utf16_to_utf32(
1750 const char16_t *input, size_t length, char32_t *utf32_buffer) noexcept;
1751 #if SIMDUTF_SPAN
1752simdutf_really_inline simdutf_warn_unused size_t
1753convert_utf16_to_utf32(std::span<const char16_t> utf16_input,
1754 std::span<char32_t> utf32_output) noexcept {
1755 return convert_utf16_to_utf32(utf16_input.data(), utf16_input.size(),
1756 utf32_output.data());
1757}
1758 #endif // SIMDUTF_SPAN
1759
1774simdutf_warn_unused size_t convert_utf16le_to_utf32(
1775 const char16_t *input, size_t length, char32_t *utf32_buffer) noexcept;
1776 #if SIMDUTF_SPAN
1777simdutf_really_inline simdutf_warn_unused size_t
1778convert_utf16le_to_utf32(std::span<const char16_t> utf16_input,
1779 std::span<char32_t> utf32_output) noexcept {
1780 return convert_utf16le_to_utf32(utf16_input.data(), utf16_input.size(),
1781 utf32_output.data());
1782}
1783 #endif // SIMDUTF_SPAN
1784
1799simdutf_warn_unused size_t convert_utf16be_to_utf32(
1800 const char16_t *input, size_t length, char32_t *utf32_buffer) noexcept;
1801 #if SIMDUTF_SPAN
1802simdutf_really_inline simdutf_warn_unused size_t
1803convert_utf16be_to_utf32(std::span<const char16_t> utf16_input,
1804 std::span<char32_t> utf32_output) noexcept {
1805 return convert_utf16be_to_utf32(utf16_input.data(), utf16_input.size(),
1806 utf32_output.data());
1807}
1808 #endif // SIMDUTF_SPAN
1809
1827simdutf_warn_unused result convert_utf16_to_utf32_with_errors(
1828 const char16_t *input, size_t length, char32_t *utf32_buffer) noexcept;
1829 #if SIMDUTF_SPAN
1830simdutf_really_inline simdutf_warn_unused result
1831convert_utf16_to_utf32_with_errors(std::span<const char16_t> utf16_input,
1832 std::span<char32_t> utf32_output) noexcept {
1833 return convert_utf16_to_utf32_with_errors(
1834 utf16_input.data(), utf16_input.size(), utf32_output.data());
1835}
1836 #endif // SIMDUTF_SPAN
1837
1854simdutf_warn_unused result convert_utf16le_to_utf32_with_errors(
1855 const char16_t *input, size_t length, char32_t *utf32_buffer) noexcept;
1856 #if SIMDUTF_SPAN
1857simdutf_really_inline simdutf_warn_unused result
1858convert_utf16le_to_utf32_with_errors(
1859 std::span<const char16_t> utf16_input,
1860 std::span<char32_t> utf32_output) noexcept {
1861 return convert_utf16le_to_utf32_with_errors(
1862 utf16_input.data(), utf16_input.size(), utf32_output.data());
1863}
1864 #endif // SIMDUTF_SPAN
1865
1882simdutf_warn_unused result convert_utf16be_to_utf32_with_errors(
1883 const char16_t *input, size_t length, char32_t *utf32_buffer) noexcept;
1884 #if SIMDUTF_SPAN
1885simdutf_really_inline simdutf_warn_unused result
1886convert_utf16be_to_utf32_with_errors(
1887 std::span<const char16_t> utf16_input,
1888 std::span<char32_t> utf32_output) noexcept {
1889 return convert_utf16be_to_utf32_with_errors(
1890 utf16_input.data(), utf16_input.size(), utf32_output.data());
1891}
1892 #endif // SIMDUTF_SPAN
1893
1908simdutf_warn_unused size_t convert_valid_utf16_to_utf32(
1909 const char16_t *input, size_t length, char32_t *utf32_buffer) noexcept;
1910 #if SIMDUTF_SPAN
1911simdutf_really_inline simdutf_warn_unused size_t
1912convert_valid_utf16_to_utf32(std::span<const char16_t> valid_utf16_input,
1913 std::span<char32_t> utf32_output) noexcept {
1914 return convert_valid_utf16_to_utf32(
1915 valid_utf16_input.data(), valid_utf16_input.size(), utf32_output.data());
1916}
1917 #endif // SIMDUTF_SPAN
1918
1932simdutf_warn_unused size_t convert_valid_utf16le_to_utf32(
1933 const char16_t *input, size_t length, char32_t *utf32_buffer) noexcept;
1934 #if SIMDUTF_SPAN
1935simdutf_really_inline simdutf_warn_unused size_t
1936convert_valid_utf16le_to_utf32(std::span<const char16_t> valid_utf16_input,
1937 std::span<char32_t> utf32_output) noexcept {
1938 return convert_valid_utf16le_to_utf32(
1939 valid_utf16_input.data(), valid_utf16_input.size(), utf32_output.data());
1940}
1941 #endif // SIMDUTF_SPAN
1942
1956simdutf_warn_unused size_t convert_valid_utf16be_to_utf32(
1957 const char16_t *input, size_t length, char32_t *utf32_buffer) noexcept;
1958 #if SIMDUTF_SPAN
1959simdutf_really_inline simdutf_warn_unused size_t
1960convert_valid_utf16be_to_utf32(std::span<const char16_t> valid_utf16_input,
1961 std::span<char32_t> utf32_output) noexcept {
1962 return convert_valid_utf16be_to_utf32(
1963 valid_utf16_input.data(), valid_utf16_input.size(), utf32_output.data());
1964}
1965 #endif // SIMDUTF_SPAN
1966#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
1967
1968#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
1981simdutf_warn_unused size_t latin1_length_from_utf16(size_t length) noexcept;
1982
1994simdutf_warn_unused size_t utf8_length_from_utf16(const char16_t *input,
1995 size_t length) noexcept;
1996 #if SIMDUTF_SPAN
1997simdutf_really_inline simdutf_warn_unused size_t
1998utf8_length_from_utf16(std::span<const char16_t> valid_utf16_input) noexcept {
1999 return utf8_length_from_utf16(valid_utf16_input.data(),
2000 valid_utf16_input.size());
2001}
2002 #endif // SIMDUTF_SPAN
2003#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
2004
2005#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
2017simdutf_warn_unused size_t utf8_length_from_utf16le(const char16_t *input,
2018 size_t length) noexcept;
2019 #if SIMDUTF_SPAN
2020simdutf_really_inline simdutf_warn_unused size_t
2021utf8_length_from_utf16le(std::span<const char16_t> valid_utf16_input) noexcept {
2022 return utf8_length_from_utf16le(valid_utf16_input.data(),
2023 valid_utf16_input.size());
2024}
2025 #endif // SIMDUTF_SPAN
2026
2038simdutf_warn_unused size_t utf8_length_from_utf16be(const char16_t *input,
2039 size_t length) noexcept;
2040 #if SIMDUTF_SPAN
2041simdutf_really_inline simdutf_warn_unused size_t
2042utf8_length_from_utf16be(std::span<const char16_t> valid_utf16_input) noexcept {
2043 return utf8_length_from_utf16be(valid_utf16_input.data(),
2044 valid_utf16_input.size());
2045}
2046 #endif // SIMDUTF_SPAN
2047#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
2048
2049#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
2063simdutf_warn_unused size_t convert_utf32_to_utf8(const char32_t *input,
2064 size_t length,
2065 char *utf8_buffer) noexcept;
2066 #if SIMDUTF_SPAN
2067simdutf_really_inline simdutf_warn_unused size_t convert_utf32_to_utf8(
2068 std::span<const char32_t> utf32_input,
2069 detail::output_span_of_byte_like auto &&utf8_output) noexcept {
2070 return convert_utf32_to_utf8(utf32_input.data(), utf32_input.size(),
2071 reinterpret_cast<char *>(utf8_output.data()));
2072}
2073 #endif // SIMDUTF_SPAN
2074
2091simdutf_warn_unused result convert_utf32_to_utf8_with_errors(
2092 const char32_t *input, size_t length, char *utf8_buffer) noexcept;
2093 #if SIMDUTF_SPAN
2094simdutf_really_inline simdutf_warn_unused result
2095convert_utf32_to_utf8_with_errors(
2096 std::span<const char32_t> utf32_input,
2097 detail::output_span_of_byte_like auto &&utf8_output) noexcept {
2098 return convert_utf32_to_utf8_with_errors(
2099 utf32_input.data(), utf32_input.size(),
2100 reinterpret_cast<char *>(utf8_output.data()));
2101}
2102 #endif // SIMDUTF_SPAN
2103
2117simdutf_warn_unused size_t convert_valid_utf32_to_utf8(
2118 const char32_t *input, size_t length, char *utf8_buffer) noexcept;
2119 #if SIMDUTF_SPAN
2120simdutf_really_inline simdutf_warn_unused size_t convert_valid_utf32_to_utf8(
2121 std::span<const char32_t> valid_utf32_input,
2122 detail::output_span_of_byte_like auto &&utf8_output) noexcept {
2123 return convert_valid_utf32_to_utf8(
2124 valid_utf32_input.data(), valid_utf32_input.size(),
2125 reinterpret_cast<char *>(utf8_output.data()));
2126}
2127 #endif // SIMDUTF_SPAN
2128#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
2129
2130#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
2145simdutf_warn_unused size_t convert_utf32_to_utf16(
2146 const char32_t *input, size_t length, char16_t *utf16_buffer) noexcept;
2147 #if SIMDUTF_SPAN
2148simdutf_really_inline simdutf_warn_unused size_t
2149convert_utf32_to_utf16(std::span<const char32_t> utf32_input,
2150 std::span<char16_t> utf16_output) noexcept {
2151 return convert_utf32_to_utf16(utf32_input.data(), utf32_input.size(),
2152 utf16_output.data());
2153}
2154 #endif // SIMDUTF_SPAN
2155
2169simdutf_warn_unused size_t convert_utf32_to_utf16le(
2170 const char32_t *input, size_t length, char16_t *utf16_buffer) noexcept;
2171 #if SIMDUTF_SPAN
2172simdutf_really_inline simdutf_warn_unused size_t
2173convert_utf32_to_utf16le(std::span<const char32_t> utf32_input,
2174 std::span<char16_t> utf16_output) noexcept {
2175 return convert_utf32_to_utf16le(utf32_input.data(), utf32_input.size(),
2176 utf16_output.data());
2177}
2178 #endif // SIMDUTF_SPAN
2179#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
2180
2181#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1
2196simdutf_warn_unused size_t convert_utf32_to_latin1(
2197 const char32_t *input, size_t length, char *latin1_buffer) noexcept;
2198 #if SIMDUTF_SPAN
2199simdutf_really_inline simdutf_warn_unused size_t convert_utf32_to_latin1(
2200 std::span<const char32_t> utf32_input,
2201 detail::output_span_of_byte_like auto &&latin1_output) noexcept {
2202 return convert_utf32_to_latin1(
2203 utf32_input.data(), utf32_input.size(),
2204 reinterpret_cast<char *>(latin1_output.data()));
2205}
2206 #endif // SIMDUTF_SPAN
2207
2225simdutf_warn_unused result convert_utf32_to_latin1_with_errors(
2226 const char32_t *input, size_t length, char *latin1_buffer) noexcept;
2227 #if SIMDUTF_SPAN
2228simdutf_really_inline simdutf_warn_unused result
2229convert_utf32_to_latin1_with_errors(
2230 std::span<const char32_t> utf32_input,
2231 detail::output_span_of_byte_like auto &&latin1_output) noexcept {
2232 return convert_utf32_to_latin1_with_errors(
2233 utf32_input.data(), utf32_input.size(),
2234 reinterpret_cast<char *>(latin1_output.data()));
2235}
2236 #endif // SIMDUTF_SPAN
2237
2258simdutf_warn_unused size_t convert_valid_utf32_to_latin1(
2259 const char32_t *input, size_t length, char *latin1_buffer) noexcept;
2260 #if SIMDUTF_SPAN
2261simdutf_really_inline simdutf_warn_unused size_t convert_valid_utf32_to_latin1(
2262 std::span<const char32_t> valid_utf32_input,
2263 detail::output_span_of_byte_like auto &&latin1_output) noexcept {
2264 return convert_valid_utf32_to_latin1(
2265 valid_utf32_input.data(), valid_utf32_input.size(),
2266 reinterpret_cast<char *>(latin1_output.data()));
2267}
2268 #endif // SIMDUTF_SPAN
2269
2282simdutf_warn_unused size_t latin1_length_from_utf32(size_t length) noexcept;
2283
2292simdutf_warn_unused size_t utf32_length_from_latin1(size_t length) noexcept;
2293#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1
2294
2295#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
2309simdutf_warn_unused size_t convert_utf32_to_utf16be(
2310 const char32_t *input, size_t length, char16_t *utf16_buffer) noexcept;
2311 #if SIMDUTF_SPAN
2312simdutf_really_inline simdutf_warn_unused size_t
2313convert_utf32_to_utf16be(std::span<const char32_t> utf32_input,
2314 std::span<char16_t> utf16_output) noexcept {
2315 return convert_utf32_to_utf16be(utf32_input.data(), utf32_input.size(),
2316 utf16_output.data());
2317}
2318 #endif // SIMDUTF_SPAN
2319
2337simdutf_warn_unused result convert_utf32_to_utf16_with_errors(
2338 const char32_t *input, size_t length, char16_t *utf16_buffer) noexcept;
2339 #if SIMDUTF_SPAN
2340simdutf_really_inline simdutf_warn_unused result
2341convert_utf32_to_utf16_with_errors(std::span<const char32_t> utf32_input,
2342 std::span<char16_t> utf16_output) noexcept {
2343 return convert_utf32_to_utf16_with_errors(
2344 utf32_input.data(), utf32_input.size(), utf16_output.data());
2345}
2346 #endif // SIMDUTF_SPAN
2347
2364simdutf_warn_unused result convert_utf32_to_utf16le_with_errors(
2365 const char32_t *input, size_t length, char16_t *utf16_buffer) noexcept;
2366 #if SIMDUTF_SPAN
2367simdutf_really_inline simdutf_warn_unused result
2368convert_utf32_to_utf16le_with_errors(
2369 std::span<const char32_t> utf32_input,
2370 std::span<char16_t> utf16_output) noexcept {
2371 return convert_utf32_to_utf16le_with_errors(
2372 utf32_input.data(), utf32_input.size(), utf16_output.data());
2373}
2374 #endif // SIMDUTF_SPAN
2375
2392simdutf_warn_unused result convert_utf32_to_utf16be_with_errors(
2393 const char32_t *input, size_t length, char16_t *utf16_buffer) noexcept;
2394 #if SIMDUTF_SPAN
2395simdutf_really_inline simdutf_warn_unused result
2396convert_utf32_to_utf16be_with_errors(
2397 std::span<const char32_t> utf32_input,
2398 std::span<char16_t> utf16_output) noexcept {
2399 return convert_utf32_to_utf16be_with_errors(
2400 utf32_input.data(), utf32_input.size(), utf16_output.data());
2401}
2402 #endif // SIMDUTF_SPAN
2403
2417simdutf_warn_unused size_t convert_valid_utf32_to_utf16(
2418 const char32_t *input, size_t length, char16_t *utf16_buffer) noexcept;
2419 #if SIMDUTF_SPAN
2420simdutf_really_inline simdutf_warn_unused size_t
2421convert_valid_utf32_to_utf16(std::span<const char32_t> valid_utf32_input,
2422 std::span<char16_t> utf16_output) noexcept {
2423 return convert_valid_utf32_to_utf16(
2424 valid_utf32_input.data(), valid_utf32_input.size(), utf16_output.data());
2425}
2426 #endif // SIMDUTF_SPAN
2427
2441simdutf_warn_unused size_t convert_valid_utf32_to_utf16le(
2442 const char32_t *input, size_t length, char16_t *utf16_buffer) noexcept;
2443 #if SIMDUTF_SPAN
2444simdutf_really_inline simdutf_warn_unused size_t
2445convert_valid_utf32_to_utf16le(std::span<const char32_t> valid_utf32_input,
2446 std::span<char16_t> utf16_output) noexcept {
2447 return convert_valid_utf32_to_utf16le(
2448 valid_utf32_input.data(), valid_utf32_input.size(), utf16_output.data());
2449}
2450 #endif // SIMDUTF_SPAN
2451
2465simdutf_warn_unused size_t convert_valid_utf32_to_utf16be(
2466 const char32_t *input, size_t length, char16_t *utf16_buffer) noexcept;
2467 #if SIMDUTF_SPAN
2468simdutf_really_inline simdutf_warn_unused size_t
2469convert_valid_utf32_to_utf16be(std::span<const char32_t> valid_utf32_input,
2470 std::span<char16_t> utf16_output) noexcept {
2471 return convert_valid_utf32_to_utf16be(
2472 valid_utf32_input.data(), valid_utf32_input.size(), utf16_output.data());
2473}
2474 #endif // SIMDUTF_SPAN
2475#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
2476
2477#if SIMDUTF_FEATURE_UTF16
2491void change_endianness_utf16(const char16_t *input, size_t length,
2492 char16_t *output) noexcept;
2493 #if SIMDUTF_SPAN
2494simdutf_really_inline void
2495change_endianness_utf16(std::span<const char16_t> utf16_input,
2496 std::span<char16_t> utf16_output) noexcept {
2497 return change_endianness_utf16(utf16_input.data(), utf16_input.size(),
2498 utf16_output.data());
2499}
2500 #endif // SIMDUTF_SPAN
2501#endif // SIMDUTF_FEATURE_UTF16
2502
2503#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
2515simdutf_warn_unused size_t utf8_length_from_utf32(const char32_t *input,
2516 size_t length) noexcept;
2517 #if SIMDUTF_SPAN
2518simdutf_really_inline simdutf_warn_unused size_t
2519utf8_length_from_utf32(std::span<const char32_t> valid_utf32_input) noexcept {
2520 return utf8_length_from_utf32(valid_utf32_input.data(),
2521 valid_utf32_input.size());
2522}
2523 #endif // SIMDUTF_SPAN
2524#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
2525
2526#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
2538simdutf_warn_unused size_t utf16_length_from_utf32(const char32_t *input,
2539 size_t length) noexcept;
2540 #if SIMDUTF_SPAN
2541simdutf_really_inline simdutf_warn_unused size_t
2542utf16_length_from_utf32(std::span<const char32_t> valid_utf32_input) noexcept {
2543 return utf16_length_from_utf32(valid_utf32_input.data(),
2544 valid_utf32_input.size());
2545}
2546 #endif // SIMDUTF_SPAN
2547
2563simdutf_warn_unused size_t utf32_length_from_utf16(const char16_t *input,
2564 size_t length) noexcept;
2565 #if SIMDUTF_SPAN
2566simdutf_really_inline simdutf_warn_unused size_t
2567utf32_length_from_utf16(std::span<const char16_t> valid_utf16_input) noexcept {
2568 return utf32_length_from_utf16(valid_utf16_input.data(),
2569 valid_utf16_input.size());
2570}
2571 #endif // SIMDUTF_SPAN
2572
2588simdutf_warn_unused size_t utf32_length_from_utf16le(const char16_t *input,
2589 size_t length) noexcept;
2590 #if SIMDUTF_SPAN
2591simdutf_really_inline simdutf_warn_unused size_t utf32_length_from_utf16le(
2592 std::span<const char16_t> valid_utf16_input) noexcept {
2593 return utf32_length_from_utf16le(valid_utf16_input.data(),
2594 valid_utf16_input.size());
2595}
2596 #endif // SIMDUTF_SPAN
2597
2613simdutf_warn_unused size_t utf32_length_from_utf16be(const char16_t *input,
2614 size_t length) noexcept;
2615 #if SIMDUTF_SPAN
2616simdutf_really_inline simdutf_warn_unused size_t utf32_length_from_utf16be(
2617 std::span<const char16_t> valid_utf16_input) noexcept {
2618 return utf32_length_from_utf16be(valid_utf16_input.data(),
2619 valid_utf16_input.size());
2620}
2621 #endif // SIMDUTF_SPAN
2622#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
2623
2624#if SIMDUTF_FEATURE_UTF16
2639simdutf_warn_unused size_t count_utf16(const char16_t *input,
2640 size_t length) noexcept;
2641 #if SIMDUTF_SPAN
2642simdutf_really_inline simdutf_warn_unused size_t
2643count_utf16(std::span<const char16_t> valid_utf16_input) noexcept {
2644 return count_utf16(valid_utf16_input.data(), valid_utf16_input.size());
2645}
2646 #endif // SIMDUTF_SPAN
2647
2662simdutf_warn_unused size_t count_utf16le(const char16_t *input,
2663 size_t length) noexcept;
2664 #if SIMDUTF_SPAN
2665simdutf_really_inline simdutf_warn_unused size_t
2666count_utf16le(std::span<const char16_t> valid_utf16_input) noexcept {
2667 return count_utf16le(valid_utf16_input.data(), valid_utf16_input.size());
2668}
2669 #endif // SIMDUTF_SPAN
2670
2685simdutf_warn_unused size_t count_utf16be(const char16_t *input,
2686 size_t length) noexcept;
2687 #if SIMDUTF_SPAN
2688simdutf_really_inline simdutf_warn_unused size_t
2689count_utf16be(std::span<const char16_t> valid_utf16_input) noexcept {
2690 return count_utf16be(valid_utf16_input.data(), valid_utf16_input.size());
2691}
2692 #endif // SIMDUTF_SPAN
2693#endif // SIMDUTF_FEATURE_UTF16
2694
2695#if SIMDUTF_FEATURE_UTF8
2708simdutf_warn_unused size_t count_utf8(const char *input,
2709 size_t length) noexcept;
2710 #if SIMDUTF_SPAN
2711simdutf_really_inline simdutf_warn_unused size_t count_utf8(
2712 const detail::input_span_of_byte_like auto &valid_utf8_input) noexcept {
2713 return count_utf8(reinterpret_cast<const char *>(valid_utf8_input.data()),
2714 valid_utf8_input.size());
2715}
2716 #endif // SIMDUTF_SPAN
2717
2732simdutf_warn_unused size_t trim_partial_utf8(const char *input, size_t length);
2733 #if SIMDUTF_SPAN
2734simdutf_really_inline simdutf_warn_unused size_t trim_partial_utf8(
2735 const detail::input_span_of_byte_like auto &valid_utf8_input) noexcept {
2736 return trim_partial_utf8(
2737 reinterpret_cast<const char *>(valid_utf8_input.data()),
2738 valid_utf8_input.size());
2739}
2740 #endif // SIMDUTF_SPAN
2741#endif // SIMDUTF_FEATURE_UTF8
2742
2743#if SIMDUTF_FEATURE_UTF16
2758simdutf_warn_unused size_t trim_partial_utf16be(const char16_t *input,
2759 size_t length);
2760 #if SIMDUTF_SPAN
2761simdutf_really_inline simdutf_warn_unused size_t
2762trim_partial_utf16be(std::span<const char16_t> valid_utf16_input) noexcept {
2763 return trim_partial_utf16be(valid_utf16_input.data(),
2764 valid_utf16_input.size());
2765}
2766 #endif // SIMDUTF_SPAN
2767
2782simdutf_warn_unused size_t trim_partial_utf16le(const char16_t *input,
2783 size_t length);
2784 #if SIMDUTF_SPAN
2785simdutf_really_inline simdutf_warn_unused size_t
2786trim_partial_utf16le(std::span<const char16_t> valid_utf16_input) noexcept {
2787 return trim_partial_utf16le(valid_utf16_input.data(),
2788 valid_utf16_input.size());
2789}
2790 #endif // SIMDUTF_SPAN
2791
2806simdutf_warn_unused size_t trim_partial_utf16(const char16_t *input,
2807 size_t length);
2808 #if SIMDUTF_SPAN
2809simdutf_really_inline simdutf_warn_unused size_t
2810trim_partial_utf16(std::span<const char16_t> valid_utf16_input) noexcept {
2811 return trim_partial_utf16(valid_utf16_input.data(), valid_utf16_input.size());
2812}
2813 #endif // SIMDUTF_SPAN
2814#endif // SIMDUTF_FEATURE_UTF16
2815
2816#if SIMDUTF_FEATURE_BASE64
2817 #ifndef SIMDUTF_NEED_TRAILING_ZEROES
2818 #define SIMDUTF_NEED_TRAILING_ZEROES 1
2819 #endif
2820// base64_options are used to specify the base64 encoding options.
2821// ASCII spaces are ' ', '\t', '\n', '\r', '\f'
2822// garbage characters are characters that are not part of the base64 alphabet
2823// nor ASCII spaces.
2824constexpr uint64_t base64_reverse_padding =
2825 2; /* modifier for base64_default and base64_url */
2826enum base64_options : uint64_t {
2827 base64_default = 0, /* standard base64 format (with padding) */
2828 base64_url = 1, /* base64url format (no padding) */
2829 base64_default_no_padding =
2830 base64_default |
2831 base64_reverse_padding, /* standard base64 format without padding */
2832 base64_url_with_padding =
2833 base64_url | base64_reverse_padding, /* base64url with padding */
2834 base64_default_accept_garbage =
2835 4, /* standard base64 format accepting garbage characters, the input stops
2836 with the first '=' if any */
2837 base64_url_accept_garbage =
2838 5, /* base64url format accepting garbage characters, the input stops with
2839 the first '=' if any */
2840 base64_default_or_url =
2841 8, /* standard/base64url hybrid format (only meaningful for decoding!) */
2842 base64_default_or_url_accept_garbage =
2843 12, /* standard/base64url hybrid format accepting garbage characters
2844 (only meaningful for decoding!), the input stops with the first '='
2845 if any */
2846};
2847
2848 #if SIMDUTF_CPLUSPLUS17
2849inline std::string_view to_string(base64_options options) {
2850 switch (options) {
2851 case base64_default:
2852 return "base64_default";
2853 case base64_url:
2854 return "base64_url";
2855 case base64_reverse_padding:
2856 return "base64_reverse_padding";
2857 case base64_url_with_padding:
2858 return "base64_url_with_padding";
2859 case base64_default_accept_garbage:
2860 return "base64_default_accept_garbage";
2861 case base64_url_accept_garbage:
2862 return "base64_url_accept_garbage";
2863 case base64_default_or_url:
2864 return "base64_default_or_url";
2865 case base64_default_or_url_accept_garbage:
2866 return "base64_default_or_url_accept_garbage";
2867 }
2868 return "<unknown>";
2869}
2870 #endif // SIMDUTF_CPLUSPLUS17
2871
2872// last_chunk_handling_options are used to specify the handling of the last
2873// chunk in base64 decoding.
2874// https://tc39.es/proposal-arraybuffer-base64/spec/#sec-frombase64
2875enum last_chunk_handling_options : uint64_t {
2876 loose = 0, /* standard base64 format, decode partial final chunk */
2877 strict = 1, /* error when the last chunk is partial, 2 or 3 chars, and
2878 unpadded, or non-zero bit padding */
2879 stop_before_partial =
2880 2, /* if the last chunk is partial, ignore it (no error) */
2881 only_full_chunks =
2882 3 /* only decode full blocks (4 base64 characters, no padding) */
2883};
2884
2885inline bool is_partial(last_chunk_handling_options options) {
2886 return (options == stop_before_partial) || (options == only_full_chunks);
2887}
2888
2889 #if SIMDUTF_CPLUSPLUS17
2890inline std::string_view to_string(last_chunk_handling_options options) {
2891 switch (options) {
2892 case loose:
2893 return "loose";
2894 case strict:
2895 return "strict";
2896 case stop_before_partial:
2897 return "stop_before_partial";
2898 case only_full_chunks:
2899 return "only_full_chunks";
2900 }
2901 return "<unknown>";
2902}
2903 #endif
2904
2914simdutf_warn_unused size_t
2915maximal_binary_length_from_base64(const char *input, size_t length) noexcept;
2916 #if SIMDUTF_SPAN
2917simdutf_really_inline simdutf_warn_unused size_t
2918maximal_binary_length_from_base64(
2919 const detail::input_span_of_byte_like auto &input) noexcept {
2920 return maximal_binary_length_from_base64(
2921 reinterpret_cast<const char *>(input.data()), input.size());
2922}
2923 #endif // SIMDUTF_SPAN
2924
2935simdutf_warn_unused size_t maximal_binary_length_from_base64(
2936 const char16_t *input, size_t length) noexcept;
2937 #if SIMDUTF_SPAN
2938simdutf_really_inline simdutf_warn_unused size_t
2939maximal_binary_length_from_base64(std::span<const char16_t> input) noexcept {
2940 return maximal_binary_length_from_base64(input.data(), input.size());
2941}
2942 #endif // SIMDUTF_SPAN
2943
2998simdutf_warn_unused result base64_to_binary(
2999 const char *input, size_t length, char *output,
3000 base64_options options = base64_default,
3001 last_chunk_handling_options last_chunk_options = loose) noexcept;
3002 #if SIMDUTF_SPAN
3003simdutf_really_inline simdutf_warn_unused result base64_to_binary(
3004 const detail::input_span_of_byte_like auto &input,
3005 detail::output_span_of_byte_like auto &&binary_output,
3006 base64_options options = base64_default,
3007 last_chunk_handling_options last_chunk_options = loose) noexcept {
3008 return base64_to_binary(reinterpret_cast<const char *>(input.data()),
3009 input.size(),
3010 reinterpret_cast<char *>(binary_output.data()),
3011 options, last_chunk_options);
3012}
3013 #endif // SIMDUTF_SPAN
3014
3021simdutf_warn_unused size_t base64_length_from_binary(
3022 size_t length, base64_options options = base64_default) noexcept;
3023
3045size_t binary_to_base64(const char *input, size_t length, char *output,
3046 base64_options options = base64_default) noexcept;
3047 #if SIMDUTF_SPAN
3048simdutf_really_inline simdutf_warn_unused size_t
3049binary_to_base64(const detail::input_span_of_byte_like auto &input,
3050 detail::output_span_of_byte_like auto &&binary_output,
3051 base64_options options = base64_default) noexcept {
3052 return binary_to_base64(
3053 reinterpret_cast<const char *>(input.data()), input.size(),
3054 reinterpret_cast<char *>(binary_output.data()), options);
3055}
3056 #endif // SIMDUTF_SPAN
3057
3058 #if SIMDUTF_ATOMIC_REF
3100size_t
3101atomic_binary_to_base64(const char *input, size_t length, char *output,
3102 base64_options options = base64_default) noexcept;
3103 #if SIMDUTF_SPAN
3104simdutf_really_inline simdutf_warn_unused size_t
3105atomic_binary_to_base64(const detail::input_span_of_byte_like auto &input,
3106 detail::output_span_of_byte_like auto &&binary_output,
3107 base64_options options = base64_default) noexcept {
3108 return atomic_binary_to_base64(
3109 reinterpret_cast<const char *>(input.data()), input.size(),
3110 reinterpret_cast<char *>(binary_output.data()), options);
3111}
3112 #endif // SIMDUTF_SPAN
3113 #endif // SIMDUTF_ATOMIC_REF
3114
3171simdutf_warn_unused result
3172base64_to_binary(const char16_t *input, size_t length, char *output,
3173 base64_options options = base64_default,
3174 last_chunk_handling_options last_chunk_options =
3175 last_chunk_handling_options::loose) noexcept;
3176 #if SIMDUTF_SPAN
3177simdutf_really_inline simdutf_warn_unused result base64_to_binary(
3178 std::span<const char16_t> input,
3179 detail::output_span_of_byte_like auto &&binary_output,
3180 base64_options options = base64_default,
3181 last_chunk_handling_options last_chunk_options = loose) noexcept {
3182 return base64_to_binary(input.data(), input.size(),
3183 reinterpret_cast<char *>(binary_output.data()),
3184 options, last_chunk_options);
3185}
3186 #endif // SIMDUTF_SPAN
3187
3198simdutf_warn_unused bool
3199base64_ignorable(char input, base64_options options = base64_default) noexcept;
3200simdutf_warn_unused bool
3201base64_ignorable(char16_t input,
3202 base64_options options = base64_default) noexcept;
3203
3215simdutf_warn_unused bool
3216base64_valid(char input, base64_options options = base64_default) noexcept;
3217simdutf_warn_unused bool
3218base64_valid(char16_t input, base64_options options = base64_default) noexcept;
3219
3229simdutf_warn_unused bool
3230base64_valid_or_padding(char input,
3231 base64_options options = base64_default) noexcept;
3232simdutf_warn_unused bool
3233base64_valid_or_padding(char16_t input,
3234 base64_options options = base64_default) noexcept;
3235
3303simdutf_warn_unused result
3304base64_to_binary_safe(const char *input, size_t length, char *output,
3305 size_t &outlen, base64_options options = base64_default,
3306 last_chunk_handling_options last_chunk_options =
3307 last_chunk_handling_options::loose,
3308 bool decode_up_to_bad_char = false) noexcept;
3309 #if SIMDUTF_SPAN
3314simdutf_really_inline simdutf_warn_unused std::tuple<result, std::size_t>
3315base64_to_binary_safe(const detail::input_span_of_byte_like auto &input,
3316 detail::output_span_of_byte_like auto &&binary_output,
3317 base64_options options = base64_default,
3318 last_chunk_handling_options last_chunk_options = loose,
3319 bool decode_up_to_bad_char = false) noexcept {
3320 size_t outlen = binary_output.size();
3321 auto r = base64_to_binary_safe(
3322 reinterpret_cast<const char *>(input.data()), input.size(),
3323 reinterpret_cast<char *>(binary_output.data()), outlen, options,
3324 last_chunk_options, decode_up_to_bad_char);
3325 return {r, outlen};
3326}
3327 #endif // SIMDUTF_SPAN
3328
3329simdutf_warn_unused result
3330base64_to_binary_safe(const char16_t *input, size_t length, char *output,
3331 size_t &outlen, base64_options options = base64_default,
3332 last_chunk_handling_options last_chunk_options =
3333 last_chunk_handling_options::loose,
3334 bool decode_up_to_bad_char = false) noexcept;
3335 #if SIMDUTF_SPAN
3340simdutf_really_inline simdutf_warn_unused std::tuple<result, std::size_t>
3341base64_to_binary_safe(std::span<const char16_t> input,
3342 detail::output_span_of_byte_like auto &&binary_output,
3343 base64_options options = base64_default,
3344 last_chunk_handling_options last_chunk_options = loose,
3345 bool decode_up_to_bad_char = false) noexcept {
3346 size_t outlen = binary_output.size();
3347 auto r = base64_to_binary_safe(input.data(), input.size(),
3348 reinterpret_cast<char *>(binary_output.data()),
3349 outlen, options, last_chunk_options,
3350 decode_up_to_bad_char);
3351 return {r, outlen};
3352}
3353 #endif // SIMDUTF_SPAN
3354
3355 #if SIMDUTF_ATOMIC_REF
3395simdutf_warn_unused result atomic_base64_to_binary_safe(
3396 const char *input, size_t length, char *output, size_t &outlen,
3397 base64_options options = base64_default,
3398 last_chunk_handling_options last_chunk_options =
3399 last_chunk_handling_options::loose,
3400 bool decode_up_to_bad_char = false) noexcept;
3401simdutf_warn_unused result atomic_base64_to_binary_safe(
3402 const char16_t *input, size_t length, char *output, size_t &outlen,
3403 base64_options options = base64_default,
3404 last_chunk_handling_options last_chunk_options = loose,
3405 bool decode_up_to_bad_char = false) noexcept;
3406 #if SIMDUTF_SPAN
3411simdutf_really_inline simdutf_warn_unused std::tuple<result, std::size_t>
3412atomic_base64_to_binary_safe(
3413 const detail::input_span_of_byte_like auto &binary_input,
3414 detail::output_span_of_byte_like auto &&output,
3415 base64_options options = base64_default,
3416 last_chunk_handling_options last_chunk_options =
3417 last_chunk_handling_options::loose,
3418 bool decode_up_to_bad_char = false) noexcept {
3419 size_t outlen = output.size();
3420 auto ret = atomic_base64_to_binary_safe(
3421 reinterpret_cast<const char *>(binary_input.data()), binary_input.size(),
3422 reinterpret_cast<char *>(output.data()), outlen, options,
3423 last_chunk_options, decode_up_to_bad_char);
3424 return {ret, outlen};
3425}
3430simdutf_warn_unused std::tuple<result, std::size_t>
3431atomic_base64_to_binary_safe(
3432 std::span<const char16_t> base64_input,
3433 detail::output_span_of_byte_like auto &&binary_output,
3434 base64_options options = base64_default,
3435 last_chunk_handling_options last_chunk_options = loose,
3436 bool decode_up_to_bad_char = false) noexcept {
3437 size_t outlen = binary_output.size();
3438 auto ret = atomic_base64_to_binary_safe(
3439 base64_input.data(), base64_input.size(),
3440 reinterpret_cast<char *>(binary_output.data()), outlen, options,
3441 last_chunk_options, decode_up_to_bad_char);
3442 return {ret, outlen};
3443}
3444 #endif // SIMDUTF_SPAN
3445 #endif // SIMDUTF_ATOMIC_REF
3446
3457simdutf_warn_unused const char *find(const char *start, const char *end,
3458 char character) noexcept;
3459simdutf_warn_unused const char16_t *
3460find(const char16_t *start, const char16_t *end, char16_t character) noexcept;
3461#endif // SIMDUTF_FEATURE_BASE64
3462
3471public:
3481 virtual std::string name() const { return std::string(_name); }
3482
3492 virtual std::string description() const { return std::string(_description); }
3493
3504
3505#if SIMDUTF_FEATURE_DETECT_ENCODING
3512 virtual encoding_type autodetect_encoding(const char *input,
3513 size_t length) const noexcept;
3514
3521 virtual int detect_encodings(const char *input,
3522 size_t length) const noexcept = 0;
3523#endif // SIMDUTF_FEATURE_DETECT_ENCODING
3524
3532 virtual uint32_t required_instruction_sets() const {
3533 return _required_instruction_sets;
3534 }
3535
3536#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING
3546 simdutf_warn_unused virtual bool validate_utf8(const char *buf,
3547 size_t len) const noexcept = 0;
3548#endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING
3549
3550#if SIMDUTF_FEATURE_UTF8
3563 simdutf_warn_unused virtual result
3564 validate_utf8_with_errors(const char *buf, size_t len) const noexcept = 0;
3565#endif // SIMDUTF_FEATURE_UTF8
3566
3567#if SIMDUTF_FEATURE_ASCII
3577 simdutf_warn_unused virtual bool
3578 validate_ascii(const char *buf, size_t len) const noexcept = 0;
3579
3592 simdutf_warn_unused virtual result
3593 validate_ascii_with_errors(const char *buf, size_t len) const noexcept = 0;
3594#endif // SIMDUTF_FEATURE_ASCII
3595
3596#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING
3611 simdutf_warn_unused virtual bool
3612 validate_utf16le(const char16_t *buf, size_t len) const noexcept = 0;
3613#endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING
3614
3615#if SIMDUTF_FEATURE_UTF16
3630 simdutf_warn_unused virtual bool
3631 validate_utf16be(const char16_t *buf, size_t len) const noexcept = 0;
3632
3649 simdutf_warn_unused virtual result
3650 validate_utf16le_with_errors(const char16_t *buf,
3651 size_t len) const noexcept = 0;
3652
3669 simdutf_warn_unused virtual result
3670 validate_utf16be_with_errors(const char16_t *buf,
3671 size_t len) const noexcept = 0;
3684 virtual void to_well_formed_utf16le(const char16_t *input, size_t len,
3685 char16_t *output) const noexcept = 0;
3698 virtual void to_well_formed_utf16be(const char16_t *input, size_t len,
3699 char16_t *output) const noexcept = 0;
3700#endif // SIMDUTF_FEATURE_UTF16
3701
3702#if SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING
3715 simdutf_warn_unused virtual bool
3716 validate_utf32(const char32_t *buf, size_t len) const noexcept = 0;
3717#endif // SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING
3718
3719#if SIMDUTF_FEATURE_UTF32
3735 simdutf_warn_unused virtual result
3736 validate_utf32_with_errors(const char32_t *buf,
3737 size_t len) const noexcept = 0;
3738#endif // SIMDUTF_FEATURE_UTF32
3739
3740#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
3751 simdutf_warn_unused virtual size_t
3752 convert_latin1_to_utf8(const char *input, size_t length,
3753 char *utf8_output) const noexcept = 0;
3754#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
3755
3756#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
3767 simdutf_warn_unused virtual size_t
3768 convert_latin1_to_utf16le(const char *input, size_t length,
3769 char16_t *utf16_output) const noexcept = 0;
3770
3781 simdutf_warn_unused virtual size_t
3782 convert_latin1_to_utf16be(const char *input, size_t length,
3783 char16_t *utf16_output) const noexcept = 0;
3784#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
3785
3786#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1
3797 simdutf_warn_unused virtual size_t
3798 convert_latin1_to_utf32(const char *input, size_t length,
3799 char32_t *utf32_buffer) const noexcept = 0;
3800#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1
3801
3802#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
3815 simdutf_warn_unused virtual size_t
3816 convert_utf8_to_latin1(const char *input, size_t length,
3817 char *latin1_output) const noexcept = 0;
3818
3835 simdutf_warn_unused virtual result
3836 convert_utf8_to_latin1_with_errors(const char *input, size_t length,
3837 char *latin1_output) const noexcept = 0;
3838
3858 simdutf_warn_unused virtual size_t
3859 convert_valid_utf8_to_latin1(const char *input, size_t length,
3860 char *latin1_output) const noexcept = 0;
3861#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
3862
3863#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
3876 simdutf_warn_unused virtual size_t
3877 convert_utf8_to_utf16le(const char *input, size_t length,
3878 char16_t *utf16_output) const noexcept = 0;
3879
3892 simdutf_warn_unused virtual size_t
3893 convert_utf8_to_utf16be(const char *input, size_t length,
3894 char16_t *utf16_output) const noexcept = 0;
3895
3911 simdutf_warn_unused virtual result convert_utf8_to_utf16le_with_errors(
3912 const char *input, size_t length,
3913 char16_t *utf16_output) const noexcept = 0;
3914
3930 simdutf_warn_unused virtual result convert_utf8_to_utf16be_with_errors(
3931 const char *input, size_t length,
3932 char16_t *utf16_output) const noexcept = 0;
3933#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
3934
3935#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
3948 simdutf_warn_unused virtual size_t
3949 convert_utf8_to_utf32(const char *input, size_t length,
3950 char32_t *utf32_output) const noexcept = 0;
3951
3966 simdutf_warn_unused virtual result
3967 convert_utf8_to_utf32_with_errors(const char *input, size_t length,
3968 char32_t *utf32_output) const noexcept = 0;
3969#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
3970
3971#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
3982 simdutf_warn_unused virtual size_t
3983 convert_valid_utf8_to_utf16le(const char *input, size_t length,
3984 char16_t *utf16_buffer) const noexcept = 0;
3985
3996 simdutf_warn_unused virtual size_t
3997 convert_valid_utf8_to_utf16be(const char *input, size_t length,
3998 char16_t *utf16_buffer) const noexcept = 0;
3999#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
4000
4001#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
4012 simdutf_warn_unused virtual size_t
4013 convert_valid_utf8_to_utf32(const char *input, size_t length,
4014 char32_t *utf32_buffer) const noexcept = 0;
4015#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
4016
4017#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
4030 simdutf_warn_unused virtual size_t
4031 utf16_length_from_utf8(const char *input, size_t length) const noexcept = 0;
4032#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
4033
4034#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
4049 simdutf_warn_unused virtual size_t
4050 utf32_length_from_utf8(const char *input, size_t length) const noexcept = 0;
4051#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
4052
4053#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
4070 simdutf_warn_unused virtual size_t
4071 convert_utf16le_to_latin1(const char16_t *input, size_t length,
4072 char *latin1_buffer) const noexcept = 0;
4073
4090 simdutf_warn_unused virtual size_t
4091 convert_utf16be_to_latin1(const char16_t *input, size_t length,
4092 char *latin1_buffer) const noexcept = 0;
4093
4113 simdutf_warn_unused virtual result
4114 convert_utf16le_to_latin1_with_errors(const char16_t *input, size_t length,
4115 char *latin1_buffer) const noexcept = 0;
4116
4136 simdutf_warn_unused virtual result
4137 convert_utf16be_to_latin1_with_errors(const char16_t *input, size_t length,
4138 char *latin1_buffer) const noexcept = 0;
4139
4160 simdutf_warn_unused virtual size_t
4161 convert_valid_utf16le_to_latin1(const char16_t *input, size_t length,
4162 char *latin1_buffer) const noexcept = 0;
4163
4184 simdutf_warn_unused virtual size_t
4185 convert_valid_utf16be_to_latin1(const char16_t *input, size_t length,
4186 char *latin1_buffer) const noexcept = 0;
4187#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
4188
4189#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
4205 simdutf_warn_unused virtual size_t
4206 convert_utf16le_to_utf8(const char16_t *input, size_t length,
4207 char *utf8_buffer) const noexcept = 0;
4208
4224 simdutf_warn_unused virtual size_t
4225 convert_utf16be_to_utf8(const char16_t *input, size_t length,
4226 char *utf8_buffer) const noexcept = 0;
4227
4246 simdutf_warn_unused virtual result
4247 convert_utf16le_to_utf8_with_errors(const char16_t *input, size_t length,
4248 char *utf8_buffer) const noexcept = 0;
4249
4268 simdutf_warn_unused virtual result
4269 convert_utf16be_to_utf8_with_errors(const char16_t *input, size_t length,
4270 char *utf8_buffer) const noexcept = 0;
4271
4286 simdutf_warn_unused virtual size_t
4287 convert_valid_utf16le_to_utf8(const char16_t *input, size_t length,
4288 char *utf8_buffer) const noexcept = 0;
4289
4304 simdutf_warn_unused virtual size_t
4305 convert_valid_utf16be_to_utf8(const char16_t *input, size_t length,
4306 char *utf8_buffer) const noexcept = 0;
4307#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
4308
4309#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
4325 simdutf_warn_unused virtual size_t
4326 convert_utf16le_to_utf32(const char16_t *input, size_t length,
4327 char32_t *utf32_buffer) const noexcept = 0;
4328
4344 simdutf_warn_unused virtual size_t
4345 convert_utf16be_to_utf32(const char16_t *input, size_t length,
4346 char32_t *utf32_buffer) const noexcept = 0;
4347
4367 const char16_t *input, size_t length,
4368 char32_t *utf32_buffer) const noexcept = 0;
4369
4389 const char16_t *input, size_t length,
4390 char32_t *utf32_buffer) const noexcept = 0;
4391
4406 simdutf_warn_unused virtual size_t
4407 convert_valid_utf16le_to_utf32(const char16_t *input, size_t length,
4408 char32_t *utf32_buffer) const noexcept = 0;
4409
4424 simdutf_warn_unused virtual size_t
4425 convert_valid_utf16be_to_utf32(const char16_t *input, size_t length,
4426 char32_t *utf32_buffer) const noexcept = 0;
4427#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
4428
4429#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
4444 simdutf_warn_unused virtual size_t
4445 utf8_length_from_utf16le(const char16_t *input,
4446 size_t length) const noexcept = 0;
4447
4462 simdutf_warn_unused virtual size_t
4463 utf8_length_from_utf16be(const char16_t *input,
4464 size_t length) const noexcept = 0;
4465#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
4466
4467#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1
4484 simdutf_warn_unused virtual size_t
4485 convert_utf32_to_latin1(const char32_t *input, size_t length,
4486 char *latin1_buffer) const noexcept = 0;
4487#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1
4488
4489#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1
4509 simdutf_warn_unused virtual result
4510 convert_utf32_to_latin1_with_errors(const char32_t *input, size_t length,
4511 char *latin1_buffer) const noexcept = 0;
4512
4533 simdutf_warn_unused virtual size_t
4534 convert_valid_utf32_to_latin1(const char32_t *input, size_t length,
4535 char *latin1_buffer) const noexcept = 0;
4536#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1
4537
4538#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
4554 simdutf_warn_unused virtual size_t
4555 convert_utf32_to_utf8(const char32_t *input, size_t length,
4556 char *utf8_buffer) const noexcept = 0;
4557
4575 simdutf_warn_unused virtual result
4576 convert_utf32_to_utf8_with_errors(const char32_t *input, size_t length,
4577 char *utf8_buffer) const noexcept = 0;
4578
4593 simdutf_warn_unused virtual size_t
4594 convert_valid_utf32_to_utf8(const char32_t *input, size_t length,
4595 char *utf8_buffer) const noexcept = 0;
4596#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
4597
4598#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
4609 simdutf_warn_unused virtual size_t
4610 utf16_length_from_latin1(size_t length) const noexcept {
4611 return length;
4612 }
4613#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
4614
4615#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
4631 simdutf_warn_unused virtual size_t
4632 convert_utf32_to_utf16le(const char32_t *input, size_t length,
4633 char16_t *utf16_buffer) const noexcept = 0;
4634
4650 simdutf_warn_unused virtual size_t
4651 convert_utf32_to_utf16be(const char32_t *input, size_t length,
4652 char16_t *utf16_buffer) const noexcept = 0;
4653
4673 const char32_t *input, size_t length,
4674 char16_t *utf16_buffer) const noexcept = 0;
4675
4695 const char32_t *input, size_t length,
4696 char16_t *utf16_buffer) const noexcept = 0;
4697
4712 simdutf_warn_unused virtual size_t
4713 convert_valid_utf32_to_utf16le(const char32_t *input, size_t length,
4714 char16_t *utf16_buffer) const noexcept = 0;
4715
4730 simdutf_warn_unused virtual size_t
4731 convert_valid_utf32_to_utf16be(const char32_t *input, size_t length,
4732 char16_t *utf16_buffer) const noexcept = 0;
4733#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
4734
4735#if SIMDUTF_FEATURE_UTF16
4750 virtual void change_endianness_utf16(const char16_t *input, size_t length,
4751 char16_t *output) const noexcept = 0;
4752#endif // SIMDUTF_FEATURE_UTF16
4753
4754#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
4763 simdutf_warn_unused virtual size_t
4764 utf8_length_from_latin1(const char *input, size_t length) const noexcept = 0;
4765#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
4766
4767#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
4780 simdutf_warn_unused virtual size_t
4781 utf8_length_from_utf32(const char32_t *input,
4782 size_t length) const noexcept = 0;
4783#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
4784
4785#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1
4797 simdutf_warn_unused virtual size_t
4798 latin1_length_from_utf32(size_t length) const noexcept {
4799 return length;
4800 }
4801#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1
4802
4803#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
4815 simdutf_warn_unused virtual size_t
4816 latin1_length_from_utf8(const char *input, size_t length) const noexcept = 0;
4817#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
4818
4819#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
4835 simdutf_warn_unused virtual size_t
4836 latin1_length_from_utf16(size_t length) const noexcept {
4837 return length;
4838 }
4839#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
4840
4841#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
4854 simdutf_warn_unused virtual size_t
4855 utf16_length_from_utf32(const char32_t *input,
4856 size_t length) const noexcept = 0;
4857#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
4858
4859#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1
4868 simdutf_warn_unused virtual size_t
4869 utf32_length_from_latin1(size_t length) const noexcept {
4870 return length;
4871 }
4872#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1
4873
4874#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
4892 simdutf_warn_unused virtual size_t
4893 utf32_length_from_utf16le(const char16_t *input,
4894 size_t length) const noexcept = 0;
4895
4913 simdutf_warn_unused virtual size_t
4914 utf32_length_from_utf16be(const char16_t *input,
4915 size_t length) const noexcept = 0;
4916#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
4917
4918#if SIMDUTF_FEATURE_UTF16
4934 simdutf_warn_unused virtual size_t
4935 count_utf16le(const char16_t *input, size_t length) const noexcept = 0;
4936
4952 simdutf_warn_unused virtual size_t
4953 count_utf16be(const char16_t *input, size_t length) const noexcept = 0;
4954#endif // SIMDUTF_FEATURE_UTF16
4955
4956#if SIMDUTF_FEATURE_UTF8
4969 simdutf_warn_unused virtual size_t
4970 count_utf8(const char *input, size_t length) const noexcept = 0;
4971#endif // SIMDUTF_FEATURE_UTF8
4972
4973#if SIMDUTF_FEATURE_BASE64
4984 simdutf_warn_unused size_t maximal_binary_length_from_base64(
4985 const char *input, size_t length) const noexcept;
4986
4998 simdutf_warn_unused size_t maximal_binary_length_from_base64(
4999 const char16_t *input, size_t length) const noexcept;
5000
5033 simdutf_warn_unused virtual result
5034 base64_to_binary(const char *input, size_t length, char *output,
5035 base64_options options = base64_default,
5036 last_chunk_handling_options last_chunk_options =
5037 last_chunk_handling_options::loose) const noexcept = 0;
5038
5070 simdutf_warn_unused virtual full_result base64_to_binary_details(
5071 const char *input, size_t length, char *output,
5072 base64_options options = base64_default,
5073 last_chunk_handling_options last_chunk_options =
5074 last_chunk_handling_options::loose) const noexcept = 0;
5108 simdutf_warn_unused virtual result
5109 base64_to_binary(const char16_t *input, size_t length, char *output,
5110 base64_options options = base64_default,
5111 last_chunk_handling_options last_chunk_options =
5112 last_chunk_handling_options::loose) const noexcept = 0;
5113
5145 simdutf_warn_unused virtual full_result base64_to_binary_details(
5146 const char16_t *input, size_t length, char *output,
5147 base64_options options = base64_default,
5148 last_chunk_handling_options last_chunk_options =
5149 last_chunk_handling_options::loose) const noexcept = 0;
5158 simdutf_warn_unused size_t base64_length_from_binary(
5159 size_t length, base64_options options = base64_default) const noexcept;
5160
5182 virtual size_t
5183 binary_to_base64(const char *input, size_t length, char *output,
5184 base64_options options = base64_default) const noexcept = 0;
5195 virtual const char *find(const char *start, const char *end,
5196 char character) const noexcept = 0;
5197 virtual const char16_t *find(const char16_t *start, const char16_t *end,
5198 char16_t character) const noexcept = 0;
5199#endif // SIMDUTF_FEATURE_BASE64
5200
5201#ifdef SIMDUTF_INTERNAL_TESTS
5202 // This method is exported only in developer mode, its purpose
5203 // is to expose some internal test procedures from the given
5204 // implementation and then use them through our standard test
5205 // framework.
5206 //
5207 // Regular users should not use it, the tests of the public
5208 // API are enough.
5209
5210 struct TestProcedure {
5211 // display name
5212 std::string name;
5213
5214 // procedure should return whether given test pass or not
5215 void (*procedure)(const implementation &);
5216 };
5217
5218 virtual std::vector<TestProcedure> internal_tests() const;
5219#endif
5220
5221protected:
5224 simdutf_really_inline implementation(const char *name,
5225 const char *description,
5226 uint32_t required_instruction_sets)
5227 : _name(name), _description(description),
5228 _required_instruction_sets(required_instruction_sets) {}
5229
5230protected:
5231 ~implementation() = default;
5232
5233private:
5237 const char *_name;
5238
5242 const char *_description;
5243
5247 const uint32_t _required_instruction_sets;
5248};
5249
5251namespace internal {
5252
5256class available_implementation_list {
5257public:
5259 simdutf_really_inline available_implementation_list() {}
5261 size_t size() const noexcept;
5263 const implementation *const *begin() const noexcept;
5265 const implementation *const *end() const noexcept;
5266
5280 const implementation *operator[](const std::string &name) const noexcept {
5281 for (const implementation *impl : *this) {
5282 if (impl->name() == name) {
5283 return impl;
5284 }
5285 }
5286 return nullptr;
5287 }
5288
5302 const implementation *detect_best_supported() const noexcept;
5303};
5304
5305template <typename T> class atomic_ptr {
5306public:
5307 atomic_ptr(T *_ptr) : ptr{_ptr} {}
5308
5309#if defined(SIMDUTF_NO_THREADS)
5310 operator const T *() const { return ptr; }
5311 const T &operator*() const { return *ptr; }
5312 const T *operator->() const { return ptr; }
5313
5314 operator T *() { return ptr; }
5315 T &operator*() { return *ptr; }
5316 T *operator->() { return ptr; }
5317 atomic_ptr &operator=(T *_ptr) {
5318 ptr = _ptr;
5319 return *this;
5320 }
5321
5322#else
5323 operator const T *() const { return ptr.load(); }
5324 const T &operator*() const { return *ptr; }
5325 const T *operator->() const { return ptr.load(); }
5326
5327 operator T *() { return ptr.load(); }
5328 T &operator*() { return *ptr; }
5329 T *operator->() { return ptr.load(); }
5330 atomic_ptr &operator=(T *_ptr) {
5331 ptr = _ptr;
5332 return *this;
5333 }
5334
5335#endif
5336
5337private:
5338#if defined(SIMDUTF_NO_THREADS)
5339 T *ptr;
5340#else
5341 std::atomic<T *> ptr;
5342#endif
5343};
5344
5345class detect_best_supported_implementation_on_first_use;
5346
5347} // namespace internal
5348
5352extern SIMDUTF_DLLIMPORTEXPORT const internal::available_implementation_list &
5353get_available_implementations();
5354
5361extern SIMDUTF_DLLIMPORTEXPORT internal::atomic_ptr<const implementation> &
5362get_active_implementation();
5363
5364} // namespace simdutf
5365
5366#endif // SIMDUTF_IMPLEMENTATION_H
An implementation of simdutf for a particular CPU architecture.
virtual simdutf_warn_unused size_t convert_valid_utf32_to_latin1(const char32_t *input, size_t length, char *latin1_buffer) const noexcept=0
Convert valid UTF-32 string into Latin1 string.
virtual const char * find(const char *start, const char *end, char character) const noexcept=0
Find the first occurrence of a character in a string.
virtual simdutf_warn_unused size_t convert_valid_utf8_to_utf16le(const char *input, size_t length, char16_t *utf16_buffer) const noexcept=0
Convert valid UTF-8 string into UTF-16LE string.
virtual simdutf_warn_unused size_t latin1_length_from_utf16(size_t length) const noexcept
Compute the number of bytes that this UTF-16LE/BE string would require in Latin1 format.
virtual simdutf_warn_unused size_t utf32_length_from_utf16be(const char16_t *input, size_t length) const noexcept=0
Compute the number of bytes that this UTF-16BE string would require in UTF-32 format.
virtual simdutf_warn_unused size_t convert_utf8_to_utf16be(const char *input, size_t length, char16_t *utf16_output) const noexcept=0
Convert possibly broken UTF-8 string into UTF-16BE string.
virtual simdutf_warn_unused size_t convert_utf16le_to_utf32(const char16_t *input, size_t length, char32_t *utf32_buffer) const noexcept=0
Convert possibly broken UTF-16LE string into UTF-32 string.
virtual simdutf_warn_unused size_t convert_valid_utf16be_to_utf32(const char16_t *input, size_t length, char32_t *utf32_buffer) const noexcept=0
Convert valid UTF-16LE string into UTF-32BE string.
virtual simdutf_warn_unused size_t convert_valid_utf16be_to_utf8(const char16_t *input, size_t length, char *utf8_buffer) const noexcept=0
Convert valid UTF-16BE string into UTF-8 string.
virtual simdutf_warn_unused size_t convert_latin1_to_utf16le(const char *input, size_t length, char16_t *utf16_output) const noexcept=0
Convert possibly Latin1 string into UTF-16LE string.
virtual simdutf_warn_unused bool validate_utf16be(const char16_t *buf, size_t len) const noexcept=0
Validate the UTF-16BE string.
virtual simdutf_warn_unused result convert_utf16be_to_latin1_with_errors(const char16_t *input, size_t length, char *latin1_buffer) const noexcept=0
Convert possibly broken UTF-16BE string into Latin1 string.
virtual simdutf_warn_unused full_result base64_to_binary_details(const char *input, size_t length, char *output, base64_options options=base64_default, last_chunk_handling_options last_chunk_options=last_chunk_handling_options::loose) const noexcept=0
Convert a base64 input to a binary output while returning more details than base64_to_binary.
virtual simdutf_warn_unused size_t convert_valid_utf16le_to_utf32(const char16_t *input, size_t length, char32_t *utf32_buffer) const noexcept=0
Convert valid UTF-16LE string into UTF-32 string.
virtual simdutf_warn_unused size_t latin1_length_from_utf8(const char *input, size_t length) const noexcept=0
Compute the number of bytes that this UTF-8 string would require in Latin1 format.
virtual simdutf_warn_unused result convert_utf32_to_latin1_with_errors(const char32_t *input, size_t length, char *latin1_buffer) const noexcept=0
Convert possibly broken UTF-32 string into Latin1 string and stop on error.
virtual simdutf_warn_unused size_t utf8_length_from_utf32(const char32_t *input, size_t length) const noexcept=0
Compute the number of bytes that this UTF-32 string would require in UTF-8 format.
virtual simdutf_warn_unused size_t convert_latin1_to_utf16be(const char *input, size_t length, char16_t *utf16_output) const noexcept=0
Convert Latin1 string into UTF-16BE string.
virtual simdutf_warn_unused size_t latin1_length_from_utf32(size_t length) const noexcept
Compute the number of bytes that this UTF-32 string would require in Latin1 format.
virtual simdutf_warn_unused full_result base64_to_binary_details(const char16_t *input, size_t length, char *output, base64_options options=base64_default, last_chunk_handling_options last_chunk_options=last_chunk_handling_options::loose) const noexcept=0
Convert a base64 input to a binary output while returning more details than base64_to_binary.
virtual simdutf_warn_unused size_t convert_utf32_to_utf16le(const char32_t *input, size_t length, char16_t *utf16_buffer) const noexcept=0
Convert possibly broken UTF-32 string into UTF-16LE string.
virtual simdutf_warn_unused size_t utf16_length_from_utf32(const char32_t *input, size_t length) const noexcept=0
Compute the number of two-byte code units that this UTF-32 string would require in UTF-16 format.
virtual simdutf_warn_unused size_t count_utf8(const char *input, size_t length) const noexcept=0
Count the number of code points (characters) in the string assuming that it is valid.
virtual simdutf_warn_unused size_t convert_valid_utf32_to_utf16be(const char32_t *input, size_t length, char16_t *utf16_buffer) const noexcept=0
Convert valid UTF-32 string into UTF-16BE string.
virtual simdutf_warn_unused size_t utf32_length_from_utf8(const char *input, size_t length) const noexcept=0
Compute the number of 4-byte code units that this UTF-8 string would require in UTF-32 format.
virtual simdutf_warn_unused size_t convert_valid_utf16le_to_utf8(const char16_t *input, size_t length, char *utf8_buffer) const noexcept=0
Convert valid UTF-16LE string into UTF-8 string.
virtual void to_well_formed_utf16le(const char16_t *input, size_t len, char16_t *output) const noexcept=0
Copies the UTF-16LE string while replacing mismatched surrogates with the Unicode replacement charact...
virtual simdutf_warn_unused result convert_utf32_to_utf16be_with_errors(const char32_t *input, size_t length, char16_t *utf16_buffer) const noexcept=0
Convert possibly broken UTF-32 string into UTF-16BE string and stop on error.
virtual simdutf_warn_unused result validate_utf8_with_errors(const char *buf, size_t len) const noexcept=0
Validate the UTF-8 string and stop on errors.
virtual simdutf_warn_unused size_t utf8_length_from_utf16le(const char16_t *input, size_t length) const noexcept=0
Compute the number of bytes that this UTF-16LE string would require in UTF-8 format.
virtual simdutf_warn_unused size_t convert_utf16be_to_utf8(const char16_t *input, size_t length, char *utf8_buffer) const noexcept=0
Convert possibly broken UTF-16BE string into UTF-8 string.
virtual simdutf_warn_unused size_t convert_utf8_to_utf16le(const char *input, size_t length, char16_t *utf16_output) const noexcept=0
Convert possibly broken UTF-8 string into UTF-16LE string.
virtual simdutf_warn_unused result convert_utf16le_to_latin1_with_errors(const char16_t *input, size_t length, char *latin1_buffer) const noexcept=0
Convert possibly broken UTF-16LE string into Latin1 string.
virtual simdutf_warn_unused result validate_utf16le_with_errors(const char16_t *buf, size_t len) const noexcept=0
Validate the UTF-16LE string and stop on error.
virtual simdutf_warn_unused result convert_utf16be_to_utf8_with_errors(const char16_t *input, size_t length, char *utf8_buffer) const noexcept=0
Convert possibly broken UTF-16BE string into UTF-8 string and stop on error.
virtual size_t binary_to_base64(const char *input, size_t length, char *output, base64_options options=base64_default) const noexcept=0
Convert a binary input to a base64 output.
virtual simdutf_warn_unused size_t convert_latin1_to_utf32(const char *input, size_t length, char32_t *utf32_buffer) const noexcept=0
Convert Latin1 string into UTF-32 string.
virtual simdutf_warn_unused result convert_utf8_to_utf32_with_errors(const char *input, size_t length, char32_t *utf32_output) const noexcept=0
Convert possibly broken UTF-8 string into UTF-32 string and stop on error.
virtual simdutf_warn_unused size_t convert_valid_utf32_to_utf8(const char32_t *input, size_t length, char *utf8_buffer) const noexcept=0
Convert valid UTF-32 string into UTF-8 string.
virtual simdutf_warn_unused size_t utf32_length_from_latin1(size_t length) const noexcept
Return the number of bytes that this UTF-32 string would require in Latin1 format.
virtual simdutf_warn_unused bool validate_utf16le(const char16_t *buf, size_t len) const noexcept=0
Validate the UTF-16LE string.This function may be best when you expect the input to be almost always ...
virtual simdutf_warn_unused result convert_utf8_to_utf16le_with_errors(const char *input, size_t length, char16_t *utf16_output) const noexcept=0
Convert possibly broken UTF-8 string into UTF-16LE string and stop on error.
virtual simdutf_warn_unused size_t count_utf16le(const char16_t *input, size_t length) const noexcept=0
Count the number of code points (characters) in the string assuming that it is valid.
virtual simdutf_warn_unused size_t convert_utf16be_to_utf32(const char16_t *input, size_t length, char32_t *utf32_buffer) const noexcept=0
Convert possibly broken UTF-16BE string into UTF-32 string.
virtual simdutf_warn_unused size_t convert_valid_utf16le_to_latin1(const char16_t *input, size_t length, char *latin1_buffer) const noexcept=0
Convert valid UTF-16LE string into Latin1 string.
virtual simdutf_warn_unused size_t convert_utf32_to_latin1(const char32_t *input, size_t length, char *latin1_buffer) const noexcept=0
Convert possibly broken UTF-32 string into Latin1 string.
virtual simdutf_warn_unused size_t convert_valid_utf16be_to_latin1(const char16_t *input, size_t length, char *latin1_buffer) const noexcept=0
Convert valid UTF-16BE string into Latin1 string.
bool supported_by_runtime_system() const
The instruction sets this implementation is compiled against and the current CPU match.
virtual simdutf_warn_unused size_t convert_valid_utf8_to_utf32(const char *input, size_t length, char32_t *utf32_buffer) const noexcept=0
Convert valid UTF-8 string into UTF-32 string.
virtual simdutf_warn_unused size_t convert_utf16le_to_latin1(const char16_t *input, size_t length, char *latin1_buffer) const noexcept=0
Convert possibly broken UTF-16LE string into Latin1 string.
virtual simdutf_warn_unused result validate_utf16be_with_errors(const char16_t *buf, size_t len) const noexcept=0
Validate the UTF-16BE string and stop on error.
virtual simdutf_warn_unused result convert_utf16le_to_utf8_with_errors(const char16_t *input, size_t length, char *utf8_buffer) const noexcept=0
Convert possibly broken UTF-16LE string into UTF-8 string and stop on error.
virtual simdutf_warn_unused result convert_utf16be_to_utf32_with_errors(const char16_t *input, size_t length, char32_t *utf32_buffer) const noexcept=0
Convert possibly broken UTF-16BE string into UTF-32 string and stop on error.
virtual simdutf_warn_unused result base64_to_binary(const char *input, size_t length, char *output, base64_options options=base64_default, last_chunk_handling_options last_chunk_options=last_chunk_handling_options::loose) const noexcept=0
Convert a base64 input to a binary output.
virtual simdutf_warn_unused size_t convert_utf32_to_utf16be(const char32_t *input, size_t length, char16_t *utf16_buffer) const noexcept=0
Convert possibly broken UTF-32 string into UTF-16BE string.
virtual simdutf_warn_unused result validate_utf32_with_errors(const char32_t *buf, size_t len) const noexcept=0
Validate the UTF-32 string and stop on error.
virtual simdutf_warn_unused size_t utf32_length_from_utf16le(const char16_t *input, size_t length) const noexcept=0
Compute the number of bytes that this UTF-16LE string would require in UTF-32 format.
virtual void change_endianness_utf16(const char16_t *input, size_t length, char16_t *output) const noexcept=0
Change the endianness of the input.
virtual simdutf_warn_unused size_t convert_valid_utf8_to_utf16be(const char *input, size_t length, char16_t *utf16_buffer) const noexcept=0
Convert valid UTF-8 string into UTF-16BE string.
virtual simdutf_warn_unused size_t convert_valid_utf8_to_latin1(const char *input, size_t length, char *latin1_output) const noexcept=0
Convert valid UTF-8 string into latin1 string.
simdutf_warn_unused size_t maximal_binary_length_from_base64(const char *input, size_t length) const noexcept
Provide the maximal binary length in bytes given the base64 input.
virtual simdutf_warn_unused size_t convert_valid_utf32_to_utf16le(const char32_t *input, size_t length, char16_t *utf16_buffer) const noexcept=0
Convert valid UTF-32 string into UTF-16LE string.
virtual std::string name() const
The name of this implementation.
virtual simdutf_warn_unused result validate_ascii_with_errors(const char *buf, size_t len) const noexcept=0
Validate the ASCII string and stop on error.
virtual simdutf_warn_unused bool validate_utf8(const char *buf, size_t len) const noexcept=0
Validate the UTF-8 string.
virtual simdutf_warn_unused size_t convert_utf8_to_utf32(const char *input, size_t length, char32_t *utf32_output) const noexcept=0
Convert possibly broken UTF-8 string into UTF-32 string.
simdutf_warn_unused size_t maximal_binary_length_from_base64(const char16_t *input, size_t length) const noexcept
Provide the maximal binary length in bytes given the base64 input.
virtual simdutf_warn_unused bool validate_utf32(const char32_t *buf, size_t len) const noexcept=0
Validate the UTF-32 string.
virtual simdutf_warn_unused result convert_utf16le_to_utf32_with_errors(const char16_t *input, size_t length, char32_t *utf32_buffer) const noexcept=0
Convert possibly broken UTF-16LE string into UTF-32 string and stop on error.
virtual simdutf_warn_unused size_t convert_utf16le_to_utf8(const char16_t *input, size_t length, char *utf8_buffer) const noexcept=0
Convert possibly broken UTF-16LE string into UTF-8 string.
virtual simdutf_warn_unused size_t utf8_length_from_utf16be(const char16_t *input, size_t length) const noexcept=0
Compute the number of bytes that this UTF-16BE string would require in UTF-8 format.
virtual simdutf_warn_unused size_t convert_utf32_to_utf8(const char32_t *input, size_t length, char *utf8_buffer) const noexcept=0
Convert possibly broken UTF-32 string into UTF-8 string.
virtual simdutf_warn_unused size_t convert_utf16be_to_latin1(const char16_t *input, size_t length, char *latin1_buffer) const noexcept=0
Convert possibly broken UTF-16BE string into Latin1 string.
virtual simdutf_warn_unused size_t convert_utf8_to_latin1(const char *input, size_t length, char *latin1_output) const noexcept=0
Convert possibly broken UTF-8 string into latin1 string.
virtual simdutf_warn_unused result base64_to_binary(const char16_t *input, size_t length, char *output, base64_options options=base64_default, last_chunk_handling_options last_chunk_options=last_chunk_handling_options::loose) const noexcept=0
Convert a base64 input to a binary output.
simdutf_warn_unused size_t base64_length_from_binary(size_t length, base64_options options=base64_default) const noexcept
Provide the base64 length in bytes given the length of a binary input.
virtual int detect_encodings(const char *input, size_t length) const noexcept=0
This function will try to detect the possible encodings in one pass.
virtual simdutf_warn_unused size_t count_utf16be(const char16_t *input, size_t length) const noexcept=0
Count the number of code points (characters) in the string assuming that it is valid.
virtual simdutf_warn_unused result convert_utf32_to_utf8_with_errors(const char32_t *input, size_t length, char *utf8_buffer) const noexcept=0
Convert possibly broken UTF-32 string into UTF-8 string and stop on error.
virtual void to_well_formed_utf16be(const char16_t *input, size_t len, char16_t *output) const noexcept=0
Copies the UTF-16BE string while replacing mismatched surrogates with the Unicode replacement charact...
virtual simdutf_warn_unused bool validate_ascii(const char *buf, size_t len) const noexcept=0
Validate the ASCII string.
virtual simdutf_warn_unused size_t utf16_length_from_utf8(const char *input, size_t length) const noexcept=0
Compute the number of 2-byte code units that this UTF-8 string would require in UTF-16LE format.
virtual simdutf_warn_unused result convert_utf8_to_utf16be_with_errors(const char *input, size_t length, char16_t *utf16_output) const noexcept=0
Convert possibly broken UTF-8 string into UTF-16BE string and stop on error.
virtual simdutf_warn_unused result convert_utf8_to_latin1_with_errors(const char *input, size_t length, char *latin1_output) const noexcept=0
Convert possibly broken UTF-8 string into latin1 string with errors.
virtual std::string description() const
The description of this implementation.
virtual simdutf_warn_unused size_t utf16_length_from_latin1(size_t length) const noexcept
Return the number of bytes that this UTF-16 string would require in Latin1 format.
virtual simdutf_warn_unused result convert_utf32_to_utf16le_with_errors(const char32_t *input, size_t length, char16_t *utf16_buffer) const noexcept=0
Convert possibly broken UTF-32 string into UTF-16LE string and stop on error.
virtual simdutf_warn_unused size_t convert_latin1_to_utf8(const char *input, size_t length, char *utf8_output) const noexcept=0
Convert Latin1 string into UTF-8 string.
virtual encoding_type autodetect_encoding(const char *input, size_t length) const noexcept
This function will try to detect the encoding.
virtual simdutf_warn_unused size_t utf8_length_from_latin1(const char *input, size_t length) const noexcept=0
Return the number of bytes that this Latin1 string would require in UTF-8 format.