simdutf 9.0.0
Unicode at GB/s.
Loading...
Searching...
No Matches
implementation.h
1#ifndef SIMDUTF_IMPLEMENTATION_H
2#define SIMDUTF_IMPLEMENTATION_H
3#if !defined(SIMDUTF_NO_THREADS)
4 #include <atomic>
5#endif
6#ifdef SIMDUTF_INTERNAL_TESTS
7 #include <vector>
8#endif
9#include "simdutf/common_defs.h"
10#include "simdutf/compiler_check.h"
11#include "simdutf/encoding_types.h"
12#include "simdutf/error.h"
13#include "simdutf/internal/isadetection.h"
14
15#include <string_view>
16#if SIMDUTF_SPAN
17 #include <concepts>
18 #include <type_traits>
19 #include <span>
20 #include <tuple>
21 #include <utility> // for std::unreachable
22#endif
23// The following defines are conditionally enabled/disabled during amalgamation.
24// By default all features are enabled, regular code shouldn't check them. Only
25// when user code really relies of a selected subset, it's good to verify these
26// flags, like:
27//
28// #if !SIMDUTF_FEATURE_UTF16
29// # error("Please amalgamate simdutf with UTF-16 support")
30// #endif
31//
32#ifndef SIMDUTF_FEATURE_DETECT_ENCODING
33 #define SIMDUTF_FEATURE_DETECT_ENCODING 1
34#endif
35#ifndef SIMDUTF_FEATURE_ASCII
36 #define SIMDUTF_FEATURE_ASCII 1
37#endif
38#ifndef SIMDUTF_FEATURE_LATIN1
39 #define SIMDUTF_FEATURE_LATIN1 1
40#endif
41#ifndef SIMDUTF_FEATURE_UTF8
42 #define SIMDUTF_FEATURE_UTF8 1
43#endif
44#ifndef SIMDUTF_FEATURE_UTF16
45 #define SIMDUTF_FEATURE_UTF16 1
46#endif
47#ifndef SIMDUTF_FEATURE_UTF32
48 #define SIMDUTF_FEATURE_UTF32 1
49#endif
50#ifndef SIMDUTF_FEATURE_BASE64
51 #define SIMDUTF_FEATURE_BASE64 1
52#endif
53
54#if SIMDUTF_CPLUSPLUS23
55 #include <simdutf/constexpr_ptr.h>
56#endif
57
58#if SIMDUTF_SPAN
60namespace simdutf {
61namespace detail {
66template <typename T>
67concept byte_like = std::is_same_v<T, std::byte> || //
68 std::is_same_v<T, char> || //
69 std::is_same_v<T, signed char> || //
70 std::is_same_v<T, unsigned char> || //
71 std::is_same_v<T, char8_t>;
72
73template <typename T>
74concept is_byte_like = byte_like<std::remove_cvref_t<T>>;
75
76template <typename T>
77concept is_pointer = std::is_pointer_v<T>;
78
84template <typename T>
85concept input_span_of_byte_like = requires(const T &t) {
86 { t.size() } noexcept -> std::convertible_to<std::size_t>;
87 { t.data() } noexcept -> is_pointer;
88 { *t.data() } noexcept -> is_byte_like;
89};
90
91template <typename T>
92concept is_mutable = !std::is_const_v<std::remove_reference_t<T>>;
93
97template <typename T>
98concept output_span_of_byte_like = requires(T &t) {
99 { t.size() } noexcept -> std::convertible_to<std::size_t>;
100 { t.data() } noexcept -> is_pointer;
101 { *t.data() } noexcept -> is_byte_like;
102 { *t.data() } noexcept -> is_mutable;
103};
104
110template <class InputPtr>
111concept indexes_into_byte_like = requires(InputPtr p) {
112 { std::decay_t<decltype(p[0])>{} } -> simdutf::detail::byte_like;
113};
114template <class InputPtr>
115concept indexes_into_utf16 = requires(InputPtr p) {
116 { std::decay_t<decltype(p[0])>{} } -> std::same_as<char16_t>;
117};
118template <class InputPtr>
119concept indexes_into_utf32 = requires(InputPtr p) {
120 { std::decay_t<decltype(p[0])>{} } -> std::same_as<char32_t>;
121};
122
123template <class InputPtr>
124concept index_assignable_from_char = requires(InputPtr p, char s) {
125 { p[0] = s };
126};
127
132template <class InputPtr>
133concept indexes_into_uint32 = requires(InputPtr p) {
134 { std::decay_t<decltype(p[0])>{} } -> std::same_as<std::uint32_t>;
135};
136} // namespace detail
137} // namespace simdutf
138#endif // SIMDUTF_SPAN
139
140// these includes are needed for constexpr support. they are
141// not part of the public api.
142#include <simdutf/scalar/swap_bytes.h>
143#include <simdutf/scalar/ascii.h>
144#include <simdutf/scalar/atomic_util.h>
145#include <simdutf/scalar/latin1.h>
146#include <simdutf/scalar/latin1_to_utf16/latin1_to_utf16.h>
147#include <simdutf/scalar/latin1_to_utf32/latin1_to_utf32.h>
148#include <simdutf/scalar/latin1_to_utf8/latin1_to_utf8.h>
149#include <simdutf/scalar/utf16.h>
150#include <simdutf/scalar/utf16_to_latin1/utf16_to_latin1.h>
151#include <simdutf/scalar/utf16_to_latin1/valid_utf16_to_latin1.h>
152#include <simdutf/scalar/utf16_to_utf32/utf16_to_utf32.h>
153#include <simdutf/scalar/utf16_to_utf32/valid_utf16_to_utf32.h>
154#include <simdutf/scalar/utf16_to_utf8/utf16_to_utf8.h>
155#include <simdutf/scalar/utf16_to_utf8/valid_utf16_to_utf8.h>
156#include <simdutf/scalar/utf32.h>
157#include <simdutf/scalar/utf32_to_latin1/utf32_to_latin1.h>
158#include <simdutf/scalar/utf32_to_latin1/valid_utf32_to_latin1.h>
159#include <simdutf/scalar/utf32_to_utf16/utf32_to_utf16.h>
160#include <simdutf/scalar/utf32_to_utf16/valid_utf32_to_utf16.h>
161#include <simdutf/scalar/utf32_to_utf8/utf32_to_utf8.h>
162#include <simdutf/scalar/utf32_to_utf8/valid_utf32_to_utf8.h>
163#include <simdutf/scalar/utf8.h>
164#include <simdutf/scalar/utf8_to_latin1/utf8_to_latin1.h>
165#include <simdutf/scalar/utf8_to_latin1/valid_utf8_to_latin1.h>
166#include <simdutf/scalar/utf8_to_utf16/utf8_to_utf16.h>
167#include <simdutf/scalar/utf8_to_utf16/valid_utf8_to_utf16.h>
168#include <simdutf/scalar/utf8_to_utf32/utf8_to_utf32.h>
169#include <simdutf/scalar/utf8_to_utf32/valid_utf8_to_utf32.h>
170
171namespace simdutf {
172
173constexpr size_t default_line_length =
174 76;
175
176#if SIMDUTF_FEATURE_DETECT_ENCODING
187simdutf_warn_unused simdutf::encoding_type
188autodetect_encoding(const char *input, size_t length) noexcept;
189simdutf_really_inline simdutf_warn_unused simdutf::encoding_type
190autodetect_encoding(const uint8_t *input, size_t length) noexcept {
191 return autodetect_encoding(reinterpret_cast<const char *>(input), length);
192}
193 #if SIMDUTF_SPAN
205simdutf_really_inline simdutf_warn_unused simdutf::encoding_type
206autodetect_encoding(
207 const detail::input_span_of_byte_like auto &input) noexcept {
208 return autodetect_encoding(reinterpret_cast<const char *>(input.data()),
209 input.size());
210}
211 #endif // SIMDUTF_SPAN
212
224simdutf_warn_unused int detect_encodings(const char *input,
225 size_t length) noexcept;
226simdutf_really_inline simdutf_warn_unused int
227detect_encodings(const uint8_t *input, size_t length) noexcept {
228 return detect_encodings(reinterpret_cast<const char *>(input), length);
229}
230 #if SIMDUTF_SPAN
231simdutf_really_inline simdutf_warn_unused int
232detect_encodings(const detail::input_span_of_byte_like auto &input) noexcept {
233 return detect_encodings(reinterpret_cast<const char *>(input.data()),
234 input.size());
235}
236 #endif // SIMDUTF_SPAN
237#endif // SIMDUTF_FEATURE_DETECT_ENCODING
238
239#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING
251simdutf_warn_unused bool validate_utf8(const char *buf, size_t len) noexcept;
252 #if SIMDUTF_SPAN
253simdutf_constexpr23 simdutf_really_inline simdutf_warn_unused bool
254validate_utf8(const detail::input_span_of_byte_like auto &input) noexcept {
255 #if SIMDUTF_CPLUSPLUS23
256 if consteval {
257 return scalar::utf8::validate(
258 detail::constexpr_cast_ptr<uint8_t>(input.data()), input.size());
259 } else
260 #endif
261 {
262 return validate_utf8(reinterpret_cast<const char *>(input.data()),
263 input.size());
264 }
265}
266 #endif // SIMDUTF_SPAN
267#endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING
268
269#if SIMDUTF_FEATURE_UTF8
282simdutf_warn_unused result validate_utf8_with_errors(const char *buf,
283 size_t len) noexcept;
284 #if SIMDUTF_SPAN
285simdutf_really_inline simdutf_constexpr23 simdutf_warn_unused result
286validate_utf8_with_errors(
287 const detail::input_span_of_byte_like auto &input) noexcept {
288 #if SIMDUTF_CPLUSPLUS23
289 if consteval {
290 return scalar::utf8::validate_with_errors(
291 detail::constexpr_cast_ptr<uint8_t>(input.data()), input.size());
292 } else
293 #endif
294 {
295 return validate_utf8_with_errors(
296 reinterpret_cast<const char *>(input.data()), input.size());
297 }
298}
299 #endif // SIMDUTF_SPAN
300#endif // SIMDUTF_FEATURE_UTF8
301
302#if SIMDUTF_FEATURE_ASCII
312simdutf_warn_unused bool validate_ascii(const char *buf, size_t len) noexcept;
313 #if SIMDUTF_SPAN
314simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 bool
315validate_ascii(const detail::input_span_of_byte_like auto &input) noexcept {
316 #if SIMDUTF_CPLUSPLUS23
317 if consteval {
318 return scalar::ascii::validate(
319 detail::constexpr_cast_ptr<std::uint8_t>(input.data()), input.size());
320 } else
321 #endif
322 {
323 return validate_ascii(reinterpret_cast<const char *>(input.data()),
324 input.size());
325 }
326}
327 #endif // SIMDUTF_SPAN
328
342simdutf_warn_unused result validate_ascii_with_errors(const char *buf,
343 size_t len) noexcept;
344 #if SIMDUTF_SPAN
345simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
346validate_ascii_with_errors(
347 const detail::input_span_of_byte_like auto &input) noexcept {
348 #if SIMDUTF_CPLUSPLUS23
349 if consteval {
350 return scalar::ascii::validate_with_errors(
351 detail::constexpr_cast_ptr<std::uint8_t>(input.data()), input.size());
352 } else
353 #endif
354 {
355 return validate_ascii_with_errors(
356 reinterpret_cast<const char *>(input.data()), input.size());
357 }
358}
359 #endif // SIMDUTF_SPAN
360#endif // SIMDUTF_FEATURE_ASCII
361
362#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_ASCII
374simdutf_warn_unused bool validate_utf16_as_ascii(const char16_t *buf,
375 size_t len) noexcept;
376 #if SIMDUTF_SPAN
377simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 bool
378validate_utf16_as_ascii(std::span<const char16_t> input) noexcept {
379 #if SIMDUTF_CPLUSPLUS23
380 if consteval {
381 return scalar::utf16::validate_as_ascii<endianness::NATIVE>(input.data(),
382 input.size());
383 } else
384 #endif
385 {
386 return validate_utf16_as_ascii(input.data(), input.size());
387 }
388}
389 #endif // SIMDUTF_SPAN
390
402simdutf_warn_unused bool validate_utf16be_as_ascii(const char16_t *buf,
403 size_t len) noexcept;
404 #if SIMDUTF_SPAN
405simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 bool
406validate_utf16be_as_ascii(std::span<const char16_t> input) noexcept {
407 #if SIMDUTF_CPLUSPLUS23
408 if consteval {
409 return scalar::utf16::validate_as_ascii<endianness::BIG>(input.data(),
410 input.size());
411 } else
412 #endif
413 {
414 return validate_utf16be_as_ascii(input.data(), input.size());
415 }
416}
417 #endif // SIMDUTF_SPAN
418
430simdutf_warn_unused bool validate_utf16le_as_ascii(const char16_t *buf,
431 size_t len) noexcept;
432 #if SIMDUTF_SPAN
433simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 bool
434validate_utf16le_as_ascii(std::span<const char16_t> input) noexcept {
435 #if SIMDUTF_CPLUSPLUS23
436 if consteval {
437 return scalar::utf16::validate_as_ascii<endianness::LITTLE>(input.data(),
438 input.size());
439 } else
440 #endif
441 {
442 return validate_utf16le_as_ascii(input.data(), input.size());
443 }
444}
445 #endif // SIMDUTF_SPAN
446#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_ASCII
447
448#if SIMDUTF_FEATURE_UTF16
463simdutf_warn_unused bool validate_utf16(const char16_t *buf,
464 size_t len) noexcept;
465 #if SIMDUTF_SPAN
466simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 bool
467validate_utf16(std::span<const char16_t> input) noexcept {
468 #if SIMDUTF_CPLUSPLUS23
469 if consteval {
470 return scalar::utf16::validate<endianness::NATIVE>(input.data(),
471 input.size());
472 } else
473 #endif
474 {
475 return validate_utf16(input.data(), input.size());
476 }
477}
478 #endif // SIMDUTF_SPAN
479#endif // SIMDUTF_FEATURE_UTF16
480
481#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING
496simdutf_warn_unused bool validate_utf16le(const char16_t *buf,
497 size_t len) noexcept;
498 #if SIMDUTF_SPAN
499simdutf_really_inline simdutf_constexpr23 simdutf_warn_unused bool
500validate_utf16le(std::span<const char16_t> input) noexcept {
501 #if SIMDUTF_CPLUSPLUS23
502 if consteval {
503 return scalar::utf16::validate<endianness::LITTLE>(input.data(),
504 input.size());
505 } else
506 #endif
507 {
508 return validate_utf16le(input.data(), input.size());
509 }
510}
511 #endif // SIMDUTF_SPAN
512#endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING
513
514#if SIMDUTF_FEATURE_UTF16
529simdutf_warn_unused bool validate_utf16be(const char16_t *buf,
530 size_t len) noexcept;
531 #if SIMDUTF_SPAN
532simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 bool
533validate_utf16be(std::span<const char16_t> input) noexcept {
534 #if SIMDUTF_CPLUSPLUS23
535 if consteval {
536 return scalar::utf16::validate<endianness::BIG>(input.data(), input.size());
537 } else
538 #endif
539 {
540 return validate_utf16be(input.data(), input.size());
541 }
542}
543 #endif // SIMDUTF_SPAN
544
562simdutf_warn_unused result validate_utf16_with_errors(const char16_t *buf,
563 size_t len) noexcept;
564 #if SIMDUTF_SPAN
565simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
566validate_utf16_with_errors(std::span<const char16_t> input) noexcept {
567 #if SIMDUTF_CPLUSPLUS23
568 if consteval {
569 return scalar::utf16::validate_with_errors<endianness::NATIVE>(
570 input.data(), input.size());
571 } else
572 #endif
573 {
574 return validate_utf16_with_errors(input.data(), input.size());
575 }
576}
577 #endif // SIMDUTF_SPAN
578
595simdutf_warn_unused result validate_utf16le_with_errors(const char16_t *buf,
596 size_t len) noexcept;
597 #if SIMDUTF_SPAN
598simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
599validate_utf16le_with_errors(std::span<const char16_t> input) noexcept {
600 #if SIMDUTF_CPLUSPLUS23
601 if consteval {
602 return scalar::utf16::validate_with_errors<endianness::LITTLE>(
603 input.data(), input.size());
604 } else
605 #endif
606 {
607 return validate_utf16le_with_errors(input.data(), input.size());
608 }
609}
610 #endif // SIMDUTF_SPAN
611
628simdutf_warn_unused result validate_utf16be_with_errors(const char16_t *buf,
629 size_t len) noexcept;
630 #if SIMDUTF_SPAN
631simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
632validate_utf16be_with_errors(std::span<const char16_t> input) noexcept {
633 #if SIMDUTF_CPLUSPLUS23
634 if consteval {
635 return scalar::utf16::validate_with_errors<endianness::BIG>(input.data(),
636 input.size());
637 } else
638 #endif
639 {
640 return validate_utf16be_with_errors(input.data(), input.size());
641 }
642}
643 #endif // SIMDUTF_SPAN
644
657void to_well_formed_utf16le(const char16_t *input, size_t len,
658 char16_t *output) noexcept;
659 #if SIMDUTF_SPAN
660simdutf_really_inline simdutf_constexpr23 void
661to_well_formed_utf16le(std::span<const char16_t> input,
662 std::span<char16_t> output) noexcept {
663 #if SIMDUTF_CPLUSPLUS23
664 if consteval {
665 scalar::utf16::to_well_formed_utf16<endianness::LITTLE>(
666 input.data(), input.size(), output.data());
667 } else
668 #endif
669 {
670 to_well_formed_utf16le(input.data(), input.size(), output.data());
671 }
672}
673 #endif // SIMDUTF_SPAN
674
687void to_well_formed_utf16be(const char16_t *input, size_t len,
688 char16_t *output) noexcept;
689 #if SIMDUTF_SPAN
690simdutf_really_inline simdutf_constexpr23 void
691to_well_formed_utf16be(std::span<const char16_t> input,
692 std::span<char16_t> output) noexcept {
693 #if SIMDUTF_CPLUSPLUS23
694 if consteval {
695 scalar::utf16::to_well_formed_utf16<endianness::BIG>(
696 input.data(), input.size(), output.data());
697 } else
698 #endif
699 {
700 to_well_formed_utf16be(input.data(), input.size(), output.data());
701 }
702}
703 #endif // SIMDUTF_SPAN
704
717void to_well_formed_utf16(const char16_t *input, size_t len,
718 char16_t *output) noexcept;
719 #if SIMDUTF_SPAN
720simdutf_really_inline simdutf_constexpr23 void
721to_well_formed_utf16(std::span<const char16_t> input,
722 std::span<char16_t> output) noexcept {
723 #if SIMDUTF_CPLUSPLUS23
724 if consteval {
725 scalar::utf16::to_well_formed_utf16<endianness::NATIVE>(
726 input.data(), input.size(), output.data());
727 } else
728 #endif
729 {
730 to_well_formed_utf16(input.data(), input.size(), output.data());
731 }
732}
733 #endif // SIMDUTF_SPAN
734
735#endif // SIMDUTF_FEATURE_UTF16
736
737#if SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING
752simdutf_warn_unused bool validate_utf32(const char32_t *buf,
753 size_t len) noexcept;
754 #if SIMDUTF_SPAN
755simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 bool
756validate_utf32(std::span<const char32_t> input) noexcept {
757 #if SIMDUTF_CPLUSPLUS23
758 if consteval {
759 return scalar::utf32::validate(
760 detail::constexpr_cast_ptr<std::uint32_t>(input.data()), input.size());
761 } else
762 #endif
763 {
764 return validate_utf32(input.data(), input.size());
765 }
766}
767 #endif // SIMDUTF_SPAN
768#endif // SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING
769
770#if SIMDUTF_FEATURE_UTF32
787simdutf_warn_unused result validate_utf32_with_errors(const char32_t *buf,
788 size_t len) noexcept;
789 #if SIMDUTF_SPAN
790simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
791validate_utf32_with_errors(std::span<const char32_t> input) noexcept {
792 #if SIMDUTF_CPLUSPLUS23
793 if consteval {
794 return scalar::utf32::validate_with_errors(
795 detail::constexpr_cast_ptr<std::uint32_t>(input.data()), input.size());
796 } else
797 #endif
798 {
799 return validate_utf32_with_errors(input.data(), input.size());
800 }
801}
802 #endif // SIMDUTF_SPAN
803#endif // SIMDUTF_FEATURE_UTF32
804
805#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
816simdutf_warn_unused size_t convert_latin1_to_utf8(const char *input,
817 size_t length,
818 char *utf8_output) noexcept;
819 #if SIMDUTF_SPAN
820simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
821convert_latin1_to_utf8(
822 const detail::input_span_of_byte_like auto &latin1_input,
823 detail::output_span_of_byte_like auto &&utf8_output) noexcept {
824 #if SIMDUTF_CPLUSPLUS23
825 if consteval {
826 return scalar::latin1_to_utf8::convert(
827 detail::constexpr_cast_ptr<char>(latin1_input.data()),
828 latin1_input.size(),
829 detail::constexpr_cast_writeptr<char>(utf8_output.data()));
830 } else
831 #endif
832 {
833 return convert_latin1_to_utf8(
834 reinterpret_cast<const char *>(latin1_input.data()),
835 latin1_input.size(), reinterpret_cast<char *>(utf8_output.data()));
836 }
837}
838 #endif // SIMDUTF_SPAN
839
853simdutf_warn_unused size_t
854convert_latin1_to_utf8_safe(const char *input, size_t length, char *utf8_output,
855 size_t utf8_len) noexcept;
856 #if SIMDUTF_SPAN
857simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
858convert_latin1_to_utf8_safe(
859 const detail::input_span_of_byte_like auto &input,
860 detail::output_span_of_byte_like auto &&utf8_output) noexcept {
861 // implementation note: outputspan is a forwarding ref to avoid copying
862 // and allow both lvalues and rvalues. std::span can be copied without
863 // problems, but std::vector should not, and this function should accept
864 // both. it will allow using an owning rvalue ref (example: passing a
865 // temporary std::string) as output, but the user will quickly find out
866 // that he has no way of getting the data out of the object in that case.
867 #if SIMDUTF_CPLUSPLUS23
868 if consteval {
869 return scalar::latin1_to_utf8::convert_safe_constexpr(
870 input.data(), input.size(), utf8_output.data(), utf8_output.size());
871 } else
872 #endif
873 {
874 return convert_latin1_to_utf8_safe(
875 reinterpret_cast<const char *>(input.data()), input.size(),
876 reinterpret_cast<char *>(utf8_output.data()), utf8_output.size());
877 }
878}
879 #endif // SIMDUTF_SPAN
880#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
881
882#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
893simdutf_warn_unused size_t convert_latin1_to_utf16le(
894 const char *input, size_t length, char16_t *utf16_output) noexcept;
895 #if SIMDUTF_SPAN
896simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
897convert_latin1_to_utf16le(
898 const detail::input_span_of_byte_like auto &latin1_input,
899 std::span<char16_t> utf16_output) noexcept {
900 #if SIMDUTF_CPLUSPLUS23
901 if consteval {
902 return scalar::latin1_to_utf16::convert<endianness::LITTLE>(
903 latin1_input.data(), latin1_input.size(), utf16_output.data());
904 } else
905 #endif
906 {
907 return convert_latin1_to_utf16le(
908 reinterpret_cast<const char *>(latin1_input.data()),
909 latin1_input.size(), utf16_output.data());
910 }
911}
912 #endif // SIMDUTF_SPAN
913
924simdutf_warn_unused size_t convert_latin1_to_utf16be(
925 const char *input, size_t length, char16_t *utf16_output) noexcept;
926 #if SIMDUTF_SPAN
927simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
928convert_latin1_to_utf16be(const detail::input_span_of_byte_like auto &input,
929 std::span<char16_t> output) noexcept {
930 #if SIMDUTF_CPLUSPLUS23
931 if consteval {
932 return scalar::latin1_to_utf16::convert<endianness::BIG>(
933 input.data(), input.size(), output.data());
934 } else
935 #endif
936 {
937 return convert_latin1_to_utf16be(
938 reinterpret_cast<const char *>(input.data()), input.size(),
939 output.data());
940 }
941}
942 #endif // SIMDUTF_SPAN
951simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
952latin1_length_from_utf16(size_t length) noexcept {
953 return length;
954}
955
964simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
965utf16_length_from_latin1(size_t length) noexcept {
966 return length;
967}
968#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
969
970#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1
981simdutf_warn_unused size_t convert_latin1_to_utf32(
982 const char *input, size_t length, char32_t *utf32_buffer) noexcept;
983 #if SIMDUTF_SPAN
984simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
985convert_latin1_to_utf32(
986 const detail::input_span_of_byte_like auto &latin1_input,
987 std::span<char32_t> utf32_output) noexcept {
988 #if SIMDUTF_CPLUSPLUS23
989 if consteval {
990 return scalar::latin1_to_utf32::convert(
991 latin1_input.data(), latin1_input.size(), utf32_output.data());
992 } else
993 #endif
994 {
995 return convert_latin1_to_utf32(
996 reinterpret_cast<const char *>(latin1_input.data()),
997 latin1_input.size(), utf32_output.data());
998 }
999}
1000 #endif // SIMDUTF_SPAN
1001#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1
1002
1003#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
1016simdutf_warn_unused size_t convert_utf8_to_latin1(const char *input,
1017 size_t length,
1018 char *latin1_output) noexcept;
1019 #if SIMDUTF_SPAN
1020simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
1021convert_utf8_to_latin1(
1022 const detail::input_span_of_byte_like auto &input,
1023 detail::output_span_of_byte_like auto &&output) noexcept {
1024 #if SIMDUTF_CPLUSPLUS23
1025 if consteval {
1026 return scalar::utf8_to_latin1::convert(input.data(), input.size(),
1027 output.data());
1028 } else
1029 #endif
1030 {
1031 return convert_utf8_to_latin1(reinterpret_cast<const char *>(input.data()),
1032 input.size(),
1033 reinterpret_cast<char *>(output.data()));
1034 }
1035}
1036 #endif // SIMDUTF_SPAN
1037#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
1038
1039#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
1053simdutf_warn_unused size_t convert_utf8_to_utf16(
1054 const char *input, size_t length, char16_t *utf16_output) noexcept;
1055 #if SIMDUTF_SPAN
1056simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
1057convert_utf8_to_utf16(const detail::input_span_of_byte_like auto &input,
1058 std::span<char16_t> output) noexcept {
1059 #if SIMDUTF_CPLUSPLUS23
1060 if consteval {
1061 return scalar::utf8_to_utf16::convert<endianness::NATIVE>(
1062 input.data(), input.size(), output.data());
1063 } else
1064 #endif
1065 {
1066 return convert_utf8_to_utf16(reinterpret_cast<const char *>(input.data()),
1067 input.size(), output.data());
1068 }
1069}
1070 #endif // SIMDUTF_SPAN
1071
1089simdutf_warn_unused result utf8_length_from_utf16le_with_replacement(
1090 const char16_t *input, size_t length) noexcept;
1091 #if SIMDUTF_SPAN
1092simdutf_really_inline simdutf_constexpr23 simdutf_warn_unused result
1093utf8_length_from_utf16le_with_replacement(
1094 std::span<const char16_t> valid_utf16_input) noexcept {
1095 #if SIMDUTF_CPLUSPLUS23
1096 if consteval {
1097 return scalar::utf16::utf8_length_from_utf16_with_replacement<
1098 endianness::LITTLE>(valid_utf16_input.data(), valid_utf16_input.size());
1099 } else
1100 #endif
1101 {
1102 return utf8_length_from_utf16le_with_replacement(valid_utf16_input.data(),
1103 valid_utf16_input.size());
1104 }
1105}
1106 #endif // SIMDUTF_SPAN
1107
1125simdutf_warn_unused result utf8_length_from_utf16be_with_replacement(
1126 const char16_t *input, size_t length) noexcept;
1127 #if SIMDUTF_SPAN
1128simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
1129utf8_length_from_utf16be_with_replacement(
1130 std::span<const char16_t> valid_utf16_input) noexcept {
1131 #if SIMDUTF_CPLUSPLUS23
1132 if consteval {
1133 return scalar::utf16::utf8_length_from_utf16_with_replacement<
1134 endianness::BIG>(valid_utf16_input.data(), valid_utf16_input.size());
1135 } else
1136 #endif
1137 {
1138 return utf8_length_from_utf16be_with_replacement(valid_utf16_input.data(),
1139 valid_utf16_input.size());
1140 }
1141}
1142 #endif // SIMDUTF_SPAN
1143
1144#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
1145
1146#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
1155simdutf_warn_unused size_t convert_latin1_to_utf16(
1156 const char *input, size_t length, char16_t *utf16_output) noexcept;
1157 #if SIMDUTF_SPAN
1158simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
1159convert_latin1_to_utf16(const detail::input_span_of_byte_like auto &input,
1160 std::span<char16_t> output) noexcept {
1161 #if SIMDUTF_CPLUSPLUS23
1162 if consteval {
1163 return scalar::latin1_to_utf16::convert<endianness::NATIVE>(
1164 input.data(), input.size(), output.data());
1165 } else
1166 #endif
1167 {
1168 return convert_latin1_to_utf16(reinterpret_cast<const char *>(input.data()),
1169 input.size(), output.data());
1170 }
1171}
1172 #endif // SIMDUTF_SPAN
1173#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
1174
1175#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
1188simdutf_warn_unused size_t convert_utf8_to_utf16le(
1189 const char *input, size_t length, char16_t *utf16_output) noexcept;
1190 #if SIMDUTF_SPAN
1191simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
1192convert_utf8_to_utf16le(const detail::input_span_of_byte_like auto &utf8_input,
1193 std::span<char16_t> utf16_output) noexcept {
1194 #if SIMDUTF_CPLUSPLUS23
1195 if consteval {
1196 return scalar::utf8_to_utf16::convert<endianness::LITTLE>(
1197 utf8_input.data(), utf8_input.size(), utf16_output.data());
1198 } else
1199 #endif
1200 {
1201 return convert_utf8_to_utf16le(
1202 reinterpret_cast<const char *>(utf8_input.data()), utf8_input.size(),
1203 utf16_output.data());
1204 }
1205}
1206 #endif // SIMDUTF_SPAN
1207
1220simdutf_warn_unused size_t convert_utf8_to_utf16be(
1221 const char *input, size_t length, char16_t *utf16_output) noexcept;
1222 #if SIMDUTF_SPAN
1223simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
1224convert_utf8_to_utf16be(const detail::input_span_of_byte_like auto &utf8_input,
1225 std::span<char16_t> utf16_output) noexcept {
1226
1227 #if SIMDUTF_CPLUSPLUS23
1228 if consteval {
1229 return scalar::utf8_to_utf16::convert<endianness::BIG>(
1230 utf8_input.data(), utf8_input.size(), utf16_output.data());
1231 } else
1232 #endif
1233 {
1234 return convert_utf8_to_utf16be(
1235 reinterpret_cast<const char *>(utf8_input.data()), utf8_input.size(),
1236 utf16_output.data());
1237 }
1238}
1239 #endif // SIMDUTF_SPAN
1240#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
1241
1242#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
1259simdutf_warn_unused result convert_utf8_to_latin1_with_errors(
1260 const char *input, size_t length, char *latin1_output) noexcept;
1261 #if SIMDUTF_SPAN
1262simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
1263convert_utf8_to_latin1_with_errors(
1264 const detail::input_span_of_byte_like auto &utf8_input,
1265 detail::output_span_of_byte_like auto &&latin1_output) noexcept {
1266 #if SIMDUTF_CPLUSPLUS23
1267 if consteval {
1268 return scalar::utf8_to_latin1::convert_with_errors(
1269 utf8_input.data(), utf8_input.size(), latin1_output.data());
1270 } else
1271 #endif
1272 {
1273 return convert_utf8_to_latin1_with_errors(
1274 reinterpret_cast<const char *>(utf8_input.data()), utf8_input.size(),
1275 reinterpret_cast<char *>(latin1_output.data()));
1276 }
1277}
1278 #endif // SIMDUTF_SPAN
1279#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
1280
1281#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
1297simdutf_warn_unused result convert_utf8_to_utf16_with_errors(
1298 const char *input, size_t length, char16_t *utf16_output) noexcept;
1299 #if SIMDUTF_SPAN
1300simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
1301convert_utf8_to_utf16_with_errors(
1302 const detail::input_span_of_byte_like auto &utf8_input,
1303 std::span<char16_t> utf16_output) noexcept {
1304 #if SIMDUTF_CPLUSPLUS23
1305 if consteval {
1306 return scalar::utf8_to_utf16::convert_with_errors<endianness::NATIVE>(
1307 utf8_input.data(), utf8_input.size(), utf16_output.data());
1308 } else
1309 #endif
1310 {
1311 return convert_utf8_to_utf16_with_errors(
1312 reinterpret_cast<const char *>(utf8_input.data()), utf8_input.size(),
1313 utf16_output.data());
1314 }
1315}
1316 #endif // SIMDUTF_SPAN
1317
1332simdutf_warn_unused result convert_utf8_to_utf16le_with_errors(
1333 const char *input, size_t length, char16_t *utf16_output) noexcept;
1334 #if SIMDUTF_SPAN
1335simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
1336convert_utf8_to_utf16le_with_errors(
1337 const detail::input_span_of_byte_like auto &utf8_input,
1338 std::span<char16_t> utf16_output) noexcept {
1339 #if SIMDUTF_CPLUSPLUS23
1340 if consteval {
1341 return scalar::utf8_to_utf16::convert_with_errors<endianness::LITTLE>(
1342 utf8_input.data(), utf8_input.size(), utf16_output.data());
1343 } else
1344 #endif
1345 {
1346 return convert_utf8_to_utf16le_with_errors(
1347 reinterpret_cast<const char *>(utf8_input.data()), utf8_input.size(),
1348 utf16_output.data());
1349 }
1350}
1351 #endif // SIMDUTF_SPAN
1352
1367simdutf_warn_unused result convert_utf8_to_utf16be_with_errors(
1368 const char *input, size_t length, char16_t *utf16_output) noexcept;
1369 #if SIMDUTF_SPAN
1370simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
1371convert_utf8_to_utf16be_with_errors(
1372 const detail::input_span_of_byte_like auto &utf8_input,
1373 std::span<char16_t> utf16_output) noexcept {
1374 #if SIMDUTF_CPLUSPLUS23
1375 if consteval {
1376 return scalar::utf8_to_utf16::convert_with_errors<endianness::BIG>(
1377 utf8_input.data(), utf8_input.size(), utf16_output.data());
1378 } else
1379 #endif
1380 {
1381 return convert_utf8_to_utf16be_with_errors(
1382 reinterpret_cast<const char *>(utf8_input.data()), utf8_input.size(),
1383 utf16_output.data());
1384 }
1385}
1386 #endif // SIMDUTF_SPAN
1387#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
1388
1389#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
1402simdutf_warn_unused size_t convert_utf8_to_utf32(
1403 const char *input, size_t length, char32_t *utf32_output) noexcept;
1404 #if SIMDUTF_SPAN
1405simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
1406convert_utf8_to_utf32(const detail::input_span_of_byte_like auto &utf8_input,
1407 std::span<char32_t> utf32_output) noexcept {
1408 #if SIMDUTF_CPLUSPLUS23
1409 if consteval {
1410 return scalar::utf8_to_utf32::convert(utf8_input.data(), utf8_input.size(),
1411 utf32_output.data());
1412 } else
1413 #endif
1414 {
1415 return convert_utf8_to_utf32(
1416 reinterpret_cast<const char *>(utf8_input.data()), utf8_input.size(),
1417 utf32_output.data());
1418 }
1419}
1420 #endif // SIMDUTF_SPAN
1421
1436simdutf_warn_unused result convert_utf8_to_utf32_with_errors(
1437 const char *input, size_t length, char32_t *utf32_output) noexcept;
1438 #if SIMDUTF_SPAN
1439simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
1440convert_utf8_to_utf32_with_errors(
1441 const detail::input_span_of_byte_like auto &utf8_input,
1442 std::span<char32_t> utf32_output) noexcept {
1443 #if SIMDUTF_CPLUSPLUS23
1444 if consteval {
1445 return scalar::utf8_to_utf32::convert_with_errors(
1446 utf8_input.data(), utf8_input.size(), utf32_output.data());
1447 } else
1448 #endif
1449 {
1450 return convert_utf8_to_utf32_with_errors(
1451 reinterpret_cast<const char *>(utf8_input.data()), utf8_input.size(),
1452 utf32_output.data());
1453 }
1454}
1455 #endif // SIMDUTF_SPAN
1456#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
1457
1458#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
1478simdutf_warn_unused size_t convert_valid_utf8_to_latin1(
1479 const char *input, size_t length, char *latin1_output) noexcept;
1480 #if SIMDUTF_SPAN
1481simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
1482convert_valid_utf8_to_latin1(
1483 const detail::input_span_of_byte_like auto &valid_utf8_input,
1484 detail::output_span_of_byte_like auto &&latin1_output) noexcept {
1485 #if SIMDUTF_CPLUSPLUS23
1486 if consteval {
1487 return scalar::utf8_to_latin1::convert_valid(
1488 valid_utf8_input.data(), valid_utf8_input.size(), latin1_output.data());
1489 } else
1490 #endif
1491 {
1492 return convert_valid_utf8_to_latin1(
1493 reinterpret_cast<const char *>(valid_utf8_input.data()),
1494 valid_utf8_input.size(), latin1_output.data());
1495 }
1496}
1497 #endif // SIMDUTF_SPAN
1498#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
1499
1500#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
1511simdutf_warn_unused size_t convert_valid_utf8_to_utf16(
1512 const char *input, size_t length, char16_t *utf16_buffer) noexcept;
1513 #if SIMDUTF_SPAN
1514simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
1515convert_valid_utf8_to_utf16(
1516 const detail::input_span_of_byte_like auto &valid_utf8_input,
1517 std::span<char16_t> utf16_output) noexcept {
1518 #if SIMDUTF_CPLUSPLUS23
1519 if consteval {
1520 return scalar::utf8_to_utf16::convert_valid<endianness::NATIVE>(
1521 valid_utf8_input.data(), valid_utf8_input.size(), utf16_output.data());
1522 } else
1523 #endif
1524 {
1525 return convert_valid_utf8_to_utf16(
1526 reinterpret_cast<const char *>(valid_utf8_input.data()),
1527 valid_utf8_input.size(), utf16_output.data());
1528 }
1529}
1530 #endif // SIMDUTF_SPAN
1531
1542simdutf_warn_unused size_t convert_valid_utf8_to_utf16le(
1543 const char *input, size_t length, char16_t *utf16_buffer) noexcept;
1544 #if SIMDUTF_SPAN
1545simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
1546convert_valid_utf8_to_utf16le(
1547 const detail::input_span_of_byte_like auto &valid_utf8_input,
1548 std::span<char16_t> utf16_output) noexcept {
1549
1550 #if SIMDUTF_CPLUSPLUS23
1551 if consteval {
1552 return scalar::utf8_to_utf16::convert_valid<endianness::LITTLE>(
1553 valid_utf8_input.data(), valid_utf8_input.size(), utf16_output.data());
1554 } else
1555 #endif
1556 {
1557 return convert_valid_utf8_to_utf16le(
1558 reinterpret_cast<const char *>(valid_utf8_input.data()),
1559 valid_utf8_input.size(), utf16_output.data());
1560 }
1561}
1562 #endif // SIMDUTF_SPAN
1563
1574simdutf_warn_unused size_t convert_valid_utf8_to_utf16be(
1575 const char *input, size_t length, char16_t *utf16_buffer) noexcept;
1576 #if SIMDUTF_SPAN
1577simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
1578convert_valid_utf8_to_utf16be(
1579 const detail::input_span_of_byte_like auto &valid_utf8_input,
1580 std::span<char16_t> utf16_output) noexcept {
1581 #if SIMDUTF_CPLUSPLUS23
1582 if consteval {
1583 return scalar::utf8_to_utf16::convert_valid<endianness::BIG>(
1584 valid_utf8_input.data(), valid_utf8_input.size(), utf16_output.data());
1585 } else
1586 #endif
1587 {
1588 return convert_valid_utf8_to_utf16be(
1589 reinterpret_cast<const char *>(valid_utf8_input.data()),
1590 valid_utf8_input.size(), utf16_output.data());
1591 }
1592}
1593 #endif // SIMDUTF_SPAN
1594#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
1595
1596#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
1607simdutf_warn_unused size_t convert_valid_utf8_to_utf32(
1608 const char *input, size_t length, char32_t *utf32_buffer) noexcept;
1609 #if SIMDUTF_SPAN
1610simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
1611convert_valid_utf8_to_utf32(
1612 const detail::input_span_of_byte_like auto &valid_utf8_input,
1613 std::span<char32_t> utf32_output) noexcept {
1614 #if SIMDUTF_CPLUSPLUS23
1615 if consteval {
1616 return scalar::utf8_to_utf32::convert_valid(
1617 valid_utf8_input.data(), valid_utf8_input.size(), utf32_output.data());
1618 } else
1619 #endif
1620 {
1621 return convert_valid_utf8_to_utf32(
1622 reinterpret_cast<const char *>(valid_utf8_input.data()),
1623 valid_utf8_input.size(), utf32_output.data());
1624 }
1625}
1626 #endif // SIMDUTF_SPAN
1627#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
1628
1629#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
1638simdutf_warn_unused size_t utf8_length_from_latin1(const char *input,
1639 size_t length) noexcept;
1640 #if SIMDUTF_SPAN
1641simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
1642utf8_length_from_latin1(
1643 const detail::input_span_of_byte_like auto &latin1_input) noexcept {
1644 #if SIMDUTF_CPLUSPLUS23
1645 if consteval {
1646 return scalar::latin1_to_utf8::utf8_length_from_latin1(latin1_input.data(),
1647 latin1_input.size());
1648 } else
1649 #endif
1650 {
1651 return utf8_length_from_latin1(
1652 reinterpret_cast<const char *>(latin1_input.data()),
1653 latin1_input.size());
1654 }
1655}
1656 #endif // SIMDUTF_SPAN
1657
1671simdutf_warn_unused size_t latin1_length_from_utf8(const char *input,
1672 size_t length) noexcept;
1673 #if SIMDUTF_SPAN
1674simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
1675latin1_length_from_utf8(
1676 const detail::input_span_of_byte_like auto &valid_utf8_input) noexcept {
1677 #if SIMDUTF_CPLUSPLUS23
1678 if consteval {
1679 return scalar::utf8::count_code_points(valid_utf8_input.data(),
1680 valid_utf8_input.size());
1681 } else
1682 #endif
1683 {
1684 return latin1_length_from_utf8(
1685 reinterpret_cast<const char *>(valid_utf8_input.data()),
1686 valid_utf8_input.size());
1687 }
1688}
1689 #endif // SIMDUTF_SPAN
1690#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
1691
1692#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
1707simdutf_warn_unused size_t utf16_length_from_utf8(const char *input,
1708 size_t length) noexcept;
1709 #if SIMDUTF_SPAN
1710simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
1711utf16_length_from_utf8(
1712 const detail::input_span_of_byte_like auto &valid_utf8_input) noexcept {
1713 #if SIMDUTF_CPLUSPLUS23
1714 if consteval {
1715 return scalar::utf8::utf16_length_from_utf8(valid_utf8_input.data(),
1716 valid_utf8_input.size());
1717 } else
1718 #endif
1719 {
1720 return utf16_length_from_utf8(
1721 reinterpret_cast<const char *>(valid_utf8_input.data()),
1722 valid_utf8_input.size());
1723 }
1724}
1725 #endif // SIMDUTF_SPAN
1726#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
1727
1728#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
1745simdutf_warn_unused size_t utf32_length_from_utf8(const char *input,
1746 size_t length) noexcept;
1747 #if SIMDUTF_SPAN
1748simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
1749utf32_length_from_utf8(
1750 const detail::input_span_of_byte_like auto &valid_utf8_input) noexcept {
1751
1752 #if SIMDUTF_CPLUSPLUS23
1753 if consteval {
1754 return scalar::utf8::count_code_points(valid_utf8_input.data(),
1755 valid_utf8_input.size());
1756 } else
1757 #endif
1758 {
1759 return utf32_length_from_utf8(
1760 reinterpret_cast<const char *>(valid_utf8_input.data()),
1761 valid_utf8_input.size());
1762 }
1763}
1764 #endif // SIMDUTF_SPAN
1765#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
1766
1767#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
1783simdutf_warn_unused size_t convert_utf16_to_utf8(const char16_t *input,
1784 size_t length,
1785 char *utf8_buffer) noexcept;
1786 #if SIMDUTF_SPAN
1787simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
1788convert_utf16_to_utf8(
1789 std::span<const char16_t> utf16_input,
1790 detail::output_span_of_byte_like auto &&utf8_output) noexcept {
1791 #if SIMDUTF_CPLUSPLUS23
1792 if consteval {
1793 return scalar::utf16_to_utf8::convert<endianness::NATIVE>(
1794 utf16_input.data(), utf16_input.size(), utf8_output.data());
1795 } else
1796 #endif
1797 {
1798 return convert_utf16_to_utf8(utf16_input.data(), utf16_input.size(),
1799 reinterpret_cast<char *>(utf8_output.data()));
1800 }
1801}
1802 #endif // SIMDUTF_SPAN
1803
1822simdutf_warn_unused size_t convert_utf16_to_utf8_safe(const char16_t *input,
1823 size_t length,
1824 char *utf8_output,
1825 size_t utf8_len) noexcept;
1826 #if SIMDUTF_SPAN
1827simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
1828convert_utf16_to_utf8_safe(
1829 std::span<const char16_t> utf16_input,
1830 detail::output_span_of_byte_like auto &&utf8_output) noexcept {
1831 // implementation note: outputspan is a forwarding ref to avoid copying
1832 // and allow both lvalues and rvalues. std::span can be copied without
1833 // problems, but std::vector should not, and this function should accept
1834 // both. it will allow using an owning rvalue ref (example: passing a
1835 // temporary std::string) as output, but the user will quickly find out
1836 // that he has no way of getting the data out of the object in that case.
1837 #if SIMDUTF_CPLUSPLUS23
1838 if consteval {
1839 const full_result r =
1840 scalar::utf16_to_utf8::convert_with_errors<endianness::NATIVE, true>(
1841 utf16_input.data(), utf16_input.size(), utf8_output.data(),
1842 utf8_output.size());
1843 if (r.error != error_code::SUCCESS &&
1844 r.error != error_code::OUTPUT_BUFFER_TOO_SMALL) {
1845 return 0;
1846 }
1847 return r.output_count;
1848 } else
1849 #endif
1850 {
1851 return convert_utf16_to_utf8_safe(
1852 utf16_input.data(), utf16_input.size(),
1853 reinterpret_cast<char *>(utf8_output.data()), utf8_output.size());
1854 }
1855}
1856 #endif // SIMDUTF_SPAN
1857#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
1858
1859#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
1875simdutf_warn_unused size_t convert_utf16_to_latin1(
1876 const char16_t *input, size_t length, char *latin1_buffer) noexcept;
1877 #if SIMDUTF_SPAN
1878simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
1879convert_utf16_to_latin1(
1880 std::span<const char16_t> utf16_input,
1881 detail::output_span_of_byte_like auto &&latin1_output) noexcept {
1882 #if SIMDUTF_CPLUSPLUS23
1883 if consteval {
1884 return scalar::utf16_to_latin1::convert<endianness::NATIVE>(
1885 utf16_input.data(), utf16_input.size(), latin1_output.data());
1886 } else
1887 #endif
1888 {
1889 return convert_utf16_to_latin1(
1890 utf16_input.data(), utf16_input.size(),
1891 reinterpret_cast<char *>(latin1_output.data()));
1892 }
1893}
1894 #endif // SIMDUTF_SPAN
1895
1912simdutf_warn_unused size_t convert_utf16le_to_latin1(
1913 const char16_t *input, size_t length, char *latin1_buffer) noexcept;
1914 #if SIMDUTF_SPAN
1915simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
1916convert_utf16le_to_latin1(
1917 std::span<const char16_t> utf16_input,
1918 detail::output_span_of_byte_like auto &&latin1_output) noexcept {
1919 #if SIMDUTF_CPLUSPLUS23
1920 if consteval {
1921 return scalar::utf16_to_latin1::convert<endianness::LITTLE>(
1922 utf16_input.data(), utf16_input.size(), latin1_output.data());
1923 } else
1924 #endif
1925 {
1926 return convert_utf16le_to_latin1(
1927 utf16_input.data(), utf16_input.size(),
1928 reinterpret_cast<char *>(latin1_output.data()));
1929 }
1930}
1931 #endif // SIMDUTF_SPAN
1932
1947simdutf_warn_unused size_t convert_utf16be_to_latin1(
1948 const char16_t *input, size_t length, char *latin1_buffer) noexcept;
1949 #if SIMDUTF_SPAN
1950simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
1951convert_utf16be_to_latin1(
1952 std::span<const char16_t> utf16_input,
1953 detail::output_span_of_byte_like auto &&latin1_output) noexcept {
1954 #if SIMDUTF_CPLUSPLUS23
1955 if consteval {
1956 return scalar::utf16_to_latin1::convert<endianness::BIG>(
1957 utf16_input.data(), utf16_input.size(), latin1_output.data());
1958 } else
1959 #endif
1960 {
1961 return convert_utf16be_to_latin1(
1962 utf16_input.data(), utf16_input.size(),
1963 reinterpret_cast<char *>(latin1_output.data()));
1964 }
1965}
1966 #endif // SIMDUTF_SPAN
1967#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
1968
1969#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
1984simdutf_warn_unused size_t convert_utf16le_to_utf8(const char16_t *input,
1985 size_t length,
1986 char *utf8_buffer) noexcept;
1987 #if SIMDUTF_SPAN
1988simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
1989convert_utf16le_to_utf8(
1990 std::span<const char16_t> utf16_input,
1991 detail::output_span_of_byte_like auto &&utf8_output) noexcept {
1992 #if SIMDUTF_CPLUSPLUS23
1993 if consteval {
1994 return scalar::utf16_to_utf8::convert<endianness::LITTLE>(
1995 utf16_input.data(), utf16_input.size(), utf8_output.data());
1996 } else
1997 #endif
1998 {
1999 return convert_utf16le_to_utf8(
2000 utf16_input.data(), utf16_input.size(),
2001 reinterpret_cast<char *>(utf8_output.data()));
2002 }
2003}
2004 #endif // SIMDUTF_SPAN
2005
2020simdutf_warn_unused size_t convert_utf16be_to_utf8(const char16_t *input,
2021 size_t length,
2022 char *utf8_buffer) noexcept;
2023 #if SIMDUTF_SPAN
2024simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
2025convert_utf16be_to_utf8(
2026 std::span<const char16_t> utf16_input,
2027 detail::output_span_of_byte_like auto &&utf8_output) noexcept {
2028 #if SIMDUTF_CPLUSPLUS23
2029 if consteval {
2030 return scalar::utf16_to_utf8::convert<endianness::BIG>(
2031 utf16_input.data(), utf16_input.size(), utf8_output.data());
2032 } else
2033 #endif
2034 {
2035 return convert_utf16be_to_utf8(
2036 utf16_input.data(), utf16_input.size(),
2037 reinterpret_cast<char *>(utf8_output.data()));
2038 }
2039}
2040 #endif // SIMDUTF_SPAN
2041#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
2042
2043#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
2060simdutf_warn_unused result convert_utf16_to_latin1_with_errors(
2061 const char16_t *input, size_t length, char *latin1_buffer) noexcept;
2062 #if SIMDUTF_SPAN
2063simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
2064convert_utf16_to_latin1_with_errors(
2065 std::span<const char16_t> utf16_input,
2066 detail::output_span_of_byte_like auto &&latin1_output) noexcept {
2067 #if SIMDUTF_CPLUSPLUS23
2068 if consteval {
2069 return scalar::utf16_to_latin1::convert_with_errors<endianness::NATIVE>(
2070 utf16_input.data(), utf16_input.size(), latin1_output.data());
2071 } else
2072 #endif
2073 {
2074 return convert_utf16_to_latin1_with_errors(
2075 utf16_input.data(), utf16_input.size(),
2076 reinterpret_cast<char *>(latin1_output.data()));
2077 }
2078}
2079 #endif // SIMDUTF_SPAN
2080
2096simdutf_warn_unused result convert_utf16le_to_latin1_with_errors(
2097 const char16_t *input, size_t length, char *latin1_buffer) noexcept;
2098 #if SIMDUTF_SPAN
2099simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
2100convert_utf16le_to_latin1_with_errors(
2101 std::span<const char16_t> utf16_input,
2102 detail::output_span_of_byte_like auto &&latin1_output) noexcept {
2103 #if SIMDUTF_CPLUSPLUS23
2104 if consteval {
2105 return scalar::utf16_to_latin1::convert_with_errors<endianness::LITTLE>(
2106 utf16_input.data(), utf16_input.size(), latin1_output.data());
2107 } else
2108 #endif
2109 {
2110 return convert_utf16le_to_latin1_with_errors(
2111 utf16_input.data(), utf16_input.size(),
2112 reinterpret_cast<char *>(latin1_output.data()));
2113 }
2114}
2115 #endif // SIMDUTF_SPAN
2116
2134simdutf_warn_unused result convert_utf16be_to_latin1_with_errors(
2135 const char16_t *input, size_t length, char *latin1_buffer) noexcept;
2136 #if SIMDUTF_SPAN
2137simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
2138convert_utf16be_to_latin1_with_errors(
2139 std::span<const char16_t> utf16_input,
2140 detail::output_span_of_byte_like auto &&latin1_output) noexcept {
2141 #if SIMDUTF_CPLUSPLUS23
2142 if consteval {
2143 return scalar::utf16_to_latin1::convert_with_errors<endianness::BIG>(
2144 utf16_input.data(), utf16_input.size(), latin1_output.data());
2145 } else
2146 #endif
2147 {
2148 return convert_utf16be_to_latin1_with_errors(
2149 utf16_input.data(), utf16_input.size(),
2150 reinterpret_cast<char *>(latin1_output.data()));
2151 }
2152}
2153 #endif // SIMDUTF_SPAN
2154#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
2155
2156#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
2174simdutf_warn_unused result convert_utf16_to_utf8_with_errors(
2175 const char16_t *input, size_t length, char *utf8_buffer) noexcept;
2176 #if SIMDUTF_SPAN
2177simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
2178convert_utf16_to_utf8_with_errors(
2179 std::span<const char16_t> utf16_input,
2180 detail::output_span_of_byte_like auto &&utf8_output) noexcept {
2181 #if SIMDUTF_CPLUSPLUS23
2182 if consteval {
2183 return scalar::utf16_to_utf8::convert_with_errors<endianness::NATIVE>(
2184 utf16_input.data(), utf16_input.size(), utf8_output.data());
2185 } else
2186 #endif
2187 {
2188 return convert_utf16_to_utf8_with_errors(
2189 utf16_input.data(), utf16_input.size(),
2190 reinterpret_cast<char *>(utf8_output.data()));
2191 }
2192}
2193 #endif // SIMDUTF_SPAN
2194
2211simdutf_warn_unused result convert_utf16le_to_utf8_with_errors(
2212 const char16_t *input, size_t length, char *utf8_buffer) noexcept;
2213 #if SIMDUTF_SPAN
2214simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
2215convert_utf16le_to_utf8_with_errors(
2216 std::span<const char16_t> utf16_input,
2217 detail::output_span_of_byte_like auto &&utf8_output) noexcept {
2218 #if SIMDUTF_CPLUSPLUS23
2219 if consteval {
2220 return scalar::utf16_to_utf8::convert_with_errors<endianness::LITTLE>(
2221 utf16_input.data(), utf16_input.size(), utf8_output.data());
2222 } else
2223 #endif
2224 {
2225 return convert_utf16le_to_utf8_with_errors(
2226 utf16_input.data(), utf16_input.size(),
2227 reinterpret_cast<char *>(utf8_output.data()));
2228 }
2229}
2230 #endif // SIMDUTF_SPAN
2231
2248simdutf_warn_unused result convert_utf16be_to_utf8_with_errors(
2249 const char16_t *input, size_t length, char *utf8_buffer) noexcept;
2250 #if SIMDUTF_SPAN
2251simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
2252convert_utf16be_to_utf8_with_errors(
2253 std::span<const char16_t> utf16_input,
2254 detail::output_span_of_byte_like auto &&utf8_output) noexcept {
2255 #if SIMDUTF_CPLUSPLUS23
2256 if consteval {
2257 return scalar::utf16_to_utf8::convert_with_errors<endianness::BIG>(
2258 utf16_input.data(), utf16_input.size(), utf8_output.data());
2259 } else
2260 #endif
2261 {
2262 return convert_utf16be_to_utf8_with_errors(
2263 utf16_input.data(), utf16_input.size(),
2264 reinterpret_cast<char *>(utf8_output.data()));
2265 }
2266}
2267 #endif // SIMDUTF_SPAN
2268
2283simdutf_warn_unused size_t convert_utf16le_to_utf8_with_replacement(
2284 const char16_t *input, size_t length, char *utf8_buffer) noexcept;
2285 #if SIMDUTF_SPAN
2286simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
2287convert_utf16le_to_utf8_with_replacement(
2288 std::span<const char16_t> utf16_input,
2289 detail::output_span_of_byte_like auto &&utf8_output) noexcept {
2290 #if SIMDUTF_CPLUSPLUS23
2291 if consteval {
2292 return scalar::utf16_to_utf8::convert_with_replacement<endianness::LITTLE>(
2293 utf16_input.data(), utf16_input.size(), utf8_output.data());
2294 } else
2295 #endif
2296 {
2297 return convert_utf16le_to_utf8_with_replacement(
2298 utf16_input.data(), utf16_input.size(),
2299 reinterpret_cast<char *>(utf8_output.data()));
2300 }
2301}
2302 #endif // SIMDUTF_SPAN
2303
2318simdutf_warn_unused size_t convert_utf16be_to_utf8_with_replacement(
2319 const char16_t *input, size_t length, char *utf8_buffer) noexcept;
2320 #if SIMDUTF_SPAN
2321simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
2322convert_utf16be_to_utf8_with_replacement(
2323 std::span<const char16_t> utf16_input,
2324 detail::output_span_of_byte_like auto &&utf8_output) noexcept {
2325 #if SIMDUTF_CPLUSPLUS23
2326 if consteval {
2327 return scalar::utf16_to_utf8::convert_with_replacement<endianness::BIG>(
2328 utf16_input.data(), utf16_input.size(), utf8_output.data());
2329 } else
2330 #endif
2331 {
2332 return convert_utf16be_to_utf8_with_replacement(
2333 utf16_input.data(), utf16_input.size(),
2334 reinterpret_cast<char *>(utf8_output.data()));
2335 }
2336}
2337 #endif // SIMDUTF_SPAN
2338
2353simdutf_warn_unused size_t convert_utf16_to_utf8_with_replacement(
2354 const char16_t *input, size_t length, char *utf8_buffer) noexcept;
2355 #if SIMDUTF_SPAN
2356simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
2357convert_utf16_to_utf8_with_replacement(
2358 std::span<const char16_t> utf16_input,
2359 detail::output_span_of_byte_like auto &&utf8_output) noexcept {
2360 #if SIMDUTF_CPLUSPLUS23
2361 if consteval {
2362 return scalar::utf16_to_utf8::convert_with_replacement<endianness::NATIVE>(
2363 utf16_input.data(), utf16_input.size(), utf8_output.data());
2364 } else
2365 #endif
2366 {
2367 return convert_utf16_to_utf8_with_replacement(
2368 utf16_input.data(), utf16_input.size(),
2369 reinterpret_cast<char *>(utf8_output.data()));
2370 }
2371}
2372 #endif // SIMDUTF_SPAN
2373#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
2374
2375#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
2389simdutf_warn_unused size_t convert_valid_utf16_to_utf8(
2390 const char16_t *input, size_t length, char *utf8_buffer) noexcept;
2391 #if SIMDUTF_SPAN
2392simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
2393convert_valid_utf16_to_utf8(
2394 std::span<const char16_t> valid_utf16_input,
2395 detail::output_span_of_byte_like auto &&utf8_output) noexcept {
2396 #if SIMDUTF_CPLUSPLUS23
2397 if consteval {
2398 return scalar::utf16_to_utf8::convert_valid<endianness::NATIVE>(
2399 valid_utf16_input.data(), valid_utf16_input.size(), utf8_output.data());
2400 } else
2401 #endif
2402 {
2403 return convert_valid_utf16_to_utf8(
2404 valid_utf16_input.data(), valid_utf16_input.size(),
2405 reinterpret_cast<char *>(utf8_output.data()));
2406 }
2407}
2408 #endif // SIMDUTF_SPAN
2409#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
2410
2411#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
2431simdutf_warn_unused size_t convert_valid_utf16_to_latin1(
2432 const char16_t *input, size_t length, char *latin1_buffer) noexcept;
2433 #if SIMDUTF_SPAN
2434simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
2435convert_valid_utf16_to_latin1(
2436 std::span<const char16_t> valid_utf16_input,
2437 detail::output_span_of_byte_like auto &&latin1_output) noexcept {
2438 #if SIMDUTF_CPLUSPLUS23
2439 if consteval {
2440 return scalar::utf16_to_latin1::convert_valid_impl<endianness::NATIVE>(
2441 detail::constexpr_cast_ptr<uint16_t>(valid_utf16_input.data()),
2442 valid_utf16_input.size(),
2443 detail::constexpr_cast_writeptr<char>(latin1_output.data()));
2444 } else
2445 #endif
2446 {
2447 return convert_valid_utf16_to_latin1(
2448 valid_utf16_input.data(), valid_utf16_input.size(),
2449 reinterpret_cast<char *>(latin1_output.data()));
2450 }
2451}
2452 #endif // SIMDUTF_SPAN
2453
2473simdutf_warn_unused size_t convert_valid_utf16le_to_latin1(
2474 const char16_t *input, size_t length, char *latin1_buffer) noexcept;
2475 #if SIMDUTF_SPAN
2476simdutf_really_inline simdutf_constexpr23 simdutf_warn_unused size_t
2477convert_valid_utf16le_to_latin1(
2478 std::span<const char16_t> valid_utf16_input,
2479 detail::output_span_of_byte_like auto &&latin1_output) noexcept {
2480 #if SIMDUTF_CPLUSPLUS23
2481 if consteval {
2482 return scalar::utf16_to_latin1::convert_valid_impl<endianness::LITTLE>(
2483 detail::constexpr_cast_ptr<uint16_t>(valid_utf16_input.data()),
2484 valid_utf16_input.size(),
2485 detail::constexpr_cast_writeptr<char>(latin1_output.data()));
2486 } else
2487 #endif
2488 {
2489 return convert_valid_utf16le_to_latin1(
2490 valid_utf16_input.data(), valid_utf16_input.size(),
2491 reinterpret_cast<char *>(latin1_output.data()));
2492 }
2493}
2494 #endif // SIMDUTF_SPAN
2495
2515simdutf_warn_unused size_t convert_valid_utf16be_to_latin1(
2516 const char16_t *input, size_t length, char *latin1_buffer) noexcept;
2517 #if SIMDUTF_SPAN
2518simdutf_really_inline simdutf_constexpr23 simdutf_warn_unused size_t
2519convert_valid_utf16be_to_latin1(
2520 std::span<const char16_t> valid_utf16_input,
2521 detail::output_span_of_byte_like auto &&latin1_output) noexcept {
2522 #if SIMDUTF_CPLUSPLUS23
2523 if consteval {
2524 return scalar::utf16_to_latin1::convert_valid_impl<endianness::BIG>(
2525 detail::constexpr_cast_ptr<uint16_t>(valid_utf16_input.data()),
2526 valid_utf16_input.size(),
2527 detail::constexpr_cast_writeptr<char>(latin1_output.data()));
2528 } else
2529 #endif
2530 {
2531 return convert_valid_utf16be_to_latin1(
2532 valid_utf16_input.data(), valid_utf16_input.size(),
2533 reinterpret_cast<char *>(latin1_output.data()));
2534 }
2535}
2536 #endif // SIMDUTF_SPAN
2537#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
2538
2539#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
2553simdutf_warn_unused size_t convert_valid_utf16le_to_utf8(
2554 const char16_t *input, size_t length, char *utf8_buffer) noexcept;
2555 #if SIMDUTF_SPAN
2556simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
2557convert_valid_utf16le_to_utf8(
2558 std::span<const char16_t> valid_utf16_input,
2559 detail::output_span_of_byte_like auto &&utf8_output) noexcept {
2560 #if SIMDUTF_CPLUSPLUS23
2561 if consteval {
2562 return scalar::utf16_to_utf8::convert_valid<endianness::NATIVE>(
2563 valid_utf16_input.data(), valid_utf16_input.size(), utf8_output.data());
2564 } else
2565 #endif
2566 {
2567 return convert_valid_utf16le_to_utf8(
2568 valid_utf16_input.data(), valid_utf16_input.size(),
2569 reinterpret_cast<char *>(utf8_output.data()));
2570 }
2571}
2572 #endif // SIMDUTF_SPAN
2573
2587simdutf_warn_unused size_t convert_valid_utf16be_to_utf8(
2588 const char16_t *input, size_t length, char *utf8_buffer) noexcept;
2589 #if SIMDUTF_SPAN
2590simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
2591convert_valid_utf16be_to_utf8(
2592 std::span<const char16_t> valid_utf16_input,
2593 detail::output_span_of_byte_like auto &&utf8_output) noexcept {
2594 #if SIMDUTF_CPLUSPLUS23
2595 if consteval {
2596 return scalar::utf16_to_utf8::convert_valid<endianness::BIG>(
2597 valid_utf16_input.data(), valid_utf16_input.size(), utf8_output.data());
2598 } else
2599 #endif
2600 {
2601 return convert_valid_utf16be_to_utf8(
2602 valid_utf16_input.data(), valid_utf16_input.size(),
2603 reinterpret_cast<char *>(utf8_output.data()));
2604 }
2605}
2606 #endif // SIMDUTF_SPAN
2607#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
2608
2609#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
2625simdutf_warn_unused size_t convert_utf16_to_utf32(
2626 const char16_t *input, size_t length, char32_t *utf32_buffer) noexcept;
2627 #if SIMDUTF_SPAN
2628simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
2629convert_utf16_to_utf32(std::span<const char16_t> utf16_input,
2630 std::span<char32_t> utf32_output) noexcept {
2631
2632 #if SIMDUTF_CPLUSPLUS23
2633 if consteval {
2634 return scalar::utf16_to_utf32::convert<endianness::NATIVE>(
2635 utf16_input.data(), utf16_input.size(), utf32_output.data());
2636 } else
2637 #endif
2638 {
2639 return convert_utf16_to_utf32(utf16_input.data(), utf16_input.size(),
2640 utf32_output.data());
2641 }
2642}
2643 #endif // SIMDUTF_SPAN
2644
2659simdutf_warn_unused size_t convert_utf16le_to_utf32(
2660 const char16_t *input, size_t length, char32_t *utf32_buffer) noexcept;
2661 #if SIMDUTF_SPAN
2662simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
2663convert_utf16le_to_utf32(std::span<const char16_t> utf16_input,
2664 std::span<char32_t> utf32_output) noexcept {
2665 #if SIMDUTF_CPLUSPLUS23
2666 if consteval {
2667 return scalar::utf16_to_utf32::convert<endianness::LITTLE>(
2668 utf16_input.data(), utf16_input.size(), utf32_output.data());
2669 } else
2670 #endif
2671 {
2672 return convert_utf16le_to_utf32(utf16_input.data(), utf16_input.size(),
2673 utf32_output.data());
2674 }
2675}
2676 #endif // SIMDUTF_SPAN
2677
2692simdutf_warn_unused size_t convert_utf16be_to_utf32(
2693 const char16_t *input, size_t length, char32_t *utf32_buffer) noexcept;
2694 #if SIMDUTF_SPAN
2695simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
2696convert_utf16be_to_utf32(std::span<const char16_t> utf16_input,
2697 std::span<char32_t> utf32_output) noexcept {
2698 #if SIMDUTF_CPLUSPLUS23
2699 if consteval {
2700 return scalar::utf16_to_utf32::convert<endianness::BIG>(
2701 utf16_input.data(), utf16_input.size(), utf32_output.data());
2702 } else
2703 #endif
2704 {
2705 return convert_utf16be_to_utf32(utf16_input.data(), utf16_input.size(),
2706 utf32_output.data());
2707 }
2708}
2709 #endif // SIMDUTF_SPAN
2710
2728simdutf_warn_unused result convert_utf16_to_utf32_with_errors(
2729 const char16_t *input, size_t length, char32_t *utf32_buffer) noexcept;
2730 #if SIMDUTF_SPAN
2731simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
2732convert_utf16_to_utf32_with_errors(std::span<const char16_t> utf16_input,
2733 std::span<char32_t> utf32_output) noexcept {
2734 #if SIMDUTF_CPLUSPLUS23
2735 if consteval {
2736 return scalar::utf16_to_utf32::convert_with_errors<endianness::NATIVE>(
2737 utf16_input.data(), utf16_input.size(), utf32_output.data());
2738 } else
2739 #endif
2740 {
2741 return convert_utf16_to_utf32_with_errors(
2742 utf16_input.data(), utf16_input.size(), utf32_output.data());
2743 }
2744}
2745 #endif // SIMDUTF_SPAN
2746
2763simdutf_warn_unused result convert_utf16le_to_utf32_with_errors(
2764 const char16_t *input, size_t length, char32_t *utf32_buffer) noexcept;
2765 #if SIMDUTF_SPAN
2766simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
2767convert_utf16le_to_utf32_with_errors(
2768 std::span<const char16_t> utf16_input,
2769 std::span<char32_t> utf32_output) noexcept {
2770 #if SIMDUTF_CPLUSPLUS23
2771 if consteval {
2772 return scalar::utf16_to_utf32::convert_with_errors<endianness::LITTLE>(
2773 utf16_input.data(), utf16_input.size(), utf32_output.data());
2774 } else
2775 #endif
2776 {
2777 return convert_utf16le_to_utf32_with_errors(
2778 utf16_input.data(), utf16_input.size(), utf32_output.data());
2779 }
2780}
2781 #endif // SIMDUTF_SPAN
2782
2799simdutf_warn_unused result convert_utf16be_to_utf32_with_errors(
2800 const char16_t *input, size_t length, char32_t *utf32_buffer) noexcept;
2801 #if SIMDUTF_SPAN
2802simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
2803convert_utf16be_to_utf32_with_errors(
2804 std::span<const char16_t> utf16_input,
2805 std::span<char32_t> utf32_output) noexcept {
2806 #if SIMDUTF_CPLUSPLUS23
2807 if consteval {
2808 return scalar::utf16_to_utf32::convert_with_errors<endianness::BIG>(
2809 utf16_input.data(), utf16_input.size(), utf32_output.data());
2810 } else
2811 #endif
2812 {
2813 return convert_utf16be_to_utf32_with_errors(
2814 utf16_input.data(), utf16_input.size(), utf32_output.data());
2815 }
2816}
2817 #endif // SIMDUTF_SPAN
2818
2833simdutf_warn_unused size_t convert_valid_utf16_to_utf32(
2834 const char16_t *input, size_t length, char32_t *utf32_buffer) noexcept;
2835 #if SIMDUTF_SPAN
2836simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
2837convert_valid_utf16_to_utf32(std::span<const char16_t> valid_utf16_input,
2838 std::span<char32_t> utf32_output) noexcept {
2839 #if SIMDUTF_CPLUSPLUS23
2840 if consteval {
2841 return scalar::utf16_to_utf32::convert_valid<endianness::NATIVE>(
2842 valid_utf16_input.data(), valid_utf16_input.size(),
2843 utf32_output.data());
2844 } else
2845 #endif
2846 {
2847 return convert_valid_utf16_to_utf32(valid_utf16_input.data(),
2848 valid_utf16_input.size(),
2849 utf32_output.data());
2850 }
2851}
2852 #endif // SIMDUTF_SPAN
2853
2867simdutf_warn_unused size_t convert_valid_utf16le_to_utf32(
2868 const char16_t *input, size_t length, char32_t *utf32_buffer) noexcept;
2869 #if SIMDUTF_SPAN
2870simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
2871convert_valid_utf16le_to_utf32(std::span<const char16_t> valid_utf16_input,
2872 std::span<char32_t> utf32_output) noexcept {
2873 #if SIMDUTF_CPLUSPLUS23
2874 if consteval {
2875 return scalar::utf16_to_utf32::convert_valid<endianness::LITTLE>(
2876 valid_utf16_input.data(), valid_utf16_input.size(),
2877 utf32_output.data());
2878 } else
2879 #endif
2880 {
2881 return convert_valid_utf16le_to_utf32(valid_utf16_input.data(),
2882 valid_utf16_input.size(),
2883 utf32_output.data());
2884 }
2885}
2886 #endif // SIMDUTF_SPAN
2887
2901simdutf_warn_unused size_t convert_valid_utf16be_to_utf32(
2902 const char16_t *input, size_t length, char32_t *utf32_buffer) noexcept;
2903 #if SIMDUTF_SPAN
2904simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
2905convert_valid_utf16be_to_utf32(std::span<const char16_t> valid_utf16_input,
2906 std::span<char32_t> utf32_output) noexcept {
2907 #if SIMDUTF_CPLUSPLUS23
2908 if consteval {
2909 return scalar::utf16_to_utf32::convert_valid<endianness::BIG>(
2910 valid_utf16_input.data(), valid_utf16_input.size(),
2911 utf32_output.data());
2912 } else
2913 #endif
2914 {
2915 return convert_valid_utf16be_to_utf32(valid_utf16_input.data(),
2916 valid_utf16_input.size(),
2917 utf32_output.data());
2918 }
2919}
2920 #endif // SIMDUTF_SPAN
2921#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
2922
2923#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
2935simdutf_warn_unused size_t utf8_length_from_utf16(const char16_t *input,
2936 size_t length) noexcept;
2937 #if SIMDUTF_SPAN
2938simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
2939utf8_length_from_utf16(std::span<const char16_t> valid_utf16_input) noexcept {
2940 #if SIMDUTF_CPLUSPLUS23
2941 if consteval {
2942 return scalar::utf16::utf8_length_from_utf16<endianness::NATIVE>(
2943 valid_utf16_input.data(), valid_utf16_input.size());
2944 } else
2945 #endif
2946 {
2947 return utf8_length_from_utf16(valid_utf16_input.data(),
2948 valid_utf16_input.size());
2949 }
2950}
2951 #endif // SIMDUTF_SPAN
2952
2971simdutf_warn_unused result utf8_length_from_utf16_with_replacement(
2972 const char16_t *input, size_t length) noexcept;
2973 #if SIMDUTF_SPAN
2974simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
2975utf8_length_from_utf16_with_replacement(
2976 std::span<const char16_t> valid_utf16_input) noexcept {
2977 #if SIMDUTF_CPLUSPLUS23
2978 if consteval {
2979 return scalar::utf16::utf8_length_from_utf16_with_replacement<
2980 endianness::NATIVE>(valid_utf16_input.data(), valid_utf16_input.size());
2981 } else
2982 #endif
2983 {
2984 return utf8_length_from_utf16_with_replacement(valid_utf16_input.data(),
2985 valid_utf16_input.size());
2986 }
2987}
2988 #endif // SIMDUTF_SPAN
2989
3001simdutf_warn_unused size_t utf8_length_from_utf16le(const char16_t *input,
3002 size_t length) noexcept;
3003 #if SIMDUTF_SPAN
3004simdutf_really_inline simdutf_constexpr23 simdutf_warn_unused size_t
3005utf8_length_from_utf16le(std::span<const char16_t> valid_utf16_input) noexcept {
3006 #if SIMDUTF_CPLUSPLUS23
3007 if consteval {
3008 return scalar::utf16::utf8_length_from_utf16<endianness::LITTLE>(
3009 valid_utf16_input.data(), valid_utf16_input.size());
3010 } else
3011 #endif
3012 {
3013 return utf8_length_from_utf16le(valid_utf16_input.data(),
3014 valid_utf16_input.size());
3015 }
3016}
3017 #endif // SIMDUTF_SPAN
3018
3030simdutf_warn_unused size_t utf8_length_from_utf16be(const char16_t *input,
3031 size_t length) noexcept;
3032 #if SIMDUTF_SPAN
3033simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
3034utf8_length_from_utf16be(std::span<const char16_t> valid_utf16_input) noexcept {
3035 #if SIMDUTF_CPLUSPLUS23
3036 if consteval {
3037 return scalar::utf16::utf8_length_from_utf16<endianness::BIG>(
3038 valid_utf16_input.data(), valid_utf16_input.size());
3039 } else
3040 #endif
3041 {
3042 return utf8_length_from_utf16be(valid_utf16_input.data(),
3043 valid_utf16_input.size());
3044 }
3045}
3046 #endif // SIMDUTF_SPAN
3047#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
3048
3049#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
3063simdutf_warn_unused size_t convert_utf32_to_utf8(const char32_t *input,
3064 size_t length,
3065 char *utf8_buffer) noexcept;
3066 #if SIMDUTF_SPAN
3067simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
3068convert_utf32_to_utf8(
3069 std::span<const char32_t> utf32_input,
3070 detail::output_span_of_byte_like auto &&utf8_output) noexcept {
3071 #if SIMDUTF_CPLUSPLUS23
3072 if consteval {
3073 return scalar::utf32_to_utf8::convert(
3074 utf32_input.data(), utf32_input.size(), utf8_output.data());
3075 } else
3076 #endif
3077 {
3078 return convert_utf32_to_utf8(utf32_input.data(), utf32_input.size(),
3079 reinterpret_cast<char *>(utf8_output.data()));
3080 }
3081}
3082 #endif // SIMDUTF_SPAN
3083
3100simdutf_warn_unused result convert_utf32_to_utf8_with_errors(
3101 const char32_t *input, size_t length, char *utf8_buffer) noexcept;
3102 #if SIMDUTF_SPAN
3103simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
3104convert_utf32_to_utf8_with_errors(
3105 std::span<const char32_t> utf32_input,
3106 detail::output_span_of_byte_like auto &&utf8_output) noexcept {
3107 #if SIMDUTF_CPLUSPLUS23
3108 if consteval {
3109 return scalar::utf32_to_utf8::convert_with_errors(
3110 utf32_input.data(), utf32_input.size(), utf8_output.data());
3111 } else
3112 #endif
3113 {
3114 return convert_utf32_to_utf8_with_errors(
3115 utf32_input.data(), utf32_input.size(),
3116 reinterpret_cast<char *>(utf8_output.data()));
3117 }
3118}
3119 #endif // SIMDUTF_SPAN
3120
3134simdutf_warn_unused size_t convert_valid_utf32_to_utf8(
3135 const char32_t *input, size_t length, char *utf8_buffer) noexcept;
3136 #if SIMDUTF_SPAN
3137simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
3138convert_valid_utf32_to_utf8(
3139 std::span<const char32_t> valid_utf32_input,
3140 detail::output_span_of_byte_like auto &&utf8_output) noexcept {
3141 #if SIMDUTF_CPLUSPLUS23
3142 if consteval {
3143 return scalar::utf32_to_utf8::convert_valid(
3144 valid_utf32_input.data(), valid_utf32_input.size(), utf8_output.data());
3145 } else
3146 #endif
3147 {
3148 return convert_valid_utf32_to_utf8(
3149 valid_utf32_input.data(), valid_utf32_input.size(),
3150 reinterpret_cast<char *>(utf8_output.data()));
3151 }
3152}
3153 #endif // SIMDUTF_SPAN
3154#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
3155
3156#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
3171simdutf_warn_unused size_t convert_utf32_to_utf16(
3172 const char32_t *input, size_t length, char16_t *utf16_buffer) noexcept;
3173 #if SIMDUTF_SPAN
3174simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
3175convert_utf32_to_utf16(std::span<const char32_t> utf32_input,
3176 std::span<char16_t> utf16_output) noexcept {
3177 #if SIMDUTF_CPLUSPLUS23
3178 if consteval {
3179 return scalar::utf32_to_utf16::convert<endianness::NATIVE>(
3180 utf32_input.data(), utf32_input.size(), utf16_output.data());
3181 } else
3182 #endif
3183 {
3184 return convert_utf32_to_utf16(utf32_input.data(), utf32_input.size(),
3185 utf16_output.data());
3186 }
3187}
3188 #endif // SIMDUTF_SPAN
3189
3203simdutf_warn_unused size_t convert_utf32_to_utf16le(
3204 const char32_t *input, size_t length, char16_t *utf16_buffer) noexcept;
3205 #if SIMDUTF_SPAN
3206simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
3207convert_utf32_to_utf16le(std::span<const char32_t> utf32_input,
3208 std::span<char16_t> utf16_output) noexcept {
3209 #if SIMDUTF_CPLUSPLUS23
3210 if consteval {
3211 return scalar::utf32_to_utf16::convert<endianness::LITTLE>(
3212 utf32_input.data(), utf32_input.size(), utf16_output.data());
3213 } else
3214 #endif
3215 {
3216 return convert_utf32_to_utf16le(utf32_input.data(), utf32_input.size(),
3217 utf16_output.data());
3218 }
3219}
3220 #endif // SIMDUTF_SPAN
3221#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
3222
3223#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1
3238simdutf_warn_unused size_t convert_utf32_to_latin1(
3239 const char32_t *input, size_t length, char *latin1_buffer) noexcept;
3240 #if SIMDUTF_SPAN
3241simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
3242convert_utf32_to_latin1(
3243 std::span<const char32_t> utf32_input,
3244 detail::output_span_of_byte_like auto &&latin1_output) noexcept {
3245 #if SIMDUTF_CPLUSPLUS23
3246 if consteval {
3247 return scalar::utf32_to_latin1::convert(
3248 utf32_input.data(), utf32_input.size(), latin1_output.data());
3249 } else
3250 #endif
3251 {
3252 return convert_utf32_to_latin1(
3253 utf32_input.data(), utf32_input.size(),
3254 reinterpret_cast<char *>(latin1_output.data()));
3255 }
3256}
3257 #endif // SIMDUTF_SPAN
3258
3276simdutf_warn_unused result convert_utf32_to_latin1_with_errors(
3277 const char32_t *input, size_t length, char *latin1_buffer) noexcept;
3278 #if SIMDUTF_SPAN
3279simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
3280convert_utf32_to_latin1_with_errors(
3281 std::span<const char32_t> utf32_input,
3282 detail::output_span_of_byte_like auto &&latin1_output) noexcept {
3283 #if SIMDUTF_CPLUSPLUS23
3284 if consteval {
3285 return scalar::utf32_to_latin1::convert_with_errors(
3286 utf32_input.data(), utf32_input.size(), latin1_output.data());
3287 } else
3288 #endif
3289 {
3290 return convert_utf32_to_latin1_with_errors(
3291 utf32_input.data(), utf32_input.size(),
3292 reinterpret_cast<char *>(latin1_output.data()));
3293 }
3294}
3295 #endif // SIMDUTF_SPAN
3296
3317simdutf_warn_unused size_t convert_valid_utf32_to_latin1(
3318 const char32_t *input, size_t length, char *latin1_buffer) noexcept;
3319 #if SIMDUTF_SPAN
3320simdutf_really_inline simdutf_constexpr23 simdutf_warn_unused size_t
3321convert_valid_utf32_to_latin1(
3322 std::span<const char32_t> valid_utf32_input,
3323 detail::output_span_of_byte_like auto &&latin1_output) noexcept {
3324 #if SIMDUTF_CPLUSPLUS23
3325 if consteval {
3326 return scalar::utf32_to_latin1::convert_valid(
3327 detail::constexpr_cast_ptr<uint32_t>(valid_utf32_input.data()),
3328 valid_utf32_input.size(),
3329 detail::constexpr_cast_writeptr<char>(latin1_output.data()));
3330 }
3331 #endif
3332 {
3333 return convert_valid_utf32_to_latin1(
3334 valid_utf32_input.data(), valid_utf32_input.size(),
3335 reinterpret_cast<char *>(latin1_output.data()));
3336 }
3337}
3338 #endif // SIMDUTF_SPAN
3339
3352simdutf_warn_unused simdutf_really_inline simdutf_constexpr23 size_t
3353latin1_length_from_utf32(size_t length) noexcept {
3354 return length;
3355}
3356
3365simdutf_warn_unused simdutf_really_inline simdutf_constexpr23 size_t
3366utf32_length_from_latin1(size_t length) noexcept {
3367 return length;
3368}
3369#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1
3370
3371#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
3385simdutf_warn_unused size_t convert_utf32_to_utf16be(
3386 const char32_t *input, size_t length, char16_t *utf16_buffer) noexcept;
3387 #if SIMDUTF_SPAN
3388simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
3389convert_utf32_to_utf16be(std::span<const char32_t> utf32_input,
3390 std::span<char16_t> utf16_output) noexcept {
3391 #if SIMDUTF_CPLUSPLUS23
3392 if consteval {
3393 return scalar::utf32_to_utf16::convert<endianness::BIG>(
3394 utf32_input.data(), utf32_input.size(), utf16_output.data());
3395 } else
3396 #endif
3397 {
3398 return convert_utf32_to_utf16be(utf32_input.data(), utf32_input.size(),
3399 utf16_output.data());
3400 }
3401}
3402 #endif // SIMDUTF_SPAN
3403
3421simdutf_warn_unused result convert_utf32_to_utf16_with_errors(
3422 const char32_t *input, size_t length, char16_t *utf16_buffer) noexcept;
3423 #if SIMDUTF_SPAN
3424simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
3425convert_utf32_to_utf16_with_errors(std::span<const char32_t> utf32_input,
3426 std::span<char16_t> utf16_output) noexcept {
3427 #if SIMDUTF_CPLUSPLUS23
3428 if consteval {
3429 return scalar::utf32_to_utf16::convert_with_errors<endianness::NATIVE>(
3430 utf32_input.data(), utf32_input.size(), utf16_output.data());
3431 } else
3432 #endif
3433 {
3434 return convert_utf32_to_utf16_with_errors(
3435 utf32_input.data(), utf32_input.size(), utf16_output.data());
3436 }
3437}
3438 #endif // SIMDUTF_SPAN
3439
3456simdutf_warn_unused result convert_utf32_to_utf16le_with_errors(
3457 const char32_t *input, size_t length, char16_t *utf16_buffer) noexcept;
3458 #if SIMDUTF_SPAN
3459simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
3460convert_utf32_to_utf16le_with_errors(
3461 std::span<const char32_t> utf32_input,
3462 std::span<char16_t> utf16_output) noexcept {
3463 #if SIMDUTF_CPLUSPLUS23
3464 if consteval {
3465 return scalar::utf32_to_utf16::convert_with_errors<endianness::LITTLE>(
3466 utf32_input.data(), utf32_input.size(), utf16_output.data());
3467 } else
3468 #endif
3469 {
3470 return convert_utf32_to_utf16le_with_errors(
3471 utf32_input.data(), utf32_input.size(), utf16_output.data());
3472 }
3473}
3474 #endif // SIMDUTF_SPAN
3475
3492simdutf_warn_unused result convert_utf32_to_utf16be_with_errors(
3493 const char32_t *input, size_t length, char16_t *utf16_buffer) noexcept;
3494 #if SIMDUTF_SPAN
3495simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
3496convert_utf32_to_utf16be_with_errors(
3497 std::span<const char32_t> utf32_input,
3498 std::span<char16_t> utf16_output) noexcept {
3499 #if SIMDUTF_CPLUSPLUS23
3500 if consteval {
3501 return scalar::utf32_to_utf16::convert_with_errors<endianness::BIG>(
3502 utf32_input.data(), utf32_input.size(), utf16_output.data());
3503 } else
3504 #endif
3505 {
3506 return convert_utf32_to_utf16be_with_errors(
3507 utf32_input.data(), utf32_input.size(), utf16_output.data());
3508 }
3509}
3510 #endif // SIMDUTF_SPAN
3511
3525simdutf_warn_unused size_t convert_valid_utf32_to_utf16(
3526 const char32_t *input, size_t length, char16_t *utf16_buffer) noexcept;
3527 #if SIMDUTF_SPAN
3528simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
3529convert_valid_utf32_to_utf16(std::span<const char32_t> valid_utf32_input,
3530 std::span<char16_t> utf16_output) noexcept {
3531
3532 #if SIMDUTF_CPLUSPLUS23
3533 if consteval {
3534 return scalar::utf32_to_utf16::convert_valid<endianness::NATIVE>(
3535 valid_utf32_input.data(), valid_utf32_input.size(),
3536 utf16_output.data());
3537 } else
3538 #endif
3539 {
3540 return convert_valid_utf32_to_utf16(valid_utf32_input.data(),
3541 valid_utf32_input.size(),
3542 utf16_output.data());
3543 }
3544}
3545 #endif // SIMDUTF_SPAN
3546
3560simdutf_warn_unused size_t convert_valid_utf32_to_utf16le(
3561 const char32_t *input, size_t length, char16_t *utf16_buffer) noexcept;
3562 #if SIMDUTF_SPAN
3563simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
3564convert_valid_utf32_to_utf16le(std::span<const char32_t> valid_utf32_input,
3565 std::span<char16_t> utf16_output) noexcept {
3566 #if SIMDUTF_CPLUSPLUS23
3567 if consteval {
3568 return scalar::utf32_to_utf16::convert_valid<endianness::LITTLE>(
3569 valid_utf32_input.data(), valid_utf32_input.size(),
3570 utf16_output.data());
3571 } else
3572 #endif
3573 {
3574 return convert_valid_utf32_to_utf16le(valid_utf32_input.data(),
3575 valid_utf32_input.size(),
3576 utf16_output.data());
3577 }
3578}
3579 #endif // SIMDUTF_SPAN
3580
3594simdutf_warn_unused size_t convert_valid_utf32_to_utf16be(
3595 const char32_t *input, size_t length, char16_t *utf16_buffer) noexcept;
3596 #if SIMDUTF_SPAN
3597simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
3598convert_valid_utf32_to_utf16be(std::span<const char32_t> valid_utf32_input,
3599 std::span<char16_t> utf16_output) noexcept {
3600 #if SIMDUTF_CPLUSPLUS23
3601 if consteval {
3602 return scalar::utf32_to_utf16::convert_valid<endianness::BIG>(
3603 valid_utf32_input.data(), valid_utf32_input.size(),
3604 utf16_output.data());
3605 } else
3606 #endif
3607 {
3608 return convert_valid_utf32_to_utf16be(valid_utf32_input.data(),
3609 valid_utf32_input.size(),
3610 utf16_output.data());
3611 }
3612}
3613 #endif // SIMDUTF_SPAN
3614#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
3615
3616#if SIMDUTF_FEATURE_UTF16
3630void change_endianness_utf16(const char16_t *input, size_t length,
3631 char16_t *output) noexcept;
3632 #if SIMDUTF_SPAN
3633simdutf_really_inline simdutf_constexpr23 void
3634change_endianness_utf16(std::span<const char16_t> utf16_input,
3635 std::span<char16_t> utf16_output) noexcept {
3636 #if SIMDUTF_CPLUSPLUS23
3637 if consteval {
3638 return scalar::utf16::change_endianness_utf16(
3639 utf16_input.data(), utf16_input.size(), utf16_output.data());
3640 } else
3641 #endif
3642 {
3643 return change_endianness_utf16(utf16_input.data(), utf16_input.size(),
3644 utf16_output.data());
3645 }
3646}
3647 #endif // SIMDUTF_SPAN
3648#endif // SIMDUTF_FEATURE_UTF16
3649
3650#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
3662simdutf_warn_unused size_t utf8_length_from_utf32(const char32_t *input,
3663 size_t length) noexcept;
3664 #if SIMDUTF_SPAN
3665simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
3666utf8_length_from_utf32(std::span<const char32_t> valid_utf32_input) noexcept {
3667 #if SIMDUTF_CPLUSPLUS23
3668 if consteval {
3669 return scalar::utf32::utf8_length_from_utf32(valid_utf32_input.data(),
3670 valid_utf32_input.size());
3671 } else
3672 #endif
3673 {
3674 return utf8_length_from_utf32(valid_utf32_input.data(),
3675 valid_utf32_input.size());
3676 }
3677}
3678 #endif // SIMDUTF_SPAN
3679#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
3680
3681#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
3693simdutf_warn_unused size_t utf16_length_from_utf32(const char32_t *input,
3694 size_t length) noexcept;
3695 #if SIMDUTF_SPAN
3696simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
3697utf16_length_from_utf32(std::span<const char32_t> valid_utf32_input) noexcept {
3698 #if SIMDUTF_CPLUSPLUS23
3699 if consteval {
3700 return scalar::utf32::utf16_length_from_utf32(valid_utf32_input.data(),
3701 valid_utf32_input.size());
3702 } else
3703 #endif
3704 {
3705 return utf16_length_from_utf32(valid_utf32_input.data(),
3706 valid_utf32_input.size());
3707 }
3708}
3709 #endif // SIMDUTF_SPAN
3710
3726simdutf_warn_unused size_t utf32_length_from_utf16(const char16_t *input,
3727 size_t length) noexcept;
3728 #if SIMDUTF_SPAN
3729simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
3730utf32_length_from_utf16(std::span<const char16_t> valid_utf16_input) noexcept {
3731 #if SIMDUTF_CPLUSPLUS23
3732 if consteval {
3733 return scalar::utf16::utf32_length_from_utf16<endianness::NATIVE>(
3734 valid_utf16_input.data(), valid_utf16_input.size());
3735 } else
3736 #endif
3737 {
3738 return utf32_length_from_utf16(valid_utf16_input.data(),
3739 valid_utf16_input.size());
3740 }
3741}
3742 #endif // SIMDUTF_SPAN
3743
3759simdutf_warn_unused size_t utf32_length_from_utf16le(const char16_t *input,
3760 size_t length) noexcept;
3761 #if SIMDUTF_SPAN
3762simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
3763utf32_length_from_utf16le(
3764 std::span<const char16_t> valid_utf16_input) noexcept {
3765 #if SIMDUTF_CPLUSPLUS23
3766 if consteval {
3767 return scalar::utf16::utf32_length_from_utf16<endianness::LITTLE>(
3768 valid_utf16_input.data(), valid_utf16_input.size());
3769 } else
3770 #endif
3771 {
3772 return utf32_length_from_utf16le(valid_utf16_input.data(),
3773 valid_utf16_input.size());
3774 }
3775}
3776 #endif // SIMDUTF_SPAN
3777
3793simdutf_warn_unused size_t utf32_length_from_utf16be(const char16_t *input,
3794 size_t length) noexcept;
3795 #if SIMDUTF_SPAN
3796simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
3797utf32_length_from_utf16be(
3798 std::span<const char16_t> valid_utf16_input) noexcept {
3799 #if SIMDUTF_CPLUSPLUS23
3800 if consteval {
3801 return scalar::utf16::utf32_length_from_utf16<endianness::BIG>(
3802 valid_utf16_input.data(), valid_utf16_input.size());
3803 } else
3804 #endif
3805 {
3806 return utf32_length_from_utf16be(valid_utf16_input.data(),
3807 valid_utf16_input.size());
3808 }
3809}
3810 #endif // SIMDUTF_SPAN
3811#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
3812
3813#if SIMDUTF_FEATURE_UTF16
3828simdutf_warn_unused size_t count_utf16(const char16_t *input,
3829 size_t length) noexcept;
3830 #if SIMDUTF_SPAN
3831simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
3832count_utf16(std::span<const char16_t> valid_utf16_input) noexcept {
3833 #if SIMDUTF_CPLUSPLUS23
3834 if consteval {
3835 return scalar::utf16::count_code_points<endianness::NATIVE>(
3836 valid_utf16_input.data(), valid_utf16_input.size());
3837 } else
3838 #endif
3839 {
3840 return count_utf16(valid_utf16_input.data(), valid_utf16_input.size());
3841 }
3842}
3843 #endif // SIMDUTF_SPAN
3844
3859simdutf_warn_unused size_t count_utf16le(const char16_t *input,
3860 size_t length) noexcept;
3861 #if SIMDUTF_SPAN
3862simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
3863count_utf16le(std::span<const char16_t> valid_utf16_input) noexcept {
3864 #if SIMDUTF_CPLUSPLUS23
3865 if consteval {
3866 return scalar::utf16::count_code_points<endianness::LITTLE>(
3867 valid_utf16_input.data(), valid_utf16_input.size());
3868 } else
3869 #endif
3870 {
3871 return count_utf16le(valid_utf16_input.data(), valid_utf16_input.size());
3872 }
3873}
3874 #endif // SIMDUTF_SPAN
3875
3890simdutf_warn_unused size_t count_utf16be(const char16_t *input,
3891 size_t length) noexcept;
3892 #if SIMDUTF_SPAN
3893simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
3894count_utf16be(std::span<const char16_t> valid_utf16_input) noexcept {
3895 #if SIMDUTF_CPLUSPLUS23
3896 if consteval {
3897 return scalar::utf16::count_code_points<endianness::BIG>(
3898 valid_utf16_input.data(), valid_utf16_input.size());
3899 } else
3900 #endif
3901 {
3902 return count_utf16be(valid_utf16_input.data(), valid_utf16_input.size());
3903 }
3904}
3905 #endif // SIMDUTF_SPAN
3906#endif // SIMDUTF_FEATURE_UTF16
3907
3908#if SIMDUTF_FEATURE_UTF8
3921simdutf_warn_unused size_t count_utf8(const char *input,
3922 size_t length) noexcept;
3923 #if SIMDUTF_SPAN
3924simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t count_utf8(
3925 const detail::input_span_of_byte_like auto &valid_utf8_input) noexcept {
3926 #if SIMDUTF_CPLUSPLUS23
3927 if consteval {
3928 return scalar::utf8::count_code_points(valid_utf8_input.data(),
3929 valid_utf8_input.size());
3930 } else
3931 #endif
3932 {
3933 return count_utf8(reinterpret_cast<const char *>(valid_utf8_input.data()),
3934 valid_utf8_input.size());
3935 }
3936}
3937 #endif // SIMDUTF_SPAN
3938
3953simdutf_warn_unused size_t trim_partial_utf8(const char *input, size_t length);
3954 #if SIMDUTF_SPAN
3955simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
3956trim_partial_utf8(
3957 const detail::input_span_of_byte_like auto &valid_utf8_input) noexcept {
3958 #if SIMDUTF_CPLUSPLUS23
3959 if consteval {
3960 return scalar::utf8::trim_partial_utf8(valid_utf8_input.data(),
3961 valid_utf8_input.size());
3962 } else
3963 #endif
3964 {
3965 return trim_partial_utf8(
3966 reinterpret_cast<const char *>(valid_utf8_input.data()),
3967 valid_utf8_input.size());
3968 }
3969}
3970 #endif // SIMDUTF_SPAN
3971#endif // SIMDUTF_FEATURE_UTF8
3972
3973#if SIMDUTF_FEATURE_UTF16
3988simdutf_warn_unused size_t trim_partial_utf16be(const char16_t *input,
3989 size_t length);
3990 #if SIMDUTF_SPAN
3991simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
3992trim_partial_utf16be(std::span<const char16_t> valid_utf16_input) noexcept {
3993 #if SIMDUTF_CPLUSPLUS23
3994 if consteval {
3995 return scalar::utf16::trim_partial_utf16<endianness::BIG>(
3996 valid_utf16_input.data(), valid_utf16_input.size());
3997 } else
3998 #endif
3999 {
4000 return trim_partial_utf16be(valid_utf16_input.data(),
4001 valid_utf16_input.size());
4002 }
4003}
4004 #endif // SIMDUTF_SPAN
4005
4020simdutf_warn_unused size_t trim_partial_utf16le(const char16_t *input,
4021 size_t length);
4022 #if SIMDUTF_SPAN
4023simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
4024trim_partial_utf16le(std::span<const char16_t> valid_utf16_input) noexcept {
4025 #if SIMDUTF_CPLUSPLUS23
4026 if consteval {
4027 return scalar::utf16::trim_partial_utf16<endianness::LITTLE>(
4028 valid_utf16_input.data(), valid_utf16_input.size());
4029 } else
4030 #endif
4031 {
4032 return trim_partial_utf16le(valid_utf16_input.data(),
4033 valid_utf16_input.size());
4034 }
4035}
4036 #endif // SIMDUTF_SPAN
4037
4052simdutf_warn_unused size_t trim_partial_utf16(const char16_t *input,
4053 size_t length);
4054 #if SIMDUTF_SPAN
4055simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
4056trim_partial_utf16(std::span<const char16_t> valid_utf16_input) noexcept {
4057 #if SIMDUTF_CPLUSPLUS23
4058 if consteval {
4059 return scalar::utf16::trim_partial_utf16<endianness::NATIVE>(
4060 valid_utf16_input.data(), valid_utf16_input.size());
4061 } else
4062 #endif
4063 {
4064 return trim_partial_utf16(valid_utf16_input.data(),
4065 valid_utf16_input.size());
4066 }
4067}
4068 #endif // SIMDUTF_SPAN
4069#endif // SIMDUTF_FEATURE_UTF16
4070
4071#if SIMDUTF_FEATURE_BASE64 || SIMDUTF_FEATURE_UTF16 || \
4072 SIMDUTF_FEATURE_DETECT_ENCODING
4073 #ifndef SIMDUTF_NEED_TRAILING_ZEROES
4074 #define SIMDUTF_NEED_TRAILING_ZEROES 1
4075 #endif
4076#endif // SIMDUTF_FEATURE_BASE64 || SIMDUTF_FEATURE_UTF16 ||
4077 // SIMDUTF_FEATURE_DETECT_ENCODING
4078
4079#if SIMDUTF_FEATURE_BASE64
4080// base64_options are used to specify the base64 encoding options.
4081// ASCII spaces are ' ', '\t', '\n', '\r', '\f'
4082// garbage characters are characters that are not part of the base64 alphabet
4083// nor ASCII spaces.
4084constexpr uint64_t base64_reverse_padding =
4085 2; /* modifier for base64_default and base64_url */
4086enum base64_options : uint64_t {
4087 base64_default = 0, /* standard base64 format (with padding) */
4088 base64_url = 1, /* base64url format (no padding) */
4089 base64_default_no_padding =
4090 base64_default |
4091 base64_reverse_padding, /* standard base64 format without padding */
4092 base64_url_with_padding =
4093 base64_url | base64_reverse_padding, /* base64url with padding */
4094 base64_default_accept_garbage =
4095 4, /* standard base64 format accepting garbage characters, the input stops
4096 with the first '=' if any */
4097 base64_url_accept_garbage =
4098 5, /* base64url format accepting garbage characters, the input stops with
4099 the first '=' if any */
4100 base64_default_or_url =
4101 8, /* standard/base64url hybrid format (only meaningful for decoding!) */
4102 base64_default_or_url_accept_garbage =
4103 12, /* standard/base64url hybrid format accepting garbage characters
4104 (only meaningful for decoding!), the input stops with the first '='
4105 if any */
4106};
4107
4108// last_chunk_handling_options are used to specify the handling of the last
4109// chunk in base64 decoding.
4110// https://tc39.es/proposal-arraybuffer-base64/spec/#sec-frombase64
4111enum last_chunk_handling_options : uint64_t {
4112 loose = 0, /* standard base64 format, decode partial final chunk */
4113 strict = 1, /* error when the last chunk is partial, 2 or 3 chars, and
4114 unpadded, or non-zero bit padding */
4115 stop_before_partial =
4116 2, /* if the last chunk is partial, ignore it (no error) */
4117 only_full_chunks =
4118 3 /* only decode full blocks (4 base64 characters, no padding) */
4119};
4120
4121inline simdutf_constexpr23 bool
4122is_partial(last_chunk_handling_options options) {
4123 return (options == stop_before_partial) || (options == only_full_chunks);
4124}
4125
4126namespace detail {
4127simdutf_warn_unused const char *find(const char *start, const char *end,
4128 char character) noexcept;
4129simdutf_warn_unused const char16_t *
4130find(const char16_t *start, const char16_t *end, char16_t character) noexcept;
4131} // namespace detail
4132
4143simdutf_warn_unused simdutf_really_inline simdutf_constexpr23 const char *
4144find(const char *start, const char *end, char character) noexcept {
4145 #if SIMDUTF_CPLUSPLUS23
4146 if consteval {
4147 for (; start != end; ++start)
4148 if (*start == character)
4149 return start;
4150 return end;
4151 } else
4152 #endif
4153 {
4154 return detail::find(start, end, character);
4155 }
4156}
4157simdutf_warn_unused simdutf_really_inline simdutf_constexpr23 const char16_t *
4158find(const char16_t *start, const char16_t *end, char16_t character) noexcept {
4159 // implementation note: this is repeated instead of a template, to ensure
4160 // the api is still a function and compiles without concepts
4161 #if SIMDUTF_CPLUSPLUS23
4162 if consteval {
4163 for (; start != end; ++start)
4164 if (*start == character)
4165 return start;
4166 return end;
4167 } else
4168 #endif
4169 {
4170 return detail::find(start, end, character);
4171 }
4172}
4173}
4174 // We include base64_tables once.
4175 #include <simdutf/base64_tables.h>
4176 #include <simdutf/scalar/base64.h>
4177
4178namespace simdutf {
4179
4180inline std::string_view to_string(base64_options options) {
4181 switch (options) {
4182 case base64_default:
4183 return "base64_default";
4184 case base64_url:
4185 return "base64_url";
4186 case base64_reverse_padding:
4187 return "base64_reverse_padding";
4188 case base64_url_with_padding:
4189 return "base64_url_with_padding";
4190 case base64_default_accept_garbage:
4191 return "base64_default_accept_garbage";
4192 case base64_url_accept_garbage:
4193 return "base64_url_accept_garbage";
4194 case base64_default_or_url:
4195 return "base64_default_or_url";
4196 case base64_default_or_url_accept_garbage:
4197 return "base64_default_or_url_accept_garbage";
4198 }
4199 return "<unknown>";
4200}
4201
4202inline std::string_view to_string(last_chunk_handling_options options) {
4203 switch (options) {
4204 case loose:
4205 return "loose";
4206 case strict:
4207 return "strict";
4208 case stop_before_partial:
4209 return "stop_before_partial";
4210 case only_full_chunks:
4211 return "only_full_chunks";
4212 }
4213 return "<unknown>";
4214}
4215
4229simdutf_warn_unused size_t
4230maximal_binary_length_from_base64(const char *input, size_t length) noexcept;
4231 #if SIMDUTF_SPAN
4232simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
4233maximal_binary_length_from_base64(
4234 const detail::input_span_of_byte_like auto &input) noexcept {
4235 #if SIMDUTF_CPLUSPLUS23
4236 if consteval {
4237 return scalar::base64::maximal_binary_length_from_base64(
4238 detail::constexpr_cast_ptr<uint8_t>(input.data()), input.size());
4239 } else
4240 #endif
4241 {
4242 return maximal_binary_length_from_base64(
4243 reinterpret_cast<const char *>(input.data()), input.size());
4244 }
4245}
4246 #endif // SIMDUTF_SPAN
4247
4262simdutf_warn_unused size_t maximal_binary_length_from_base64(
4263 const char16_t *input, size_t length) noexcept;
4264 #if SIMDUTF_SPAN
4265simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
4266maximal_binary_length_from_base64(std::span<const char16_t> input) noexcept {
4267 #if SIMDUTF_CPLUSPLUS23
4268 if consteval {
4269 return scalar::base64::maximal_binary_length_from_base64(input.data(),
4270 input.size());
4271 } else
4272 #endif
4273 {
4274 return maximal_binary_length_from_base64(input.data(), input.size());
4275 }
4276}
4277 #endif // SIMDUTF_SPAN
4278
4293simdutf_warn_unused size_t binary_length_from_base64(const char *input,
4294 size_t length) noexcept;
4295 #if SIMDUTF_SPAN
4296simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
4297binary_length_from_base64(
4298 const detail::input_span_of_byte_like auto &input) noexcept {
4299 #if SIMDUTF_CPLUSPLUS23
4300 if consteval {
4301 return scalar::base64::binary_length_from_base64(input.data(),
4302 input.size());
4303 } else
4304 #endif
4305 {
4306 return binary_length_from_base64(
4307 reinterpret_cast<const char *>(input.data()), input.size());
4308 }
4309}
4310 #endif // SIMDUTF_SPAN
4311
4327simdutf_warn_unused size_t binary_length_from_base64(const char16_t *input,
4328 size_t length) noexcept;
4329 #if SIMDUTF_SPAN
4330simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
4331binary_length_from_base64(std::span<const char16_t> input) noexcept {
4332 #if SIMDUTF_CPLUSPLUS23
4333 if consteval {
4334 return scalar::base64::binary_length_from_base64(input.data(),
4335 input.size());
4336 } else
4337 #endif
4338 {
4339 return binary_length_from_base64(input.data(), input.size());
4340 }
4341}
4342 #endif // SIMDUTF_SPAN
4343
4398simdutf_warn_unused result base64_to_binary(
4399 const char *input, size_t length, char *output,
4400 base64_options options = base64_default,
4401 last_chunk_handling_options last_chunk_options = loose) noexcept;
4402 #if SIMDUTF_SPAN
4403simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
4404base64_to_binary(
4405 const detail::input_span_of_byte_like auto &input,
4406 detail::output_span_of_byte_like auto &&binary_output,
4407 base64_options options = base64_default,
4408 last_chunk_handling_options last_chunk_options = loose) noexcept {
4409 #if SIMDUTF_CPLUSPLUS23
4410 if consteval {
4411 return scalar::base64::base64_to_binary_details_impl(
4412 input.data(), input.size(), binary_output.data(), options,
4413 last_chunk_options);
4414 } else
4415 #endif
4416 {
4417 return base64_to_binary(reinterpret_cast<const char *>(input.data()),
4418 input.size(),
4419 reinterpret_cast<char *>(binary_output.data()),
4420 options, last_chunk_options);
4421 }
4422}
4423 #endif // SIMDUTF_SPAN
4424
4431inline simdutf_warn_unused simdutf_constexpr23 size_t base64_length_from_binary(
4432 size_t length, base64_options options = base64_default) noexcept {
4433 return scalar::base64::base64_length_from_binary(length, options);
4434}
4435
4445inline simdutf_warn_unused simdutf_constexpr23 size_t
4446base64_length_from_binary_with_lines(
4447 size_t length, base64_options options = base64_default,
4448 size_t line_length = default_line_length) noexcept {
4449 return scalar::base64::base64_length_from_binary_with_lines(length, options,
4450 line_length);
4451}
4452
4474size_t binary_to_base64(const char *input, size_t length, char *output,
4475 base64_options options = base64_default) noexcept;
4476 #if SIMDUTF_SPAN
4477simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
4478binary_to_base64(const detail::input_span_of_byte_like auto &input,
4479 detail::output_span_of_byte_like auto &&binary_output,
4480 base64_options options = base64_default) noexcept {
4481 #if SIMDUTF_CPLUSPLUS23
4482 if consteval {
4483 return scalar::base64::tail_encode_base64(
4484 binary_output.data(), input.data(), input.size(), options);
4485 } else
4486 #endif
4487 {
4488 return binary_to_base64(
4489 reinterpret_cast<const char *>(input.data()), input.size(),
4490 reinterpret_cast<char *>(binary_output.data()), options);
4491 }
4492}
4493 #endif // SIMDUTF_SPAN
4494
4519size_t
4520binary_to_base64_with_lines(const char *input, size_t length, char *output,
4521 size_t line_length = simdutf::default_line_length,
4522 base64_options options = base64_default) noexcept;
4523 #if SIMDUTF_SPAN
4524simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
4525binary_to_base64_with_lines(
4526 const detail::input_span_of_byte_like auto &input,
4527 detail::output_span_of_byte_like auto &&binary_output,
4528 size_t line_length = simdutf::default_line_length,
4529 base64_options options = base64_default) noexcept {
4530 #if SIMDUTF_CPLUSPLUS23
4531 if consteval {
4532 return scalar::base64::tail_encode_base64_impl<true>(
4533 binary_output.data(), input.data(), input.size(), options, line_length);
4534 } else
4535 #endif
4536 {
4537 return binary_to_base64_with_lines(
4538 reinterpret_cast<const char *>(input.data()), input.size(),
4539 reinterpret_cast<char *>(binary_output.data()), line_length, options);
4540 }
4541}
4542 #endif // SIMDUTF_SPAN
4543
4544 #if SIMDUTF_ATOMIC_REF
4586size_t
4587atomic_binary_to_base64(const char *input, size_t length, char *output,
4588 base64_options options = base64_default) noexcept;
4589 #if SIMDUTF_SPAN
4590simdutf_really_inline simdutf_warn_unused size_t
4591atomic_binary_to_base64(const detail::input_span_of_byte_like auto &input,
4592 detail::output_span_of_byte_like auto &&binary_output,
4593 base64_options options = base64_default) noexcept {
4594 return atomic_binary_to_base64(
4595 reinterpret_cast<const char *>(input.data()), input.size(),
4596 reinterpret_cast<char *>(binary_output.data()), options);
4597}
4598 #endif // SIMDUTF_SPAN
4599 #endif // SIMDUTF_ATOMIC_REF
4600
4657simdutf_warn_unused result
4658base64_to_binary(const char16_t *input, size_t length, char *output,
4659 base64_options options = base64_default,
4660 last_chunk_handling_options last_chunk_options =
4661 last_chunk_handling_options::loose) noexcept;
4662 #if SIMDUTF_SPAN
4663simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
4664base64_to_binary(
4665 std::span<const char16_t> input,
4666 detail::output_span_of_byte_like auto &&binary_output,
4667 base64_options options = base64_default,
4668 last_chunk_handling_options last_chunk_options = loose) noexcept {
4669 #if SIMDUTF_CPLUSPLUS23
4670 if consteval {
4671 return scalar::base64::base64_to_binary_details_impl(
4672 input.data(), input.size(), binary_output.data(), options,
4673 last_chunk_options);
4674 } else
4675 #endif
4676 {
4677 return base64_to_binary(input.data(), input.size(),
4678 reinterpret_cast<char *>(binary_output.data()),
4679 options, last_chunk_options);
4680 }
4681}
4682 #endif // SIMDUTF_SPAN
4683
4731simdutf_warn_unused full_result
4732base64_to_binary_details(const char *input, size_t length, char *output,
4733 base64_options options = base64_default,
4734 last_chunk_handling_options last_chunk_options =
4735 last_chunk_handling_options::loose) noexcept;
4736 #if SIMDUTF_SPAN
4737simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 full_result
4738base64_to_binary_details(
4739 const detail::input_span_of_byte_like auto &input,
4740 detail::output_span_of_byte_like auto &&binary_output,
4741 base64_options options = base64_default,
4742 last_chunk_handling_options last_chunk_options = loose) noexcept {
4743 #if SIMDUTF_CPLUSPLUS23
4744 if consteval {
4745 return scalar::base64::base64_to_binary_details_impl(
4746 input.data(), input.size(), binary_output.data(), options,
4747 last_chunk_options);
4748 } else
4749 #endif
4750 {
4751 return base64_to_binary_details(
4752 reinterpret_cast<const char *>(input.data()), input.size(),
4753 reinterpret_cast<char *>(binary_output.data()), options,
4754 last_chunk_options);
4755 }
4756}
4757 #endif // SIMDUTF_SPAN
4758
4807simdutf_warn_unused full_result
4808base64_to_binary_details(const char16_t *input, size_t length, char *output,
4809 base64_options options = base64_default,
4810 last_chunk_handling_options last_chunk_options =
4811 last_chunk_handling_options::loose) noexcept;
4812 #if SIMDUTF_SPAN
4813simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 full_result
4814base64_to_binary_details(
4815 std::span<const char16_t> input,
4816 detail::output_span_of_byte_like auto &&binary_output,
4817 base64_options options = base64_default,
4818 last_chunk_handling_options last_chunk_options = loose) noexcept {
4819 #if SIMDUTF_CPLUSPLUS23
4820 if consteval {
4821 return scalar::base64::base64_to_binary_details_impl(
4822 input.data(), input.size(), binary_output.data(), options,
4823 last_chunk_options);
4824 } else
4825 #endif
4826 {
4827 return base64_to_binary_details(
4828 input.data(), input.size(),
4829 reinterpret_cast<char *>(binary_output.data()), options,
4830 last_chunk_options);
4831 }
4832}
4833 #endif // SIMDUTF_SPAN
4834
4845simdutf_warn_unused simdutf_really_inline simdutf_constexpr23 bool
4846base64_ignorable(char input, base64_options options = base64_default) noexcept {
4847 return scalar::base64::is_ignorable(input, options);
4848}
4849simdutf_warn_unused simdutf_really_inline simdutf_constexpr23 bool
4850base64_ignorable(char16_t input,
4851 base64_options options = base64_default) noexcept {
4852 return scalar::base64::is_ignorable(input, options);
4853}
4854
4866simdutf_warn_unused simdutf_really_inline simdutf_constexpr23 bool
4867base64_valid(char input, base64_options options = base64_default) noexcept {
4868 return scalar::base64::is_base64(input, options);
4869}
4870simdutf_warn_unused simdutf_really_inline simdutf_constexpr23 bool
4871base64_valid(char16_t input, base64_options options = base64_default) noexcept {
4872 return scalar::base64::is_base64(input, options);
4873}
4874
4884simdutf_warn_unused simdutf_really_inline simdutf_constexpr23 bool
4885base64_valid_or_padding(char input,
4886 base64_options options = base64_default) noexcept {
4887 return scalar::base64::is_base64_or_padding(input, options);
4888}
4889simdutf_warn_unused simdutf_really_inline simdutf_constexpr23 bool
4890base64_valid_or_padding(char16_t input,
4891 base64_options options = base64_default) noexcept {
4892 return scalar::base64::is_base64_or_padding(input, options);
4893}
4894
4962simdutf_warn_unused result
4963base64_to_binary_safe(const char *input, size_t length, char *output,
4964 size_t &outlen, base64_options options = base64_default,
4965 last_chunk_handling_options last_chunk_options =
4966 last_chunk_handling_options::loose,
4967 bool decode_up_to_bad_char = false) noexcept;
4968// the span overload has moved to the bottom of the file
4969
4970simdutf_warn_unused result
4971base64_to_binary_safe(const char16_t *input, size_t length, char *output,
4972 size_t &outlen, base64_options options = base64_default,
4973 last_chunk_handling_options last_chunk_options =
4974 last_chunk_handling_options::loose,
4975 bool decode_up_to_bad_char = false) noexcept;
4976 // span overload moved to bottom of file
4977
4978 #if SIMDUTF_ATOMIC_REF
5018simdutf_warn_unused result atomic_base64_to_binary_safe(
5019 const char *input, size_t length, char *output, size_t &outlen,
5020 base64_options options = base64_default,
5021 last_chunk_handling_options last_chunk_options =
5022 last_chunk_handling_options::loose,
5023 bool decode_up_to_bad_char = false) noexcept;
5024simdutf_warn_unused result atomic_base64_to_binary_safe(
5025 const char16_t *input, size_t length, char *output, size_t &outlen,
5026 base64_options options = base64_default,
5027 last_chunk_handling_options last_chunk_options = loose,
5028 bool decode_up_to_bad_char = false) noexcept;
5029 #if SIMDUTF_SPAN
5034simdutf_really_inline simdutf_warn_unused std::tuple<result, std::size_t>
5035atomic_base64_to_binary_safe(
5036 const detail::input_span_of_byte_like auto &binary_input,
5037 detail::output_span_of_byte_like auto &&output,
5038 base64_options options = base64_default,
5039 last_chunk_handling_options last_chunk_options =
5040 last_chunk_handling_options::loose,
5041 bool decode_up_to_bad_char = false) noexcept {
5042 size_t outlen = output.size();
5043 auto ret = atomic_base64_to_binary_safe(
5044 reinterpret_cast<const char *>(binary_input.data()), binary_input.size(),
5045 reinterpret_cast<char *>(output.data()), outlen, options,
5046 last_chunk_options, decode_up_to_bad_char);
5047 return {ret, outlen};
5048}
5053simdutf_warn_unused std::tuple<result, std::size_t>
5054atomic_base64_to_binary_safe(
5055 std::span<const char16_t> base64_input,
5056 detail::output_span_of_byte_like auto &&binary_output,
5057 base64_options options = base64_default,
5058 last_chunk_handling_options last_chunk_options = loose,
5059 bool decode_up_to_bad_char = false) noexcept {
5060 size_t outlen = binary_output.size();
5061 auto ret = atomic_base64_to_binary_safe(
5062 base64_input.data(), base64_input.size(),
5063 reinterpret_cast<char *>(binary_output.data()), outlen, options,
5064 last_chunk_options, decode_up_to_bad_char);
5065 return {ret, outlen};
5066}
5067 #endif // SIMDUTF_SPAN
5068 #endif // SIMDUTF_ATOMIC_REF
5069
5070#endif // SIMDUTF_FEATURE_BASE64
5071
5080public:
5090 virtual std::string_view name() const noexcept { return _name; }
5091
5101 virtual std::string_view description() const noexcept { return _description; }
5102
5113
5114#if SIMDUTF_FEATURE_DETECT_ENCODING
5121 virtual encoding_type autodetect_encoding(const char *input,
5122 size_t length) const noexcept;
5123
5130 virtual int detect_encodings(const char *input,
5131 size_t length) const noexcept = 0;
5132#endif // SIMDUTF_FEATURE_DETECT_ENCODING
5133
5141 virtual uint32_t required_instruction_sets() const {
5142 return _required_instruction_sets;
5143 }
5144
5145#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING
5155 simdutf_warn_unused virtual bool validate_utf8(const char *buf,
5156 size_t len) const noexcept = 0;
5157#endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING
5158
5159#if SIMDUTF_FEATURE_UTF8
5172 simdutf_warn_unused virtual result
5173 validate_utf8_with_errors(const char *buf, size_t len) const noexcept = 0;
5174#endif // SIMDUTF_FEATURE_UTF8
5175
5176#if SIMDUTF_FEATURE_ASCII
5186 simdutf_warn_unused virtual bool
5187 validate_ascii(const char *buf, size_t len) const noexcept = 0;
5188
5201 simdutf_warn_unused virtual result
5202 validate_ascii_with_errors(const char *buf, size_t len) const noexcept = 0;
5203
5204#endif // SIMDUTF_FEATURE_ASCII
5205
5206#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_ASCII
5218 simdutf_warn_unused virtual bool
5219 validate_utf16be_as_ascii(const char16_t *buf, size_t len) const noexcept = 0;
5220
5232 simdutf_warn_unused virtual bool
5233 validate_utf16le_as_ascii(const char16_t *buf, size_t len) const noexcept = 0;
5234#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_ASCII
5235
5236#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING
5251 simdutf_warn_unused virtual bool
5252 validate_utf16le(const char16_t *buf, size_t len) const noexcept = 0;
5253#endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING
5254
5255#if SIMDUTF_FEATURE_UTF16
5270 simdutf_warn_unused virtual bool
5271 validate_utf16be(const char16_t *buf, size_t len) const noexcept = 0;
5272
5289 simdutf_warn_unused virtual result
5290 validate_utf16le_with_errors(const char16_t *buf,
5291 size_t len) const noexcept = 0;
5292
5309 simdutf_warn_unused virtual result
5310 validate_utf16be_with_errors(const char16_t *buf,
5311 size_t len) const noexcept = 0;
5324 virtual void to_well_formed_utf16le(const char16_t *input, size_t len,
5325 char16_t *output) const noexcept = 0;
5338 virtual void to_well_formed_utf16be(const char16_t *input, size_t len,
5339 char16_t *output) const noexcept = 0;
5340#endif // SIMDUTF_FEATURE_UTF16
5341
5342#if SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING
5355 simdutf_warn_unused virtual bool
5356 validate_utf32(const char32_t *buf, size_t len) const noexcept = 0;
5357#endif // SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING
5358
5359#if SIMDUTF_FEATURE_UTF32
5375 simdutf_warn_unused virtual result
5376 validate_utf32_with_errors(const char32_t *buf,
5377 size_t len) const noexcept = 0;
5378#endif // SIMDUTF_FEATURE_UTF32
5379
5380#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
5391 simdutf_warn_unused virtual size_t
5392 convert_latin1_to_utf8(const char *input, size_t length,
5393 char *utf8_output) const noexcept = 0;
5394#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
5395
5396#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
5407 simdutf_warn_unused virtual size_t
5408 convert_latin1_to_utf16le(const char *input, size_t length,
5409 char16_t *utf16_output) const noexcept = 0;
5410
5421 simdutf_warn_unused virtual size_t
5422 convert_latin1_to_utf16be(const char *input, size_t length,
5423 char16_t *utf16_output) const noexcept = 0;
5424#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
5425
5426#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1
5437 simdutf_warn_unused virtual size_t
5438 convert_latin1_to_utf32(const char *input, size_t length,
5439 char32_t *utf32_buffer) const noexcept = 0;
5440#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1
5441
5442#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
5455 simdutf_warn_unused virtual size_t
5456 convert_utf8_to_latin1(const char *input, size_t length,
5457 char *latin1_output) const noexcept = 0;
5458
5475 simdutf_warn_unused virtual result
5476 convert_utf8_to_latin1_with_errors(const char *input, size_t length,
5477 char *latin1_output) const noexcept = 0;
5478
5498 simdutf_warn_unused virtual size_t
5499 convert_valid_utf8_to_latin1(const char *input, size_t length,
5500 char *latin1_output) const noexcept = 0;
5501#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
5502
5503#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
5516 simdutf_warn_unused virtual size_t
5517 convert_utf8_to_utf16le(const char *input, size_t length,
5518 char16_t *utf16_output) const noexcept = 0;
5519
5532 simdutf_warn_unused virtual size_t
5533 convert_utf8_to_utf16be(const char *input, size_t length,
5534 char16_t *utf16_output) const noexcept = 0;
5535
5551 simdutf_warn_unused virtual result convert_utf8_to_utf16le_with_errors(
5552 const char *input, size_t length,
5553 char16_t *utf16_output) const noexcept = 0;
5554
5570 simdutf_warn_unused virtual result convert_utf8_to_utf16be_with_errors(
5571 const char *input, size_t length,
5572 char16_t *utf16_output) const noexcept = 0;
5593 const char16_t *input, size_t length) const noexcept = 0;
5594
5615 const char16_t *input, size_t length) const noexcept = 0;
5616
5617#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
5618
5619#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
5632 simdutf_warn_unused virtual size_t
5633 convert_utf8_to_utf32(const char *input, size_t length,
5634 char32_t *utf32_output) const noexcept = 0;
5635
5650 simdutf_warn_unused virtual result
5651 convert_utf8_to_utf32_with_errors(const char *input, size_t length,
5652 char32_t *utf32_output) const noexcept = 0;
5653#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
5654
5655#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
5666 simdutf_warn_unused virtual size_t
5667 convert_valid_utf8_to_utf16le(const char *input, size_t length,
5668 char16_t *utf16_buffer) const noexcept = 0;
5669
5680 simdutf_warn_unused virtual size_t
5681 convert_valid_utf8_to_utf16be(const char *input, size_t length,
5682 char16_t *utf16_buffer) const noexcept = 0;
5683#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
5684
5685#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
5696 simdutf_warn_unused virtual size_t
5697 convert_valid_utf8_to_utf32(const char *input, size_t length,
5698 char32_t *utf32_buffer) const noexcept = 0;
5699#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
5700
5701#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
5714 simdutf_warn_unused virtual size_t
5715 utf16_length_from_utf8(const char *input, size_t length) const noexcept = 0;
5716#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
5717
5718#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
5733 simdutf_warn_unused virtual size_t
5734 utf32_length_from_utf8(const char *input, size_t length) const noexcept = 0;
5735#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
5736
5737#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
5754 simdutf_warn_unused virtual size_t
5755 convert_utf16le_to_latin1(const char16_t *input, size_t length,
5756 char *latin1_buffer) const noexcept = 0;
5757
5774 simdutf_warn_unused virtual size_t
5775 convert_utf16be_to_latin1(const char16_t *input, size_t length,
5776 char *latin1_buffer) const noexcept = 0;
5777
5797 simdutf_warn_unused virtual result
5798 convert_utf16le_to_latin1_with_errors(const char16_t *input, size_t length,
5799 char *latin1_buffer) const noexcept = 0;
5800
5820 simdutf_warn_unused virtual result
5821 convert_utf16be_to_latin1_with_errors(const char16_t *input, size_t length,
5822 char *latin1_buffer) const noexcept = 0;
5823
5844 simdutf_warn_unused virtual size_t
5845 convert_valid_utf16le_to_latin1(const char16_t *input, size_t length,
5846 char *latin1_buffer) const noexcept = 0;
5847
5868 simdutf_warn_unused virtual size_t
5869 convert_valid_utf16be_to_latin1(const char16_t *input, size_t length,
5870 char *latin1_buffer) const noexcept = 0;
5871#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
5872
5873#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
5889 simdutf_warn_unused virtual size_t
5890 convert_utf16le_to_utf8(const char16_t *input, size_t length,
5891 char *utf8_buffer) const noexcept = 0;
5892
5908 simdutf_warn_unused virtual size_t
5909 convert_utf16be_to_utf8(const char16_t *input, size_t length,
5910 char *utf8_buffer) const noexcept = 0;
5911
5930 simdutf_warn_unused virtual result
5931 convert_utf16le_to_utf8_with_errors(const char16_t *input, size_t length,
5932 char *utf8_buffer) const noexcept = 0;
5933
5952 simdutf_warn_unused virtual result
5953 convert_utf16be_to_utf8_with_errors(const char16_t *input, size_t length,
5954 char *utf8_buffer) const noexcept = 0;
5955
5971 simdutf_warn_unused virtual size_t convert_utf16le_to_utf8_with_replacement(
5972 const char16_t *input, size_t length,
5973 char *utf8_buffer) const noexcept = 0;
5974
5990 simdutf_warn_unused virtual size_t convert_utf16be_to_utf8_with_replacement(
5991 const char16_t *input, size_t length,
5992 char *utf8_buffer) const noexcept = 0;
5993
6008 simdutf_warn_unused virtual size_t
6009 convert_valid_utf16le_to_utf8(const char16_t *input, size_t length,
6010 char *utf8_buffer) const noexcept = 0;
6011
6026 simdutf_warn_unused virtual size_t
6027 convert_valid_utf16be_to_utf8(const char16_t *input, size_t length,
6028 char *utf8_buffer) const noexcept = 0;
6029#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
6030
6031#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
6047 simdutf_warn_unused virtual size_t
6048 convert_utf16le_to_utf32(const char16_t *input, size_t length,
6049 char32_t *utf32_buffer) const noexcept = 0;
6050
6066 simdutf_warn_unused virtual size_t
6067 convert_utf16be_to_utf32(const char16_t *input, size_t length,
6068 char32_t *utf32_buffer) const noexcept = 0;
6069
6089 const char16_t *input, size_t length,
6090 char32_t *utf32_buffer) const noexcept = 0;
6091
6111 const char16_t *input, size_t length,
6112 char32_t *utf32_buffer) const noexcept = 0;
6113
6128 simdutf_warn_unused virtual size_t
6129 convert_valid_utf16le_to_utf32(const char16_t *input, size_t length,
6130 char32_t *utf32_buffer) const noexcept = 0;
6131
6146 simdutf_warn_unused virtual size_t
6147 convert_valid_utf16be_to_utf32(const char16_t *input, size_t length,
6148 char32_t *utf32_buffer) const noexcept = 0;
6149#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
6150
6151#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
6166 simdutf_warn_unused virtual size_t
6167 utf8_length_from_utf16le(const char16_t *input,
6168 size_t length) const noexcept = 0;
6169
6184 simdutf_warn_unused virtual size_t
6185 utf8_length_from_utf16be(const char16_t *input,
6186 size_t length) const noexcept = 0;
6187#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
6188
6189#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1
6206 simdutf_warn_unused virtual size_t
6207 convert_utf32_to_latin1(const char32_t *input, size_t length,
6208 char *latin1_buffer) const noexcept = 0;
6209#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1
6210
6211#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1
6231 simdutf_warn_unused virtual result
6232 convert_utf32_to_latin1_with_errors(const char32_t *input, size_t length,
6233 char *latin1_buffer) const noexcept = 0;
6234
6255 simdutf_warn_unused virtual size_t
6256 convert_valid_utf32_to_latin1(const char32_t *input, size_t length,
6257 char *latin1_buffer) const noexcept = 0;
6258#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1
6259
6260#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
6276 simdutf_warn_unused virtual size_t
6277 convert_utf32_to_utf8(const char32_t *input, size_t length,
6278 char *utf8_buffer) const noexcept = 0;
6279
6297 simdutf_warn_unused virtual result
6298 convert_utf32_to_utf8_with_errors(const char32_t *input, size_t length,
6299 char *utf8_buffer) const noexcept = 0;
6300
6315 simdutf_warn_unused virtual size_t
6316 convert_valid_utf32_to_utf8(const char32_t *input, size_t length,
6317 char *utf8_buffer) const noexcept = 0;
6318#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
6319
6320#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
6331 simdutf_warn_unused virtual size_t
6332 utf16_length_from_latin1(size_t length) const noexcept {
6333 return length;
6334 }
6335#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
6336
6337#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
6353 simdutf_warn_unused virtual size_t
6354 convert_utf32_to_utf16le(const char32_t *input, size_t length,
6355 char16_t *utf16_buffer) const noexcept = 0;
6356
6372 simdutf_warn_unused virtual size_t
6373 convert_utf32_to_utf16be(const char32_t *input, size_t length,
6374 char16_t *utf16_buffer) const noexcept = 0;
6375
6395 const char32_t *input, size_t length,
6396 char16_t *utf16_buffer) const noexcept = 0;
6397
6417 const char32_t *input, size_t length,
6418 char16_t *utf16_buffer) const noexcept = 0;
6419
6434 simdutf_warn_unused virtual size_t
6435 convert_valid_utf32_to_utf16le(const char32_t *input, size_t length,
6436 char16_t *utf16_buffer) const noexcept = 0;
6437
6452 simdutf_warn_unused virtual size_t
6453 convert_valid_utf32_to_utf16be(const char32_t *input, size_t length,
6454 char16_t *utf16_buffer) const noexcept = 0;
6455#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
6456
6457#if SIMDUTF_FEATURE_UTF16
6472 virtual void change_endianness_utf16(const char16_t *input, size_t length,
6473 char16_t *output) const noexcept = 0;
6474#endif // SIMDUTF_FEATURE_UTF16
6475
6476#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
6485 simdutf_warn_unused virtual size_t
6486 utf8_length_from_latin1(const char *input, size_t length) const noexcept = 0;
6487#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
6488
6489#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
6502 simdutf_warn_unused virtual size_t
6503 utf8_length_from_utf32(const char32_t *input,
6504 size_t length) const noexcept = 0;
6505#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
6506
6507#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1
6519 simdutf_warn_unused virtual size_t
6520 latin1_length_from_utf32(size_t length) const noexcept {
6521 return length;
6522 }
6523#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1
6524
6525#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
6537 simdutf_warn_unused virtual size_t
6538 latin1_length_from_utf8(const char *input, size_t length) const noexcept = 0;
6539#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
6540
6541#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
6557 simdutf_warn_unused virtual size_t
6558 latin1_length_from_utf16(size_t length) const noexcept {
6559 return length;
6560 }
6561#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
6562
6563#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
6576 simdutf_warn_unused virtual size_t
6577 utf16_length_from_utf32(const char32_t *input,
6578 size_t length) const noexcept = 0;
6579#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
6580
6581#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1
6590 simdutf_warn_unused virtual size_t
6591 utf32_length_from_latin1(size_t length) const noexcept {
6592 return length;
6593 }
6594#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1
6595
6596#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
6614 simdutf_warn_unused virtual size_t
6615 utf32_length_from_utf16le(const char16_t *input,
6616 size_t length) const noexcept = 0;
6617
6635 simdutf_warn_unused virtual size_t
6636 utf32_length_from_utf16be(const char16_t *input,
6637 size_t length) const noexcept = 0;
6638#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
6639
6640#if SIMDUTF_FEATURE_UTF16
6656 simdutf_warn_unused virtual size_t
6657 count_utf16le(const char16_t *input, size_t length) const noexcept = 0;
6658
6674 simdutf_warn_unused virtual size_t
6675 count_utf16be(const char16_t *input, size_t length) const noexcept = 0;
6676#endif // SIMDUTF_FEATURE_UTF16
6677
6678#if SIMDUTF_FEATURE_UTF8
6691 simdutf_warn_unused virtual size_t
6692 count_utf8(const char *input, size_t length) const noexcept = 0;
6693#endif // SIMDUTF_FEATURE_UTF8
6694
6695#if SIMDUTF_FEATURE_BASE64
6709 simdutf_warn_unused size_t maximal_binary_length_from_base64(
6710 const char *input, size_t length) const noexcept;
6711
6726 simdutf_warn_unused size_t maximal_binary_length_from_base64(
6727 const char16_t *input, size_t length) const noexcept;
6728
6741 simdutf_warn_unused virtual size_t
6742 binary_length_from_base64(const char *input, size_t length) const noexcept;
6743
6757 simdutf_warn_unused virtual size_t
6758 binary_length_from_base64(const char16_t *input,
6759 size_t length) const noexcept;
6760
6793 simdutf_warn_unused virtual result
6794 base64_to_binary(const char *input, size_t length, char *output,
6795 base64_options options = base64_default,
6796 last_chunk_handling_options last_chunk_options =
6797 last_chunk_handling_options::loose) const noexcept = 0;
6798
6830 simdutf_warn_unused virtual full_result base64_to_binary_details(
6831 const char *input, size_t length, char *output,
6832 base64_options options = base64_default,
6833 last_chunk_handling_options last_chunk_options =
6834 last_chunk_handling_options::loose) const noexcept = 0;
6835
6869 simdutf_warn_unused virtual result
6870 base64_to_binary(const char16_t *input, size_t length, char *output,
6871 base64_options options = base64_default,
6872 last_chunk_handling_options last_chunk_options =
6873 last_chunk_handling_options::loose) const noexcept = 0;
6874
6906 simdutf_warn_unused virtual full_result base64_to_binary_details(
6907 const char16_t *input, size_t length, char *output,
6908 base64_options options = base64_default,
6909 last_chunk_handling_options last_chunk_options =
6910 last_chunk_handling_options::loose) const noexcept = 0;
6911
6920 simdutf_warn_unused size_t base64_length_from_binary(
6921 size_t length, base64_options options = base64_default) const noexcept;
6922
6944 virtual size_t
6945 binary_to_base64(const char *input, size_t length, char *output,
6946 base64_options options = base64_default) const noexcept = 0;
6947
6974 const char *input, size_t length, char *output,
6975 size_t line_length = simdutf::default_line_length,
6976 base64_options options = base64_default) const noexcept = 0;
6977
6988 virtual const char *find(const char *start, const char *end,
6989 char character) const noexcept = 0;
6990 virtual const char16_t *find(const char16_t *start, const char16_t *end,
6991 char16_t character) const noexcept = 0;
6992#endif // SIMDUTF_FEATURE_BASE64
6993
6994#ifdef SIMDUTF_INTERNAL_TESTS
6995 // This method is exported only in developer mode, its purpose
6996 // is to expose some internal test procedures from the given
6997 // implementation and then use them through our standard test
6998 // framework.
6999 //
7000 // Regular users should not use it, the tests of the public
7001 // API are enough.
7002
7003 struct TestProcedure {
7004 // display name
7005 std::string_view name;
7006
7007 // procedure should return whether given test pass or not
7008 void (*procedure)(const implementation &);
7009 };
7010
7011 virtual std::vector<TestProcedure> internal_tests() const;
7012#endif
7013
7014protected:
7017 simdutf_really_inline implementation(const char *name,
7018 const char *description,
7019 uint32_t required_instruction_sets)
7020 : _name(name), _description(description),
7021 _required_instruction_sets(required_instruction_sets) {}
7022
7023protected:
7024 ~implementation() = default;
7025
7026private:
7030 const char *_name;
7031
7035 const char *_description;
7036
7040 const uint32_t _required_instruction_sets;
7041};
7042
7044namespace internal {
7045
7049class available_implementation_list {
7050public:
7052 simdutf_really_inline available_implementation_list() {}
7054 size_t size() const noexcept;
7056 const implementation *const *begin() const noexcept;
7058 const implementation *const *end() const noexcept;
7059
7073 const implementation *operator[](std::string_view name) const noexcept {
7074 for (const implementation *impl : *this) {
7075 if (impl->name() == name) {
7076 return impl;
7077 }
7078 }
7079 return nullptr;
7080 }
7081
7095 const implementation *detect_best_supported() const noexcept;
7096};
7097
7098template <typename T> class atomic_ptr {
7099public:
7100 atomic_ptr(T *_ptr) : ptr{_ptr} {}
7101
7102#if defined(SIMDUTF_NO_THREADS)
7103 operator const T *() const { return ptr; }
7104 const T &operator*() const { return *ptr; }
7105 const T *operator->() const { return ptr; }
7106
7107 operator T *() { return ptr; }
7108 T &operator*() { return *ptr; }
7109 T *operator->() { return ptr; }
7110 atomic_ptr &operator=(T *_ptr) {
7111 ptr = _ptr;
7112 return *this;
7113 }
7114
7115#else
7116 operator const T *() const { return ptr.load(); }
7117 const T &operator*() const { return *ptr; }
7118 const T *operator->() const { return ptr.load(); }
7119
7120 operator T *() { return ptr.load(); }
7121 T &operator*() { return *ptr; }
7122 T *operator->() { return ptr.load(); }
7123 atomic_ptr &operator=(T *_ptr) {
7124 ptr = _ptr;
7125 return *this;
7126 }
7127
7128#endif
7129
7130private:
7131#if defined(SIMDUTF_NO_THREADS)
7132 T *ptr;
7133#else
7134 std::atomic<T *> ptr;
7135#endif
7136};
7137
7138class detect_best_supported_implementation_on_first_use;
7139
7140} // namespace internal
7141
7145extern SIMDUTF_DLLIMPORTEXPORT const internal::available_implementation_list &
7146get_available_implementations();
7147
7154extern SIMDUTF_DLLIMPORTEXPORT internal::atomic_ptr<const implementation> &
7155get_active_implementation();
7156
7157} // namespace simdutf
7158
7159#if SIMDUTF_FEATURE_BASE64
7160 // this header is not part of the public api
7161 #include <simdutf/base64_implementation.h>
7162
7163namespace simdutf {
7164 #if SIMDUTF_SPAN
7169simdutf_really_inline
7170 simdutf_constexpr23 simdutf_warn_unused std::tuple<result, std::size_t>
7171 base64_to_binary_safe(
7172 const detail::input_span_of_byte_like auto &input,
7173 detail::output_span_of_byte_like auto &&binary_output,
7174 base64_options options = base64_default,
7175 last_chunk_handling_options last_chunk_options = loose,
7176 bool decode_up_to_bad_char = false) noexcept {
7177 size_t outlen = binary_output.size();
7178 #if SIMDUTF_CPLUSPLUS23
7179 if consteval {
7180 using CInput = std::decay_t<decltype(*input.data())>;
7181 static_assert(std::is_same_v<CInput, char>,
7182 "sorry, the constexpr implementation is for now limited to "
7183 "input of type char");
7184 using COutput = std::decay_t<decltype(*binary_output.data())>;
7185 static_assert(std::is_same_v<COutput, char>,
7186 "sorry, the constexpr implementation is for now limited to "
7187 "output of type char");
7188 auto r = base64_to_binary_safe_impl(
7189 input.data(), input.size(), binary_output.data(), outlen, options,
7190 last_chunk_options, decode_up_to_bad_char);
7191 return {r, outlen};
7192 } else
7193 #endif
7194 {
7195 auto r = base64_to_binary_safe_impl<char>(
7196 reinterpret_cast<const char *>(input.data()), input.size(),
7197 reinterpret_cast<char *>(binary_output.data()), outlen, options,
7198 last_chunk_options, decode_up_to_bad_char);
7199 return {r, outlen};
7200 }
7201}
7202
7203 #if SIMDUTF_SPAN
7208simdutf_really_inline
7209 simdutf_warn_unused simdutf_constexpr23 std::tuple<result, std::size_t>
7210 base64_to_binary_safe(
7211 std::span<const char16_t> input,
7212 detail::output_span_of_byte_like auto &&binary_output,
7213 base64_options options = base64_default,
7214 last_chunk_handling_options last_chunk_options = loose,
7215 bool decode_up_to_bad_char = false) noexcept {
7216 size_t outlen = binary_output.size();
7217 #if SIMDUTF_CPLUSPLUS23
7218 if consteval {
7219 auto r = base64_to_binary_safe_impl(
7220 input.data(), input.size(), binary_output.data(), outlen, options,
7221 last_chunk_options, decode_up_to_bad_char);
7222 return {r, outlen};
7223 } else
7224 #endif
7225 {
7226 auto r = base64_to_binary_safe(
7227 input.data(), input.size(),
7228 reinterpret_cast<char *>(binary_output.data()), outlen, options,
7229 last_chunk_options, decode_up_to_bad_char);
7230 return {r, outlen};
7231 }
7232}
7233 #endif // SIMDUTF_SPAN
7234
7235 #endif // SIMDUTF_SPAN
7236} // namespace simdutf
7237
7238#endif // SIMDUTF_FEATURE_BASE64
7239
7240#if SIMDUTF_CPLUSPLUS23 && SIMDUTF_FEATURE_BASE64
7241
7242namespace simdutf {
7243namespace literals {
7244
7245namespace detail {
7246
7247// the detail namespace is not part of the public api
7248
7249template <std::size_t N> struct base64_literal_helper {
7250 std::array<char, N - 1> storage{};
7251 static constexpr std::size_t size() noexcept { return N - 1; }
7252 consteval base64_literal_helper(const char (&str)[N]) {
7253 for (std::size_t i = 0; i < size(); i++) {
7254 storage[i] = str[i];
7255 }
7256 }
7257};
7258
7259template <std::size_t InputLen> struct base64_decode_result {
7260 static constexpr std::size_t max_out = (InputLen + 3) / 4 * 3;
7261 std::array<char, max_out> buffer{};
7262 std::size_t output_count{};
7263};
7264
7265template <std::size_t InputLen>
7266consteval auto base64_decode_literal(const char *str) {
7267 base64_decode_result<InputLen> result{};
7268 auto r = scalar::base64::base64_to_binary_details_impl(
7269 str, InputLen, result.buffer.data(), base64_default, loose);
7270 if (r.error != error_code::SUCCESS) {
7271 std::unreachable(); // invalid base64 input in _base64 literal
7272 }
7273 result.output_count = r.output_count;
7274 return result;
7275}
7276
7277template <base64_literal_helper a> consteval auto base64_make_array() {
7278 constexpr auto decoded = base64_decode_literal<a.size()>(a.storage.data());
7279 std::array<char, decoded.output_count> ret{};
7280 for (std::size_t i = 0; i < decoded.output_count; i++) {
7281 ret[i] = decoded.buffer[i];
7282 }
7283 return ret;
7284}
7285
7286} // namespace detail
7287
7299template <detail::base64_literal_helper a> consteval auto operator""_base64() {
7300 return detail::base64_make_array<a>();
7301}
7302
7303} // namespace literals
7304} // namespace simdutf
7305
7306#endif // SIMDUTF_CPLUSPLUS23 && SIMDUTF_FEATURE_BASE64
7307
7308#endif // SIMDUTF_IMPLEMENTATION_H
An implementation of simdutf for a particular CPU architecture.
virtual simdutf_warn_unused size_t convert_valid_utf32_to_latin1(const char32_t *input, size_t length, char *latin1_buffer) const noexcept=0
Convert valid UTF-32 string into Latin1 string.
virtual simdutf_warn_unused size_t binary_length_from_base64(const char *input, size_t length) const noexcept
Compute the binary length from a base64 input with ASCII spaces.
virtual const char * find(const char *start, const char *end, char character) const noexcept=0
Find the first occurrence of a character in a string.
virtual simdutf_warn_unused size_t convert_valid_utf8_to_utf16le(const char *input, size_t length, char16_t *utf16_buffer) const noexcept=0
Convert valid UTF-8 string into UTF-16LE string.
virtual simdutf_warn_unused size_t latin1_length_from_utf16(size_t length) const noexcept
Compute the number of bytes that this UTF-16LE/BE string would require in Latin1 format.
virtual simdutf_warn_unused size_t convert_utf16le_to_utf8_with_replacement(const char16_t *input, size_t length, char *utf8_buffer) const noexcept=0
Convert possibly broken UTF-16LE string into UTF-8 string, replacing unpaired surrogates with the Uni...
virtual simdutf_warn_unused size_t binary_length_from_base64(const char16_t *input, size_t length) const noexcept
Compute the binary length from a base64 input with ASCII spaces.
virtual simdutf_warn_unused size_t utf32_length_from_utf16be(const char16_t *input, size_t length) const noexcept=0
Compute the number of bytes that this UTF-16BE string would require in UTF-32 format.
virtual simdutf_warn_unused size_t convert_utf8_to_utf16be(const char *input, size_t length, char16_t *utf16_output) const noexcept=0
Convert possibly broken UTF-8 string into UTF-16BE string.
virtual simdutf_warn_unused size_t convert_utf16le_to_utf32(const char16_t *input, size_t length, char32_t *utf32_buffer) const noexcept=0
Convert possibly broken UTF-16LE string into UTF-32 string.
virtual simdutf_warn_unused size_t convert_valid_utf16be_to_utf32(const char16_t *input, size_t length, char32_t *utf32_buffer) const noexcept=0
Convert valid UTF-16LE string into UTF-32BE string.
virtual std::string_view name() const noexcept
The name of this implementation.
virtual simdutf_warn_unused size_t convert_valid_utf16be_to_utf8(const char16_t *input, size_t length, char *utf8_buffer) const noexcept=0
Convert valid UTF-16BE string into UTF-8 string.
virtual simdutf_warn_unused size_t convert_latin1_to_utf16le(const char *input, size_t length, char16_t *utf16_output) const noexcept=0
Convert possibly Latin1 string into UTF-16LE string.
virtual simdutf_warn_unused bool validate_utf16be(const char16_t *buf, size_t len) const noexcept=0
Validate the UTF-16BE string.
virtual simdutf_warn_unused result convert_utf16be_to_latin1_with_errors(const char16_t *input, size_t length, char *latin1_buffer) const noexcept=0
Convert possibly broken UTF-16BE string into Latin1 string.
virtual simdutf_warn_unused full_result base64_to_binary_details(const char *input, size_t length, char *output, base64_options options=base64_default, last_chunk_handling_options last_chunk_options=last_chunk_handling_options::loose) const noexcept=0
Convert a base64 input to a binary output while returning more details than base64_to_binary.
virtual simdutf_warn_unused size_t convert_valid_utf16le_to_utf32(const char16_t *input, size_t length, char32_t *utf32_buffer) const noexcept=0
Convert valid UTF-16LE string into UTF-32 string.
virtual simdutf_warn_unused size_t latin1_length_from_utf8(const char *input, size_t length) const noexcept=0
Compute the number of bytes that this UTF-8 string would require in Latin1 format.
virtual simdutf_warn_unused result convert_utf32_to_latin1_with_errors(const char32_t *input, size_t length, char *latin1_buffer) const noexcept=0
Convert possibly broken UTF-32 string into Latin1 string and stop on error.
virtual simdutf_warn_unused size_t utf8_length_from_utf32(const char32_t *input, size_t length) const noexcept=0
Compute the number of bytes that this UTF-32 string would require in UTF-8 format.
virtual simdutf_warn_unused size_t convert_latin1_to_utf16be(const char *input, size_t length, char16_t *utf16_output) const noexcept=0
Convert Latin1 string into UTF-16BE string.
virtual simdutf_warn_unused size_t latin1_length_from_utf32(size_t length) const noexcept
Compute the number of bytes that this UTF-32 string would require in Latin1 format.
virtual simdutf_warn_unused full_result base64_to_binary_details(const char16_t *input, size_t length, char *output, base64_options options=base64_default, last_chunk_handling_options last_chunk_options=last_chunk_handling_options::loose) const noexcept=0
Convert a base64 input to a binary output while returning more details than base64_to_binary.
virtual simdutf_warn_unused size_t convert_utf32_to_utf16le(const char32_t *input, size_t length, char16_t *utf16_buffer) const noexcept=0
Convert possibly broken UTF-32 string into UTF-16LE string.
virtual simdutf_warn_unused size_t utf16_length_from_utf32(const char32_t *input, size_t length) const noexcept=0
Compute the number of two-byte code units that this UTF-32 string would require in UTF-16 format.
virtual simdutf_warn_unused size_t count_utf8(const char *input, size_t length) const noexcept=0
Count the number of code points (characters) in the string assuming that it is valid.
virtual simdutf_warn_unused size_t convert_valid_utf32_to_utf16be(const char32_t *input, size_t length, char16_t *utf16_buffer) const noexcept=0
Convert valid UTF-32 string into UTF-16BE string.
virtual simdutf_warn_unused size_t utf32_length_from_utf8(const char *input, size_t length) const noexcept=0
Compute the number of 4-byte code units that this UTF-8 string would require in UTF-32 format.
virtual simdutf_warn_unused size_t convert_valid_utf16le_to_utf8(const char16_t *input, size_t length, char *utf8_buffer) const noexcept=0
Convert valid UTF-16LE string into UTF-8 string.
virtual void to_well_formed_utf16le(const char16_t *input, size_t len, char16_t *output) const noexcept=0
Copies the UTF-16LE string while replacing mismatched surrogates with the Unicode replacement charact...
virtual simdutf_warn_unused result convert_utf32_to_utf16be_with_errors(const char32_t *input, size_t length, char16_t *utf16_buffer) const noexcept=0
Convert possibly broken UTF-32 string into UTF-16BE string and stop on error.
virtual simdutf_warn_unused result validate_utf8_with_errors(const char *buf, size_t len) const noexcept=0
Validate the UTF-8 string and stop on errors.
virtual simdutf_warn_unused size_t utf8_length_from_utf16le(const char16_t *input, size_t length) const noexcept=0
Compute the number of bytes that this UTF-16LE string would require in UTF-8 format.
virtual size_t binary_to_base64_with_lines(const char *input, size_t length, char *output, size_t line_length=simdutf::default_line_length, base64_options options=base64_default) const noexcept=0
Convert a binary input to a base64 output with lines of given length.
virtual simdutf_warn_unused size_t convert_utf16be_to_utf8(const char16_t *input, size_t length, char *utf8_buffer) const noexcept=0
Convert possibly broken UTF-16BE string into UTF-8 string.
virtual simdutf_warn_unused size_t convert_utf8_to_utf16le(const char *input, size_t length, char16_t *utf16_output) const noexcept=0
Convert possibly broken UTF-8 string into UTF-16LE string.
virtual simdutf_warn_unused result convert_utf16le_to_latin1_with_errors(const char16_t *input, size_t length, char *latin1_buffer) const noexcept=0
Convert possibly broken UTF-16LE string into Latin1 string.
virtual simdutf_warn_unused result utf8_length_from_utf16le_with_replacement(const char16_t *input, size_t length) const noexcept=0
Compute the number of bytes that this UTF-16LE string would require in UTF-8 format even when the UTF...
virtual simdutf_warn_unused result validate_utf16le_with_errors(const char16_t *buf, size_t len) const noexcept=0
Validate the UTF-16LE string and stop on error.
virtual simdutf_warn_unused result convert_utf16be_to_utf8_with_errors(const char16_t *input, size_t length, char *utf8_buffer) const noexcept=0
Convert possibly broken UTF-16BE string into UTF-8 string and stop on error.
virtual size_t binary_to_base64(const char *input, size_t length, char *output, base64_options options=base64_default) const noexcept=0
Convert a binary input to a base64 output.
virtual simdutf_warn_unused result utf8_length_from_utf16be_with_replacement(const char16_t *input, size_t length) const noexcept=0
Compute the number of bytes that this UTF-16BE string would require in UTF-8 format even when the UTF...
virtual simdutf_warn_unused size_t convert_latin1_to_utf32(const char *input, size_t length, char32_t *utf32_buffer) const noexcept=0
Convert Latin1 string into UTF-32 string.
virtual simdutf_warn_unused result convert_utf8_to_utf32_with_errors(const char *input, size_t length, char32_t *utf32_output) const noexcept=0
Convert possibly broken UTF-8 string into UTF-32 string and stop on error.
virtual simdutf_warn_unused size_t convert_valid_utf32_to_utf8(const char32_t *input, size_t length, char *utf8_buffer) const noexcept=0
Convert valid UTF-32 string into UTF-8 string.
virtual simdutf_warn_unused size_t utf32_length_from_latin1(size_t length) const noexcept
Return the number of bytes that this UTF-32 string would require in Latin1 format.
virtual std::string_view description() const noexcept
The description of this implementation.
virtual simdutf_warn_unused bool validate_utf16le(const char16_t *buf, size_t len) const noexcept=0
Validate the UTF-16LE string.This function may be best when you expect the input to be almost always ...
virtual simdutf_warn_unused result convert_utf8_to_utf16le_with_errors(const char *input, size_t length, char16_t *utf16_output) const noexcept=0
Convert possibly broken UTF-8 string into UTF-16LE string and stop on error.
virtual simdutf_warn_unused size_t count_utf16le(const char16_t *input, size_t length) const noexcept=0
Count the number of code points (characters) in the string assuming that it is valid.
virtual simdutf_warn_unused size_t convert_utf16be_to_utf32(const char16_t *input, size_t length, char32_t *utf32_buffer) const noexcept=0
Convert possibly broken UTF-16BE string into UTF-32 string.
virtual simdutf_warn_unused size_t convert_valid_utf16le_to_latin1(const char16_t *input, size_t length, char *latin1_buffer) const noexcept=0
Convert valid UTF-16LE string into Latin1 string.
virtual simdutf_warn_unused size_t convert_utf32_to_latin1(const char32_t *input, size_t length, char *latin1_buffer) const noexcept=0
Convert possibly broken UTF-32 string into Latin1 string.
virtual simdutf_warn_unused size_t convert_valid_utf16be_to_latin1(const char16_t *input, size_t length, char *latin1_buffer) const noexcept=0
Convert valid UTF-16BE string into Latin1 string.
bool supported_by_runtime_system() const
The instruction sets this implementation is compiled against and the current CPU match.
virtual simdutf_warn_unused size_t convert_valid_utf8_to_utf32(const char *input, size_t length, char32_t *utf32_buffer) const noexcept=0
Convert valid UTF-8 string into UTF-32 string.
virtual simdutf_warn_unused size_t convert_utf16le_to_latin1(const char16_t *input, size_t length, char *latin1_buffer) const noexcept=0
Convert possibly broken UTF-16LE string into Latin1 string.
virtual simdutf_warn_unused result validate_utf16be_with_errors(const char16_t *buf, size_t len) const noexcept=0
Validate the UTF-16BE string and stop on error.
virtual simdutf_warn_unused result convert_utf16le_to_utf8_with_errors(const char16_t *input, size_t length, char *utf8_buffer) const noexcept=0
Convert possibly broken UTF-16LE string into UTF-8 string and stop on error.
virtual simdutf_warn_unused result convert_utf16be_to_utf32_with_errors(const char16_t *input, size_t length, char32_t *utf32_buffer) const noexcept=0
Convert possibly broken UTF-16BE string into UTF-32 string and stop on error.
virtual simdutf_warn_unused result base64_to_binary(const char *input, size_t length, char *output, base64_options options=base64_default, last_chunk_handling_options last_chunk_options=last_chunk_handling_options::loose) const noexcept=0
Convert a base64 input to a binary output.
virtual simdutf_warn_unused bool validate_utf16be_as_ascii(const char16_t *buf, size_t len) const noexcept=0
Validate the ASCII string as a UTF-16BE sequence.
virtual simdutf_warn_unused size_t convert_utf32_to_utf16be(const char32_t *input, size_t length, char16_t *utf16_buffer) const noexcept=0
Convert possibly broken UTF-32 string into UTF-16BE string.
virtual simdutf_warn_unused result validate_utf32_with_errors(const char32_t *buf, size_t len) const noexcept=0
Validate the UTF-32 string and stop on error.
virtual simdutf_warn_unused size_t utf32_length_from_utf16le(const char16_t *input, size_t length) const noexcept=0
Compute the number of bytes that this UTF-16LE string would require in UTF-32 format.
virtual void change_endianness_utf16(const char16_t *input, size_t length, char16_t *output) const noexcept=0
Change the endianness of the input.
virtual simdutf_warn_unused size_t convert_valid_utf8_to_utf16be(const char *input, size_t length, char16_t *utf16_buffer) const noexcept=0
Convert valid UTF-8 string into UTF-16BE string.
virtual simdutf_warn_unused size_t convert_valid_utf8_to_latin1(const char *input, size_t length, char *latin1_output) const noexcept=0
Convert valid UTF-8 string into latin1 string.
simdutf_warn_unused size_t maximal_binary_length_from_base64(const char *input, size_t length) const noexcept
Provide the maximal binary length in bytes given the base64 input.
virtual simdutf_warn_unused size_t convert_valid_utf32_to_utf16le(const char32_t *input, size_t length, char16_t *utf16_buffer) const noexcept=0
Convert valid UTF-32 string into UTF-16LE string.
virtual simdutf_warn_unused result validate_ascii_with_errors(const char *buf, size_t len) const noexcept=0
Validate the ASCII string and stop on error.
virtual simdutf_warn_unused bool validate_utf8(const char *buf, size_t len) const noexcept=0
Validate the UTF-8 string.
virtual simdutf_warn_unused size_t convert_utf8_to_utf32(const char *input, size_t length, char32_t *utf32_output) const noexcept=0
Convert possibly broken UTF-8 string into UTF-32 string.
simdutf_warn_unused size_t maximal_binary_length_from_base64(const char16_t *input, size_t length) const noexcept
Provide the maximal binary length in bytes given the base64 input.
virtual simdutf_warn_unused bool validate_utf32(const char32_t *buf, size_t len) const noexcept=0
Validate the UTF-32 string.
virtual simdutf_warn_unused result convert_utf16le_to_utf32_with_errors(const char16_t *input, size_t length, char32_t *utf32_buffer) const noexcept=0
Convert possibly broken UTF-16LE string into UTF-32 string and stop on error.
virtual simdutf_warn_unused size_t convert_utf16be_to_utf8_with_replacement(const char16_t *input, size_t length, char *utf8_buffer) const noexcept=0
Convert possibly broken UTF-16BE string into UTF-8 string, replacing unpaired surrogates with the Uni...
virtual simdutf_warn_unused size_t convert_utf16le_to_utf8(const char16_t *input, size_t length, char *utf8_buffer) const noexcept=0
Convert possibly broken UTF-16LE string into UTF-8 string.
virtual simdutf_warn_unused size_t utf8_length_from_utf16be(const char16_t *input, size_t length) const noexcept=0
Compute the number of bytes that this UTF-16BE string would require in UTF-8 format.
virtual simdutf_warn_unused size_t convert_utf32_to_utf8(const char32_t *input, size_t length, char *utf8_buffer) const noexcept=0
Convert possibly broken UTF-32 string into UTF-8 string.
virtual simdutf_warn_unused size_t convert_utf16be_to_latin1(const char16_t *input, size_t length, char *latin1_buffer) const noexcept=0
Convert possibly broken UTF-16BE string into Latin1 string.
virtual simdutf_warn_unused size_t convert_utf8_to_latin1(const char *input, size_t length, char *latin1_output) const noexcept=0
Convert possibly broken UTF-8 string into latin1 string.
virtual simdutf_warn_unused result base64_to_binary(const char16_t *input, size_t length, char *output, base64_options options=base64_default, last_chunk_handling_options last_chunk_options=last_chunk_handling_options::loose) const noexcept=0
Convert a base64 input to a binary output.
simdutf_warn_unused size_t base64_length_from_binary(size_t length, base64_options options=base64_default) const noexcept
Provide the base64 length in bytes given the length of a binary input.
virtual int detect_encodings(const char *input, size_t length) const noexcept=0
This function will try to detect the possible encodings in one pass.
virtual simdutf_warn_unused size_t count_utf16be(const char16_t *input, size_t length) const noexcept=0
Count the number of code points (characters) in the string assuming that it is valid.
virtual simdutf_warn_unused result convert_utf32_to_utf8_with_errors(const char32_t *input, size_t length, char *utf8_buffer) const noexcept=0
Convert possibly broken UTF-32 string into UTF-8 string and stop on error.
virtual simdutf_warn_unused bool validate_utf16le_as_ascii(const char16_t *buf, size_t len) const noexcept=0
Validate the ASCII string as a UTF-16LE sequence.
virtual void to_well_formed_utf16be(const char16_t *input, size_t len, char16_t *output) const noexcept=0
Copies the UTF-16BE string while replacing mismatched surrogates with the Unicode replacement charact...
virtual simdutf_warn_unused bool validate_ascii(const char *buf, size_t len) const noexcept=0
Validate the ASCII string.
virtual simdutf_warn_unused size_t utf16_length_from_utf8(const char *input, size_t length) const noexcept=0
Compute the number of 2-byte code units that this UTF-8 string would require in UTF-16LE format.
virtual simdutf_warn_unused result convert_utf8_to_utf16be_with_errors(const char *input, size_t length, char16_t *utf16_output) const noexcept=0
Convert possibly broken UTF-8 string into UTF-16BE string and stop on error.
virtual simdutf_warn_unused result convert_utf8_to_latin1_with_errors(const char *input, size_t length, char *latin1_output) const noexcept=0
Convert possibly broken UTF-8 string into latin1 string with errors.
virtual simdutf_warn_unused size_t utf16_length_from_latin1(size_t length) const noexcept
Return the number of bytes that this UTF-16 string would require in Latin1 format.
virtual simdutf_warn_unused result convert_utf32_to_utf16le_with_errors(const char32_t *input, size_t length, char16_t *utf16_buffer) const noexcept=0
Convert possibly broken UTF-32 string into UTF-16LE string and stop on error.
virtual simdutf_warn_unused size_t convert_latin1_to_utf8(const char *input, size_t length, char *utf8_output) const noexcept=0
Convert Latin1 string into UTF-8 string.
virtual encoding_type autodetect_encoding(const char *input, size_t length) const noexcept
This function will try to detect the encoding.
virtual simdutf_warn_unused size_t utf8_length_from_latin1(const char *input, size_t length) const noexcept=0
Return the number of bytes that this Latin1 string would require in UTF-8 format.