simdutf 8.2.0
Unicode at GB/s.
Loading...
Searching...
No Matches
implementation.h
1#ifndef SIMDUTF_IMPLEMENTATION_H
2#define SIMDUTF_IMPLEMENTATION_H
3#if !defined(SIMDUTF_NO_THREADS)
4 #include <atomic>
5#endif
6#include <string>
7#ifdef SIMDUTF_INTERNAL_TESTS
8 #include <vector>
9#endif
10#include "simdutf/common_defs.h"
11#include "simdutf/compiler_check.h"
12#include "simdutf/encoding_types.h"
13#include "simdutf/error.h"
14#include "simdutf/internal/isadetection.h"
15
16#if SIMDUTF_SPAN
17 #include <concepts>
18 #include <type_traits>
19 #include <span>
20 #include <tuple>
21#endif
22#if SIMDUTF_CPLUSPLUS17
23 #include <string_view>
24#endif
25// The following defines are conditionally enabled/disabled during amalgamation.
26// By default all features are enabled, regular code shouldn't check them. Only
27// when user code really relies of a selected subset, it's good to verify these
28// flags, like:
29//
30// #if !SIMDUTF_FEATURE_UTF16
31// # error("Please amalgamate simdutf with UTF-16 support")
32// #endif
33//
34#define SIMDUTF_FEATURE_DETECT_ENCODING 1
35#define SIMDUTF_FEATURE_ASCII 1
36#define SIMDUTF_FEATURE_LATIN1 1
37#define SIMDUTF_FEATURE_UTF8 1
38#define SIMDUTF_FEATURE_UTF16 1
39#define SIMDUTF_FEATURE_UTF32 1
40#define SIMDUTF_FEATURE_BASE64 1
41
42#if SIMDUTF_CPLUSPLUS23
43 #include <simdutf/constexpr_ptr.h>
44#endif
45
46#if SIMDUTF_SPAN
48namespace simdutf {
49namespace detail {
54template <typename T>
55concept byte_like = std::is_same_v<T, std::byte> || //
56 std::is_same_v<T, char> || //
57 std::is_same_v<T, signed char> || //
58 std::is_same_v<T, unsigned char> || //
59 std::is_same_v<T, char8_t>;
60
61template <typename T>
62concept is_byte_like = byte_like<std::remove_cvref_t<T>>;
63
64template <typename T>
65concept is_pointer = std::is_pointer_v<T>;
66
72template <typename T>
73concept input_span_of_byte_like = requires(const T &t) {
74 { t.size() } noexcept -> std::convertible_to<std::size_t>;
75 { t.data() } noexcept -> is_pointer;
76 { *t.data() } noexcept -> is_byte_like;
77};
78
79template <typename T>
80concept is_mutable = !std::is_const_v<std::remove_reference_t<T>>;
81
85template <typename T>
86concept output_span_of_byte_like = requires(T &t) {
87 { t.size() } noexcept -> std::convertible_to<std::size_t>;
88 { t.data() } noexcept -> is_pointer;
89 { *t.data() } noexcept -> is_byte_like;
90 { *t.data() } noexcept -> is_mutable;
91};
92
98template <class InputPtr>
99concept indexes_into_byte_like = requires(InputPtr p) {
100 { std::decay_t<decltype(p[0])>{} } -> simdutf::detail::byte_like;
101};
102template <class InputPtr>
103concept indexes_into_utf16 = requires(InputPtr p) {
104 { std::decay_t<decltype(p[0])>{} } -> std::same_as<char16_t>;
105};
106template <class InputPtr>
107concept indexes_into_utf32 = requires(InputPtr p) {
108 { std::decay_t<decltype(p[0])>{} } -> std::same_as<char32_t>;
109};
110
111template <class InputPtr>
112concept index_assignable_from_char = requires(InputPtr p, char s) {
113 { p[0] = s };
114};
115
120template <class InputPtr>
121concept indexes_into_uint32 = requires(InputPtr p) {
122 { std::decay_t<decltype(p[0])>{} } -> std::same_as<std::uint32_t>;
123};
124} // namespace detail
125} // namespace simdutf
126#endif // SIMDUTF_SPAN
127
128// these includes are needed for constexpr support. they are
129// not part of the public api.
130#include <simdutf/scalar/swap_bytes.h>
131#include <simdutf/scalar/ascii.h>
132#include <simdutf/scalar/atomic_util.h>
133#include <simdutf/scalar/latin1.h>
134#include <simdutf/scalar/latin1_to_utf16/latin1_to_utf16.h>
135#include <simdutf/scalar/latin1_to_utf32/latin1_to_utf32.h>
136#include <simdutf/scalar/latin1_to_utf8/latin1_to_utf8.h>
137#include <simdutf/scalar/utf16.h>
138#include <simdutf/scalar/utf16_to_latin1/utf16_to_latin1.h>
139#include <simdutf/scalar/utf16_to_latin1/valid_utf16_to_latin1.h>
140#include <simdutf/scalar/utf16_to_utf32/utf16_to_utf32.h>
141#include <simdutf/scalar/utf16_to_utf32/valid_utf16_to_utf32.h>
142#include <simdutf/scalar/utf16_to_utf8/utf16_to_utf8.h>
143#include <simdutf/scalar/utf16_to_utf8/valid_utf16_to_utf8.h>
144#include <simdutf/scalar/utf32.h>
145#include <simdutf/scalar/utf32_to_latin1/utf32_to_latin1.h>
146#include <simdutf/scalar/utf32_to_latin1/valid_utf32_to_latin1.h>
147#include <simdutf/scalar/utf32_to_utf16/utf32_to_utf16.h>
148#include <simdutf/scalar/utf32_to_utf16/valid_utf32_to_utf16.h>
149#include <simdutf/scalar/utf32_to_utf8/utf32_to_utf8.h>
150#include <simdutf/scalar/utf32_to_utf8/valid_utf32_to_utf8.h>
151#include <simdutf/scalar/utf8.h>
152#include <simdutf/scalar/utf8_to_latin1/utf8_to_latin1.h>
153#include <simdutf/scalar/utf8_to_latin1/valid_utf8_to_latin1.h>
154#include <simdutf/scalar/utf8_to_utf16/utf8_to_utf16.h>
155#include <simdutf/scalar/utf8_to_utf16/valid_utf8_to_utf16.h>
156#include <simdutf/scalar/utf8_to_utf32/utf8_to_utf32.h>
157#include <simdutf/scalar/utf8_to_utf32/valid_utf8_to_utf32.h>
158
159namespace simdutf {
160
161constexpr size_t default_line_length =
162 76;
163
164#if SIMDUTF_FEATURE_DETECT_ENCODING
175simdutf_warn_unused simdutf::encoding_type
176autodetect_encoding(const char *input, size_t length) noexcept;
177simdutf_really_inline simdutf_warn_unused simdutf::encoding_type
178autodetect_encoding(const uint8_t *input, size_t length) noexcept {
179 return autodetect_encoding(reinterpret_cast<const char *>(input), length);
180}
181 #if SIMDUTF_SPAN
193simdutf_really_inline simdutf_warn_unused simdutf::encoding_type
194autodetect_encoding(
195 const detail::input_span_of_byte_like auto &input) noexcept {
196 return autodetect_encoding(reinterpret_cast<const char *>(input.data()),
197 input.size());
198}
199 #endif // SIMDUTF_SPAN
200
212simdutf_warn_unused int detect_encodings(const char *input,
213 size_t length) noexcept;
214simdutf_really_inline simdutf_warn_unused int
215detect_encodings(const uint8_t *input, size_t length) noexcept {
216 return detect_encodings(reinterpret_cast<const char *>(input), length);
217}
218 #if SIMDUTF_SPAN
219simdutf_really_inline simdutf_warn_unused int
220detect_encodings(const detail::input_span_of_byte_like auto &input) noexcept {
221 return detect_encodings(reinterpret_cast<const char *>(input.data()),
222 input.size());
223}
224 #endif // SIMDUTF_SPAN
225#endif // SIMDUTF_FEATURE_DETECT_ENCODING
226
227#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING
239simdutf_warn_unused bool validate_utf8(const char *buf, size_t len) noexcept;
240 #if SIMDUTF_SPAN
241simdutf_constexpr23 simdutf_really_inline simdutf_warn_unused bool
242validate_utf8(const detail::input_span_of_byte_like auto &input) noexcept {
243 #if SIMDUTF_CPLUSPLUS23
244 if consteval {
245 return scalar::utf8::validate(
246 detail::constexpr_cast_ptr<uint8_t>(input.data()), input.size());
247 } else
248 #endif
249 {
250 return validate_utf8(reinterpret_cast<const char *>(input.data()),
251 input.size());
252 }
253}
254 #endif // SIMDUTF_SPAN
255#endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING
256
257#if SIMDUTF_FEATURE_UTF8
270simdutf_warn_unused result validate_utf8_with_errors(const char *buf,
271 size_t len) noexcept;
272 #if SIMDUTF_SPAN
273simdutf_really_inline simdutf_constexpr23 simdutf_warn_unused result
274validate_utf8_with_errors(
275 const detail::input_span_of_byte_like auto &input) noexcept {
276 #if SIMDUTF_CPLUSPLUS23
277 if consteval {
278 return scalar::utf8::validate_with_errors(
279 detail::constexpr_cast_ptr<uint8_t>(input.data()), input.size());
280 } else
281 #endif
282 {
283 return validate_utf8_with_errors(
284 reinterpret_cast<const char *>(input.data()), input.size());
285 }
286}
287 #endif // SIMDUTF_SPAN
288#endif // SIMDUTF_FEATURE_UTF8
289
290#if SIMDUTF_FEATURE_ASCII
300simdutf_warn_unused bool validate_ascii(const char *buf, size_t len) noexcept;
301 #if SIMDUTF_SPAN
302simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 bool
303validate_ascii(const detail::input_span_of_byte_like auto &input) noexcept {
304 #if SIMDUTF_CPLUSPLUS23
305 if consteval {
306 return scalar::ascii::validate(
307 detail::constexpr_cast_ptr<std::uint8_t>(input.data()), input.size());
308 } else
309 #endif
310 {
311 return validate_ascii(reinterpret_cast<const char *>(input.data()),
312 input.size());
313 }
314}
315 #endif // SIMDUTF_SPAN
316
330simdutf_warn_unused result validate_ascii_with_errors(const char *buf,
331 size_t len) noexcept;
332 #if SIMDUTF_SPAN
333simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
334validate_ascii_with_errors(
335 const detail::input_span_of_byte_like auto &input) noexcept {
336 #if SIMDUTF_CPLUSPLUS23
337 if consteval {
338 return scalar::ascii::validate_with_errors(
339 detail::constexpr_cast_ptr<std::uint8_t>(input.data()), input.size());
340 } else
341 #endif
342 {
343 return validate_ascii_with_errors(
344 reinterpret_cast<const char *>(input.data()), input.size());
345 }
346}
347 #endif // SIMDUTF_SPAN
348#endif // SIMDUTF_FEATURE_ASCII
349
350#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_ASCII
362simdutf_warn_unused bool validate_utf16_as_ascii(const char16_t *buf,
363 size_t len) noexcept;
364 #if SIMDUTF_SPAN
365simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 bool
366validate_utf16_as_ascii(std::span<const char16_t> input) noexcept {
367 #if SIMDUTF_CPLUSPLUS23
368 if consteval {
369 return scalar::utf16::validate_as_ascii<endianness::NATIVE>(input.data(),
370 input.size());
371 } else
372 #endif
373 {
374 return validate_utf16_as_ascii(input.data(), input.size());
375 }
376}
377 #endif // SIMDUTF_SPAN
378
390simdutf_warn_unused bool validate_utf16be_as_ascii(const char16_t *buf,
391 size_t len) noexcept;
392 #if SIMDUTF_SPAN
393simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 bool
394validate_utf16be_as_ascii(std::span<const char16_t> input) noexcept {
395 #if SIMDUTF_CPLUSPLUS23
396 if consteval {
397 return scalar::utf16::validate_as_ascii<endianness::BIG>(input.data(),
398 input.size());
399 } else
400 #endif
401 {
402 return validate_utf16be_as_ascii(input.data(), input.size());
403 }
404}
405 #endif // SIMDUTF_SPAN
406
418simdutf_warn_unused bool validate_utf16le_as_ascii(const char16_t *buf,
419 size_t len) noexcept;
420 #if SIMDUTF_SPAN
421simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 bool
422validate_utf16le_as_ascii(std::span<const char16_t> input) noexcept {
423 #if SIMDUTF_CPLUSPLUS23
424 if consteval {
425 return scalar::utf16::validate_as_ascii<endianness::LITTLE>(input.data(),
426 input.size());
427 } else
428 #endif
429 {
430 return validate_utf16le_as_ascii(input.data(), input.size());
431 }
432}
433 #endif // SIMDUTF_SPAN
434#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_ASCII
435
436#if SIMDUTF_FEATURE_UTF16
451simdutf_warn_unused bool validate_utf16(const char16_t *buf,
452 size_t len) noexcept;
453 #if SIMDUTF_SPAN
454simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 bool
455validate_utf16(std::span<const char16_t> input) noexcept {
456 #if SIMDUTF_CPLUSPLUS23
457 if consteval {
458 return scalar::utf16::validate<endianness::NATIVE>(input.data(),
459 input.size());
460 } else
461 #endif
462 {
463 return validate_utf16(input.data(), input.size());
464 }
465}
466 #endif // SIMDUTF_SPAN
467#endif // SIMDUTF_FEATURE_UTF16
468
469#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING
484simdutf_warn_unused bool validate_utf16le(const char16_t *buf,
485 size_t len) noexcept;
486 #if SIMDUTF_SPAN
487simdutf_really_inline simdutf_constexpr23 simdutf_warn_unused bool
488validate_utf16le(std::span<const char16_t> input) noexcept {
489 #if SIMDUTF_CPLUSPLUS23
490 if consteval {
491 return scalar::utf16::validate<endianness::LITTLE>(input.data(),
492 input.size());
493 } else
494 #endif
495 {
496 return validate_utf16le(input.data(), input.size());
497 }
498}
499 #endif // SIMDUTF_SPAN
500#endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING
501
502#if SIMDUTF_FEATURE_UTF16
517simdutf_warn_unused bool validate_utf16be(const char16_t *buf,
518 size_t len) noexcept;
519 #if SIMDUTF_SPAN
520simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 bool
521validate_utf16be(std::span<const char16_t> input) noexcept {
522 #if SIMDUTF_CPLUSPLUS23
523 if consteval {
524 return scalar::utf16::validate<endianness::BIG>(input.data(), input.size());
525 } else
526 #endif
527 {
528 return validate_utf16be(input.data(), input.size());
529 }
530}
531 #endif // SIMDUTF_SPAN
532
550simdutf_warn_unused result validate_utf16_with_errors(const char16_t *buf,
551 size_t len) noexcept;
552 #if SIMDUTF_SPAN
553simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
554validate_utf16_with_errors(std::span<const char16_t> input) noexcept {
555 #if SIMDUTF_CPLUSPLUS23
556 if consteval {
557 return scalar::utf16::validate_with_errors<endianness::NATIVE>(
558 input.data(), input.size());
559 } else
560 #endif
561 {
562 return validate_utf16_with_errors(input.data(), input.size());
563 }
564}
565 #endif // SIMDUTF_SPAN
566
583simdutf_warn_unused result validate_utf16le_with_errors(const char16_t *buf,
584 size_t len) noexcept;
585 #if SIMDUTF_SPAN
586simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
587validate_utf16le_with_errors(std::span<const char16_t> input) noexcept {
588 #if SIMDUTF_CPLUSPLUS23
589 if consteval {
590 return scalar::utf16::validate_with_errors<endianness::LITTLE>(
591 input.data(), input.size());
592 } else
593 #endif
594 {
595 return validate_utf16le_with_errors(input.data(), input.size());
596 }
597}
598 #endif // SIMDUTF_SPAN
599
616simdutf_warn_unused result validate_utf16be_with_errors(const char16_t *buf,
617 size_t len) noexcept;
618 #if SIMDUTF_SPAN
619simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
620validate_utf16be_with_errors(std::span<const char16_t> input) noexcept {
621 #if SIMDUTF_CPLUSPLUS23
622 if consteval {
623 return scalar::utf16::validate_with_errors<endianness::BIG>(input.data(),
624 input.size());
625 } else
626 #endif
627 {
628 return validate_utf16be_with_errors(input.data(), input.size());
629 }
630}
631 #endif // SIMDUTF_SPAN
632
645void to_well_formed_utf16le(const char16_t *input, size_t len,
646 char16_t *output) noexcept;
647 #if SIMDUTF_SPAN
648simdutf_really_inline simdutf_constexpr23 void
649to_well_formed_utf16le(std::span<const char16_t> input,
650 std::span<char16_t> output) noexcept {
651 #if SIMDUTF_CPLUSPLUS23
652 if consteval {
653 scalar::utf16::to_well_formed_utf16<endianness::LITTLE>(
654 input.data(), input.size(), output.data());
655 } else
656 #endif
657 {
658 to_well_formed_utf16le(input.data(), input.size(), output.data());
659 }
660}
661 #endif // SIMDUTF_SPAN
662
675void to_well_formed_utf16be(const char16_t *input, size_t len,
676 char16_t *output) noexcept;
677 #if SIMDUTF_SPAN
678simdutf_really_inline simdutf_constexpr23 void
679to_well_formed_utf16be(std::span<const char16_t> input,
680 std::span<char16_t> output) noexcept {
681 #if SIMDUTF_CPLUSPLUS23
682 if consteval {
683 scalar::utf16::to_well_formed_utf16<endianness::BIG>(
684 input.data(), input.size(), output.data());
685 } else
686 #endif
687 {
688 to_well_formed_utf16be(input.data(), input.size(), output.data());
689 }
690}
691 #endif // SIMDUTF_SPAN
692
705void to_well_formed_utf16(const char16_t *input, size_t len,
706 char16_t *output) noexcept;
707 #if SIMDUTF_SPAN
708simdutf_really_inline simdutf_constexpr23 void
709to_well_formed_utf16(std::span<const char16_t> input,
710 std::span<char16_t> output) noexcept {
711 #if SIMDUTF_CPLUSPLUS23
712 if consteval {
713 scalar::utf16::to_well_formed_utf16<endianness::NATIVE>(
714 input.data(), input.size(), output.data());
715 } else
716 #endif
717 {
718 to_well_formed_utf16(input.data(), input.size(), output.data());
719 }
720}
721 #endif // SIMDUTF_SPAN
722
723#endif // SIMDUTF_FEATURE_UTF16
724
725#if SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING
740simdutf_warn_unused bool validate_utf32(const char32_t *buf,
741 size_t len) noexcept;
742 #if SIMDUTF_SPAN
743simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 bool
744validate_utf32(std::span<const char32_t> input) noexcept {
745 #if SIMDUTF_CPLUSPLUS23
746 if consteval {
747 return scalar::utf32::validate(
748 detail::constexpr_cast_ptr<std::uint32_t>(input.data()), input.size());
749 } else
750 #endif
751 {
752 return validate_utf32(input.data(), input.size());
753 }
754}
755 #endif // SIMDUTF_SPAN
756#endif // SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING
757
758#if SIMDUTF_FEATURE_UTF32
775simdutf_warn_unused result validate_utf32_with_errors(const char32_t *buf,
776 size_t len) noexcept;
777 #if SIMDUTF_SPAN
778simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
779validate_utf32_with_errors(std::span<const char32_t> input) noexcept {
780 #if SIMDUTF_CPLUSPLUS23
781 if consteval {
782 return scalar::utf32::validate_with_errors(
783 detail::constexpr_cast_ptr<std::uint32_t>(input.data()), input.size());
784 } else
785 #endif
786 {
787 return validate_utf32_with_errors(input.data(), input.size());
788 }
789}
790 #endif // SIMDUTF_SPAN
791#endif // SIMDUTF_FEATURE_UTF32
792
793#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
804simdutf_warn_unused size_t convert_latin1_to_utf8(const char *input,
805 size_t length,
806 char *utf8_output) noexcept;
807 #if SIMDUTF_SPAN
808simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
809convert_latin1_to_utf8(
810 const detail::input_span_of_byte_like auto &latin1_input,
811 detail::output_span_of_byte_like auto &&utf8_output) noexcept {
812 #if SIMDUTF_CPLUSPLUS23
813 if consteval {
814 return scalar::latin1_to_utf8::convert(
815 detail::constexpr_cast_ptr<char>(latin1_input.data()),
816 latin1_input.size(),
817 detail::constexpr_cast_writeptr<char>(utf8_output.data()));
818 } else
819 #endif
820 {
821 return convert_latin1_to_utf8(
822 reinterpret_cast<const char *>(latin1_input.data()),
823 latin1_input.size(), reinterpret_cast<char *>(utf8_output.data()));
824 }
825}
826 #endif // SIMDUTF_SPAN
827
841simdutf_warn_unused size_t
842convert_latin1_to_utf8_safe(const char *input, size_t length, char *utf8_output,
843 size_t utf8_len) noexcept;
844 #if SIMDUTF_SPAN
845simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
846convert_latin1_to_utf8_safe(
847 const detail::input_span_of_byte_like auto &input,
848 detail::output_span_of_byte_like auto &&utf8_output) noexcept {
849 // implementation note: outputspan is a forwarding ref to avoid copying
850 // and allow both lvalues and rvalues. std::span can be copied without
851 // problems, but std::vector should not, and this function should accept
852 // both. it will allow using an owning rvalue ref (example: passing a
853 // temporary std::string) as output, but the user will quickly find out
854 // that he has no way of getting the data out of the object in that case.
855 #if SIMDUTF_CPLUSPLUS23
856 if consteval {
857 return scalar::latin1_to_utf8::convert_safe_constexpr(
858 input.data(), input.size(), utf8_output.data(), utf8_output.size());
859 } else
860 #endif
861 {
862 return convert_latin1_to_utf8_safe(
863 reinterpret_cast<const char *>(input.data()), input.size(),
864 reinterpret_cast<char *>(utf8_output.data()), utf8_output.size());
865 }
866}
867 #endif // SIMDUTF_SPAN
868#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
869
870#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
881simdutf_warn_unused size_t convert_latin1_to_utf16le(
882 const char *input, size_t length, char16_t *utf16_output) noexcept;
883 #if SIMDUTF_SPAN
884simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
885convert_latin1_to_utf16le(
886 const detail::input_span_of_byte_like auto &latin1_input,
887 std::span<char16_t> utf16_output) noexcept {
888 #if SIMDUTF_CPLUSPLUS23
889 if consteval {
890 return scalar::latin1_to_utf16::convert<endianness::LITTLE>(
891 latin1_input.data(), latin1_input.size(), utf16_output.data());
892 } else
893 #endif
894 {
895 return convert_latin1_to_utf16le(
896 reinterpret_cast<const char *>(latin1_input.data()),
897 latin1_input.size(), utf16_output.data());
898 }
899}
900 #endif // SIMDUTF_SPAN
901
912simdutf_warn_unused size_t convert_latin1_to_utf16be(
913 const char *input, size_t length, char16_t *utf16_output) noexcept;
914 #if SIMDUTF_SPAN
915simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
916convert_latin1_to_utf16be(const detail::input_span_of_byte_like auto &input,
917 std::span<char16_t> output) noexcept {
918 #if SIMDUTF_CPLUSPLUS23
919 if consteval {
920 return scalar::latin1_to_utf16::convert<endianness::BIG>(
921 input.data(), input.size(), output.data());
922 } else
923 #endif
924 {
925 return convert_latin1_to_utf16be(
926 reinterpret_cast<const char *>(input.data()), input.size(),
927 output.data());
928 }
929}
930 #endif // SIMDUTF_SPAN
939simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
940latin1_length_from_utf16(size_t length) noexcept {
941 return length;
942}
943
952simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
953utf16_length_from_latin1(size_t length) noexcept {
954 return length;
955}
956#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
957
958#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1
969simdutf_warn_unused size_t convert_latin1_to_utf32(
970 const char *input, size_t length, char32_t *utf32_buffer) noexcept;
971 #if SIMDUTF_SPAN
972simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
973convert_latin1_to_utf32(
974 const detail::input_span_of_byte_like auto &latin1_input,
975 std::span<char32_t> utf32_output) noexcept {
976 #if SIMDUTF_CPLUSPLUS23
977 if consteval {
978 return scalar::latin1_to_utf32::convert(
979 latin1_input.data(), latin1_input.size(), utf32_output.data());
980 } else
981 #endif
982 {
983 return convert_latin1_to_utf32(
984 reinterpret_cast<const char *>(latin1_input.data()),
985 latin1_input.size(), utf32_output.data());
986 }
987}
988 #endif // SIMDUTF_SPAN
989#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1
990
991#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
1004simdutf_warn_unused size_t convert_utf8_to_latin1(const char *input,
1005 size_t length,
1006 char *latin1_output) noexcept;
1007 #if SIMDUTF_SPAN
1008simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
1009convert_utf8_to_latin1(
1010 const detail::input_span_of_byte_like auto &input,
1011 detail::output_span_of_byte_like auto &&output) noexcept {
1012 #if SIMDUTF_CPLUSPLUS23
1013 if consteval {
1014 return scalar::utf8_to_latin1::convert(input.data(), input.size(),
1015 output.data());
1016 } else
1017 #endif
1018 {
1019 return convert_utf8_to_latin1(reinterpret_cast<const char *>(input.data()),
1020 input.size(),
1021 reinterpret_cast<char *>(output.data()));
1022 }
1023}
1024 #endif // SIMDUTF_SPAN
1025#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
1026
1027#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
1041simdutf_warn_unused size_t convert_utf8_to_utf16(
1042 const char *input, size_t length, char16_t *utf16_output) noexcept;
1043 #if SIMDUTF_SPAN
1044simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
1045convert_utf8_to_utf16(const detail::input_span_of_byte_like auto &input,
1046 std::span<char16_t> output) noexcept {
1047 #if SIMDUTF_CPLUSPLUS23
1048 if consteval {
1049 return scalar::utf8_to_utf16::convert<endianness::NATIVE>(
1050 input.data(), input.size(), output.data());
1051 } else
1052 #endif
1053 {
1054 return convert_utf8_to_utf16(reinterpret_cast<const char *>(input.data()),
1055 input.size(), output.data());
1056 }
1057}
1058 #endif // SIMDUTF_SPAN
1059
1077simdutf_warn_unused result utf8_length_from_utf16le_with_replacement(
1078 const char16_t *input, size_t length) noexcept;
1079 #if SIMDUTF_SPAN
1080simdutf_really_inline simdutf_constexpr23 simdutf_warn_unused result
1081utf8_length_from_utf16le_with_replacement(
1082 std::span<const char16_t> valid_utf16_input) noexcept {
1083 #if SIMDUTF_CPLUSPLUS23
1084 if consteval {
1085 return scalar::utf16::utf8_length_from_utf16_with_replacement<
1086 endianness::LITTLE>(valid_utf16_input.data(), valid_utf16_input.size());
1087 } else
1088 #endif
1089 {
1090 return utf8_length_from_utf16le_with_replacement(valid_utf16_input.data(),
1091 valid_utf16_input.size());
1092 }
1093}
1094 #endif // SIMDUTF_SPAN
1095
1113simdutf_warn_unused result utf8_length_from_utf16be_with_replacement(
1114 const char16_t *input, size_t length) noexcept;
1115 #if SIMDUTF_SPAN
1116simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
1117utf8_length_from_utf16be_with_replacement(
1118 std::span<const char16_t> valid_utf16_input) noexcept {
1119 #if SIMDUTF_CPLUSPLUS23
1120 if consteval {
1121 return scalar::utf16::utf8_length_from_utf16_with_replacement<
1122 endianness::BIG>(valid_utf16_input.data(), valid_utf16_input.size());
1123 } else
1124 #endif
1125 {
1126 return utf8_length_from_utf16be_with_replacement(valid_utf16_input.data(),
1127 valid_utf16_input.size());
1128 }
1129}
1130 #endif // SIMDUTF_SPAN
1131
1132#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
1133
1134#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
1143simdutf_warn_unused size_t convert_latin1_to_utf16(
1144 const char *input, size_t length, char16_t *utf16_output) noexcept;
1145 #if SIMDUTF_SPAN
1146simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
1147convert_latin1_to_utf16(const detail::input_span_of_byte_like auto &input,
1148 std::span<char16_t> output) noexcept {
1149 #if SIMDUTF_CPLUSPLUS23
1150 if consteval {
1151 return scalar::latin1_to_utf16::convert<endianness::NATIVE>(
1152 input.data(), input.size(), output.data());
1153 } else
1154 #endif
1155 {
1156 return convert_latin1_to_utf16(reinterpret_cast<const char *>(input.data()),
1157 input.size(), output.data());
1158 }
1159}
1160 #endif // SIMDUTF_SPAN
1161#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
1162
1163#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
1176simdutf_warn_unused size_t convert_utf8_to_utf16le(
1177 const char *input, size_t length, char16_t *utf16_output) noexcept;
1178 #if SIMDUTF_SPAN
1179simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
1180convert_utf8_to_utf16le(const detail::input_span_of_byte_like auto &utf8_input,
1181 std::span<char16_t> utf16_output) noexcept {
1182 #if SIMDUTF_CPLUSPLUS23
1183 if consteval {
1184 return scalar::utf8_to_utf16::convert<endianness::LITTLE>(
1185 utf8_input.data(), utf8_input.size(), utf16_output.data());
1186 } else
1187 #endif
1188 {
1189 return convert_utf8_to_utf16le(
1190 reinterpret_cast<const char *>(utf8_input.data()), utf8_input.size(),
1191 utf16_output.data());
1192 }
1193}
1194 #endif // SIMDUTF_SPAN
1195
1208simdutf_warn_unused size_t convert_utf8_to_utf16be(
1209 const char *input, size_t length, char16_t *utf16_output) noexcept;
1210 #if SIMDUTF_SPAN
1211simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
1212convert_utf8_to_utf16be(const detail::input_span_of_byte_like auto &utf8_input,
1213 std::span<char16_t> utf16_output) noexcept {
1214
1215 #if SIMDUTF_CPLUSPLUS23
1216 if consteval {
1217 return scalar::utf8_to_utf16::convert<endianness::BIG>(
1218 utf8_input.data(), utf8_input.size(), utf16_output.data());
1219 } else
1220 #endif
1221 {
1222 return convert_utf8_to_utf16be(
1223 reinterpret_cast<const char *>(utf8_input.data()), utf8_input.size(),
1224 utf16_output.data());
1225 }
1226}
1227 #endif // SIMDUTF_SPAN
1228#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
1229
1230#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
1247simdutf_warn_unused result convert_utf8_to_latin1_with_errors(
1248 const char *input, size_t length, char *latin1_output) noexcept;
1249 #if SIMDUTF_SPAN
1250simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
1251convert_utf8_to_latin1_with_errors(
1252 const detail::input_span_of_byte_like auto &utf8_input,
1253 detail::output_span_of_byte_like auto &&latin1_output) noexcept {
1254 #if SIMDUTF_CPLUSPLUS23
1255 if consteval {
1256 return scalar::utf8_to_latin1::convert_with_errors(
1257 utf8_input.data(), utf8_input.size(), latin1_output.data());
1258 } else
1259 #endif
1260 {
1261 return convert_utf8_to_latin1_with_errors(
1262 reinterpret_cast<const char *>(utf8_input.data()), utf8_input.size(),
1263 reinterpret_cast<char *>(latin1_output.data()));
1264 }
1265}
1266 #endif // SIMDUTF_SPAN
1267#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
1268
1269#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
1285simdutf_warn_unused result convert_utf8_to_utf16_with_errors(
1286 const char *input, size_t length, char16_t *utf16_output) noexcept;
1287 #if SIMDUTF_SPAN
1288simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
1289convert_utf8_to_utf16_with_errors(
1290 const detail::input_span_of_byte_like auto &utf8_input,
1291 std::span<char16_t> utf16_output) noexcept {
1292 #if SIMDUTF_CPLUSPLUS23
1293 if consteval {
1294 return scalar::utf8_to_utf16::convert_with_errors<endianness::NATIVE>(
1295 utf8_input.data(), utf8_input.size(), utf16_output.data());
1296 } else
1297 #endif
1298 {
1299 return convert_utf8_to_utf16_with_errors(
1300 reinterpret_cast<const char *>(utf8_input.data()), utf8_input.size(),
1301 utf16_output.data());
1302 }
1303}
1304 #endif // SIMDUTF_SPAN
1305
1320simdutf_warn_unused result convert_utf8_to_utf16le_with_errors(
1321 const char *input, size_t length, char16_t *utf16_output) noexcept;
1322 #if SIMDUTF_SPAN
1323simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
1324convert_utf8_to_utf16le_with_errors(
1325 const detail::input_span_of_byte_like auto &utf8_input,
1326 std::span<char16_t> utf16_output) noexcept {
1327 #if SIMDUTF_CPLUSPLUS23
1328 if consteval {
1329 return scalar::utf8_to_utf16::convert_with_errors<endianness::LITTLE>(
1330 utf8_input.data(), utf8_input.size(), utf16_output.data());
1331 } else
1332 #endif
1333 {
1334 return convert_utf8_to_utf16le_with_errors(
1335 reinterpret_cast<const char *>(utf8_input.data()), utf8_input.size(),
1336 utf16_output.data());
1337 }
1338}
1339 #endif // SIMDUTF_SPAN
1340
1355simdutf_warn_unused result convert_utf8_to_utf16be_with_errors(
1356 const char *input, size_t length, char16_t *utf16_output) noexcept;
1357 #if SIMDUTF_SPAN
1358simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
1359convert_utf8_to_utf16be_with_errors(
1360 const detail::input_span_of_byte_like auto &utf8_input,
1361 std::span<char16_t> utf16_output) noexcept {
1362 #if SIMDUTF_CPLUSPLUS23
1363 if consteval {
1364 return scalar::utf8_to_utf16::convert_with_errors<endianness::BIG>(
1365 utf8_input.data(), utf8_input.size(), utf16_output.data());
1366 } else
1367 #endif
1368 {
1369 return convert_utf8_to_utf16be_with_errors(
1370 reinterpret_cast<const char *>(utf8_input.data()), utf8_input.size(),
1371 utf16_output.data());
1372 }
1373}
1374 #endif // SIMDUTF_SPAN
1375#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
1376
1377#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
1390simdutf_warn_unused size_t convert_utf8_to_utf32(
1391 const char *input, size_t length, char32_t *utf32_output) noexcept;
1392 #if SIMDUTF_SPAN
1393simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
1394convert_utf8_to_utf32(const detail::input_span_of_byte_like auto &utf8_input,
1395 std::span<char32_t> utf32_output) noexcept {
1396 #if SIMDUTF_CPLUSPLUS23
1397 if consteval {
1398 return scalar::utf8_to_utf32::convert(utf8_input.data(), utf8_input.size(),
1399 utf32_output.data());
1400 } else
1401 #endif
1402 {
1403 return convert_utf8_to_utf32(
1404 reinterpret_cast<const char *>(utf8_input.data()), utf8_input.size(),
1405 utf32_output.data());
1406 }
1407}
1408 #endif // SIMDUTF_SPAN
1409
1424simdutf_warn_unused result convert_utf8_to_utf32_with_errors(
1425 const char *input, size_t length, char32_t *utf32_output) noexcept;
1426 #if SIMDUTF_SPAN
1427simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
1428convert_utf8_to_utf32_with_errors(
1429 const detail::input_span_of_byte_like auto &utf8_input,
1430 std::span<char32_t> utf32_output) noexcept {
1431 #if SIMDUTF_CPLUSPLUS23
1432 if consteval {
1433 return scalar::utf8_to_utf32::convert_with_errors(
1434 utf8_input.data(), utf8_input.size(), utf32_output.data());
1435 } else
1436 #endif
1437 {
1438 return convert_utf8_to_utf32_with_errors(
1439 reinterpret_cast<const char *>(utf8_input.data()), utf8_input.size(),
1440 utf32_output.data());
1441 }
1442}
1443 #endif // SIMDUTF_SPAN
1444#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
1445
1446#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
1466simdutf_warn_unused size_t convert_valid_utf8_to_latin1(
1467 const char *input, size_t length, char *latin1_output) noexcept;
1468 #if SIMDUTF_SPAN
1469simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
1470convert_valid_utf8_to_latin1(
1471 const detail::input_span_of_byte_like auto &valid_utf8_input,
1472 detail::output_span_of_byte_like auto &&latin1_output) noexcept {
1473 #if SIMDUTF_CPLUSPLUS23
1474 if consteval {
1475 return scalar::utf8_to_latin1::convert_valid(
1476 valid_utf8_input.data(), valid_utf8_input.size(), latin1_output.data());
1477 } else
1478 #endif
1479 {
1480 return convert_valid_utf8_to_latin1(
1481 reinterpret_cast<const char *>(valid_utf8_input.data()),
1482 valid_utf8_input.size(), latin1_output.data());
1483 }
1484}
1485 #endif // SIMDUTF_SPAN
1486#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
1487
1488#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
1499simdutf_warn_unused size_t convert_valid_utf8_to_utf16(
1500 const char *input, size_t length, char16_t *utf16_buffer) noexcept;
1501 #if SIMDUTF_SPAN
1502simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
1503convert_valid_utf8_to_utf16(
1504 const detail::input_span_of_byte_like auto &valid_utf8_input,
1505 std::span<char16_t> utf16_output) noexcept {
1506 #if SIMDUTF_CPLUSPLUS23
1507 if consteval {
1508 return scalar::utf8_to_utf16::convert_valid<endianness::NATIVE>(
1509 valid_utf8_input.data(), valid_utf8_input.size(), utf16_output.data());
1510 } else
1511 #endif
1512 {
1513 return convert_valid_utf8_to_utf16(
1514 reinterpret_cast<const char *>(valid_utf8_input.data()),
1515 valid_utf8_input.size(), utf16_output.data());
1516 }
1517}
1518 #endif // SIMDUTF_SPAN
1519
1530simdutf_warn_unused size_t convert_valid_utf8_to_utf16le(
1531 const char *input, size_t length, char16_t *utf16_buffer) noexcept;
1532 #if SIMDUTF_SPAN
1533simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
1534convert_valid_utf8_to_utf16le(
1535 const detail::input_span_of_byte_like auto &valid_utf8_input,
1536 std::span<char16_t> utf16_output) noexcept {
1537
1538 #if SIMDUTF_CPLUSPLUS23
1539 if consteval {
1540 return scalar::utf8_to_utf16::convert_valid<endianness::LITTLE>(
1541 valid_utf8_input.data(), valid_utf8_input.size(), utf16_output.data());
1542 } else
1543 #endif
1544 {
1545 return convert_valid_utf8_to_utf16le(
1546 reinterpret_cast<const char *>(valid_utf8_input.data()),
1547 valid_utf8_input.size(), utf16_output.data());
1548 }
1549}
1550 #endif // SIMDUTF_SPAN
1551
1562simdutf_warn_unused size_t convert_valid_utf8_to_utf16be(
1563 const char *input, size_t length, char16_t *utf16_buffer) noexcept;
1564 #if SIMDUTF_SPAN
1565simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
1566convert_valid_utf8_to_utf16be(
1567 const detail::input_span_of_byte_like auto &valid_utf8_input,
1568 std::span<char16_t> utf16_output) noexcept {
1569 #if SIMDUTF_CPLUSPLUS23
1570 if consteval {
1571 return scalar::utf8_to_utf16::convert_valid<endianness::BIG>(
1572 valid_utf8_input.data(), valid_utf8_input.size(), utf16_output.data());
1573 } else
1574 #endif
1575 {
1576 return convert_valid_utf8_to_utf16be(
1577 reinterpret_cast<const char *>(valid_utf8_input.data()),
1578 valid_utf8_input.size(), utf16_output.data());
1579 }
1580}
1581 #endif // SIMDUTF_SPAN
1582#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
1583
1584#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
1595simdutf_warn_unused size_t convert_valid_utf8_to_utf32(
1596 const char *input, size_t length, char32_t *utf32_buffer) noexcept;
1597 #if SIMDUTF_SPAN
1598simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
1599convert_valid_utf8_to_utf32(
1600 const detail::input_span_of_byte_like auto &valid_utf8_input,
1601 std::span<char32_t> utf32_output) noexcept {
1602 #if SIMDUTF_CPLUSPLUS23
1603 if consteval {
1604 return scalar::utf8_to_utf32::convert_valid(
1605 valid_utf8_input.data(), valid_utf8_input.size(), utf32_output.data());
1606 } else
1607 #endif
1608 {
1609 return convert_valid_utf8_to_utf32(
1610 reinterpret_cast<const char *>(valid_utf8_input.data()),
1611 valid_utf8_input.size(), utf32_output.data());
1612 }
1613}
1614 #endif // SIMDUTF_SPAN
1615#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
1616
1617#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
1626simdutf_warn_unused size_t utf8_length_from_latin1(const char *input,
1627 size_t length) noexcept;
1628 #if SIMDUTF_SPAN
1629simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
1630utf8_length_from_latin1(
1631 const detail::input_span_of_byte_like auto &latin1_input) noexcept {
1632 #if SIMDUTF_CPLUSPLUS23
1633 if consteval {
1634 return scalar::latin1_to_utf8::utf8_length_from_latin1(latin1_input.data(),
1635 latin1_input.size());
1636 } else
1637 #endif
1638 {
1639 return utf8_length_from_latin1(
1640 reinterpret_cast<const char *>(latin1_input.data()),
1641 latin1_input.size());
1642 }
1643}
1644 #endif // SIMDUTF_SPAN
1645
1659simdutf_warn_unused size_t latin1_length_from_utf8(const char *input,
1660 size_t length) noexcept;
1661 #if SIMDUTF_SPAN
1662simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
1663latin1_length_from_utf8(
1664 const detail::input_span_of_byte_like auto &valid_utf8_input) noexcept {
1665 #if SIMDUTF_CPLUSPLUS23
1666 if consteval {
1667 return scalar::utf8::count_code_points(valid_utf8_input.data(),
1668 valid_utf8_input.size());
1669 } else
1670 #endif
1671 {
1672 return latin1_length_from_utf8(
1673 reinterpret_cast<const char *>(valid_utf8_input.data()),
1674 valid_utf8_input.size());
1675 }
1676}
1677 #endif // SIMDUTF_SPAN
1678#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
1679
1680#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
1695simdutf_warn_unused size_t utf16_length_from_utf8(const char *input,
1696 size_t length) noexcept;
1697 #if SIMDUTF_SPAN
1698simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
1699utf16_length_from_utf8(
1700 const detail::input_span_of_byte_like auto &valid_utf8_input) noexcept {
1701 #if SIMDUTF_CPLUSPLUS23
1702 if consteval {
1703 return scalar::utf8::utf16_length_from_utf8(valid_utf8_input.data(),
1704 valid_utf8_input.size());
1705 } else
1706 #endif
1707 {
1708 return utf16_length_from_utf8(
1709 reinterpret_cast<const char *>(valid_utf8_input.data()),
1710 valid_utf8_input.size());
1711 }
1712}
1713 #endif // SIMDUTF_SPAN
1714#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
1715
1716#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
1733simdutf_warn_unused size_t utf32_length_from_utf8(const char *input,
1734 size_t length) noexcept;
1735 #if SIMDUTF_SPAN
1736simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
1737utf32_length_from_utf8(
1738 const detail::input_span_of_byte_like auto &valid_utf8_input) noexcept {
1739
1740 #if SIMDUTF_CPLUSPLUS23
1741 if consteval {
1742 return scalar::utf8::count_code_points(valid_utf8_input.data(),
1743 valid_utf8_input.size());
1744 } else
1745 #endif
1746 {
1747 return utf32_length_from_utf8(
1748 reinterpret_cast<const char *>(valid_utf8_input.data()),
1749 valid_utf8_input.size());
1750 }
1751}
1752 #endif // SIMDUTF_SPAN
1753#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
1754
1755#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
1771simdutf_warn_unused size_t convert_utf16_to_utf8(const char16_t *input,
1772 size_t length,
1773 char *utf8_buffer) noexcept;
1774 #if SIMDUTF_SPAN
1775simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
1776convert_utf16_to_utf8(
1777 std::span<const char16_t> utf16_input,
1778 detail::output_span_of_byte_like auto &&utf8_output) noexcept {
1779 #if SIMDUTF_CPLUSPLUS23
1780 if consteval {
1781 return scalar::utf16_to_utf8::convert<endianness::NATIVE>(
1782 utf16_input.data(), utf16_input.size(), utf8_output.data());
1783 } else
1784 #endif
1785 {
1786 return convert_utf16_to_utf8(utf16_input.data(), utf16_input.size(),
1787 reinterpret_cast<char *>(utf8_output.data()));
1788 }
1789}
1790 #endif // SIMDUTF_SPAN
1791
1810simdutf_warn_unused size_t convert_utf16_to_utf8_safe(const char16_t *input,
1811 size_t length,
1812 char *utf8_output,
1813 size_t utf8_len) noexcept;
1814 #if SIMDUTF_SPAN
1815simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
1816convert_utf16_to_utf8_safe(
1817 std::span<const char16_t> utf16_input,
1818 detail::output_span_of_byte_like auto &&utf8_output) noexcept {
1819 // implementation note: outputspan is a forwarding ref to avoid copying
1820 // and allow both lvalues and rvalues. std::span can be copied without
1821 // problems, but std::vector should not, and this function should accept
1822 // both. it will allow using an owning rvalue ref (example: passing a
1823 // temporary std::string) as output, but the user will quickly find out
1824 // that he has no way of getting the data out of the object in that case.
1825 #if SIMDUTF_CPLUSPLUS23
1826 if consteval {
1827 const full_result r =
1828 scalar::utf16_to_utf8::convert_with_errors<endianness::NATIVE, true>(
1829 utf16_input.data(), utf16_input.size(), utf8_output.data(),
1830 utf8_output.size());
1831 if (r.error != error_code::SUCCESS &&
1832 r.error != error_code::OUTPUT_BUFFER_TOO_SMALL) {
1833 return 0;
1834 }
1835 return r.output_count;
1836 } else
1837 #endif
1838 {
1839 return convert_utf16_to_utf8_safe(
1840 utf16_input.data(), utf16_input.size(),
1841 reinterpret_cast<char *>(utf8_output.data()), utf8_output.size());
1842 }
1843}
1844 #endif // SIMDUTF_SPAN
1845#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
1846
1847#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
1863simdutf_warn_unused size_t convert_utf16_to_latin1(
1864 const char16_t *input, size_t length, char *latin1_buffer) noexcept;
1865 #if SIMDUTF_SPAN
1866simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
1867convert_utf16_to_latin1(
1868 std::span<const char16_t> utf16_input,
1869 detail::output_span_of_byte_like auto &&latin1_output) noexcept {
1870 #if SIMDUTF_CPLUSPLUS23
1871 if consteval {
1872 return scalar::utf16_to_latin1::convert<endianness::NATIVE>(
1873 utf16_input.data(), utf16_input.size(), latin1_output.data());
1874 } else
1875 #endif
1876 {
1877 return convert_utf16_to_latin1(
1878 utf16_input.data(), utf16_input.size(),
1879 reinterpret_cast<char *>(latin1_output.data()));
1880 }
1881}
1882 #endif // SIMDUTF_SPAN
1883
1900simdutf_warn_unused size_t convert_utf16le_to_latin1(
1901 const char16_t *input, size_t length, char *latin1_buffer) noexcept;
1902 #if SIMDUTF_SPAN
1903simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
1904convert_utf16le_to_latin1(
1905 std::span<const char16_t> utf16_input,
1906 detail::output_span_of_byte_like auto &&latin1_output) noexcept {
1907 #if SIMDUTF_CPLUSPLUS23
1908 if consteval {
1909 return scalar::utf16_to_latin1::convert<endianness::LITTLE>(
1910 utf16_input.data(), utf16_input.size(), latin1_output.data());
1911 } else
1912 #endif
1913 {
1914 return convert_utf16le_to_latin1(
1915 utf16_input.data(), utf16_input.size(),
1916 reinterpret_cast<char *>(latin1_output.data()));
1917 }
1918}
1919 #endif // SIMDUTF_SPAN
1920
1935simdutf_warn_unused size_t convert_utf16be_to_latin1(
1936 const char16_t *input, size_t length, char *latin1_buffer) noexcept;
1937 #if SIMDUTF_SPAN
1938simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
1939convert_utf16be_to_latin1(
1940 std::span<const char16_t> utf16_input,
1941 detail::output_span_of_byte_like auto &&latin1_output) noexcept {
1942 #if SIMDUTF_CPLUSPLUS23
1943 if consteval {
1944 return scalar::utf16_to_latin1::convert<endianness::BIG>(
1945 utf16_input.data(), utf16_input.size(), latin1_output.data());
1946 } else
1947 #endif
1948 {
1949 return convert_utf16be_to_latin1(
1950 utf16_input.data(), utf16_input.size(),
1951 reinterpret_cast<char *>(latin1_output.data()));
1952 }
1953}
1954 #endif // SIMDUTF_SPAN
1955#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
1956
1957#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
1972simdutf_warn_unused size_t convert_utf16le_to_utf8(const char16_t *input,
1973 size_t length,
1974 char *utf8_buffer) noexcept;
1975 #if SIMDUTF_SPAN
1976simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
1977convert_utf16le_to_utf8(
1978 std::span<const char16_t> utf16_input,
1979 detail::output_span_of_byte_like auto &&utf8_output) noexcept {
1980 #if SIMDUTF_CPLUSPLUS23
1981 if consteval {
1982 return scalar::utf16_to_utf8::convert<endianness::LITTLE>(
1983 utf16_input.data(), utf16_input.size(), utf8_output.data());
1984 } else
1985 #endif
1986 {
1987 return convert_utf16le_to_utf8(
1988 utf16_input.data(), utf16_input.size(),
1989 reinterpret_cast<char *>(utf8_output.data()));
1990 }
1991}
1992 #endif // SIMDUTF_SPAN
1993
2008simdutf_warn_unused size_t convert_utf16be_to_utf8(const char16_t *input,
2009 size_t length,
2010 char *utf8_buffer) noexcept;
2011 #if SIMDUTF_SPAN
2012simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
2013convert_utf16be_to_utf8(
2014 std::span<const char16_t> utf16_input,
2015 detail::output_span_of_byte_like auto &&utf8_output) noexcept {
2016 #if SIMDUTF_CPLUSPLUS23
2017 if consteval {
2018 return scalar::utf16_to_utf8::convert<endianness::BIG>(
2019 utf16_input.data(), utf16_input.size(), utf8_output.data());
2020 } else
2021 #endif
2022 {
2023 return convert_utf16be_to_utf8(
2024 utf16_input.data(), utf16_input.size(),
2025 reinterpret_cast<char *>(utf8_output.data()));
2026 }
2027}
2028 #endif // SIMDUTF_SPAN
2029#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
2030
2031#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
2048simdutf_warn_unused result convert_utf16_to_latin1_with_errors(
2049 const char16_t *input, size_t length, char *latin1_buffer) noexcept;
2050 #if SIMDUTF_SPAN
2051simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
2052convert_utf16_to_latin1_with_errors(
2053 std::span<const char16_t> utf16_input,
2054 detail::output_span_of_byte_like auto &&latin1_output) noexcept {
2055 #if SIMDUTF_CPLUSPLUS23
2056 if consteval {
2057 return scalar::utf16_to_latin1::convert_with_errors<endianness::NATIVE>(
2058 utf16_input.data(), utf16_input.size(), latin1_output.data());
2059 } else
2060 #endif
2061 {
2062 return convert_utf16_to_latin1_with_errors(
2063 utf16_input.data(), utf16_input.size(),
2064 reinterpret_cast<char *>(latin1_output.data()));
2065 }
2066}
2067 #endif // SIMDUTF_SPAN
2068
2084simdutf_warn_unused result convert_utf16le_to_latin1_with_errors(
2085 const char16_t *input, size_t length, char *latin1_buffer) noexcept;
2086 #if SIMDUTF_SPAN
2087simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
2088convert_utf16le_to_latin1_with_errors(
2089 std::span<const char16_t> utf16_input,
2090 detail::output_span_of_byte_like auto &&latin1_output) noexcept {
2091 #if SIMDUTF_CPLUSPLUS23
2092 if consteval {
2093 return scalar::utf16_to_latin1::convert_with_errors<endianness::LITTLE>(
2094 utf16_input.data(), utf16_input.size(), latin1_output.data());
2095 } else
2096 #endif
2097 {
2098 return convert_utf16le_to_latin1_with_errors(
2099 utf16_input.data(), utf16_input.size(),
2100 reinterpret_cast<char *>(latin1_output.data()));
2101 }
2102}
2103 #endif // SIMDUTF_SPAN
2104
2122simdutf_warn_unused result convert_utf16be_to_latin1_with_errors(
2123 const char16_t *input, size_t length, char *latin1_buffer) noexcept;
2124 #if SIMDUTF_SPAN
2125simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
2126convert_utf16be_to_latin1_with_errors(
2127 std::span<const char16_t> utf16_input,
2128 detail::output_span_of_byte_like auto &&latin1_output) noexcept {
2129 #if SIMDUTF_CPLUSPLUS23
2130 if consteval {
2131 return scalar::utf16_to_latin1::convert_with_errors<endianness::BIG>(
2132 utf16_input.data(), utf16_input.size(), latin1_output.data());
2133 } else
2134 #endif
2135 {
2136 return convert_utf16be_to_latin1_with_errors(
2137 utf16_input.data(), utf16_input.size(),
2138 reinterpret_cast<char *>(latin1_output.data()));
2139 }
2140}
2141 #endif // SIMDUTF_SPAN
2142#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
2143
2144#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
2162simdutf_warn_unused result convert_utf16_to_utf8_with_errors(
2163 const char16_t *input, size_t length, char *utf8_buffer) noexcept;
2164 #if SIMDUTF_SPAN
2165simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
2166convert_utf16_to_utf8_with_errors(
2167 std::span<const char16_t> utf16_input,
2168 detail::output_span_of_byte_like auto &&utf8_output) noexcept {
2169 #if SIMDUTF_CPLUSPLUS23
2170 if consteval {
2171 return scalar::utf16_to_utf8::convert_with_errors<endianness::NATIVE>(
2172 utf16_input.data(), utf16_input.size(), utf8_output.data());
2173 } else
2174 #endif
2175 {
2176 return convert_utf16_to_utf8_with_errors(
2177 utf16_input.data(), utf16_input.size(),
2178 reinterpret_cast<char *>(utf8_output.data()));
2179 }
2180}
2181 #endif // SIMDUTF_SPAN
2182
2199simdutf_warn_unused result convert_utf16le_to_utf8_with_errors(
2200 const char16_t *input, size_t length, char *utf8_buffer) noexcept;
2201 #if SIMDUTF_SPAN
2202simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
2203convert_utf16le_to_utf8_with_errors(
2204 std::span<const char16_t> utf16_input,
2205 detail::output_span_of_byte_like auto &&utf8_output) noexcept {
2206 #if SIMDUTF_CPLUSPLUS23
2207 if consteval {
2208 return scalar::utf16_to_utf8::convert_with_errors<endianness::LITTLE>(
2209 utf16_input.data(), utf16_input.size(), utf8_output.data());
2210 } else
2211 #endif
2212 {
2213 return convert_utf16le_to_utf8_with_errors(
2214 utf16_input.data(), utf16_input.size(),
2215 reinterpret_cast<char *>(utf8_output.data()));
2216 }
2217}
2218 #endif // SIMDUTF_SPAN
2219
2236simdutf_warn_unused result convert_utf16be_to_utf8_with_errors(
2237 const char16_t *input, size_t length, char *utf8_buffer) noexcept;
2238 #if SIMDUTF_SPAN
2239simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
2240convert_utf16be_to_utf8_with_errors(
2241 std::span<const char16_t> utf16_input,
2242 detail::output_span_of_byte_like auto &&utf8_output) noexcept {
2243 #if SIMDUTF_CPLUSPLUS23
2244 if consteval {
2245 return scalar::utf16_to_utf8::convert_with_errors<endianness::BIG>(
2246 utf16_input.data(), utf16_input.size(), utf8_output.data());
2247 } else
2248 #endif
2249 {
2250 return convert_utf16be_to_utf8_with_errors(
2251 utf16_input.data(), utf16_input.size(),
2252 reinterpret_cast<char *>(utf8_output.data()));
2253 }
2254}
2255 #endif // SIMDUTF_SPAN
2256
2271simdutf_warn_unused size_t convert_utf16le_to_utf8_with_replacement(
2272 const char16_t *input, size_t length, char *utf8_buffer) noexcept;
2273 #if SIMDUTF_SPAN
2274simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
2275convert_utf16le_to_utf8_with_replacement(
2276 std::span<const char16_t> utf16_input,
2277 detail::output_span_of_byte_like auto &&utf8_output) noexcept {
2278 #if SIMDUTF_CPLUSPLUS23
2279 if consteval {
2280 return scalar::utf16_to_utf8::convert_with_replacement<endianness::LITTLE>(
2281 utf16_input.data(), utf16_input.size(), utf8_output.data());
2282 } else
2283 #endif
2284 {
2285 return convert_utf16le_to_utf8_with_replacement(
2286 utf16_input.data(), utf16_input.size(),
2287 reinterpret_cast<char *>(utf8_output.data()));
2288 }
2289}
2290 #endif // SIMDUTF_SPAN
2291
2306simdutf_warn_unused size_t convert_utf16be_to_utf8_with_replacement(
2307 const char16_t *input, size_t length, char *utf8_buffer) noexcept;
2308 #if SIMDUTF_SPAN
2309simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
2310convert_utf16be_to_utf8_with_replacement(
2311 std::span<const char16_t> utf16_input,
2312 detail::output_span_of_byte_like auto &&utf8_output) noexcept {
2313 #if SIMDUTF_CPLUSPLUS23
2314 if consteval {
2315 return scalar::utf16_to_utf8::convert_with_replacement<endianness::BIG>(
2316 utf16_input.data(), utf16_input.size(), utf8_output.data());
2317 } else
2318 #endif
2319 {
2320 return convert_utf16be_to_utf8_with_replacement(
2321 utf16_input.data(), utf16_input.size(),
2322 reinterpret_cast<char *>(utf8_output.data()));
2323 }
2324}
2325 #endif // SIMDUTF_SPAN
2326
2341simdutf_warn_unused size_t convert_utf16_to_utf8_with_replacement(
2342 const char16_t *input, size_t length, char *utf8_buffer) noexcept;
2343 #if SIMDUTF_SPAN
2344simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
2345convert_utf16_to_utf8_with_replacement(
2346 std::span<const char16_t> utf16_input,
2347 detail::output_span_of_byte_like auto &&utf8_output) noexcept {
2348 #if SIMDUTF_CPLUSPLUS23
2349 if consteval {
2350 return scalar::utf16_to_utf8::convert_with_replacement<endianness::NATIVE>(
2351 utf16_input.data(), utf16_input.size(), utf8_output.data());
2352 } else
2353 #endif
2354 {
2355 return convert_utf16_to_utf8_with_replacement(
2356 utf16_input.data(), utf16_input.size(),
2357 reinterpret_cast<char *>(utf8_output.data()));
2358 }
2359}
2360 #endif // SIMDUTF_SPAN
2361#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
2362
2363#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
2377simdutf_warn_unused size_t convert_valid_utf16_to_utf8(
2378 const char16_t *input, size_t length, char *utf8_buffer) noexcept;
2379 #if SIMDUTF_SPAN
2380simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
2381convert_valid_utf16_to_utf8(
2382 std::span<const char16_t> valid_utf16_input,
2383 detail::output_span_of_byte_like auto &&utf8_output) noexcept {
2384 #if SIMDUTF_CPLUSPLUS23
2385 if consteval {
2386 return scalar::utf16_to_utf8::convert_valid<endianness::NATIVE>(
2387 valid_utf16_input.data(), valid_utf16_input.size(), utf8_output.data());
2388 } else
2389 #endif
2390 {
2391 return convert_valid_utf16_to_utf8(
2392 valid_utf16_input.data(), valid_utf16_input.size(),
2393 reinterpret_cast<char *>(utf8_output.data()));
2394 }
2395}
2396 #endif // SIMDUTF_SPAN
2397#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
2398
2399#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
2419simdutf_warn_unused size_t convert_valid_utf16_to_latin1(
2420 const char16_t *input, size_t length, char *latin1_buffer) noexcept;
2421 #if SIMDUTF_SPAN
2422simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
2423convert_valid_utf16_to_latin1(
2424 std::span<const char16_t> valid_utf16_input,
2425 detail::output_span_of_byte_like auto &&latin1_output) noexcept {
2426 #if SIMDUTF_CPLUSPLUS23
2427 if consteval {
2428 return scalar::utf16_to_latin1::convert_valid_impl<endianness::NATIVE>(
2429 detail::constexpr_cast_ptr<uint16_t>(valid_utf16_input.data()),
2430 valid_utf16_input.size(),
2431 detail::constexpr_cast_writeptr<char>(latin1_output.data()));
2432 } else
2433 #endif
2434 {
2435 return convert_valid_utf16_to_latin1(
2436 valid_utf16_input.data(), valid_utf16_input.size(),
2437 reinterpret_cast<char *>(latin1_output.data()));
2438 }
2439}
2440 #endif // SIMDUTF_SPAN
2441
2461simdutf_warn_unused size_t convert_valid_utf16le_to_latin1(
2462 const char16_t *input, size_t length, char *latin1_buffer) noexcept;
2463 #if SIMDUTF_SPAN
2464simdutf_really_inline simdutf_constexpr23 simdutf_warn_unused size_t
2465convert_valid_utf16le_to_latin1(
2466 std::span<const char16_t> valid_utf16_input,
2467 detail::output_span_of_byte_like auto &&latin1_output) noexcept {
2468 #if SIMDUTF_CPLUSPLUS23
2469 if consteval {
2470 return scalar::utf16_to_latin1::convert_valid_impl<endianness::LITTLE>(
2471 detail::constexpr_cast_ptr<uint16_t>(valid_utf16_input.data()),
2472 valid_utf16_input.size(),
2473 detail::constexpr_cast_writeptr<char>(latin1_output.data()));
2474 } else
2475 #endif
2476 {
2477 return convert_valid_utf16le_to_latin1(
2478 valid_utf16_input.data(), valid_utf16_input.size(),
2479 reinterpret_cast<char *>(latin1_output.data()));
2480 }
2481}
2482 #endif // SIMDUTF_SPAN
2483
2503simdutf_warn_unused size_t convert_valid_utf16be_to_latin1(
2504 const char16_t *input, size_t length, char *latin1_buffer) noexcept;
2505 #if SIMDUTF_SPAN
2506simdutf_really_inline simdutf_constexpr23 simdutf_warn_unused size_t
2507convert_valid_utf16be_to_latin1(
2508 std::span<const char16_t> valid_utf16_input,
2509 detail::output_span_of_byte_like auto &&latin1_output) noexcept {
2510 #if SIMDUTF_CPLUSPLUS23
2511 if consteval {
2512 return scalar::utf16_to_latin1::convert_valid_impl<endianness::BIG>(
2513 detail::constexpr_cast_ptr<uint16_t>(valid_utf16_input.data()),
2514 valid_utf16_input.size(),
2515 detail::constexpr_cast_writeptr<char>(latin1_output.data()));
2516 } else
2517 #endif
2518 {
2519 return convert_valid_utf16be_to_latin1(
2520 valid_utf16_input.data(), valid_utf16_input.size(),
2521 reinterpret_cast<char *>(latin1_output.data()));
2522 }
2523}
2524 #endif // SIMDUTF_SPAN
2525#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
2526
2527#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
2541simdutf_warn_unused size_t convert_valid_utf16le_to_utf8(
2542 const char16_t *input, size_t length, char *utf8_buffer) noexcept;
2543 #if SIMDUTF_SPAN
2544simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
2545convert_valid_utf16le_to_utf8(
2546 std::span<const char16_t> valid_utf16_input,
2547 detail::output_span_of_byte_like auto &&utf8_output) noexcept {
2548 #if SIMDUTF_CPLUSPLUS23
2549 if consteval {
2550 return scalar::utf16_to_utf8::convert_valid<endianness::NATIVE>(
2551 valid_utf16_input.data(), valid_utf16_input.size(), utf8_output.data());
2552 } else
2553 #endif
2554 {
2555 return convert_valid_utf16le_to_utf8(
2556 valid_utf16_input.data(), valid_utf16_input.size(),
2557 reinterpret_cast<char *>(utf8_output.data()));
2558 }
2559}
2560 #endif // SIMDUTF_SPAN
2561
2575simdutf_warn_unused size_t convert_valid_utf16be_to_utf8(
2576 const char16_t *input, size_t length, char *utf8_buffer) noexcept;
2577 #if SIMDUTF_SPAN
2578simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
2579convert_valid_utf16be_to_utf8(
2580 std::span<const char16_t> valid_utf16_input,
2581 detail::output_span_of_byte_like auto &&utf8_output) noexcept {
2582 #if SIMDUTF_CPLUSPLUS23
2583 if consteval {
2584 return scalar::utf16_to_utf8::convert_valid<endianness::BIG>(
2585 valid_utf16_input.data(), valid_utf16_input.size(), utf8_output.data());
2586 } else
2587 #endif
2588 {
2589 return convert_valid_utf16be_to_utf8(
2590 valid_utf16_input.data(), valid_utf16_input.size(),
2591 reinterpret_cast<char *>(utf8_output.data()));
2592 }
2593}
2594 #endif // SIMDUTF_SPAN
2595#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
2596
2597#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
2613simdutf_warn_unused size_t convert_utf16_to_utf32(
2614 const char16_t *input, size_t length, char32_t *utf32_buffer) noexcept;
2615 #if SIMDUTF_SPAN
2616simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
2617convert_utf16_to_utf32(std::span<const char16_t> utf16_input,
2618 std::span<char32_t> utf32_output) noexcept {
2619
2620 #if SIMDUTF_CPLUSPLUS23
2621 if consteval {
2622 return scalar::utf16_to_utf32::convert<endianness::NATIVE>(
2623 utf16_input.data(), utf16_input.size(), utf32_output.data());
2624 } else
2625 #endif
2626 {
2627 return convert_utf16_to_utf32(utf16_input.data(), utf16_input.size(),
2628 utf32_output.data());
2629 }
2630}
2631 #endif // SIMDUTF_SPAN
2632
2647simdutf_warn_unused size_t convert_utf16le_to_utf32(
2648 const char16_t *input, size_t length, char32_t *utf32_buffer) noexcept;
2649 #if SIMDUTF_SPAN
2650simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
2651convert_utf16le_to_utf32(std::span<const char16_t> utf16_input,
2652 std::span<char32_t> utf32_output) noexcept {
2653 #if SIMDUTF_CPLUSPLUS23
2654 if consteval {
2655 return scalar::utf16_to_utf32::convert<endianness::LITTLE>(
2656 utf16_input.data(), utf16_input.size(), utf32_output.data());
2657 } else
2658 #endif
2659 {
2660 return convert_utf16le_to_utf32(utf16_input.data(), utf16_input.size(),
2661 utf32_output.data());
2662 }
2663}
2664 #endif // SIMDUTF_SPAN
2665
2680simdutf_warn_unused size_t convert_utf16be_to_utf32(
2681 const char16_t *input, size_t length, char32_t *utf32_buffer) noexcept;
2682 #if SIMDUTF_SPAN
2683simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
2684convert_utf16be_to_utf32(std::span<const char16_t> utf16_input,
2685 std::span<char32_t> utf32_output) noexcept {
2686 #if SIMDUTF_CPLUSPLUS23
2687 if consteval {
2688 return scalar::utf16_to_utf32::convert<endianness::BIG>(
2689 utf16_input.data(), utf16_input.size(), utf32_output.data());
2690 } else
2691 #endif
2692 {
2693 return convert_utf16be_to_utf32(utf16_input.data(), utf16_input.size(),
2694 utf32_output.data());
2695 }
2696}
2697 #endif // SIMDUTF_SPAN
2698
2716simdutf_warn_unused result convert_utf16_to_utf32_with_errors(
2717 const char16_t *input, size_t length, char32_t *utf32_buffer) noexcept;
2718 #if SIMDUTF_SPAN
2719simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
2720convert_utf16_to_utf32_with_errors(std::span<const char16_t> utf16_input,
2721 std::span<char32_t> utf32_output) noexcept {
2722 #if SIMDUTF_CPLUSPLUS23
2723 if consteval {
2724 return scalar::utf16_to_utf32::convert_with_errors<endianness::NATIVE>(
2725 utf16_input.data(), utf16_input.size(), utf32_output.data());
2726 } else
2727 #endif
2728 {
2729 return convert_utf16_to_utf32_with_errors(
2730 utf16_input.data(), utf16_input.size(), utf32_output.data());
2731 }
2732}
2733 #endif // SIMDUTF_SPAN
2734
2751simdutf_warn_unused result convert_utf16le_to_utf32_with_errors(
2752 const char16_t *input, size_t length, char32_t *utf32_buffer) noexcept;
2753 #if SIMDUTF_SPAN
2754simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
2755convert_utf16le_to_utf32_with_errors(
2756 std::span<const char16_t> utf16_input,
2757 std::span<char32_t> utf32_output) noexcept {
2758 #if SIMDUTF_CPLUSPLUS23
2759 if consteval {
2760 return scalar::utf16_to_utf32::convert_with_errors<endianness::LITTLE>(
2761 utf16_input.data(), utf16_input.size(), utf32_output.data());
2762 } else
2763 #endif
2764 {
2765 return convert_utf16le_to_utf32_with_errors(
2766 utf16_input.data(), utf16_input.size(), utf32_output.data());
2767 }
2768}
2769 #endif // SIMDUTF_SPAN
2770
2787simdutf_warn_unused result convert_utf16be_to_utf32_with_errors(
2788 const char16_t *input, size_t length, char32_t *utf32_buffer) noexcept;
2789 #if SIMDUTF_SPAN
2790simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
2791convert_utf16be_to_utf32_with_errors(
2792 std::span<const char16_t> utf16_input,
2793 std::span<char32_t> utf32_output) noexcept {
2794 #if SIMDUTF_CPLUSPLUS23
2795 if consteval {
2796 return scalar::utf16_to_utf32::convert_with_errors<endianness::BIG>(
2797 utf16_input.data(), utf16_input.size(), utf32_output.data());
2798 } else
2799 #endif
2800 {
2801 return convert_utf16be_to_utf32_with_errors(
2802 utf16_input.data(), utf16_input.size(), utf32_output.data());
2803 }
2804}
2805 #endif // SIMDUTF_SPAN
2806
2821simdutf_warn_unused size_t convert_valid_utf16_to_utf32(
2822 const char16_t *input, size_t length, char32_t *utf32_buffer) noexcept;
2823 #if SIMDUTF_SPAN
2824simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
2825convert_valid_utf16_to_utf32(std::span<const char16_t> valid_utf16_input,
2826 std::span<char32_t> utf32_output) noexcept {
2827 #if SIMDUTF_CPLUSPLUS23
2828 if consteval {
2829 return scalar::utf16_to_utf32::convert_valid<endianness::NATIVE>(
2830 valid_utf16_input.data(), valid_utf16_input.size(),
2831 utf32_output.data());
2832 } else
2833 #endif
2834 {
2835 return convert_valid_utf16_to_utf32(valid_utf16_input.data(),
2836 valid_utf16_input.size(),
2837 utf32_output.data());
2838 }
2839}
2840 #endif // SIMDUTF_SPAN
2841
2855simdutf_warn_unused size_t convert_valid_utf16le_to_utf32(
2856 const char16_t *input, size_t length, char32_t *utf32_buffer) noexcept;
2857 #if SIMDUTF_SPAN
2858simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
2859convert_valid_utf16le_to_utf32(std::span<const char16_t> valid_utf16_input,
2860 std::span<char32_t> utf32_output) noexcept {
2861 #if SIMDUTF_CPLUSPLUS23
2862 if consteval {
2863 return scalar::utf16_to_utf32::convert_valid<endianness::LITTLE>(
2864 valid_utf16_input.data(), valid_utf16_input.size(),
2865 utf32_output.data());
2866 } else
2867 #endif
2868 {
2869 return convert_valid_utf16le_to_utf32(valid_utf16_input.data(),
2870 valid_utf16_input.size(),
2871 utf32_output.data());
2872 }
2873}
2874 #endif // SIMDUTF_SPAN
2875
2889simdutf_warn_unused size_t convert_valid_utf16be_to_utf32(
2890 const char16_t *input, size_t length, char32_t *utf32_buffer) noexcept;
2891 #if SIMDUTF_SPAN
2892simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
2893convert_valid_utf16be_to_utf32(std::span<const char16_t> valid_utf16_input,
2894 std::span<char32_t> utf32_output) noexcept {
2895 #if SIMDUTF_CPLUSPLUS23
2896 if consteval {
2897 return scalar::utf16_to_utf32::convert_valid<endianness::BIG>(
2898 valid_utf16_input.data(), valid_utf16_input.size(),
2899 utf32_output.data());
2900 } else
2901 #endif
2902 {
2903 return convert_valid_utf16be_to_utf32(valid_utf16_input.data(),
2904 valid_utf16_input.size(),
2905 utf32_output.data());
2906 }
2907}
2908 #endif // SIMDUTF_SPAN
2909#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
2910
2911#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
2923simdutf_warn_unused size_t utf8_length_from_utf16(const char16_t *input,
2924 size_t length) noexcept;
2925 #if SIMDUTF_SPAN
2926simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
2927utf8_length_from_utf16(std::span<const char16_t> valid_utf16_input) noexcept {
2928 #if SIMDUTF_CPLUSPLUS23
2929 if consteval {
2930 return scalar::utf16::utf8_length_from_utf16<endianness::NATIVE>(
2931 valid_utf16_input.data(), valid_utf16_input.size());
2932 } else
2933 #endif
2934 {
2935 return utf8_length_from_utf16(valid_utf16_input.data(),
2936 valid_utf16_input.size());
2937 }
2938}
2939 #endif // SIMDUTF_SPAN
2940
2959simdutf_warn_unused result utf8_length_from_utf16_with_replacement(
2960 const char16_t *input, size_t length) noexcept;
2961 #if SIMDUTF_SPAN
2962simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
2963utf8_length_from_utf16_with_replacement(
2964 std::span<const char16_t> valid_utf16_input) noexcept {
2965 #if SIMDUTF_CPLUSPLUS23
2966 if consteval {
2967 return scalar::utf16::utf8_length_from_utf16_with_replacement<
2968 endianness::NATIVE>(valid_utf16_input.data(), valid_utf16_input.size());
2969 } else
2970 #endif
2971 {
2972 return utf8_length_from_utf16_with_replacement(valid_utf16_input.data(),
2973 valid_utf16_input.size());
2974 }
2975}
2976 #endif // SIMDUTF_SPAN
2977
2989simdutf_warn_unused size_t utf8_length_from_utf16le(const char16_t *input,
2990 size_t length) noexcept;
2991 #if SIMDUTF_SPAN
2992simdutf_really_inline simdutf_constexpr23 simdutf_warn_unused size_t
2993utf8_length_from_utf16le(std::span<const char16_t> valid_utf16_input) noexcept {
2994 #if SIMDUTF_CPLUSPLUS23
2995 if consteval {
2996 return scalar::utf16::utf8_length_from_utf16<endianness::LITTLE>(
2997 valid_utf16_input.data(), valid_utf16_input.size());
2998 } else
2999 #endif
3000 {
3001 return utf8_length_from_utf16le(valid_utf16_input.data(),
3002 valid_utf16_input.size());
3003 }
3004}
3005 #endif // SIMDUTF_SPAN
3006
3018simdutf_warn_unused size_t utf8_length_from_utf16be(const char16_t *input,
3019 size_t length) noexcept;
3020 #if SIMDUTF_SPAN
3021simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
3022utf8_length_from_utf16be(std::span<const char16_t> valid_utf16_input) noexcept {
3023 #if SIMDUTF_CPLUSPLUS23
3024 if consteval {
3025 return scalar::utf16::utf8_length_from_utf16<endianness::BIG>(
3026 valid_utf16_input.data(), valid_utf16_input.size());
3027 } else
3028 #endif
3029 {
3030 return utf8_length_from_utf16be(valid_utf16_input.data(),
3031 valid_utf16_input.size());
3032 }
3033}
3034 #endif // SIMDUTF_SPAN
3035#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
3036
3037#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
3051simdutf_warn_unused size_t convert_utf32_to_utf8(const char32_t *input,
3052 size_t length,
3053 char *utf8_buffer) noexcept;
3054 #if SIMDUTF_SPAN
3055simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
3056convert_utf32_to_utf8(
3057 std::span<const char32_t> utf32_input,
3058 detail::output_span_of_byte_like auto &&utf8_output) noexcept {
3059 #if SIMDUTF_CPLUSPLUS23
3060 if consteval {
3061 return scalar::utf32_to_utf8::convert(
3062 utf32_input.data(), utf32_input.size(), utf8_output.data());
3063 } else
3064 #endif
3065 {
3066 return convert_utf32_to_utf8(utf32_input.data(), utf32_input.size(),
3067 reinterpret_cast<char *>(utf8_output.data()));
3068 }
3069}
3070 #endif // SIMDUTF_SPAN
3071
3088simdutf_warn_unused result convert_utf32_to_utf8_with_errors(
3089 const char32_t *input, size_t length, char *utf8_buffer) noexcept;
3090 #if SIMDUTF_SPAN
3091simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
3092convert_utf32_to_utf8_with_errors(
3093 std::span<const char32_t> utf32_input,
3094 detail::output_span_of_byte_like auto &&utf8_output) noexcept {
3095 #if SIMDUTF_CPLUSPLUS23
3096 if consteval {
3097 return scalar::utf32_to_utf8::convert_with_errors(
3098 utf32_input.data(), utf32_input.size(), utf8_output.data());
3099 } else
3100 #endif
3101 {
3102 return convert_utf32_to_utf8_with_errors(
3103 utf32_input.data(), utf32_input.size(),
3104 reinterpret_cast<char *>(utf8_output.data()));
3105 }
3106}
3107 #endif // SIMDUTF_SPAN
3108
3122simdutf_warn_unused size_t convert_valid_utf32_to_utf8(
3123 const char32_t *input, size_t length, char *utf8_buffer) noexcept;
3124 #if SIMDUTF_SPAN
3125simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
3126convert_valid_utf32_to_utf8(
3127 std::span<const char32_t> valid_utf32_input,
3128 detail::output_span_of_byte_like auto &&utf8_output) noexcept {
3129 #if SIMDUTF_CPLUSPLUS23
3130 if consteval {
3131 return scalar::utf32_to_utf8::convert_valid(
3132 valid_utf32_input.data(), valid_utf32_input.size(), utf8_output.data());
3133 } else
3134 #endif
3135 {
3136 return convert_valid_utf32_to_utf8(
3137 valid_utf32_input.data(), valid_utf32_input.size(),
3138 reinterpret_cast<char *>(utf8_output.data()));
3139 }
3140}
3141 #endif // SIMDUTF_SPAN
3142#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
3143
3144#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
3159simdutf_warn_unused size_t convert_utf32_to_utf16(
3160 const char32_t *input, size_t length, char16_t *utf16_buffer) noexcept;
3161 #if SIMDUTF_SPAN
3162simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
3163convert_utf32_to_utf16(std::span<const char32_t> utf32_input,
3164 std::span<char16_t> utf16_output) noexcept {
3165 #if SIMDUTF_CPLUSPLUS23
3166 if consteval {
3167 return scalar::utf32_to_utf16::convert<endianness::NATIVE>(
3168 utf32_input.data(), utf32_input.size(), utf16_output.data());
3169 } else
3170 #endif
3171 {
3172 return convert_utf32_to_utf16(utf32_input.data(), utf32_input.size(),
3173 utf16_output.data());
3174 }
3175}
3176 #endif // SIMDUTF_SPAN
3177
3191simdutf_warn_unused size_t convert_utf32_to_utf16le(
3192 const char32_t *input, size_t length, char16_t *utf16_buffer) noexcept;
3193 #if SIMDUTF_SPAN
3194simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
3195convert_utf32_to_utf16le(std::span<const char32_t> utf32_input,
3196 std::span<char16_t> utf16_output) noexcept {
3197 #if SIMDUTF_CPLUSPLUS23
3198 if consteval {
3199 return scalar::utf32_to_utf16::convert<endianness::LITTLE>(
3200 utf32_input.data(), utf32_input.size(), utf16_output.data());
3201 } else
3202 #endif
3203 {
3204 return convert_utf32_to_utf16le(utf32_input.data(), utf32_input.size(),
3205 utf16_output.data());
3206 }
3207}
3208 #endif // SIMDUTF_SPAN
3209#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
3210
3211#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1
3226simdutf_warn_unused size_t convert_utf32_to_latin1(
3227 const char32_t *input, size_t length, char *latin1_buffer) noexcept;
3228 #if SIMDUTF_SPAN
3229simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
3230convert_utf32_to_latin1(
3231 std::span<const char32_t> utf32_input,
3232 detail::output_span_of_byte_like auto &&latin1_output) noexcept {
3233 #if SIMDUTF_CPLUSPLUS23
3234 if consteval {
3235 return scalar::utf32_to_latin1::convert(
3236 utf32_input.data(), utf32_input.size(), latin1_output.data());
3237 } else
3238 #endif
3239 {
3240 return convert_utf32_to_latin1(
3241 utf32_input.data(), utf32_input.size(),
3242 reinterpret_cast<char *>(latin1_output.data()));
3243 }
3244}
3245 #endif // SIMDUTF_SPAN
3246
3264simdutf_warn_unused result convert_utf32_to_latin1_with_errors(
3265 const char32_t *input, size_t length, char *latin1_buffer) noexcept;
3266 #if SIMDUTF_SPAN
3267simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
3268convert_utf32_to_latin1_with_errors(
3269 std::span<const char32_t> utf32_input,
3270 detail::output_span_of_byte_like auto &&latin1_output) noexcept {
3271 #if SIMDUTF_CPLUSPLUS23
3272 if consteval {
3273 return scalar::utf32_to_latin1::convert_with_errors(
3274 utf32_input.data(), utf32_input.size(), latin1_output.data());
3275 } else
3276 #endif
3277 {
3278 return convert_utf32_to_latin1_with_errors(
3279 utf32_input.data(), utf32_input.size(),
3280 reinterpret_cast<char *>(latin1_output.data()));
3281 }
3282}
3283 #endif // SIMDUTF_SPAN
3284
3305simdutf_warn_unused size_t convert_valid_utf32_to_latin1(
3306 const char32_t *input, size_t length, char *latin1_buffer) noexcept;
3307 #if SIMDUTF_SPAN
3308simdutf_really_inline simdutf_constexpr23 simdutf_warn_unused size_t
3309convert_valid_utf32_to_latin1(
3310 std::span<const char32_t> valid_utf32_input,
3311 detail::output_span_of_byte_like auto &&latin1_output) noexcept {
3312 #if SIMDUTF_CPLUSPLUS23
3313 if consteval {
3314 return scalar::utf32_to_latin1::convert_valid(
3315 detail::constexpr_cast_ptr<uint32_t>(valid_utf32_input.data()),
3316 valid_utf32_input.size(),
3317 detail::constexpr_cast_writeptr<char>(latin1_output.data()));
3318 }
3319 #endif
3320 {
3321 return convert_valid_utf32_to_latin1(
3322 valid_utf32_input.data(), valid_utf32_input.size(),
3323 reinterpret_cast<char *>(latin1_output.data()));
3324 }
3325}
3326 #endif // SIMDUTF_SPAN
3327
3340simdutf_warn_unused simdutf_really_inline simdutf_constexpr23 size_t
3341latin1_length_from_utf32(size_t length) noexcept {
3342 return length;
3343}
3344
3353simdutf_warn_unused simdutf_really_inline simdutf_constexpr23 size_t
3354utf32_length_from_latin1(size_t length) noexcept {
3355 return length;
3356}
3357#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1
3358
3359#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
3373simdutf_warn_unused size_t convert_utf32_to_utf16be(
3374 const char32_t *input, size_t length, char16_t *utf16_buffer) noexcept;
3375 #if SIMDUTF_SPAN
3376simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
3377convert_utf32_to_utf16be(std::span<const char32_t> utf32_input,
3378 std::span<char16_t> utf16_output) noexcept {
3379 #if SIMDUTF_CPLUSPLUS23
3380 if consteval {
3381 return scalar::utf32_to_utf16::convert<endianness::BIG>(
3382 utf32_input.data(), utf32_input.size(), utf16_output.data());
3383 } else
3384 #endif
3385 {
3386 return convert_utf32_to_utf16be(utf32_input.data(), utf32_input.size(),
3387 utf16_output.data());
3388 }
3389}
3390 #endif // SIMDUTF_SPAN
3391
3409simdutf_warn_unused result convert_utf32_to_utf16_with_errors(
3410 const char32_t *input, size_t length, char16_t *utf16_buffer) noexcept;
3411 #if SIMDUTF_SPAN
3412simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
3413convert_utf32_to_utf16_with_errors(std::span<const char32_t> utf32_input,
3414 std::span<char16_t> utf16_output) noexcept {
3415 #if SIMDUTF_CPLUSPLUS23
3416 if consteval {
3417 return scalar::utf32_to_utf16::convert_with_errors<endianness::NATIVE>(
3418 utf32_input.data(), utf32_input.size(), utf16_output.data());
3419 } else
3420 #endif
3421 {
3422 return convert_utf32_to_utf16_with_errors(
3423 utf32_input.data(), utf32_input.size(), utf16_output.data());
3424 }
3425}
3426 #endif // SIMDUTF_SPAN
3427
3444simdutf_warn_unused result convert_utf32_to_utf16le_with_errors(
3445 const char32_t *input, size_t length, char16_t *utf16_buffer) noexcept;
3446 #if SIMDUTF_SPAN
3447simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
3448convert_utf32_to_utf16le_with_errors(
3449 std::span<const char32_t> utf32_input,
3450 std::span<char16_t> utf16_output) noexcept {
3451 #if SIMDUTF_CPLUSPLUS23
3452 if consteval {
3453 return scalar::utf32_to_utf16::convert_with_errors<endianness::LITTLE>(
3454 utf32_input.data(), utf32_input.size(), utf16_output.data());
3455 } else
3456 #endif
3457 {
3458 return convert_utf32_to_utf16le_with_errors(
3459 utf32_input.data(), utf32_input.size(), utf16_output.data());
3460 }
3461}
3462 #endif // SIMDUTF_SPAN
3463
3480simdutf_warn_unused result convert_utf32_to_utf16be_with_errors(
3481 const char32_t *input, size_t length, char16_t *utf16_buffer) noexcept;
3482 #if SIMDUTF_SPAN
3483simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
3484convert_utf32_to_utf16be_with_errors(
3485 std::span<const char32_t> utf32_input,
3486 std::span<char16_t> utf16_output) noexcept {
3487 #if SIMDUTF_CPLUSPLUS23
3488 if consteval {
3489 return scalar::utf32_to_utf16::convert_with_errors<endianness::BIG>(
3490 utf32_input.data(), utf32_input.size(), utf16_output.data());
3491 } else
3492 #endif
3493 {
3494 return convert_utf32_to_utf16be_with_errors(
3495 utf32_input.data(), utf32_input.size(), utf16_output.data());
3496 }
3497}
3498 #endif // SIMDUTF_SPAN
3499
3513simdutf_warn_unused size_t convert_valid_utf32_to_utf16(
3514 const char32_t *input, size_t length, char16_t *utf16_buffer) noexcept;
3515 #if SIMDUTF_SPAN
3516simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
3517convert_valid_utf32_to_utf16(std::span<const char32_t> valid_utf32_input,
3518 std::span<char16_t> utf16_output) noexcept {
3519
3520 #if SIMDUTF_CPLUSPLUS23
3521 if consteval {
3522 return scalar::utf32_to_utf16::convert_valid<endianness::NATIVE>(
3523 valid_utf32_input.data(), valid_utf32_input.size(),
3524 utf16_output.data());
3525 } else
3526 #endif
3527 {
3528 return convert_valid_utf32_to_utf16(valid_utf32_input.data(),
3529 valid_utf32_input.size(),
3530 utf16_output.data());
3531 }
3532}
3533 #endif // SIMDUTF_SPAN
3534
3548simdutf_warn_unused size_t convert_valid_utf32_to_utf16le(
3549 const char32_t *input, size_t length, char16_t *utf16_buffer) noexcept;
3550 #if SIMDUTF_SPAN
3551simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
3552convert_valid_utf32_to_utf16le(std::span<const char32_t> valid_utf32_input,
3553 std::span<char16_t> utf16_output) noexcept {
3554 #if SIMDUTF_CPLUSPLUS23
3555 if consteval {
3556 return scalar::utf32_to_utf16::convert_valid<endianness::LITTLE>(
3557 valid_utf32_input.data(), valid_utf32_input.size(),
3558 utf16_output.data());
3559 } else
3560 #endif
3561 {
3562 return convert_valid_utf32_to_utf16le(valid_utf32_input.data(),
3563 valid_utf32_input.size(),
3564 utf16_output.data());
3565 }
3566}
3567 #endif // SIMDUTF_SPAN
3568
3582simdutf_warn_unused size_t convert_valid_utf32_to_utf16be(
3583 const char32_t *input, size_t length, char16_t *utf16_buffer) noexcept;
3584 #if SIMDUTF_SPAN
3585simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
3586convert_valid_utf32_to_utf16be(std::span<const char32_t> valid_utf32_input,
3587 std::span<char16_t> utf16_output) noexcept {
3588 #if SIMDUTF_CPLUSPLUS23
3589 if consteval {
3590 return scalar::utf32_to_utf16::convert_valid<endianness::BIG>(
3591 valid_utf32_input.data(), valid_utf32_input.size(),
3592 utf16_output.data());
3593 } else
3594 #endif
3595 {
3596 return convert_valid_utf32_to_utf16be(valid_utf32_input.data(),
3597 valid_utf32_input.size(),
3598 utf16_output.data());
3599 }
3600}
3601 #endif // SIMDUTF_SPAN
3602#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
3603
3604#if SIMDUTF_FEATURE_UTF16
3618void change_endianness_utf16(const char16_t *input, size_t length,
3619 char16_t *output) noexcept;
3620 #if SIMDUTF_SPAN
3621simdutf_really_inline simdutf_constexpr23 void
3622change_endianness_utf16(std::span<const char16_t> utf16_input,
3623 std::span<char16_t> utf16_output) noexcept {
3624 #if SIMDUTF_CPLUSPLUS23
3625 if consteval {
3626 return scalar::utf16::change_endianness_utf16(
3627 utf16_input.data(), utf16_input.size(), utf16_output.data());
3628 } else
3629 #endif
3630 {
3631 return change_endianness_utf16(utf16_input.data(), utf16_input.size(),
3632 utf16_output.data());
3633 }
3634}
3635 #endif // SIMDUTF_SPAN
3636#endif // SIMDUTF_FEATURE_UTF16
3637
3638#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
3650simdutf_warn_unused size_t utf8_length_from_utf32(const char32_t *input,
3651 size_t length) noexcept;
3652 #if SIMDUTF_SPAN
3653simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
3654utf8_length_from_utf32(std::span<const char32_t> valid_utf32_input) noexcept {
3655 #if SIMDUTF_CPLUSPLUS23
3656 if consteval {
3657 return scalar::utf32::utf8_length_from_utf32(valid_utf32_input.data(),
3658 valid_utf32_input.size());
3659 } else
3660 #endif
3661 {
3662 return utf8_length_from_utf32(valid_utf32_input.data(),
3663 valid_utf32_input.size());
3664 }
3665}
3666 #endif // SIMDUTF_SPAN
3667#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
3668
3669#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
3681simdutf_warn_unused size_t utf16_length_from_utf32(const char32_t *input,
3682 size_t length) noexcept;
3683 #if SIMDUTF_SPAN
3684simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
3685utf16_length_from_utf32(std::span<const char32_t> valid_utf32_input) noexcept {
3686 #if SIMDUTF_CPLUSPLUS23
3687 if consteval {
3688 return scalar::utf32::utf16_length_from_utf32(valid_utf32_input.data(),
3689 valid_utf32_input.size());
3690 } else
3691 #endif
3692 {
3693 return utf16_length_from_utf32(valid_utf32_input.data(),
3694 valid_utf32_input.size());
3695 }
3696}
3697 #endif // SIMDUTF_SPAN
3698
3714simdutf_warn_unused size_t utf32_length_from_utf16(const char16_t *input,
3715 size_t length) noexcept;
3716 #if SIMDUTF_SPAN
3717simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
3718utf32_length_from_utf16(std::span<const char16_t> valid_utf16_input) noexcept {
3719 #if SIMDUTF_CPLUSPLUS23
3720 if consteval {
3721 return scalar::utf16::utf32_length_from_utf16<endianness::NATIVE>(
3722 valid_utf16_input.data(), valid_utf16_input.size());
3723 } else
3724 #endif
3725 {
3726 return utf32_length_from_utf16(valid_utf16_input.data(),
3727 valid_utf16_input.size());
3728 }
3729}
3730 #endif // SIMDUTF_SPAN
3731
3747simdutf_warn_unused size_t utf32_length_from_utf16le(const char16_t *input,
3748 size_t length) noexcept;
3749 #if SIMDUTF_SPAN
3750simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
3751utf32_length_from_utf16le(
3752 std::span<const char16_t> valid_utf16_input) noexcept {
3753 #if SIMDUTF_CPLUSPLUS23
3754 if consteval {
3755 return scalar::utf16::utf32_length_from_utf16<endianness::LITTLE>(
3756 valid_utf16_input.data(), valid_utf16_input.size());
3757 } else
3758 #endif
3759 {
3760 return utf32_length_from_utf16le(valid_utf16_input.data(),
3761 valid_utf16_input.size());
3762 }
3763}
3764 #endif // SIMDUTF_SPAN
3765
3781simdutf_warn_unused size_t utf32_length_from_utf16be(const char16_t *input,
3782 size_t length) noexcept;
3783 #if SIMDUTF_SPAN
3784simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
3785utf32_length_from_utf16be(
3786 std::span<const char16_t> valid_utf16_input) noexcept {
3787 #if SIMDUTF_CPLUSPLUS23
3788 if consteval {
3789 return scalar::utf16::utf32_length_from_utf16<endianness::BIG>(
3790 valid_utf16_input.data(), valid_utf16_input.size());
3791 } else
3792 #endif
3793 {
3794 return utf32_length_from_utf16be(valid_utf16_input.data(),
3795 valid_utf16_input.size());
3796 }
3797}
3798 #endif // SIMDUTF_SPAN
3799#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
3800
3801#if SIMDUTF_FEATURE_UTF16
3816simdutf_warn_unused size_t count_utf16(const char16_t *input,
3817 size_t length) noexcept;
3818 #if SIMDUTF_SPAN
3819simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
3820count_utf16(std::span<const char16_t> valid_utf16_input) noexcept {
3821 #if SIMDUTF_CPLUSPLUS23
3822 if consteval {
3823 return scalar::utf16::count_code_points<endianness::NATIVE>(
3824 valid_utf16_input.data(), valid_utf16_input.size());
3825 } else
3826 #endif
3827 {
3828 return count_utf16(valid_utf16_input.data(), valid_utf16_input.size());
3829 }
3830}
3831 #endif // SIMDUTF_SPAN
3832
3847simdutf_warn_unused size_t count_utf16le(const char16_t *input,
3848 size_t length) noexcept;
3849 #if SIMDUTF_SPAN
3850simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
3851count_utf16le(std::span<const char16_t> valid_utf16_input) noexcept {
3852 #if SIMDUTF_CPLUSPLUS23
3853 if consteval {
3854 return scalar::utf16::count_code_points<endianness::LITTLE>(
3855 valid_utf16_input.data(), valid_utf16_input.size());
3856 } else
3857 #endif
3858 {
3859 return count_utf16le(valid_utf16_input.data(), valid_utf16_input.size());
3860 }
3861}
3862 #endif // SIMDUTF_SPAN
3863
3878simdutf_warn_unused size_t count_utf16be(const char16_t *input,
3879 size_t length) noexcept;
3880 #if SIMDUTF_SPAN
3881simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
3882count_utf16be(std::span<const char16_t> valid_utf16_input) noexcept {
3883 #if SIMDUTF_CPLUSPLUS23
3884 if consteval {
3885 return scalar::utf16::count_code_points<endianness::BIG>(
3886 valid_utf16_input.data(), valid_utf16_input.size());
3887 } else
3888 #endif
3889 {
3890 return count_utf16be(valid_utf16_input.data(), valid_utf16_input.size());
3891 }
3892}
3893 #endif // SIMDUTF_SPAN
3894#endif // SIMDUTF_FEATURE_UTF16
3895
3896#if SIMDUTF_FEATURE_UTF8
3909simdutf_warn_unused size_t count_utf8(const char *input,
3910 size_t length) noexcept;
3911 #if SIMDUTF_SPAN
3912simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t count_utf8(
3913 const detail::input_span_of_byte_like auto &valid_utf8_input) noexcept {
3914 #if SIMDUTF_CPLUSPLUS23
3915 if consteval {
3916 return scalar::utf8::count_code_points(valid_utf8_input.data(),
3917 valid_utf8_input.size());
3918 } else
3919 #endif
3920 {
3921 return count_utf8(reinterpret_cast<const char *>(valid_utf8_input.data()),
3922 valid_utf8_input.size());
3923 }
3924}
3925 #endif // SIMDUTF_SPAN
3926
3941simdutf_warn_unused size_t trim_partial_utf8(const char *input, size_t length);
3942 #if SIMDUTF_SPAN
3943simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
3944trim_partial_utf8(
3945 const detail::input_span_of_byte_like auto &valid_utf8_input) noexcept {
3946 #if SIMDUTF_CPLUSPLUS23
3947 if consteval {
3948 return scalar::utf8::trim_partial_utf8(valid_utf8_input.data(),
3949 valid_utf8_input.size());
3950 } else
3951 #endif
3952 {
3953 return trim_partial_utf8(
3954 reinterpret_cast<const char *>(valid_utf8_input.data()),
3955 valid_utf8_input.size());
3956 }
3957}
3958 #endif // SIMDUTF_SPAN
3959#endif // SIMDUTF_FEATURE_UTF8
3960
3961#if SIMDUTF_FEATURE_UTF16
3976simdutf_warn_unused size_t trim_partial_utf16be(const char16_t *input,
3977 size_t length);
3978 #if SIMDUTF_SPAN
3979simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
3980trim_partial_utf16be(std::span<const char16_t> valid_utf16_input) noexcept {
3981 #if SIMDUTF_CPLUSPLUS23
3982 if consteval {
3983 return scalar::utf16::trim_partial_utf16<endianness::BIG>(
3984 valid_utf16_input.data(), valid_utf16_input.size());
3985 } else
3986 #endif
3987 {
3988 return trim_partial_utf16be(valid_utf16_input.data(),
3989 valid_utf16_input.size());
3990 }
3991}
3992 #endif // SIMDUTF_SPAN
3993
4008simdutf_warn_unused size_t trim_partial_utf16le(const char16_t *input,
4009 size_t length);
4010 #if SIMDUTF_SPAN
4011simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
4012trim_partial_utf16le(std::span<const char16_t> valid_utf16_input) noexcept {
4013 #if SIMDUTF_CPLUSPLUS23
4014 if consteval {
4015 return scalar::utf16::trim_partial_utf16<endianness::LITTLE>(
4016 valid_utf16_input.data(), valid_utf16_input.size());
4017 } else
4018 #endif
4019 {
4020 return trim_partial_utf16le(valid_utf16_input.data(),
4021 valid_utf16_input.size());
4022 }
4023}
4024 #endif // SIMDUTF_SPAN
4025
4040simdutf_warn_unused size_t trim_partial_utf16(const char16_t *input,
4041 size_t length);
4042 #if SIMDUTF_SPAN
4043simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
4044trim_partial_utf16(std::span<const char16_t> valid_utf16_input) noexcept {
4045 #if SIMDUTF_CPLUSPLUS23
4046 if consteval {
4047 return scalar::utf16::trim_partial_utf16<endianness::NATIVE>(
4048 valid_utf16_input.data(), valid_utf16_input.size());
4049 } else
4050 #endif
4051 {
4052 return trim_partial_utf16(valid_utf16_input.data(),
4053 valid_utf16_input.size());
4054 }
4055}
4056 #endif // SIMDUTF_SPAN
4057#endif // SIMDUTF_FEATURE_UTF16
4058
4059#if SIMDUTF_FEATURE_BASE64 || SIMDUTF_FEATURE_UTF16 || \
4060 SIMDUTF_FEATURE_DETECT_ENCODING
4061 #ifndef SIMDUTF_NEED_TRAILING_ZEROES
4062 #define SIMDUTF_NEED_TRAILING_ZEROES 1
4063 #endif
4064#endif // SIMDUTF_FEATURE_BASE64 || SIMDUTF_FEATURE_UTF16 ||
4065 // SIMDUTF_FEATURE_DETECT_ENCODING
4066
4067#if SIMDUTF_FEATURE_BASE64
4068// base64_options are used to specify the base64 encoding options.
4069// ASCII spaces are ' ', '\t', '\n', '\r', '\f'
4070// garbage characters are characters that are not part of the base64 alphabet
4071// nor ASCII spaces.
4072constexpr uint64_t base64_reverse_padding =
4073 2; /* modifier for base64_default and base64_url */
4074enum base64_options : uint64_t {
4075 base64_default = 0, /* standard base64 format (with padding) */
4076 base64_url = 1, /* base64url format (no padding) */
4077 base64_default_no_padding =
4078 base64_default |
4079 base64_reverse_padding, /* standard base64 format without padding */
4080 base64_url_with_padding =
4081 base64_url | base64_reverse_padding, /* base64url with padding */
4082 base64_default_accept_garbage =
4083 4, /* standard base64 format accepting garbage characters, the input stops
4084 with the first '=' if any */
4085 base64_url_accept_garbage =
4086 5, /* base64url format accepting garbage characters, the input stops with
4087 the first '=' if any */
4088 base64_default_or_url =
4089 8, /* standard/base64url hybrid format (only meaningful for decoding!) */
4090 base64_default_or_url_accept_garbage =
4091 12, /* standard/base64url hybrid format accepting garbage characters
4092 (only meaningful for decoding!), the input stops with the first '='
4093 if any */
4094};
4095
4096// last_chunk_handling_options are used to specify the handling of the last
4097// chunk in base64 decoding.
4098// https://tc39.es/proposal-arraybuffer-base64/spec/#sec-frombase64
4099enum last_chunk_handling_options : uint64_t {
4100 loose = 0, /* standard base64 format, decode partial final chunk */
4101 strict = 1, /* error when the last chunk is partial, 2 or 3 chars, and
4102 unpadded, or non-zero bit padding */
4103 stop_before_partial =
4104 2, /* if the last chunk is partial, ignore it (no error) */
4105 only_full_chunks =
4106 3 /* only decode full blocks (4 base64 characters, no padding) */
4107};
4108
4109inline simdutf_constexpr23 bool
4110is_partial(last_chunk_handling_options options) {
4111 return (options == stop_before_partial) || (options == only_full_chunks);
4112}
4113
4114namespace detail {
4115simdutf_warn_unused const char *find(const char *start, const char *end,
4116 char character) noexcept;
4117simdutf_warn_unused const char16_t *
4118find(const char16_t *start, const char16_t *end, char16_t character) noexcept;
4119} // namespace detail
4120
4131simdutf_warn_unused simdutf_really_inline simdutf_constexpr23 const char *
4132find(const char *start, const char *end, char character) noexcept {
4133 #if SIMDUTF_CPLUSPLUS23
4134 if consteval {
4135 for (; start != end; ++start)
4136 if (*start == character)
4137 return start;
4138 return end;
4139 } else
4140 #endif
4141 {
4142 return detail::find(start, end, character);
4143 }
4144}
4145simdutf_warn_unused simdutf_really_inline simdutf_constexpr23 const char16_t *
4146find(const char16_t *start, const char16_t *end, char16_t character) noexcept {
4147 // implementation note: this is repeated instead of a template, to ensure
4148 // the api is still a function and compiles without concepts
4149 #if SIMDUTF_CPLUSPLUS23
4150 if consteval {
4151 for (; start != end; ++start)
4152 if (*start == character)
4153 return start;
4154 return end;
4155 } else
4156 #endif
4157 {
4158 return detail::find(start, end, character);
4159 }
4160}
4161}
4162 // We include base64_tables once.
4163 #include <simdutf/base64_tables.h>
4164 #include <simdutf/scalar/base64.h>
4165
4166namespace simdutf {
4167
4168 #if SIMDUTF_CPLUSPLUS17
4169inline std::string_view to_string(base64_options options) {
4170 switch (options) {
4171 case base64_default:
4172 return "base64_default";
4173 case base64_url:
4174 return "base64_url";
4175 case base64_reverse_padding:
4176 return "base64_reverse_padding";
4177 case base64_url_with_padding:
4178 return "base64_url_with_padding";
4179 case base64_default_accept_garbage:
4180 return "base64_default_accept_garbage";
4181 case base64_url_accept_garbage:
4182 return "base64_url_accept_garbage";
4183 case base64_default_or_url:
4184 return "base64_default_or_url";
4185 case base64_default_or_url_accept_garbage:
4186 return "base64_default_or_url_accept_garbage";
4187 }
4188 return "<unknown>";
4189}
4190 #endif // SIMDUTF_CPLUSPLUS17
4191
4192 #if SIMDUTF_CPLUSPLUS17
4193inline std::string_view to_string(last_chunk_handling_options options) {
4194 switch (options) {
4195 case loose:
4196 return "loose";
4197 case strict:
4198 return "strict";
4199 case stop_before_partial:
4200 return "stop_before_partial";
4201 case only_full_chunks:
4202 return "only_full_chunks";
4203 }
4204 return "<unknown>";
4205}
4206 #endif
4207
4221simdutf_warn_unused size_t
4222maximal_binary_length_from_base64(const char *input, size_t length) noexcept;
4223 #if SIMDUTF_SPAN
4224simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
4225maximal_binary_length_from_base64(
4226 const detail::input_span_of_byte_like auto &input) noexcept {
4227 #if SIMDUTF_CPLUSPLUS23
4228 if consteval {
4229 return scalar::base64::maximal_binary_length_from_base64(
4230 detail::constexpr_cast_ptr<uint8_t>(input.data()), input.size());
4231 } else
4232 #endif
4233 {
4234 return maximal_binary_length_from_base64(
4235 reinterpret_cast<const char *>(input.data()), input.size());
4236 }
4237}
4238 #endif // SIMDUTF_SPAN
4239
4254simdutf_warn_unused size_t maximal_binary_length_from_base64(
4255 const char16_t *input, size_t length) noexcept;
4256 #if SIMDUTF_SPAN
4257simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
4258maximal_binary_length_from_base64(std::span<const char16_t> input) noexcept {
4259 #if SIMDUTF_CPLUSPLUS23
4260 if consteval {
4261 return scalar::base64::maximal_binary_length_from_base64(input.data(),
4262 input.size());
4263 } else
4264 #endif
4265 {
4266 return maximal_binary_length_from_base64(input.data(), input.size());
4267 }
4268}
4269 #endif // SIMDUTF_SPAN
4270
4285simdutf_warn_unused size_t binary_length_from_base64(const char *input,
4286 size_t length) noexcept;
4287 #if SIMDUTF_SPAN
4288simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
4289binary_length_from_base64(
4290 const detail::input_span_of_byte_like auto &input) noexcept {
4291 #if SIMDUTF_CPLUSPLUS23
4292 if consteval {
4293 return scalar::base64::binary_length_from_base64(input.data(),
4294 input.size());
4295 } else
4296 #endif
4297 {
4298 return binary_length_from_base64(
4299 reinterpret_cast<const char *>(input.data()), input.size());
4300 }
4301}
4302 #endif // SIMDUTF_SPAN
4303
4319simdutf_warn_unused size_t binary_length_from_base64(const char16_t *input,
4320 size_t length) noexcept;
4321 #if SIMDUTF_SPAN
4322simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
4323binary_length_from_base64(std::span<const char16_t> input) noexcept {
4324 #if SIMDUTF_CPLUSPLUS23
4325 if consteval {
4326 return scalar::base64::binary_length_from_base64(input.data(),
4327 input.size());
4328 } else
4329 #endif
4330 {
4331 return binary_length_from_base64(input.data(), input.size());
4332 }
4333}
4334 #endif // SIMDUTF_SPAN
4335
4390simdutf_warn_unused result base64_to_binary(
4391 const char *input, size_t length, char *output,
4392 base64_options options = base64_default,
4393 last_chunk_handling_options last_chunk_options = loose) noexcept;
4394 #if SIMDUTF_SPAN
4395simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
4396base64_to_binary(
4397 const detail::input_span_of_byte_like auto &input,
4398 detail::output_span_of_byte_like auto &&binary_output,
4399 base64_options options = base64_default,
4400 last_chunk_handling_options last_chunk_options = loose) noexcept {
4401 #if SIMDUTF_CPLUSPLUS23
4402 if consteval {
4403 return scalar::base64::base64_to_binary_details_impl(
4404 input.data(), input.size(), binary_output.data(), options,
4405 last_chunk_options);
4406 } else
4407 #endif
4408 {
4409 return base64_to_binary(reinterpret_cast<const char *>(input.data()),
4410 input.size(),
4411 reinterpret_cast<char *>(binary_output.data()),
4412 options, last_chunk_options);
4413 }
4414}
4415 #endif // SIMDUTF_SPAN
4416
4423inline simdutf_warn_unused simdutf_constexpr23 size_t base64_length_from_binary(
4424 size_t length, base64_options options = base64_default) noexcept {
4425 return scalar::base64::base64_length_from_binary(length, options);
4426}
4427
4437inline simdutf_warn_unused simdutf_constexpr23 size_t
4438base64_length_from_binary_with_lines(
4439 size_t length, base64_options options = base64_default,
4440 size_t line_length = default_line_length) noexcept {
4441 return scalar::base64::base64_length_from_binary_with_lines(length, options,
4442 line_length);
4443}
4444
4466size_t binary_to_base64(const char *input, size_t length, char *output,
4467 base64_options options = base64_default) noexcept;
4468 #if SIMDUTF_SPAN
4469simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
4470binary_to_base64(const detail::input_span_of_byte_like auto &input,
4471 detail::output_span_of_byte_like auto &&binary_output,
4472 base64_options options = base64_default) noexcept {
4473 #if SIMDUTF_CPLUSPLUS23
4474 if consteval {
4475 return scalar::base64::tail_encode_base64(
4476 binary_output.data(), input.data(), input.size(), options);
4477 } else
4478 #endif
4479 {
4480 return binary_to_base64(
4481 reinterpret_cast<const char *>(input.data()), input.size(),
4482 reinterpret_cast<char *>(binary_output.data()), options);
4483 }
4484}
4485 #endif // SIMDUTF_SPAN
4486
4511size_t
4512binary_to_base64_with_lines(const char *input, size_t length, char *output,
4513 size_t line_length = simdutf::default_line_length,
4514 base64_options options = base64_default) noexcept;
4515 #if SIMDUTF_SPAN
4516simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
4517binary_to_base64_with_lines(
4518 const detail::input_span_of_byte_like auto &input,
4519 detail::output_span_of_byte_like auto &&binary_output,
4520 size_t line_length = simdutf::default_line_length,
4521 base64_options options = base64_default) noexcept {
4522 #if SIMDUTF_CPLUSPLUS23
4523 if consteval {
4524 return scalar::base64::tail_encode_base64_impl<true>(
4525 binary_output.data(), input.data(), input.size(), options, line_length);
4526 } else
4527 #endif
4528 {
4529 return binary_to_base64_with_lines(
4530 reinterpret_cast<const char *>(input.data()), input.size(),
4531 reinterpret_cast<char *>(binary_output.data()), line_length, options);
4532 }
4533}
4534 #endif // SIMDUTF_SPAN
4535
4536 #if SIMDUTF_ATOMIC_REF
4578size_t
4579atomic_binary_to_base64(const char *input, size_t length, char *output,
4580 base64_options options = base64_default) noexcept;
4581 #if SIMDUTF_SPAN
4582simdutf_really_inline simdutf_warn_unused size_t
4583atomic_binary_to_base64(const detail::input_span_of_byte_like auto &input,
4584 detail::output_span_of_byte_like auto &&binary_output,
4585 base64_options options = base64_default) noexcept {
4586 return atomic_binary_to_base64(
4587 reinterpret_cast<const char *>(input.data()), input.size(),
4588 reinterpret_cast<char *>(binary_output.data()), options);
4589}
4590 #endif // SIMDUTF_SPAN
4591 #endif // SIMDUTF_ATOMIC_REF
4592
4649simdutf_warn_unused result
4650base64_to_binary(const char16_t *input, size_t length, char *output,
4651 base64_options options = base64_default,
4652 last_chunk_handling_options last_chunk_options =
4653 last_chunk_handling_options::loose) noexcept;
4654 #if SIMDUTF_SPAN
4655simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
4656base64_to_binary(
4657 std::span<const char16_t> input,
4658 detail::output_span_of_byte_like auto &&binary_output,
4659 base64_options options = base64_default,
4660 last_chunk_handling_options last_chunk_options = loose) noexcept {
4661 #if SIMDUTF_CPLUSPLUS23
4662 if consteval {
4663 return scalar::base64::base64_to_binary_details_impl(
4664 input.data(), input.size(), binary_output.data(), options,
4665 last_chunk_options);
4666 } else
4667 #endif
4668 {
4669 return base64_to_binary(input.data(), input.size(),
4670 reinterpret_cast<char *>(binary_output.data()),
4671 options, last_chunk_options);
4672 }
4673}
4674 #endif // SIMDUTF_SPAN
4675
4686simdutf_warn_unused simdutf_really_inline simdutf_constexpr23 bool
4687base64_ignorable(char input, base64_options options = base64_default) noexcept {
4688 return scalar::base64::is_ignorable(input, options);
4689}
4690simdutf_warn_unused simdutf_really_inline simdutf_constexpr23 bool
4691base64_ignorable(char16_t input,
4692 base64_options options = base64_default) noexcept {
4693 return scalar::base64::is_ignorable(input, options);
4694}
4695
4707simdutf_warn_unused simdutf_really_inline simdutf_constexpr23 bool
4708base64_valid(char input, base64_options options = base64_default) noexcept {
4709 return scalar::base64::is_base64(input, options);
4710}
4711simdutf_warn_unused simdutf_really_inline simdutf_constexpr23 bool
4712base64_valid(char16_t input, base64_options options = base64_default) noexcept {
4713 return scalar::base64::is_base64(input, options);
4714}
4715
4725simdutf_warn_unused simdutf_really_inline simdutf_constexpr23 bool
4726base64_valid_or_padding(char input,
4727 base64_options options = base64_default) noexcept {
4728 return scalar::base64::is_base64_or_padding(input, options);
4729}
4730simdutf_warn_unused simdutf_really_inline simdutf_constexpr23 bool
4731base64_valid_or_padding(char16_t input,
4732 base64_options options = base64_default) noexcept {
4733 return scalar::base64::is_base64_or_padding(input, options);
4734}
4735
4803simdutf_warn_unused result
4804base64_to_binary_safe(const char *input, size_t length, char *output,
4805 size_t &outlen, base64_options options = base64_default,
4806 last_chunk_handling_options last_chunk_options =
4807 last_chunk_handling_options::loose,
4808 bool decode_up_to_bad_char = false) noexcept;
4809// the span overload has moved to the bottom of the file
4810
4811simdutf_warn_unused result
4812base64_to_binary_safe(const char16_t *input, size_t length, char *output,
4813 size_t &outlen, base64_options options = base64_default,
4814 last_chunk_handling_options last_chunk_options =
4815 last_chunk_handling_options::loose,
4816 bool decode_up_to_bad_char = false) noexcept;
4817 // span overload moved to bottom of file
4818
4819 #if SIMDUTF_ATOMIC_REF
4859simdutf_warn_unused result atomic_base64_to_binary_safe(
4860 const char *input, size_t length, char *output, size_t &outlen,
4861 base64_options options = base64_default,
4862 last_chunk_handling_options last_chunk_options =
4863 last_chunk_handling_options::loose,
4864 bool decode_up_to_bad_char = false) noexcept;
4865simdutf_warn_unused result atomic_base64_to_binary_safe(
4866 const char16_t *input, size_t length, char *output, size_t &outlen,
4867 base64_options options = base64_default,
4868 last_chunk_handling_options last_chunk_options = loose,
4869 bool decode_up_to_bad_char = false) noexcept;
4870 #if SIMDUTF_SPAN
4875simdutf_really_inline simdutf_warn_unused std::tuple<result, std::size_t>
4876atomic_base64_to_binary_safe(
4877 const detail::input_span_of_byte_like auto &binary_input,
4878 detail::output_span_of_byte_like auto &&output,
4879 base64_options options = base64_default,
4880 last_chunk_handling_options last_chunk_options =
4881 last_chunk_handling_options::loose,
4882 bool decode_up_to_bad_char = false) noexcept {
4883 size_t outlen = output.size();
4884 auto ret = atomic_base64_to_binary_safe(
4885 reinterpret_cast<const char *>(binary_input.data()), binary_input.size(),
4886 reinterpret_cast<char *>(output.data()), outlen, options,
4887 last_chunk_options, decode_up_to_bad_char);
4888 return {ret, outlen};
4889}
4894simdutf_warn_unused std::tuple<result, std::size_t>
4895atomic_base64_to_binary_safe(
4896 std::span<const char16_t> base64_input,
4897 detail::output_span_of_byte_like auto &&binary_output,
4898 base64_options options = base64_default,
4899 last_chunk_handling_options last_chunk_options = loose,
4900 bool decode_up_to_bad_char = false) noexcept {
4901 size_t outlen = binary_output.size();
4902 auto ret = atomic_base64_to_binary_safe(
4903 base64_input.data(), base64_input.size(),
4904 reinterpret_cast<char *>(binary_output.data()), outlen, options,
4905 last_chunk_options, decode_up_to_bad_char);
4906 return {ret, outlen};
4907}
4908 #endif // SIMDUTF_SPAN
4909 #endif // SIMDUTF_ATOMIC_REF
4910
4911#endif // SIMDUTF_FEATURE_BASE64
4912
4921public:
4931 virtual std::string name() const { return std::string(_name); }
4932
4942 virtual std::string description() const { return std::string(_description); }
4943
4954
4955#if SIMDUTF_FEATURE_DETECT_ENCODING
4962 virtual encoding_type autodetect_encoding(const char *input,
4963 size_t length) const noexcept;
4964
4971 virtual int detect_encodings(const char *input,
4972 size_t length) const noexcept = 0;
4973#endif // SIMDUTF_FEATURE_DETECT_ENCODING
4974
4982 virtual uint32_t required_instruction_sets() const {
4983 return _required_instruction_sets;
4984 }
4985
4986#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING
4996 simdutf_warn_unused virtual bool validate_utf8(const char *buf,
4997 size_t len) const noexcept = 0;
4998#endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING
4999
5000#if SIMDUTF_FEATURE_UTF8
5013 simdutf_warn_unused virtual result
5014 validate_utf8_with_errors(const char *buf, size_t len) const noexcept = 0;
5015#endif // SIMDUTF_FEATURE_UTF8
5016
5017#if SIMDUTF_FEATURE_ASCII
5027 simdutf_warn_unused virtual bool
5028 validate_ascii(const char *buf, size_t len) const noexcept = 0;
5029
5042 simdutf_warn_unused virtual result
5043 validate_ascii_with_errors(const char *buf, size_t len) const noexcept = 0;
5044
5045#endif // SIMDUTF_FEATURE_ASCII
5046
5047#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_ASCII
5059 simdutf_warn_unused virtual bool
5060 validate_utf16be_as_ascii(const char16_t *buf, size_t len) const noexcept = 0;
5061
5073 simdutf_warn_unused virtual bool
5074 validate_utf16le_as_ascii(const char16_t *buf, size_t len) const noexcept = 0;
5075#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_ASCII
5076
5077#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING
5092 simdutf_warn_unused virtual bool
5093 validate_utf16le(const char16_t *buf, size_t len) const noexcept = 0;
5094#endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING
5095
5096#if SIMDUTF_FEATURE_UTF16
5111 simdutf_warn_unused virtual bool
5112 validate_utf16be(const char16_t *buf, size_t len) const noexcept = 0;
5113
5130 simdutf_warn_unused virtual result
5131 validate_utf16le_with_errors(const char16_t *buf,
5132 size_t len) const noexcept = 0;
5133
5150 simdutf_warn_unused virtual result
5151 validate_utf16be_with_errors(const char16_t *buf,
5152 size_t len) const noexcept = 0;
5165 virtual void to_well_formed_utf16le(const char16_t *input, size_t len,
5166 char16_t *output) const noexcept = 0;
5179 virtual void to_well_formed_utf16be(const char16_t *input, size_t len,
5180 char16_t *output) const noexcept = 0;
5181#endif // SIMDUTF_FEATURE_UTF16
5182
5183#if SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING
5196 simdutf_warn_unused virtual bool
5197 validate_utf32(const char32_t *buf, size_t len) const noexcept = 0;
5198#endif // SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING
5199
5200#if SIMDUTF_FEATURE_UTF32
5216 simdutf_warn_unused virtual result
5217 validate_utf32_with_errors(const char32_t *buf,
5218 size_t len) const noexcept = 0;
5219#endif // SIMDUTF_FEATURE_UTF32
5220
5221#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
5232 simdutf_warn_unused virtual size_t
5233 convert_latin1_to_utf8(const char *input, size_t length,
5234 char *utf8_output) const noexcept = 0;
5235#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
5236
5237#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
5248 simdutf_warn_unused virtual size_t
5249 convert_latin1_to_utf16le(const char *input, size_t length,
5250 char16_t *utf16_output) const noexcept = 0;
5251
5262 simdutf_warn_unused virtual size_t
5263 convert_latin1_to_utf16be(const char *input, size_t length,
5264 char16_t *utf16_output) const noexcept = 0;
5265#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
5266
5267#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1
5278 simdutf_warn_unused virtual size_t
5279 convert_latin1_to_utf32(const char *input, size_t length,
5280 char32_t *utf32_buffer) const noexcept = 0;
5281#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1
5282
5283#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
5296 simdutf_warn_unused virtual size_t
5297 convert_utf8_to_latin1(const char *input, size_t length,
5298 char *latin1_output) const noexcept = 0;
5299
5316 simdutf_warn_unused virtual result
5317 convert_utf8_to_latin1_with_errors(const char *input, size_t length,
5318 char *latin1_output) const noexcept = 0;
5319
5339 simdutf_warn_unused virtual size_t
5340 convert_valid_utf8_to_latin1(const char *input, size_t length,
5341 char *latin1_output) const noexcept = 0;
5342#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
5343
5344#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
5357 simdutf_warn_unused virtual size_t
5358 convert_utf8_to_utf16le(const char *input, size_t length,
5359 char16_t *utf16_output) const noexcept = 0;
5360
5373 simdutf_warn_unused virtual size_t
5374 convert_utf8_to_utf16be(const char *input, size_t length,
5375 char16_t *utf16_output) const noexcept = 0;
5376
5392 simdutf_warn_unused virtual result convert_utf8_to_utf16le_with_errors(
5393 const char *input, size_t length,
5394 char16_t *utf16_output) const noexcept = 0;
5395
5411 simdutf_warn_unused virtual result convert_utf8_to_utf16be_with_errors(
5412 const char *input, size_t length,
5413 char16_t *utf16_output) const noexcept = 0;
5434 const char16_t *input, size_t length) const noexcept = 0;
5435
5456 const char16_t *input, size_t length) const noexcept = 0;
5457
5458#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
5459
5460#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
5473 simdutf_warn_unused virtual size_t
5474 convert_utf8_to_utf32(const char *input, size_t length,
5475 char32_t *utf32_output) const noexcept = 0;
5476
5491 simdutf_warn_unused virtual result
5492 convert_utf8_to_utf32_with_errors(const char *input, size_t length,
5493 char32_t *utf32_output) const noexcept = 0;
5494#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
5495
5496#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
5507 simdutf_warn_unused virtual size_t
5508 convert_valid_utf8_to_utf16le(const char *input, size_t length,
5509 char16_t *utf16_buffer) const noexcept = 0;
5510
5521 simdutf_warn_unused virtual size_t
5522 convert_valid_utf8_to_utf16be(const char *input, size_t length,
5523 char16_t *utf16_buffer) const noexcept = 0;
5524#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
5525
5526#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
5537 simdutf_warn_unused virtual size_t
5538 convert_valid_utf8_to_utf32(const char *input, size_t length,
5539 char32_t *utf32_buffer) const noexcept = 0;
5540#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
5541
5542#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
5555 simdutf_warn_unused virtual size_t
5556 utf16_length_from_utf8(const char *input, size_t length) const noexcept = 0;
5557#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
5558
5559#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
5574 simdutf_warn_unused virtual size_t
5575 utf32_length_from_utf8(const char *input, size_t length) const noexcept = 0;
5576#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
5577
5578#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
5595 simdutf_warn_unused virtual size_t
5596 convert_utf16le_to_latin1(const char16_t *input, size_t length,
5597 char *latin1_buffer) const noexcept = 0;
5598
5615 simdutf_warn_unused virtual size_t
5616 convert_utf16be_to_latin1(const char16_t *input, size_t length,
5617 char *latin1_buffer) const noexcept = 0;
5618
5638 simdutf_warn_unused virtual result
5639 convert_utf16le_to_latin1_with_errors(const char16_t *input, size_t length,
5640 char *latin1_buffer) const noexcept = 0;
5641
5661 simdutf_warn_unused virtual result
5662 convert_utf16be_to_latin1_with_errors(const char16_t *input, size_t length,
5663 char *latin1_buffer) const noexcept = 0;
5664
5685 simdutf_warn_unused virtual size_t
5686 convert_valid_utf16le_to_latin1(const char16_t *input, size_t length,
5687 char *latin1_buffer) const noexcept = 0;
5688
5709 simdutf_warn_unused virtual size_t
5710 convert_valid_utf16be_to_latin1(const char16_t *input, size_t length,
5711 char *latin1_buffer) const noexcept = 0;
5712#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
5713
5714#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
5730 simdutf_warn_unused virtual size_t
5731 convert_utf16le_to_utf8(const char16_t *input, size_t length,
5732 char *utf8_buffer) const noexcept = 0;
5733
5749 simdutf_warn_unused virtual size_t
5750 convert_utf16be_to_utf8(const char16_t *input, size_t length,
5751 char *utf8_buffer) const noexcept = 0;
5752
5771 simdutf_warn_unused virtual result
5772 convert_utf16le_to_utf8_with_errors(const char16_t *input, size_t length,
5773 char *utf8_buffer) const noexcept = 0;
5774
5793 simdutf_warn_unused virtual result
5794 convert_utf16be_to_utf8_with_errors(const char16_t *input, size_t length,
5795 char *utf8_buffer) const noexcept = 0;
5796
5812 simdutf_warn_unused virtual size_t convert_utf16le_to_utf8_with_replacement(
5813 const char16_t *input, size_t length,
5814 char *utf8_buffer) const noexcept = 0;
5815
5831 simdutf_warn_unused virtual size_t convert_utf16be_to_utf8_with_replacement(
5832 const char16_t *input, size_t length,
5833 char *utf8_buffer) const noexcept = 0;
5834
5849 simdutf_warn_unused virtual size_t
5850 convert_valid_utf16le_to_utf8(const char16_t *input, size_t length,
5851 char *utf8_buffer) const noexcept = 0;
5852
5867 simdutf_warn_unused virtual size_t
5868 convert_valid_utf16be_to_utf8(const char16_t *input, size_t length,
5869 char *utf8_buffer) const noexcept = 0;
5870#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
5871
5872#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
5888 simdutf_warn_unused virtual size_t
5889 convert_utf16le_to_utf32(const char16_t *input, size_t length,
5890 char32_t *utf32_buffer) const noexcept = 0;
5891
5907 simdutf_warn_unused virtual size_t
5908 convert_utf16be_to_utf32(const char16_t *input, size_t length,
5909 char32_t *utf32_buffer) const noexcept = 0;
5910
5930 const char16_t *input, size_t length,
5931 char32_t *utf32_buffer) const noexcept = 0;
5932
5952 const char16_t *input, size_t length,
5953 char32_t *utf32_buffer) const noexcept = 0;
5954
5969 simdutf_warn_unused virtual size_t
5970 convert_valid_utf16le_to_utf32(const char16_t *input, size_t length,
5971 char32_t *utf32_buffer) const noexcept = 0;
5972
5987 simdutf_warn_unused virtual size_t
5988 convert_valid_utf16be_to_utf32(const char16_t *input, size_t length,
5989 char32_t *utf32_buffer) const noexcept = 0;
5990#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
5991
5992#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
6007 simdutf_warn_unused virtual size_t
6008 utf8_length_from_utf16le(const char16_t *input,
6009 size_t length) const noexcept = 0;
6010
6025 simdutf_warn_unused virtual size_t
6026 utf8_length_from_utf16be(const char16_t *input,
6027 size_t length) const noexcept = 0;
6028#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
6029
6030#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1
6047 simdutf_warn_unused virtual size_t
6048 convert_utf32_to_latin1(const char32_t *input, size_t length,
6049 char *latin1_buffer) const noexcept = 0;
6050#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1
6051
6052#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1
6072 simdutf_warn_unused virtual result
6073 convert_utf32_to_latin1_with_errors(const char32_t *input, size_t length,
6074 char *latin1_buffer) const noexcept = 0;
6075
6096 simdutf_warn_unused virtual size_t
6097 convert_valid_utf32_to_latin1(const char32_t *input, size_t length,
6098 char *latin1_buffer) const noexcept = 0;
6099#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1
6100
6101#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
6117 simdutf_warn_unused virtual size_t
6118 convert_utf32_to_utf8(const char32_t *input, size_t length,
6119 char *utf8_buffer) const noexcept = 0;
6120
6138 simdutf_warn_unused virtual result
6139 convert_utf32_to_utf8_with_errors(const char32_t *input, size_t length,
6140 char *utf8_buffer) const noexcept = 0;
6141
6156 simdutf_warn_unused virtual size_t
6157 convert_valid_utf32_to_utf8(const char32_t *input, size_t length,
6158 char *utf8_buffer) const noexcept = 0;
6159#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
6160
6161#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
6172 simdutf_warn_unused virtual size_t
6173 utf16_length_from_latin1(size_t length) const noexcept {
6174 return length;
6175 }
6176#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
6177
6178#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
6194 simdutf_warn_unused virtual size_t
6195 convert_utf32_to_utf16le(const char32_t *input, size_t length,
6196 char16_t *utf16_buffer) const noexcept = 0;
6197
6213 simdutf_warn_unused virtual size_t
6214 convert_utf32_to_utf16be(const char32_t *input, size_t length,
6215 char16_t *utf16_buffer) const noexcept = 0;
6216
6236 const char32_t *input, size_t length,
6237 char16_t *utf16_buffer) const noexcept = 0;
6238
6258 const char32_t *input, size_t length,
6259 char16_t *utf16_buffer) const noexcept = 0;
6260
6275 simdutf_warn_unused virtual size_t
6276 convert_valid_utf32_to_utf16le(const char32_t *input, size_t length,
6277 char16_t *utf16_buffer) const noexcept = 0;
6278
6293 simdutf_warn_unused virtual size_t
6294 convert_valid_utf32_to_utf16be(const char32_t *input, size_t length,
6295 char16_t *utf16_buffer) const noexcept = 0;
6296#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
6297
6298#if SIMDUTF_FEATURE_UTF16
6313 virtual void change_endianness_utf16(const char16_t *input, size_t length,
6314 char16_t *output) const noexcept = 0;
6315#endif // SIMDUTF_FEATURE_UTF16
6316
6317#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
6326 simdutf_warn_unused virtual size_t
6327 utf8_length_from_latin1(const char *input, size_t length) const noexcept = 0;
6328#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
6329
6330#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
6343 simdutf_warn_unused virtual size_t
6344 utf8_length_from_utf32(const char32_t *input,
6345 size_t length) const noexcept = 0;
6346#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
6347
6348#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1
6360 simdutf_warn_unused virtual size_t
6361 latin1_length_from_utf32(size_t length) const noexcept {
6362 return length;
6363 }
6364#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1
6365
6366#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
6378 simdutf_warn_unused virtual size_t
6379 latin1_length_from_utf8(const char *input, size_t length) const noexcept = 0;
6380#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
6381
6382#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
6398 simdutf_warn_unused virtual size_t
6399 latin1_length_from_utf16(size_t length) const noexcept {
6400 return length;
6401 }
6402#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
6403
6404#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
6417 simdutf_warn_unused virtual size_t
6418 utf16_length_from_utf32(const char32_t *input,
6419 size_t length) const noexcept = 0;
6420#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
6421
6422#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1
6431 simdutf_warn_unused virtual size_t
6432 utf32_length_from_latin1(size_t length) const noexcept {
6433 return length;
6434 }
6435#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1
6436
6437#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
6455 simdutf_warn_unused virtual size_t
6456 utf32_length_from_utf16le(const char16_t *input,
6457 size_t length) const noexcept = 0;
6458
6476 simdutf_warn_unused virtual size_t
6477 utf32_length_from_utf16be(const char16_t *input,
6478 size_t length) const noexcept = 0;
6479#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
6480
6481#if SIMDUTF_FEATURE_UTF16
6497 simdutf_warn_unused virtual size_t
6498 count_utf16le(const char16_t *input, size_t length) const noexcept = 0;
6499
6515 simdutf_warn_unused virtual size_t
6516 count_utf16be(const char16_t *input, size_t length) const noexcept = 0;
6517#endif // SIMDUTF_FEATURE_UTF16
6518
6519#if SIMDUTF_FEATURE_UTF8
6532 simdutf_warn_unused virtual size_t
6533 count_utf8(const char *input, size_t length) const noexcept = 0;
6534#endif // SIMDUTF_FEATURE_UTF8
6535
6536#if SIMDUTF_FEATURE_BASE64
6550 simdutf_warn_unused size_t maximal_binary_length_from_base64(
6551 const char *input, size_t length) const noexcept;
6552
6567 simdutf_warn_unused size_t maximal_binary_length_from_base64(
6568 const char16_t *input, size_t length) const noexcept;
6569
6582 simdutf_warn_unused virtual size_t
6583 binary_length_from_base64(const char *input, size_t length) const noexcept;
6584
6598 simdutf_warn_unused virtual size_t
6599 binary_length_from_base64(const char16_t *input,
6600 size_t length) const noexcept;
6601
6634 simdutf_warn_unused virtual result
6635 base64_to_binary(const char *input, size_t length, char *output,
6636 base64_options options = base64_default,
6637 last_chunk_handling_options last_chunk_options =
6638 last_chunk_handling_options::loose) const noexcept = 0;
6639
6671 simdutf_warn_unused virtual full_result base64_to_binary_details(
6672 const char *input, size_t length, char *output,
6673 base64_options options = base64_default,
6674 last_chunk_handling_options last_chunk_options =
6675 last_chunk_handling_options::loose) const noexcept = 0;
6676
6710 simdutf_warn_unused virtual result
6711 base64_to_binary(const char16_t *input, size_t length, char *output,
6712 base64_options options = base64_default,
6713 last_chunk_handling_options last_chunk_options =
6714 last_chunk_handling_options::loose) const noexcept = 0;
6715
6747 simdutf_warn_unused virtual full_result base64_to_binary_details(
6748 const char16_t *input, size_t length, char *output,
6749 base64_options options = base64_default,
6750 last_chunk_handling_options last_chunk_options =
6751 last_chunk_handling_options::loose) const noexcept = 0;
6752
6761 simdutf_warn_unused size_t base64_length_from_binary(
6762 size_t length, base64_options options = base64_default) const noexcept;
6763
6785 virtual size_t
6786 binary_to_base64(const char *input, size_t length, char *output,
6787 base64_options options = base64_default) const noexcept = 0;
6788
6815 const char *input, size_t length, char *output,
6816 size_t line_length = simdutf::default_line_length,
6817 base64_options options = base64_default) const noexcept = 0;
6818
6829 virtual const char *find(const char *start, const char *end,
6830 char character) const noexcept = 0;
6831 virtual const char16_t *find(const char16_t *start, const char16_t *end,
6832 char16_t character) const noexcept = 0;
6833#endif // SIMDUTF_FEATURE_BASE64
6834
6835#ifdef SIMDUTF_INTERNAL_TESTS
6836 // This method is exported only in developer mode, its purpose
6837 // is to expose some internal test procedures from the given
6838 // implementation and then use them through our standard test
6839 // framework.
6840 //
6841 // Regular users should not use it, the tests of the public
6842 // API are enough.
6843
6844 struct TestProcedure {
6845 // display name
6846 std::string name;
6847
6848 // procedure should return whether given test pass or not
6849 void (*procedure)(const implementation &);
6850 };
6851
6852 virtual std::vector<TestProcedure> internal_tests() const;
6853#endif
6854
6855protected:
6858 simdutf_really_inline implementation(const char *name,
6859 const char *description,
6860 uint32_t required_instruction_sets)
6861 : _name(name), _description(description),
6862 _required_instruction_sets(required_instruction_sets) {}
6863
6864protected:
6865 ~implementation() = default;
6866
6867private:
6871 const char *_name;
6872
6876 const char *_description;
6877
6881 const uint32_t _required_instruction_sets;
6882};
6883
6885namespace internal {
6886
6890class available_implementation_list {
6891public:
6893 simdutf_really_inline available_implementation_list() {}
6895 size_t size() const noexcept;
6897 const implementation *const *begin() const noexcept;
6899 const implementation *const *end() const noexcept;
6900
6914 const implementation *operator[](const std::string &name) const noexcept {
6915 for (const implementation *impl : *this) {
6916 if (impl->name() == name) {
6917 return impl;
6918 }
6919 }
6920 return nullptr;
6921 }
6922
6936 const implementation *detect_best_supported() const noexcept;
6937};
6938
6939template <typename T> class atomic_ptr {
6940public:
6941 atomic_ptr(T *_ptr) : ptr{_ptr} {}
6942
6943#if defined(SIMDUTF_NO_THREADS)
6944 operator const T *() const { return ptr; }
6945 const T &operator*() const { return *ptr; }
6946 const T *operator->() const { return ptr; }
6947
6948 operator T *() { return ptr; }
6949 T &operator*() { return *ptr; }
6950 T *operator->() { return ptr; }
6951 atomic_ptr &operator=(T *_ptr) {
6952 ptr = _ptr;
6953 return *this;
6954 }
6955
6956#else
6957 operator const T *() const { return ptr.load(); }
6958 const T &operator*() const { return *ptr; }
6959 const T *operator->() const { return ptr.load(); }
6960
6961 operator T *() { return ptr.load(); }
6962 T &operator*() { return *ptr; }
6963 T *operator->() { return ptr.load(); }
6964 atomic_ptr &operator=(T *_ptr) {
6965 ptr = _ptr;
6966 return *this;
6967 }
6968
6969#endif
6970
6971private:
6972#if defined(SIMDUTF_NO_THREADS)
6973 T *ptr;
6974#else
6975 std::atomic<T *> ptr;
6976#endif
6977};
6978
6979class detect_best_supported_implementation_on_first_use;
6980
6981} // namespace internal
6982
6986extern SIMDUTF_DLLIMPORTEXPORT const internal::available_implementation_list &
6987get_available_implementations();
6988
6995extern SIMDUTF_DLLIMPORTEXPORT internal::atomic_ptr<const implementation> &
6996get_active_implementation();
6997
6998} // namespace simdutf
6999
7000#if SIMDUTF_FEATURE_BASE64
7001 // this header is not part of the public api
7002 #include <simdutf/base64_implementation.h>
7003
7004namespace simdutf {
7005 #if SIMDUTF_SPAN
7010simdutf_really_inline
7011 simdutf_constexpr23 simdutf_warn_unused std::tuple<result, std::size_t>
7012 base64_to_binary_safe(
7013 const detail::input_span_of_byte_like auto &input,
7014 detail::output_span_of_byte_like auto &&binary_output,
7015 base64_options options = base64_default,
7016 last_chunk_handling_options last_chunk_options = loose,
7017 bool decode_up_to_bad_char = false) noexcept {
7018 size_t outlen = binary_output.size();
7019 #if SIMDUTF_CPLUSPLUS23
7020 if consteval {
7021 using CInput = std::decay_t<decltype(*input.data())>;
7022 static_assert(std::is_same_v<CInput, char>,
7023 "sorry, the constexpr implementation is for now limited to "
7024 "input of type char");
7025 using COutput = std::decay_t<decltype(*binary_output.data())>;
7026 static_assert(std::is_same_v<COutput, char>,
7027 "sorry, the constexpr implementation is for now limited to "
7028 "output of type char");
7029 auto r = base64_to_binary_safe_impl(
7030 input.data(), input.size(), binary_output.data(), outlen, options,
7031 last_chunk_options, decode_up_to_bad_char);
7032 return {r, outlen};
7033 } else
7034 #endif
7035 {
7036 auto r = base64_to_binary_safe_impl<char>(
7037 reinterpret_cast<const char *>(input.data()), input.size(),
7038 reinterpret_cast<char *>(binary_output.data()), outlen, options,
7039 last_chunk_options, decode_up_to_bad_char);
7040 return {r, outlen};
7041 }
7042}
7043
7044 #if SIMDUTF_SPAN
7049simdutf_really_inline
7050 simdutf_warn_unused simdutf_constexpr23 std::tuple<result, std::size_t>
7051 base64_to_binary_safe(
7052 std::span<const char16_t> input,
7053 detail::output_span_of_byte_like auto &&binary_output,
7054 base64_options options = base64_default,
7055 last_chunk_handling_options last_chunk_options = loose,
7056 bool decode_up_to_bad_char = false) noexcept {
7057 size_t outlen = binary_output.size();
7058 #if SIMDUTF_CPLUSPLUS23
7059 if consteval {
7060 auto r = base64_to_binary_safe_impl(
7061 input.data(), input.size(), binary_output.data(), outlen, options,
7062 last_chunk_options, decode_up_to_bad_char);
7063 return {r, outlen};
7064 } else
7065 #endif
7066 {
7067 auto r = base64_to_binary_safe(
7068 input.data(), input.size(),
7069 reinterpret_cast<char *>(binary_output.data()), outlen, options,
7070 last_chunk_options, decode_up_to_bad_char);
7071 return {r, outlen};
7072 }
7073}
7074 #endif // SIMDUTF_SPAN
7075
7076 #endif // SIMDUTF_SPAN
7077} // namespace simdutf
7078
7079#endif // SIMDUTF_FEATURE_BASE64
7080
7081#endif // SIMDUTF_IMPLEMENTATION_H
An implementation of simdutf for a particular CPU architecture.
virtual simdutf_warn_unused size_t convert_valid_utf32_to_latin1(const char32_t *input, size_t length, char *latin1_buffer) const noexcept=0
Convert valid UTF-32 string into Latin1 string.
virtual simdutf_warn_unused size_t binary_length_from_base64(const char *input, size_t length) const noexcept
Compute the binary length from a base64 input with ASCII spaces.
virtual const char * find(const char *start, const char *end, char character) const noexcept=0
Find the first occurrence of a character in a string.
virtual simdutf_warn_unused size_t convert_valid_utf8_to_utf16le(const char *input, size_t length, char16_t *utf16_buffer) const noexcept=0
Convert valid UTF-8 string into UTF-16LE string.
virtual simdutf_warn_unused size_t latin1_length_from_utf16(size_t length) const noexcept
Compute the number of bytes that this UTF-16LE/BE string would require in Latin1 format.
virtual simdutf_warn_unused size_t convert_utf16le_to_utf8_with_replacement(const char16_t *input, size_t length, char *utf8_buffer) const noexcept=0
Convert possibly broken UTF-16LE string into UTF-8 string, replacing unpaired surrogates with the Uni...
virtual simdutf_warn_unused size_t binary_length_from_base64(const char16_t *input, size_t length) const noexcept
Compute the binary length from a base64 input with ASCII spaces.
virtual simdutf_warn_unused size_t utf32_length_from_utf16be(const char16_t *input, size_t length) const noexcept=0
Compute the number of bytes that this UTF-16BE string would require in UTF-32 format.
virtual simdutf_warn_unused size_t convert_utf8_to_utf16be(const char *input, size_t length, char16_t *utf16_output) const noexcept=0
Convert possibly broken UTF-8 string into UTF-16BE string.
virtual simdutf_warn_unused size_t convert_utf16le_to_utf32(const char16_t *input, size_t length, char32_t *utf32_buffer) const noexcept=0
Convert possibly broken UTF-16LE string into UTF-32 string.
virtual simdutf_warn_unused size_t convert_valid_utf16be_to_utf32(const char16_t *input, size_t length, char32_t *utf32_buffer) const noexcept=0
Convert valid UTF-16LE string into UTF-32BE string.
virtual simdutf_warn_unused size_t convert_valid_utf16be_to_utf8(const char16_t *input, size_t length, char *utf8_buffer) const noexcept=0
Convert valid UTF-16BE string into UTF-8 string.
virtual simdutf_warn_unused size_t convert_latin1_to_utf16le(const char *input, size_t length, char16_t *utf16_output) const noexcept=0
Convert possibly Latin1 string into UTF-16LE string.
virtual simdutf_warn_unused bool validate_utf16be(const char16_t *buf, size_t len) const noexcept=0
Validate the UTF-16BE string.
virtual simdutf_warn_unused result convert_utf16be_to_latin1_with_errors(const char16_t *input, size_t length, char *latin1_buffer) const noexcept=0
Convert possibly broken UTF-16BE string into Latin1 string.
virtual simdutf_warn_unused full_result base64_to_binary_details(const char *input, size_t length, char *output, base64_options options=base64_default, last_chunk_handling_options last_chunk_options=last_chunk_handling_options::loose) const noexcept=0
Convert a base64 input to a binary output while returning more details than base64_to_binary.
virtual simdutf_warn_unused size_t convert_valid_utf16le_to_utf32(const char16_t *input, size_t length, char32_t *utf32_buffer) const noexcept=0
Convert valid UTF-16LE string into UTF-32 string.
virtual simdutf_warn_unused size_t latin1_length_from_utf8(const char *input, size_t length) const noexcept=0
Compute the number of bytes that this UTF-8 string would require in Latin1 format.
virtual simdutf_warn_unused result convert_utf32_to_latin1_with_errors(const char32_t *input, size_t length, char *latin1_buffer) const noexcept=0
Convert possibly broken UTF-32 string into Latin1 string and stop on error.
virtual simdutf_warn_unused size_t utf8_length_from_utf32(const char32_t *input, size_t length) const noexcept=0
Compute the number of bytes that this UTF-32 string would require in UTF-8 format.
virtual simdutf_warn_unused size_t convert_latin1_to_utf16be(const char *input, size_t length, char16_t *utf16_output) const noexcept=0
Convert Latin1 string into UTF-16BE string.
virtual simdutf_warn_unused size_t latin1_length_from_utf32(size_t length) const noexcept
Compute the number of bytes that this UTF-32 string would require in Latin1 format.
virtual simdutf_warn_unused full_result base64_to_binary_details(const char16_t *input, size_t length, char *output, base64_options options=base64_default, last_chunk_handling_options last_chunk_options=last_chunk_handling_options::loose) const noexcept=0
Convert a base64 input to a binary output while returning more details than base64_to_binary.
virtual simdutf_warn_unused size_t convert_utf32_to_utf16le(const char32_t *input, size_t length, char16_t *utf16_buffer) const noexcept=0
Convert possibly broken UTF-32 string into UTF-16LE string.
virtual simdutf_warn_unused size_t utf16_length_from_utf32(const char32_t *input, size_t length) const noexcept=0
Compute the number of two-byte code units that this UTF-32 string would require in UTF-16 format.
virtual simdutf_warn_unused size_t count_utf8(const char *input, size_t length) const noexcept=0
Count the number of code points (characters) in the string assuming that it is valid.
virtual simdutf_warn_unused size_t convert_valid_utf32_to_utf16be(const char32_t *input, size_t length, char16_t *utf16_buffer) const noexcept=0
Convert valid UTF-32 string into UTF-16BE string.
virtual simdutf_warn_unused size_t utf32_length_from_utf8(const char *input, size_t length) const noexcept=0
Compute the number of 4-byte code units that this UTF-8 string would require in UTF-32 format.
virtual simdutf_warn_unused size_t convert_valid_utf16le_to_utf8(const char16_t *input, size_t length, char *utf8_buffer) const noexcept=0
Convert valid UTF-16LE string into UTF-8 string.
virtual void to_well_formed_utf16le(const char16_t *input, size_t len, char16_t *output) const noexcept=0
Copies the UTF-16LE string while replacing mismatched surrogates with the Unicode replacement charact...
virtual simdutf_warn_unused result convert_utf32_to_utf16be_with_errors(const char32_t *input, size_t length, char16_t *utf16_buffer) const noexcept=0
Convert possibly broken UTF-32 string into UTF-16BE string and stop on error.
virtual simdutf_warn_unused result validate_utf8_with_errors(const char *buf, size_t len) const noexcept=0
Validate the UTF-8 string and stop on errors.
virtual simdutf_warn_unused size_t utf8_length_from_utf16le(const char16_t *input, size_t length) const noexcept=0
Compute the number of bytes that this UTF-16LE string would require in UTF-8 format.
virtual size_t binary_to_base64_with_lines(const char *input, size_t length, char *output, size_t line_length=simdutf::default_line_length, base64_options options=base64_default) const noexcept=0
Convert a binary input to a base64 output with lines of given length.
virtual simdutf_warn_unused size_t convert_utf16be_to_utf8(const char16_t *input, size_t length, char *utf8_buffer) const noexcept=0
Convert possibly broken UTF-16BE string into UTF-8 string.
virtual simdutf_warn_unused size_t convert_utf8_to_utf16le(const char *input, size_t length, char16_t *utf16_output) const noexcept=0
Convert possibly broken UTF-8 string into UTF-16LE string.
virtual simdutf_warn_unused result convert_utf16le_to_latin1_with_errors(const char16_t *input, size_t length, char *latin1_buffer) const noexcept=0
Convert possibly broken UTF-16LE string into Latin1 string.
virtual simdutf_warn_unused result utf8_length_from_utf16le_with_replacement(const char16_t *input, size_t length) const noexcept=0
Compute the number of bytes that this UTF-16LE string would require in UTF-8 format even when the UTF...
virtual simdutf_warn_unused result validate_utf16le_with_errors(const char16_t *buf, size_t len) const noexcept=0
Validate the UTF-16LE string and stop on error.
virtual simdutf_warn_unused result convert_utf16be_to_utf8_with_errors(const char16_t *input, size_t length, char *utf8_buffer) const noexcept=0
Convert possibly broken UTF-16BE string into UTF-8 string and stop on error.
virtual size_t binary_to_base64(const char *input, size_t length, char *output, base64_options options=base64_default) const noexcept=0
Convert a binary input to a base64 output.
virtual simdutf_warn_unused result utf8_length_from_utf16be_with_replacement(const char16_t *input, size_t length) const noexcept=0
Compute the number of bytes that this UTF-16BE string would require in UTF-8 format even when the UTF...
virtual simdutf_warn_unused size_t convert_latin1_to_utf32(const char *input, size_t length, char32_t *utf32_buffer) const noexcept=0
Convert Latin1 string into UTF-32 string.
virtual simdutf_warn_unused result convert_utf8_to_utf32_with_errors(const char *input, size_t length, char32_t *utf32_output) const noexcept=0
Convert possibly broken UTF-8 string into UTF-32 string and stop on error.
virtual simdutf_warn_unused size_t convert_valid_utf32_to_utf8(const char32_t *input, size_t length, char *utf8_buffer) const noexcept=0
Convert valid UTF-32 string into UTF-8 string.
virtual simdutf_warn_unused size_t utf32_length_from_latin1(size_t length) const noexcept
Return the number of bytes that this UTF-32 string would require in Latin1 format.
virtual simdutf_warn_unused bool validate_utf16le(const char16_t *buf, size_t len) const noexcept=0
Validate the UTF-16LE string.This function may be best when you expect the input to be almost always ...
virtual simdutf_warn_unused result convert_utf8_to_utf16le_with_errors(const char *input, size_t length, char16_t *utf16_output) const noexcept=0
Convert possibly broken UTF-8 string into UTF-16LE string and stop on error.
virtual simdutf_warn_unused size_t count_utf16le(const char16_t *input, size_t length) const noexcept=0
Count the number of code points (characters) in the string assuming that it is valid.
virtual simdutf_warn_unused size_t convert_utf16be_to_utf32(const char16_t *input, size_t length, char32_t *utf32_buffer) const noexcept=0
Convert possibly broken UTF-16BE string into UTF-32 string.
virtual simdutf_warn_unused size_t convert_valid_utf16le_to_latin1(const char16_t *input, size_t length, char *latin1_buffer) const noexcept=0
Convert valid UTF-16LE string into Latin1 string.
virtual simdutf_warn_unused size_t convert_utf32_to_latin1(const char32_t *input, size_t length, char *latin1_buffer) const noexcept=0
Convert possibly broken UTF-32 string into Latin1 string.
virtual simdutf_warn_unused size_t convert_valid_utf16be_to_latin1(const char16_t *input, size_t length, char *latin1_buffer) const noexcept=0
Convert valid UTF-16BE string into Latin1 string.
bool supported_by_runtime_system() const
The instruction sets this implementation is compiled against and the current CPU match.
virtual simdutf_warn_unused size_t convert_valid_utf8_to_utf32(const char *input, size_t length, char32_t *utf32_buffer) const noexcept=0
Convert valid UTF-8 string into UTF-32 string.
virtual simdutf_warn_unused size_t convert_utf16le_to_latin1(const char16_t *input, size_t length, char *latin1_buffer) const noexcept=0
Convert possibly broken UTF-16LE string into Latin1 string.
virtual simdutf_warn_unused result validate_utf16be_with_errors(const char16_t *buf, size_t len) const noexcept=0
Validate the UTF-16BE string and stop on error.
virtual simdutf_warn_unused result convert_utf16le_to_utf8_with_errors(const char16_t *input, size_t length, char *utf8_buffer) const noexcept=0
Convert possibly broken UTF-16LE string into UTF-8 string and stop on error.
virtual simdutf_warn_unused result convert_utf16be_to_utf32_with_errors(const char16_t *input, size_t length, char32_t *utf32_buffer) const noexcept=0
Convert possibly broken UTF-16BE string into UTF-32 string and stop on error.
virtual simdutf_warn_unused result base64_to_binary(const char *input, size_t length, char *output, base64_options options=base64_default, last_chunk_handling_options last_chunk_options=last_chunk_handling_options::loose) const noexcept=0
Convert a base64 input to a binary output.
virtual simdutf_warn_unused bool validate_utf16be_as_ascii(const char16_t *buf, size_t len) const noexcept=0
Validate the ASCII string as a UTF-16BE sequence.
virtual simdutf_warn_unused size_t convert_utf32_to_utf16be(const char32_t *input, size_t length, char16_t *utf16_buffer) const noexcept=0
Convert possibly broken UTF-32 string into UTF-16BE string.
virtual simdutf_warn_unused result validate_utf32_with_errors(const char32_t *buf, size_t len) const noexcept=0
Validate the UTF-32 string and stop on error.
virtual simdutf_warn_unused size_t utf32_length_from_utf16le(const char16_t *input, size_t length) const noexcept=0
Compute the number of bytes that this UTF-16LE string would require in UTF-32 format.
virtual void change_endianness_utf16(const char16_t *input, size_t length, char16_t *output) const noexcept=0
Change the endianness of the input.
virtual simdutf_warn_unused size_t convert_valid_utf8_to_utf16be(const char *input, size_t length, char16_t *utf16_buffer) const noexcept=0
Convert valid UTF-8 string into UTF-16BE string.
virtual simdutf_warn_unused size_t convert_valid_utf8_to_latin1(const char *input, size_t length, char *latin1_output) const noexcept=0
Convert valid UTF-8 string into latin1 string.
simdutf_warn_unused size_t maximal_binary_length_from_base64(const char *input, size_t length) const noexcept
Provide the maximal binary length in bytes given the base64 input.
virtual simdutf_warn_unused size_t convert_valid_utf32_to_utf16le(const char32_t *input, size_t length, char16_t *utf16_buffer) const noexcept=0
Convert valid UTF-32 string into UTF-16LE string.
virtual std::string name() const
The name of this implementation.
virtual simdutf_warn_unused result validate_ascii_with_errors(const char *buf, size_t len) const noexcept=0
Validate the ASCII string and stop on error.
virtual simdutf_warn_unused bool validate_utf8(const char *buf, size_t len) const noexcept=0
Validate the UTF-8 string.
virtual simdutf_warn_unused size_t convert_utf8_to_utf32(const char *input, size_t length, char32_t *utf32_output) const noexcept=0
Convert possibly broken UTF-8 string into UTF-32 string.
simdutf_warn_unused size_t maximal_binary_length_from_base64(const char16_t *input, size_t length) const noexcept
Provide the maximal binary length in bytes given the base64 input.
virtual simdutf_warn_unused bool validate_utf32(const char32_t *buf, size_t len) const noexcept=0
Validate the UTF-32 string.
virtual simdutf_warn_unused result convert_utf16le_to_utf32_with_errors(const char16_t *input, size_t length, char32_t *utf32_buffer) const noexcept=0
Convert possibly broken UTF-16LE string into UTF-32 string and stop on error.
virtual simdutf_warn_unused size_t convert_utf16be_to_utf8_with_replacement(const char16_t *input, size_t length, char *utf8_buffer) const noexcept=0
Convert possibly broken UTF-16BE string into UTF-8 string, replacing unpaired surrogates with the Uni...
virtual simdutf_warn_unused size_t convert_utf16le_to_utf8(const char16_t *input, size_t length, char *utf8_buffer) const noexcept=0
Convert possibly broken UTF-16LE string into UTF-8 string.
virtual simdutf_warn_unused size_t utf8_length_from_utf16be(const char16_t *input, size_t length) const noexcept=0
Compute the number of bytes that this UTF-16BE string would require in UTF-8 format.
virtual simdutf_warn_unused size_t convert_utf32_to_utf8(const char32_t *input, size_t length, char *utf8_buffer) const noexcept=0
Convert possibly broken UTF-32 string into UTF-8 string.
virtual simdutf_warn_unused size_t convert_utf16be_to_latin1(const char16_t *input, size_t length, char *latin1_buffer) const noexcept=0
Convert possibly broken UTF-16BE string into Latin1 string.
virtual simdutf_warn_unused size_t convert_utf8_to_latin1(const char *input, size_t length, char *latin1_output) const noexcept=0
Convert possibly broken UTF-8 string into latin1 string.
virtual simdutf_warn_unused result base64_to_binary(const char16_t *input, size_t length, char *output, base64_options options=base64_default, last_chunk_handling_options last_chunk_options=last_chunk_handling_options::loose) const noexcept=0
Convert a base64 input to a binary output.
simdutf_warn_unused size_t base64_length_from_binary(size_t length, base64_options options=base64_default) const noexcept
Provide the base64 length in bytes given the length of a binary input.
virtual int detect_encodings(const char *input, size_t length) const noexcept=0
This function will try to detect the possible encodings in one pass.
virtual simdutf_warn_unused size_t count_utf16be(const char16_t *input, size_t length) const noexcept=0
Count the number of code points (characters) in the string assuming that it is valid.
virtual simdutf_warn_unused result convert_utf32_to_utf8_with_errors(const char32_t *input, size_t length, char *utf8_buffer) const noexcept=0
Convert possibly broken UTF-32 string into UTF-8 string and stop on error.
virtual simdutf_warn_unused bool validate_utf16le_as_ascii(const char16_t *buf, size_t len) const noexcept=0
Validate the ASCII string as a UTF-16LE sequence.
virtual void to_well_formed_utf16be(const char16_t *input, size_t len, char16_t *output) const noexcept=0
Copies the UTF-16BE string while replacing mismatched surrogates with the Unicode replacement charact...
virtual simdutf_warn_unused bool validate_ascii(const char *buf, size_t len) const noexcept=0
Validate the ASCII string.
virtual simdutf_warn_unused size_t utf16_length_from_utf8(const char *input, size_t length) const noexcept=0
Compute the number of 2-byte code units that this UTF-8 string would require in UTF-16LE format.
virtual simdutf_warn_unused result convert_utf8_to_utf16be_with_errors(const char *input, size_t length, char16_t *utf16_output) const noexcept=0
Convert possibly broken UTF-8 string into UTF-16BE string and stop on error.
virtual simdutf_warn_unused result convert_utf8_to_latin1_with_errors(const char *input, size_t length, char *latin1_output) const noexcept=0
Convert possibly broken UTF-8 string into latin1 string with errors.
virtual std::string description() const
The description of this implementation.
virtual simdutf_warn_unused size_t utf16_length_from_latin1(size_t length) const noexcept
Return the number of bytes that this UTF-16 string would require in Latin1 format.
virtual simdutf_warn_unused result convert_utf32_to_utf16le_with_errors(const char32_t *input, size_t length, char16_t *utf16_buffer) const noexcept=0
Convert possibly broken UTF-32 string into UTF-16LE string and stop on error.
virtual simdutf_warn_unused size_t convert_latin1_to_utf8(const char *input, size_t length, char *utf8_output) const noexcept=0
Convert Latin1 string into UTF-8 string.
virtual encoding_type autodetect_encoding(const char *input, size_t length) const noexcept
This function will try to detect the encoding.
virtual simdutf_warn_unused size_t utf8_length_from_latin1(const char *input, size_t length) const noexcept=0
Return the number of bytes that this Latin1 string would require in UTF-8 format.