simdutf 8.0.0
Unicode at GB/s.
Loading...
Searching...
No Matches
implementation.h
1#ifndef SIMDUTF_IMPLEMENTATION_H
2#define SIMDUTF_IMPLEMENTATION_H
3#if !defined(SIMDUTF_NO_THREADS)
4 #include <atomic>
5#endif
6#include <string>
7#ifdef SIMDUTF_INTERNAL_TESTS
8 #include <vector>
9#endif
10#include "simdutf/common_defs.h"
11#include "simdutf/compiler_check.h"
12#include "simdutf/encoding_types.h"
13#include "simdutf/error.h"
14#include "simdutf/internal/isadetection.h"
15
16#if SIMDUTF_SPAN
17 #include <concepts>
18 #include <type_traits>
19 #include <span>
20 #include <tuple>
21#endif
22#if SIMDUTF_CPLUSPLUS17
23 #include <string_view>
24#endif
25// The following defines are conditionally enabled/disabled during amalgamation.
26// By default all features are enabled, regular code shouldn't check them. Only
27// when user code really relies of a selected subset, it's good to verify these
28// flags, like:
29//
30// #if !SIMDUTF_FEATURE_UTF16
31// # error("Please amalgamate simdutf with UTF-16 support")
32// #endif
33//
34#define SIMDUTF_FEATURE_DETECT_ENCODING 1
35#define SIMDUTF_FEATURE_ASCII 1
36#define SIMDUTF_FEATURE_LATIN1 1
37#define SIMDUTF_FEATURE_UTF8 1
38#define SIMDUTF_FEATURE_UTF16 1
39#define SIMDUTF_FEATURE_UTF32 1
40#define SIMDUTF_FEATURE_BASE64 1
41
42#if SIMDUTF_CPLUSPLUS23
43 #include <simdutf/constexpr_ptr.h>
44#endif
45
46#if SIMDUTF_SPAN
48namespace simdutf {
49namespace detail {
54template <typename T>
55concept byte_like = std::is_same_v<T, std::byte> || //
56 std::is_same_v<T, char> || //
57 std::is_same_v<T, signed char> || //
58 std::is_same_v<T, unsigned char> || //
59 std::is_same_v<T, char8_t>;
60
61template <typename T>
62concept is_byte_like = byte_like<std::remove_cvref_t<T>>;
63
64template <typename T>
65concept is_pointer = std::is_pointer_v<T>;
66
72template <typename T>
73concept input_span_of_byte_like = requires(const T &t) {
74 { t.size() } noexcept -> std::convertible_to<std::size_t>;
75 { t.data() } noexcept -> is_pointer;
76 { *t.data() } noexcept -> is_byte_like;
77};
78
79template <typename T>
80concept is_mutable = !std::is_const_v<std::remove_reference_t<T>>;
81
85template <typename T>
86concept output_span_of_byte_like = requires(T &t) {
87 { t.size() } noexcept -> std::convertible_to<std::size_t>;
88 { t.data() } noexcept -> is_pointer;
89 { *t.data() } noexcept -> is_byte_like;
90 { *t.data() } noexcept -> is_mutable;
91};
92
98template <class InputPtr>
99concept indexes_into_byte_like = requires(InputPtr p) {
100 { std::decay_t<decltype(p[0])>{} } -> simdutf::detail::byte_like;
101};
102template <class InputPtr>
103concept indexes_into_utf16 = requires(InputPtr p) {
104 { std::decay_t<decltype(p[0])>{} } -> std::same_as<char16_t>;
105};
106template <class InputPtr>
107concept indexes_into_utf32 = requires(InputPtr p) {
108 { std::decay_t<decltype(p[0])>{} } -> std::same_as<char32_t>;
109};
110
111template <class InputPtr>
112concept index_assignable_from_char = requires(InputPtr p, char s) {
113 { p[0] = s };
114};
115
120template <class InputPtr>
121concept indexes_into_uint32 = requires(InputPtr p) {
122 { std::decay_t<decltype(p[0])>{} } -> std::same_as<std::uint32_t>;
123};
124} // namespace detail
125} // namespace simdutf
126#endif // SIMDUTF_SPAN
127
128// these includes are needed for constexpr support. they are
129// not part of the public api.
130#include <simdutf/scalar/swap_bytes.h>
131#include <simdutf/scalar/ascii.h>
132#include <simdutf/scalar/atomic_util.h>
133#include <simdutf/scalar/latin1.h>
134#include <simdutf/scalar/latin1_to_utf16/latin1_to_utf16.h>
135#include <simdutf/scalar/latin1_to_utf32/latin1_to_utf32.h>
136#include <simdutf/scalar/latin1_to_utf8/latin1_to_utf8.h>
137#include <simdutf/scalar/utf16.h>
138#include <simdutf/scalar/utf16_to_latin1/utf16_to_latin1.h>
139#include <simdutf/scalar/utf16_to_latin1/valid_utf16_to_latin1.h>
140#include <simdutf/scalar/utf16_to_utf32/utf16_to_utf32.h>
141#include <simdutf/scalar/utf16_to_utf32/valid_utf16_to_utf32.h>
142#include <simdutf/scalar/utf16_to_utf8/utf16_to_utf8.h>
143#include <simdutf/scalar/utf16_to_utf8/valid_utf16_to_utf8.h>
144#include <simdutf/scalar/utf32.h>
145#include <simdutf/scalar/utf32_to_latin1/utf32_to_latin1.h>
146#include <simdutf/scalar/utf32_to_latin1/valid_utf32_to_latin1.h>
147#include <simdutf/scalar/utf32_to_utf16/utf32_to_utf16.h>
148#include <simdutf/scalar/utf32_to_utf16/valid_utf32_to_utf16.h>
149#include <simdutf/scalar/utf32_to_utf8/utf32_to_utf8.h>
150#include <simdutf/scalar/utf32_to_utf8/valid_utf32_to_utf8.h>
151#include <simdutf/scalar/utf8.h>
152#include <simdutf/scalar/utf8_to_latin1/utf8_to_latin1.h>
153#include <simdutf/scalar/utf8_to_latin1/valid_utf8_to_latin1.h>
154#include <simdutf/scalar/utf8_to_utf16/utf8_to_utf16.h>
155#include <simdutf/scalar/utf8_to_utf16/valid_utf8_to_utf16.h>
156#include <simdutf/scalar/utf8_to_utf32/utf8_to_utf32.h>
157#include <simdutf/scalar/utf8_to_utf32/valid_utf8_to_utf32.h>
158
159namespace simdutf {
160
161constexpr size_t default_line_length =
162 76;
163
164#if SIMDUTF_FEATURE_DETECT_ENCODING
175simdutf_warn_unused simdutf::encoding_type
176autodetect_encoding(const char *input, size_t length) noexcept;
177simdutf_really_inline simdutf_warn_unused simdutf::encoding_type
178autodetect_encoding(const uint8_t *input, size_t length) noexcept {
179 return autodetect_encoding(reinterpret_cast<const char *>(input), length);
180}
181 #if SIMDUTF_SPAN
193simdutf_really_inline simdutf_warn_unused simdutf::encoding_type
194autodetect_encoding(
195 const detail::input_span_of_byte_like auto &input) noexcept {
196 return autodetect_encoding(reinterpret_cast<const char *>(input.data()),
197 input.size());
198}
199 #endif // SIMDUTF_SPAN
200
212simdutf_warn_unused int detect_encodings(const char *input,
213 size_t length) noexcept;
214simdutf_really_inline simdutf_warn_unused int
215detect_encodings(const uint8_t *input, size_t length) noexcept {
216 return detect_encodings(reinterpret_cast<const char *>(input), length);
217}
218 #if SIMDUTF_SPAN
219simdutf_really_inline simdutf_warn_unused int
220detect_encodings(const detail::input_span_of_byte_like auto &input) noexcept {
221 return detect_encodings(reinterpret_cast<const char *>(input.data()),
222 input.size());
223}
224 #endif // SIMDUTF_SPAN
225#endif // SIMDUTF_FEATURE_DETECT_ENCODING
226
227#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING
239simdutf_warn_unused bool validate_utf8(const char *buf, size_t len) noexcept;
240 #if SIMDUTF_SPAN
241simdutf_constexpr23 simdutf_really_inline simdutf_warn_unused bool
242validate_utf8(const detail::input_span_of_byte_like auto &input) noexcept {
243 #if SIMDUTF_CPLUSPLUS23
244 if consteval {
245 return scalar::utf8::validate(
246 detail::constexpr_cast_ptr<uint8_t>(input.data()), input.size());
247 } else
248 #endif
249 {
250 return validate_utf8(reinterpret_cast<const char *>(input.data()),
251 input.size());
252 }
253}
254 #endif // SIMDUTF_SPAN
255#endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING
256
257#if SIMDUTF_FEATURE_UTF8
270simdutf_warn_unused result validate_utf8_with_errors(const char *buf,
271 size_t len) noexcept;
272 #if SIMDUTF_SPAN
273simdutf_really_inline simdutf_constexpr23 simdutf_warn_unused result
274validate_utf8_with_errors(
275 const detail::input_span_of_byte_like auto &input) noexcept {
276 #if SIMDUTF_CPLUSPLUS23
277 if consteval {
278 return scalar::utf8::validate_with_errors(
279 detail::constexpr_cast_ptr<uint8_t>(input.data()), input.size());
280 } else
281 #endif
282 {
283 return validate_utf8_with_errors(
284 reinterpret_cast<const char *>(input.data()), input.size());
285 }
286}
287 #endif // SIMDUTF_SPAN
288#endif // SIMDUTF_FEATURE_UTF8
289
290#if SIMDUTF_FEATURE_ASCII
300simdutf_warn_unused bool validate_ascii(const char *buf, size_t len) noexcept;
301 #if SIMDUTF_SPAN
302simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 bool
303validate_ascii(const detail::input_span_of_byte_like auto &input) noexcept {
304 #if SIMDUTF_CPLUSPLUS23
305 if consteval {
306 return scalar::ascii::validate(
307 detail::constexpr_cast_ptr<std::uint8_t>(input.data()), input.size());
308 } else
309 #endif
310 {
311 return validate_ascii(reinterpret_cast<const char *>(input.data()),
312 input.size());
313 }
314}
315 #endif // SIMDUTF_SPAN
316
330simdutf_warn_unused result validate_ascii_with_errors(const char *buf,
331 size_t len) noexcept;
332 #if SIMDUTF_SPAN
333simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
334validate_ascii_with_errors(
335 const detail::input_span_of_byte_like auto &input) noexcept {
336 #if SIMDUTF_CPLUSPLUS23
337 if consteval {
338 return scalar::ascii::validate_with_errors(
339 detail::constexpr_cast_ptr<std::uint8_t>(input.data()), input.size());
340 } else
341 #endif
342 {
343 return validate_ascii_with_errors(
344 reinterpret_cast<const char *>(input.data()), input.size());
345 }
346}
347 #endif // SIMDUTF_SPAN
348#endif // SIMDUTF_FEATURE_ASCII
349
350#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_ASCII
362simdutf_warn_unused bool validate_utf16_as_ascii(const char16_t *buf,
363 size_t len) noexcept;
364 #if SIMDUTF_SPAN
365simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 bool
366validate_utf16_as_ascii(std::span<const char16_t> input) noexcept {
367 #if SIMDUTF_CPLUSPLUS23
368 if consteval {
369 return scalar::utf16::validate_as_ascii<endianness::NATIVE>(input.data(),
370 input.size());
371 } else
372 #endif
373 {
374 return validate_utf16_as_ascii(input.data(), input.size());
375 }
376}
377 #endif // SIMDUTF_SPAN
378
390simdutf_warn_unused bool validate_utf16be_as_ascii(const char16_t *buf,
391 size_t len) noexcept;
392 #if SIMDUTF_SPAN
393simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 bool
394validate_utf16be_as_ascii(std::span<const char16_t> input) noexcept {
395 #if SIMDUTF_CPLUSPLUS23
396 if consteval {
397 return scalar::utf16::validate_as_ascii<endianness::BIG>(input.data(),
398 input.size());
399 } else
400 #endif
401 {
402 return validate_utf16be_as_ascii(input.data(), input.size());
403 }
404}
405 #endif // SIMDUTF_SPAN
406
418simdutf_warn_unused bool validate_utf16le_as_ascii(const char16_t *buf,
419 size_t len) noexcept;
420 #if SIMDUTF_SPAN
421simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 bool
422validate_utf16le_as_ascii(std::span<const char16_t> input) noexcept {
423 #if SIMDUTF_CPLUSPLUS23
424 if consteval {
425 return scalar::utf16::validate_as_ascii<endianness::LITTLE>(input.data(),
426 input.size());
427 } else
428 #endif
429 {
430 return validate_utf16le_as_ascii(input.data(), input.size());
431 }
432}
433 #endif // SIMDUTF_SPAN
434#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_ASCII
435
436#if SIMDUTF_FEATURE_UTF16
451simdutf_warn_unused bool validate_utf16(const char16_t *buf,
452 size_t len) noexcept;
453 #if SIMDUTF_SPAN
454simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 bool
455validate_utf16(std::span<const char16_t> input) noexcept {
456 #if SIMDUTF_CPLUSPLUS23
457 if consteval {
458 return scalar::utf16::validate<endianness::NATIVE>(input.data(),
459 input.size());
460 } else
461 #endif
462 {
463 return validate_utf16(input.data(), input.size());
464 }
465}
466 #endif // SIMDUTF_SPAN
467#endif // SIMDUTF_FEATURE_UTF16
468
469#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING
484simdutf_warn_unused bool validate_utf16le(const char16_t *buf,
485 size_t len) noexcept;
486 #if SIMDUTF_SPAN
487simdutf_really_inline simdutf_constexpr23 simdutf_warn_unused bool
488validate_utf16le(std::span<const char16_t> input) noexcept {
489 #if SIMDUTF_CPLUSPLUS23
490 if consteval {
491 return scalar::utf16::validate<endianness::LITTLE>(input.data(),
492 input.size());
493 } else
494 #endif
495 {
496 return validate_utf16le(input.data(), input.size());
497 }
498}
499 #endif // SIMDUTF_SPAN
500#endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING
501
502#if SIMDUTF_FEATURE_UTF16
517simdutf_warn_unused bool validate_utf16be(const char16_t *buf,
518 size_t len) noexcept;
519 #if SIMDUTF_SPAN
520simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 bool
521validate_utf16be(std::span<const char16_t> input) noexcept {
522 #if SIMDUTF_CPLUSPLUS23
523 if consteval {
524 return scalar::utf16::validate<endianness::BIG>(input.data(), input.size());
525 } else
526 #endif
527 {
528 return validate_utf16be(input.data(), input.size());
529 }
530}
531 #endif // SIMDUTF_SPAN
532
550simdutf_warn_unused result validate_utf16_with_errors(const char16_t *buf,
551 size_t len) noexcept;
552 #if SIMDUTF_SPAN
553simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
554validate_utf16_with_errors(std::span<const char16_t> input) noexcept {
555 #if SIMDUTF_CPLUSPLUS23
556 if consteval {
557 return scalar::utf16::validate_with_errors<endianness::NATIVE>(
558 input.data(), input.size());
559 } else
560 #endif
561 {
562 return validate_utf16_with_errors(input.data(), input.size());
563 }
564}
565 #endif // SIMDUTF_SPAN
566
583simdutf_warn_unused result validate_utf16le_with_errors(const char16_t *buf,
584 size_t len) noexcept;
585 #if SIMDUTF_SPAN
586simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
587validate_utf16le_with_errors(std::span<const char16_t> input) noexcept {
588 #if SIMDUTF_CPLUSPLUS23
589 if consteval {
590 return scalar::utf16::validate_with_errors<endianness::LITTLE>(
591 input.data(), input.size());
592 } else
593 #endif
594 {
595 return validate_utf16le_with_errors(input.data(), input.size());
596 }
597}
598 #endif // SIMDUTF_SPAN
599
616simdutf_warn_unused result validate_utf16be_with_errors(const char16_t *buf,
617 size_t len) noexcept;
618 #if SIMDUTF_SPAN
619simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
620validate_utf16be_with_errors(std::span<const char16_t> input) noexcept {
621 #if SIMDUTF_CPLUSPLUS23
622 if consteval {
623 return scalar::utf16::validate_with_errors<endianness::BIG>(input.data(),
624 input.size());
625 } else
626 #endif
627 {
628 return validate_utf16be_with_errors(input.data(), input.size());
629 }
630}
631 #endif // SIMDUTF_SPAN
632
645void to_well_formed_utf16le(const char16_t *input, size_t len,
646 char16_t *output) noexcept;
647 #if SIMDUTF_SPAN
648simdutf_really_inline simdutf_constexpr23 void
649to_well_formed_utf16le(std::span<const char16_t> input,
650 std::span<char16_t> output) noexcept {
651 #if SIMDUTF_CPLUSPLUS23
652 if consteval {
653 scalar::utf16::to_well_formed_utf16<endianness::LITTLE>(
654 input.data(), input.size(), output.data());
655 } else
656 #endif
657 {
658 to_well_formed_utf16le(input.data(), input.size(), output.data());
659 }
660}
661 #endif // SIMDUTF_SPAN
662
675void to_well_formed_utf16be(const char16_t *input, size_t len,
676 char16_t *output) noexcept;
677 #if SIMDUTF_SPAN
678simdutf_really_inline simdutf_constexpr23 void
679to_well_formed_utf16be(std::span<const char16_t> input,
680 std::span<char16_t> output) noexcept {
681 #if SIMDUTF_CPLUSPLUS23
682 if consteval {
683 scalar::utf16::to_well_formed_utf16<endianness::BIG>(
684 input.data(), input.size(), output.data());
685 } else
686 #endif
687 {
688 to_well_formed_utf16be(input.data(), input.size(), output.data());
689 }
690}
691 #endif // SIMDUTF_SPAN
692
705void to_well_formed_utf16(const char16_t *input, size_t len,
706 char16_t *output) noexcept;
707 #if SIMDUTF_SPAN
708simdutf_really_inline simdutf_constexpr23 void
709to_well_formed_utf16(std::span<const char16_t> input,
710 std::span<char16_t> output) noexcept {
711 #if SIMDUTF_CPLUSPLUS23
712 if consteval {
713 scalar::utf16::to_well_formed_utf16<endianness::NATIVE>(
714 input.data(), input.size(), output.data());
715 } else
716 #endif
717 {
718 to_well_formed_utf16(input.data(), input.size(), output.data());
719 }
720}
721 #endif // SIMDUTF_SPAN
722
723#endif // SIMDUTF_FEATURE_UTF16
724
725#if SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING
740simdutf_warn_unused bool validate_utf32(const char32_t *buf,
741 size_t len) noexcept;
742 #if SIMDUTF_SPAN
743simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 bool
744validate_utf32(std::span<const char32_t> input) noexcept {
745 #if SIMDUTF_CPLUSPLUS23
746 if consteval {
747 return scalar::utf32::validate(
748 detail::constexpr_cast_ptr<std::uint32_t>(input.data()), input.size());
749 } else
750 #endif
751 {
752 return validate_utf32(input.data(), input.size());
753 }
754}
755 #endif // SIMDUTF_SPAN
756#endif // SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING
757
758#if SIMDUTF_FEATURE_UTF32
775simdutf_warn_unused result validate_utf32_with_errors(const char32_t *buf,
776 size_t len) noexcept;
777 #if SIMDUTF_SPAN
778simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
779validate_utf32_with_errors(std::span<const char32_t> input) noexcept {
780 #if SIMDUTF_CPLUSPLUS23
781 if consteval {
782 return scalar::utf32::validate_with_errors(
783 detail::constexpr_cast_ptr<std::uint32_t>(input.data()), input.size());
784 } else
785 #endif
786 {
787 return validate_utf32_with_errors(input.data(), input.size());
788 }
789}
790 #endif // SIMDUTF_SPAN
791#endif // SIMDUTF_FEATURE_UTF32
792
793#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
804simdutf_warn_unused size_t convert_latin1_to_utf8(const char *input,
805 size_t length,
806 char *utf8_output) noexcept;
807 #if SIMDUTF_SPAN
808simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
809convert_latin1_to_utf8(
810 const detail::input_span_of_byte_like auto &latin1_input,
811 detail::output_span_of_byte_like auto &&utf8_output) noexcept {
812 #if SIMDUTF_CPLUSPLUS23
813 if consteval {
814 return scalar::latin1_to_utf8::convert(
815 detail::constexpr_cast_ptr<char>(latin1_input.data()),
816 latin1_input.size(),
817 detail::constexpr_cast_writeptr<char>(utf8_output.data()));
818 } else
819 #endif
820 {
821 return convert_latin1_to_utf8(
822 reinterpret_cast<const char *>(latin1_input.data()),
823 latin1_input.size(), reinterpret_cast<char *>(utf8_output.data()));
824 }
825}
826 #endif // SIMDUTF_SPAN
827
841simdutf_warn_unused size_t
842convert_latin1_to_utf8_safe(const char *input, size_t length, char *utf8_output,
843 size_t utf8_len) noexcept;
844 #if SIMDUTF_SPAN
845simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
846convert_latin1_to_utf8_safe(
847 const detail::input_span_of_byte_like auto &input,
848 detail::output_span_of_byte_like auto &&utf8_output) noexcept {
849 // implementation note: outputspan is a forwarding ref to avoid copying
850 // and allow both lvalues and rvalues. std::span can be copied without
851 // problems, but std::vector should not, and this function should accept
852 // both. it will allow using an owning rvalue ref (example: passing a
853 // temporary std::string) as output, but the user will quickly find out
854 // that he has no way of getting the data out of the object in that case.
855 #if SIMDUTF_CPLUSPLUS23
856 if consteval {
857 return scalar::latin1_to_utf8::convert_safe_constexpr(
858 input.data(), input.size(), utf8_output.data(), utf8_output.size());
859 } else
860 #endif
861 {
862 return convert_latin1_to_utf8_safe(
863 reinterpret_cast<const char *>(input.data()), input.size(),
864 reinterpret_cast<char *>(utf8_output.data()), utf8_output.size());
865 }
866}
867 #endif // SIMDUTF_SPAN
868#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
869
870#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
881simdutf_warn_unused size_t convert_latin1_to_utf16le(
882 const char *input, size_t length, char16_t *utf16_output) noexcept;
883 #if SIMDUTF_SPAN
884simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
885convert_latin1_to_utf16le(
886 const detail::input_span_of_byte_like auto &latin1_input,
887 std::span<char16_t> utf16_output) noexcept {
888 #if SIMDUTF_CPLUSPLUS23
889 if consteval {
890 return scalar::latin1_to_utf16::convert<endianness::LITTLE>(
891 latin1_input.data(), latin1_input.size(), utf16_output.data());
892 } else
893 #endif
894 {
895 return convert_latin1_to_utf16le(
896 reinterpret_cast<const char *>(latin1_input.data()),
897 latin1_input.size(), utf16_output.data());
898 }
899}
900 #endif // SIMDUTF_SPAN
901
912simdutf_warn_unused size_t convert_latin1_to_utf16be(
913 const char *input, size_t length, char16_t *utf16_output) noexcept;
914 #if SIMDUTF_SPAN
915simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
916convert_latin1_to_utf16be(const detail::input_span_of_byte_like auto &input,
917 std::span<char16_t> output) noexcept {
918 #if SIMDUTF_CPLUSPLUS23
919 if consteval {
920 return scalar::latin1_to_utf16::convert<endianness::BIG>(
921 input.data(), input.size(), output.data());
922 } else
923 #endif
924 {
925 return convert_latin1_to_utf16be(
926 reinterpret_cast<const char *>(input.data()), input.size(),
927 output.data());
928 }
929}
930 #endif // SIMDUTF_SPAN
939simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
940latin1_length_from_utf16(size_t length) noexcept {
941 return length;
942}
943
952simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
953utf16_length_from_latin1(size_t length) noexcept {
954 return length;
955}
956#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
957
958#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1
969simdutf_warn_unused size_t convert_latin1_to_utf32(
970 const char *input, size_t length, char32_t *utf32_buffer) noexcept;
971 #if SIMDUTF_SPAN
972simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
973convert_latin1_to_utf32(
974 const detail::input_span_of_byte_like auto &latin1_input,
975 std::span<char32_t> utf32_output) noexcept {
976 #if SIMDUTF_CPLUSPLUS23
977 if consteval {
978 return scalar::latin1_to_utf32::convert(
979 latin1_input.data(), latin1_input.size(), utf32_output.data());
980 } else
981 #endif
982 {
983 return convert_latin1_to_utf32(
984 reinterpret_cast<const char *>(latin1_input.data()),
985 latin1_input.size(), utf32_output.data());
986 }
987}
988 #endif // SIMDUTF_SPAN
989#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1
990
991#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
1004simdutf_warn_unused size_t convert_utf8_to_latin1(const char *input,
1005 size_t length,
1006 char *latin1_output) noexcept;
1007 #if SIMDUTF_SPAN
1008simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
1009convert_utf8_to_latin1(
1010 const detail::input_span_of_byte_like auto &input,
1011 detail::output_span_of_byte_like auto &&output) noexcept {
1012 #if SIMDUTF_CPLUSPLUS23
1013 if consteval {
1014 return scalar::utf8_to_latin1::convert(input.data(), input.size(),
1015 output.data());
1016 } else
1017 #endif
1018 {
1019 return convert_utf8_to_latin1(reinterpret_cast<const char *>(input.data()),
1020 input.size(),
1021 reinterpret_cast<char *>(output.data()));
1022 }
1023}
1024 #endif // SIMDUTF_SPAN
1025#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
1026
1027#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
1041simdutf_warn_unused size_t convert_utf8_to_utf16(
1042 const char *input, size_t length, char16_t *utf16_output) noexcept;
1043 #if SIMDUTF_SPAN
1044simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
1045convert_utf8_to_utf16(const detail::input_span_of_byte_like auto &input,
1046 std::span<char16_t> output) noexcept {
1047 #if SIMDUTF_CPLUSPLUS23
1048 if consteval {
1049 return scalar::utf8_to_utf16::convert<endianness::NATIVE>(
1050 input.data(), input.size(), output.data());
1051 } else
1052 #endif
1053 {
1054 return convert_utf8_to_utf16(reinterpret_cast<const char *>(input.data()),
1055 input.size(), output.data());
1056 }
1057}
1058 #endif // SIMDUTF_SPAN
1059
1077simdutf_warn_unused result utf8_length_from_utf16le_with_replacement(
1078 const char16_t *input, size_t length) noexcept;
1079 #if SIMDUTF_SPAN
1080simdutf_really_inline simdutf_constexpr23 simdutf_warn_unused result
1081utf8_length_from_utf16le_with_replacement(
1082 std::span<const char16_t> valid_utf16_input) noexcept {
1083 #if SIMDUTF_CPLUSPLUS23
1084 if consteval {
1085 return scalar::utf16::utf8_length_from_utf16_with_replacement<
1086 endianness::LITTLE>(valid_utf16_input.data(), valid_utf16_input.size());
1087 } else
1088 #endif
1089 {
1090 return utf8_length_from_utf16le_with_replacement(valid_utf16_input.data(),
1091 valid_utf16_input.size());
1092 }
1093}
1094 #endif // SIMDUTF_SPAN
1095
1113simdutf_warn_unused result utf8_length_from_utf16be_with_replacement(
1114 const char16_t *input, size_t length) noexcept;
1115 #if SIMDUTF_SPAN
1116simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
1117utf8_length_from_utf16be_with_replacement(
1118 std::span<const char16_t> valid_utf16_input) noexcept {
1119 #if SIMDUTF_CPLUSPLUS23
1120 if consteval {
1121 return scalar::utf16::utf8_length_from_utf16_with_replacement<
1122 endianness::BIG>(valid_utf16_input.data(), valid_utf16_input.size());
1123 } else
1124 #endif
1125 {
1126 return utf8_length_from_utf16be_with_replacement(valid_utf16_input.data(),
1127 valid_utf16_input.size());
1128 }
1129}
1130 #endif // SIMDUTF_SPAN
1131
1132#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
1133
1134#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
1143simdutf_warn_unused size_t convert_latin1_to_utf16(
1144 const char *input, size_t length, char16_t *utf16_output) noexcept;
1145 #if SIMDUTF_SPAN
1146simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
1147convert_latin1_to_utf16(const detail::input_span_of_byte_like auto &input,
1148 std::span<char16_t> output) noexcept {
1149 #if SIMDUTF_CPLUSPLUS23
1150 if consteval {
1151 return scalar::latin1_to_utf16::convert<endianness::NATIVE>(
1152 input.data(), input.size(), output.data());
1153 } else
1154 #endif
1155 {
1156 return convert_latin1_to_utf16(reinterpret_cast<const char *>(input.data()),
1157 input.size(), output.data());
1158 }
1159}
1160 #endif // SIMDUTF_SPAN
1161#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
1162
1163#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
1176simdutf_warn_unused size_t convert_utf8_to_utf16le(
1177 const char *input, size_t length, char16_t *utf16_output) noexcept;
1178 #if SIMDUTF_SPAN
1179simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
1180convert_utf8_to_utf16le(const detail::input_span_of_byte_like auto &utf8_input,
1181 std::span<char16_t> utf16_output) noexcept {
1182 #if SIMDUTF_CPLUSPLUS23
1183 if consteval {
1184 return scalar::utf8_to_utf16::convert<endianness::LITTLE>(
1185 utf8_input.data(), utf8_input.size(), utf16_output.data());
1186 } else
1187 #endif
1188 {
1189 return convert_utf8_to_utf16le(
1190 reinterpret_cast<const char *>(utf8_input.data()), utf8_input.size(),
1191 utf16_output.data());
1192 }
1193}
1194 #endif // SIMDUTF_SPAN
1195
1208simdutf_warn_unused size_t convert_utf8_to_utf16be(
1209 const char *input, size_t length, char16_t *utf16_output) noexcept;
1210 #if SIMDUTF_SPAN
1211simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
1212convert_utf8_to_utf16be(const detail::input_span_of_byte_like auto &utf8_input,
1213 std::span<char16_t> utf16_output) noexcept {
1214
1215 #if SIMDUTF_CPLUSPLUS23
1216 if consteval {
1217 return scalar::utf8_to_utf16::convert<endianness::BIG>(
1218 utf8_input.data(), utf8_input.size(), utf16_output.data());
1219 } else
1220 #endif
1221 {
1222 return convert_utf8_to_utf16be(
1223 reinterpret_cast<const char *>(utf8_input.data()), utf8_input.size(),
1224 utf16_output.data());
1225 }
1226}
1227 #endif // SIMDUTF_SPAN
1228#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
1229
1230#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
1247simdutf_warn_unused result convert_utf8_to_latin1_with_errors(
1248 const char *input, size_t length, char *latin1_output) noexcept;
1249 #if SIMDUTF_SPAN
1250simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
1251convert_utf8_to_latin1_with_errors(
1252 const detail::input_span_of_byte_like auto &utf8_input,
1253 detail::output_span_of_byte_like auto &&latin1_output) noexcept {
1254 #if SIMDUTF_CPLUSPLUS23
1255 if consteval {
1256 return scalar::utf8_to_latin1::convert_with_errors(
1257 utf8_input.data(), utf8_input.size(), latin1_output.data());
1258 } else
1259 #endif
1260 {
1261 return convert_utf8_to_latin1_with_errors(
1262 reinterpret_cast<const char *>(utf8_input.data()), utf8_input.size(),
1263 reinterpret_cast<char *>(latin1_output.data()));
1264 }
1265}
1266 #endif // SIMDUTF_SPAN
1267#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
1268
1269#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
1285simdutf_warn_unused result convert_utf8_to_utf16_with_errors(
1286 const char *input, size_t length, char16_t *utf16_output) noexcept;
1287 #if SIMDUTF_SPAN
1288simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
1289convert_utf8_to_utf16_with_errors(
1290 const detail::input_span_of_byte_like auto &utf8_input,
1291 std::span<char16_t> utf16_output) noexcept {
1292 #if SIMDUTF_CPLUSPLUS23
1293 if consteval {
1294 return scalar::utf8_to_utf16::convert_with_errors<endianness::NATIVE>(
1295 utf8_input.data(), utf8_input.size(), utf16_output.data());
1296 } else
1297 #endif
1298 {
1299 return convert_utf8_to_utf16_with_errors(
1300 reinterpret_cast<const char *>(utf8_input.data()), utf8_input.size(),
1301 utf16_output.data());
1302 }
1303}
1304 #endif // SIMDUTF_SPAN
1305
1320simdutf_warn_unused result convert_utf8_to_utf16le_with_errors(
1321 const char *input, size_t length, char16_t *utf16_output) noexcept;
1322 #if SIMDUTF_SPAN
1323simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
1324convert_utf8_to_utf16le_with_errors(
1325 const detail::input_span_of_byte_like auto &utf8_input,
1326 std::span<char16_t> utf16_output) noexcept {
1327 #if SIMDUTF_CPLUSPLUS23
1328 if consteval {
1329 return scalar::utf8_to_utf16::convert_with_errors<endianness::LITTLE>(
1330 utf8_input.data(), utf8_input.size(), utf16_output.data());
1331 } else
1332 #endif
1333 {
1334 return convert_utf8_to_utf16le_with_errors(
1335 reinterpret_cast<const char *>(utf8_input.data()), utf8_input.size(),
1336 utf16_output.data());
1337 }
1338}
1339 #endif // SIMDUTF_SPAN
1340
1355simdutf_warn_unused result convert_utf8_to_utf16be_with_errors(
1356 const char *input, size_t length, char16_t *utf16_output) noexcept;
1357 #if SIMDUTF_SPAN
1358simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
1359convert_utf8_to_utf16be_with_errors(
1360 const detail::input_span_of_byte_like auto &utf8_input,
1361 std::span<char16_t> utf16_output) noexcept {
1362 #if SIMDUTF_CPLUSPLUS23
1363 if consteval {
1364 return scalar::utf8_to_utf16::convert_with_errors<endianness::BIG>(
1365 utf8_input.data(), utf8_input.size(), utf16_output.data());
1366 } else
1367 #endif
1368 {
1369 return convert_utf8_to_utf16be_with_errors(
1370 reinterpret_cast<const char *>(utf8_input.data()), utf8_input.size(),
1371 utf16_output.data());
1372 }
1373}
1374 #endif // SIMDUTF_SPAN
1375#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
1376
1377#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
1390simdutf_warn_unused size_t convert_utf8_to_utf32(
1391 const char *input, size_t length, char32_t *utf32_output) noexcept;
1392 #if SIMDUTF_SPAN
1393simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
1394convert_utf8_to_utf32(const detail::input_span_of_byte_like auto &utf8_input,
1395 std::span<char32_t> utf32_output) noexcept {
1396 #if SIMDUTF_CPLUSPLUS23
1397 if consteval {
1398 return scalar::utf8_to_utf32::convert(utf8_input.data(), utf8_input.size(),
1399 utf32_output.data());
1400 } else
1401 #endif
1402 {
1403 return convert_utf8_to_utf32(
1404 reinterpret_cast<const char *>(utf8_input.data()), utf8_input.size(),
1405 utf32_output.data());
1406 }
1407}
1408 #endif // SIMDUTF_SPAN
1409
1424simdutf_warn_unused result convert_utf8_to_utf32_with_errors(
1425 const char *input, size_t length, char32_t *utf32_output) noexcept;
1426 #if SIMDUTF_SPAN
1427simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
1428convert_utf8_to_utf32_with_errors(
1429 const detail::input_span_of_byte_like auto &utf8_input,
1430 std::span<char32_t> utf32_output) noexcept {
1431 #if SIMDUTF_CPLUSPLUS23
1432 if consteval {
1433 return scalar::utf8_to_utf32::convert_with_errors(
1434 utf8_input.data(), utf8_input.size(), utf32_output.data());
1435 } else
1436 #endif
1437 {
1438 return convert_utf8_to_utf32_with_errors(
1439 reinterpret_cast<const char *>(utf8_input.data()), utf8_input.size(),
1440 utf32_output.data());
1441 }
1442}
1443 #endif // SIMDUTF_SPAN
1444#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
1445
1446#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
1466simdutf_warn_unused size_t convert_valid_utf8_to_latin1(
1467 const char *input, size_t length, char *latin1_output) noexcept;
1468 #if SIMDUTF_SPAN
1469simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
1470convert_valid_utf8_to_latin1(
1471 const detail::input_span_of_byte_like auto &valid_utf8_input,
1472 detail::output_span_of_byte_like auto &&latin1_output) noexcept {
1473 #if SIMDUTF_CPLUSPLUS23
1474 if consteval {
1475 return scalar::utf8_to_latin1::convert_valid(
1476 valid_utf8_input.data(), valid_utf8_input.size(), latin1_output.data());
1477 } else
1478 #endif
1479 {
1480 return convert_valid_utf8_to_latin1(
1481 reinterpret_cast<const char *>(valid_utf8_input.data()),
1482 valid_utf8_input.size(), latin1_output.data());
1483 }
1484}
1485 #endif // SIMDUTF_SPAN
1486#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
1487
1488#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
1499simdutf_warn_unused size_t convert_valid_utf8_to_utf16(
1500 const char *input, size_t length, char16_t *utf16_buffer) noexcept;
1501 #if SIMDUTF_SPAN
1502simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
1503convert_valid_utf8_to_utf16(
1504 const detail::input_span_of_byte_like auto &valid_utf8_input,
1505 std::span<char16_t> utf16_output) noexcept {
1506 #if SIMDUTF_CPLUSPLUS23
1507 if consteval {
1508 return scalar::utf8_to_utf16::convert_valid<endianness::NATIVE>(
1509 valid_utf8_input.data(), valid_utf8_input.size(), utf16_output.data());
1510 } else
1511 #endif
1512 {
1513 return convert_valid_utf8_to_utf16(
1514 reinterpret_cast<const char *>(valid_utf8_input.data()),
1515 valid_utf8_input.size(), utf16_output.data());
1516 }
1517}
1518 #endif // SIMDUTF_SPAN
1519
1530simdutf_warn_unused size_t convert_valid_utf8_to_utf16le(
1531 const char *input, size_t length, char16_t *utf16_buffer) noexcept;
1532 #if SIMDUTF_SPAN
1533simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
1534convert_valid_utf8_to_utf16le(
1535 const detail::input_span_of_byte_like auto &valid_utf8_input,
1536 std::span<char16_t> utf16_output) noexcept {
1537
1538 #if SIMDUTF_CPLUSPLUS23
1539 if consteval {
1540 return scalar::utf8_to_utf16::convert_valid<endianness::LITTLE>(
1541 valid_utf8_input.data(), valid_utf8_input.size(), utf16_output.data());
1542 } else
1543 #endif
1544 {
1545 return convert_valid_utf8_to_utf16le(
1546 reinterpret_cast<const char *>(valid_utf8_input.data()),
1547 valid_utf8_input.size(), utf16_output.data());
1548 }
1549}
1550 #endif // SIMDUTF_SPAN
1551
1562simdutf_warn_unused size_t convert_valid_utf8_to_utf16be(
1563 const char *input, size_t length, char16_t *utf16_buffer) noexcept;
1564 #if SIMDUTF_SPAN
1565simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
1566convert_valid_utf8_to_utf16be(
1567 const detail::input_span_of_byte_like auto &valid_utf8_input,
1568 std::span<char16_t> utf16_output) noexcept {
1569 #if SIMDUTF_CPLUSPLUS23
1570 if consteval {
1571 return scalar::utf8_to_utf16::convert_valid<endianness::BIG>(
1572 valid_utf8_input.data(), valid_utf8_input.size(), utf16_output.data());
1573 } else
1574 #endif
1575 {
1576 return convert_valid_utf8_to_utf16be(
1577 reinterpret_cast<const char *>(valid_utf8_input.data()),
1578 valid_utf8_input.size(), utf16_output.data());
1579 }
1580}
1581 #endif // SIMDUTF_SPAN
1582#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
1583
1584#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
1595simdutf_warn_unused size_t convert_valid_utf8_to_utf32(
1596 const char *input, size_t length, char32_t *utf32_buffer) noexcept;
1597 #if SIMDUTF_SPAN
1598simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
1599convert_valid_utf8_to_utf32(
1600 const detail::input_span_of_byte_like auto &valid_utf8_input,
1601 std::span<char32_t> utf32_output) noexcept {
1602 #if SIMDUTF_CPLUSPLUS23
1603 if consteval {
1604 return scalar::utf8_to_utf32::convert_valid(
1605 valid_utf8_input.data(), valid_utf8_input.size(), utf32_output.data());
1606 } else
1607 #endif
1608 {
1609 return convert_valid_utf8_to_utf32(
1610 reinterpret_cast<const char *>(valid_utf8_input.data()),
1611 valid_utf8_input.size(), utf32_output.data());
1612 }
1613}
1614 #endif // SIMDUTF_SPAN
1615#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
1616
1617#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
1626simdutf_warn_unused size_t utf8_length_from_latin1(const char *input,
1627 size_t length) noexcept;
1628 #if SIMDUTF_SPAN
1629simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
1630utf8_length_from_latin1(
1631 const detail::input_span_of_byte_like auto &latin1_input) noexcept {
1632 #if SIMDUTF_CPLUSPLUS23
1633 if consteval {
1634 return scalar::latin1_to_utf8::utf8_length_from_latin1(latin1_input.data(),
1635 latin1_input.size());
1636 } else
1637 #endif
1638 {
1639 return utf8_length_from_latin1(
1640 reinterpret_cast<const char *>(latin1_input.data()),
1641 latin1_input.size());
1642 }
1643}
1644 #endif // SIMDUTF_SPAN
1645
1659simdutf_warn_unused size_t latin1_length_from_utf8(const char *input,
1660 size_t length) noexcept;
1661 #if SIMDUTF_SPAN
1662simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
1663latin1_length_from_utf8(
1664 const detail::input_span_of_byte_like auto &valid_utf8_input) noexcept {
1665 #if SIMDUTF_CPLUSPLUS23
1666 if consteval {
1667 return scalar::utf8::count_code_points(valid_utf8_input.data(),
1668 valid_utf8_input.size());
1669 } else
1670 #endif
1671 {
1672 return latin1_length_from_utf8(
1673 reinterpret_cast<const char *>(valid_utf8_input.data()),
1674 valid_utf8_input.size());
1675 }
1676}
1677 #endif // SIMDUTF_SPAN
1678#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
1679
1680#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
1695simdutf_warn_unused size_t utf16_length_from_utf8(const char *input,
1696 size_t length) noexcept;
1697 #if SIMDUTF_SPAN
1698simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
1699utf16_length_from_utf8(
1700 const detail::input_span_of_byte_like auto &valid_utf8_input) noexcept {
1701 #if SIMDUTF_CPLUSPLUS23
1702 if consteval {
1703 return scalar::utf8::utf16_length_from_utf8(valid_utf8_input.data(),
1704 valid_utf8_input.size());
1705 } else
1706 #endif
1707 {
1708 return utf16_length_from_utf8(
1709 reinterpret_cast<const char *>(valid_utf8_input.data()),
1710 valid_utf8_input.size());
1711 }
1712}
1713 #endif // SIMDUTF_SPAN
1714#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
1715
1716#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
1733simdutf_warn_unused size_t utf32_length_from_utf8(const char *input,
1734 size_t length) noexcept;
1735 #if SIMDUTF_SPAN
1736simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
1737utf32_length_from_utf8(
1738 const detail::input_span_of_byte_like auto &valid_utf8_input) noexcept {
1739
1740 #if SIMDUTF_CPLUSPLUS23
1741 if consteval {
1742 return scalar::utf8::count_code_points(valid_utf8_input.data(),
1743 valid_utf8_input.size());
1744 } else
1745 #endif
1746 {
1747 return utf32_length_from_utf8(
1748 reinterpret_cast<const char *>(valid_utf8_input.data()),
1749 valid_utf8_input.size());
1750 }
1751}
1752 #endif // SIMDUTF_SPAN
1753#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
1754
1755#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
1771simdutf_warn_unused size_t convert_utf16_to_utf8(const char16_t *input,
1772 size_t length,
1773 char *utf8_buffer) noexcept;
1774 #if SIMDUTF_SPAN
1775simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
1776convert_utf16_to_utf8(
1777 std::span<const char16_t> utf16_input,
1778 detail::output_span_of_byte_like auto &&utf8_output) noexcept {
1779 #if SIMDUTF_CPLUSPLUS23
1780 if consteval {
1781 return scalar::utf16_to_utf8::convert<endianness::NATIVE>(
1782 utf16_input.data(), utf16_input.size(), utf8_output.data());
1783 } else
1784 #endif
1785 {
1786 return convert_utf16_to_utf8(utf16_input.data(), utf16_input.size(),
1787 reinterpret_cast<char *>(utf8_output.data()));
1788 }
1789}
1790 #endif // SIMDUTF_SPAN
1791
1810simdutf_warn_unused size_t convert_utf16_to_utf8_safe(const char16_t *input,
1811 size_t length,
1812 char *utf8_output,
1813 size_t utf8_len) noexcept;
1814 #if SIMDUTF_SPAN
1815simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
1816convert_utf16_to_utf8_safe(
1817 std::span<const char16_t> utf16_input,
1818 detail::output_span_of_byte_like auto &&utf8_output) noexcept {
1819 // implementation note: outputspan is a forwarding ref to avoid copying
1820 // and allow both lvalues and rvalues. std::span can be copied without
1821 // problems, but std::vector should not, and this function should accept
1822 // both. it will allow using an owning rvalue ref (example: passing a
1823 // temporary std::string) as output, but the user will quickly find out
1824 // that he has no way of getting the data out of the object in that case.
1825 #if SIMDUTF_CPLUSPLUS23
1826 if consteval {
1827 const full_result r =
1828 scalar::utf16_to_utf8::convert_with_errors<endianness::NATIVE, true>(
1829 utf16_input.data(), utf16_input.size(), utf8_output.data(),
1830 utf8_output.size());
1831 if (r.error != error_code::SUCCESS &&
1832 r.error != error_code::OUTPUT_BUFFER_TOO_SMALL) {
1833 return 0;
1834 }
1835 return r.output_count;
1836 } else
1837 #endif
1838 {
1839 return convert_utf16_to_utf8_safe(
1840 utf16_input.data(), utf16_input.size(),
1841 reinterpret_cast<char *>(utf8_output.data()), utf8_output.size());
1842 }
1843}
1844 #endif // SIMDUTF_SPAN
1845#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
1846
1847#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
1863simdutf_warn_unused size_t convert_utf16_to_latin1(
1864 const char16_t *input, size_t length, char *latin1_buffer) noexcept;
1865 #if SIMDUTF_SPAN
1866simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
1867convert_utf16_to_latin1(
1868 std::span<const char16_t> utf16_input,
1869 detail::output_span_of_byte_like auto &&latin1_output) noexcept {
1870 #if SIMDUTF_CPLUSPLUS23
1871 if consteval {
1872 return scalar::utf16_to_latin1::convert<endianness::NATIVE>(
1873 utf16_input.data(), utf16_input.size(), latin1_output.data());
1874 } else
1875 #endif
1876 {
1877 return convert_utf16_to_latin1(
1878 utf16_input.data(), utf16_input.size(),
1879 reinterpret_cast<char *>(latin1_output.data()));
1880 }
1881}
1882 #endif // SIMDUTF_SPAN
1883
1900simdutf_warn_unused size_t convert_utf16le_to_latin1(
1901 const char16_t *input, size_t length, char *latin1_buffer) noexcept;
1902 #if SIMDUTF_SPAN
1903simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
1904convert_utf16le_to_latin1(
1905 std::span<const char16_t> utf16_input,
1906 detail::output_span_of_byte_like auto &&latin1_output) noexcept {
1907 #if SIMDUTF_CPLUSPLUS23
1908 if consteval {
1909 return scalar::utf16_to_latin1::convert<endianness::LITTLE>(
1910 utf16_input.data(), utf16_input.size(), latin1_output.data());
1911 } else
1912 #endif
1913 {
1914 return convert_utf16le_to_latin1(
1915 utf16_input.data(), utf16_input.size(),
1916 reinterpret_cast<char *>(latin1_output.data()));
1917 }
1918}
1919 #endif // SIMDUTF_SPAN
1920
1935simdutf_warn_unused size_t convert_utf16be_to_latin1(
1936 const char16_t *input, size_t length, char *latin1_buffer) noexcept;
1937 #if SIMDUTF_SPAN
1938simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
1939convert_utf16be_to_latin1(
1940 std::span<const char16_t> utf16_input,
1941 detail::output_span_of_byte_like auto &&latin1_output) noexcept {
1942 #if SIMDUTF_CPLUSPLUS23
1943 if consteval {
1944 return scalar::utf16_to_latin1::convert<endianness::BIG>(
1945 utf16_input.data(), utf16_input.size(), latin1_output.data());
1946 } else
1947 #endif
1948 {
1949 return convert_utf16be_to_latin1(
1950 utf16_input.data(), utf16_input.size(),
1951 reinterpret_cast<char *>(latin1_output.data()));
1952 }
1953}
1954 #endif // SIMDUTF_SPAN
1955#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
1956
1957#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
1972simdutf_warn_unused size_t convert_utf16le_to_utf8(const char16_t *input,
1973 size_t length,
1974 char *utf8_buffer) noexcept;
1975 #if SIMDUTF_SPAN
1976simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
1977convert_utf16le_to_utf8(
1978 std::span<const char16_t> utf16_input,
1979 detail::output_span_of_byte_like auto &&utf8_output) noexcept {
1980 #if SIMDUTF_CPLUSPLUS23
1981 if consteval {
1982 return scalar::utf16_to_utf8::convert<endianness::LITTLE>(
1983 utf16_input.data(), utf16_input.size(), utf8_output.data());
1984 } else
1985 #endif
1986 {
1987 return convert_utf16le_to_utf8(
1988 utf16_input.data(), utf16_input.size(),
1989 reinterpret_cast<char *>(utf8_output.data()));
1990 }
1991}
1992 #endif // SIMDUTF_SPAN
1993
2008simdutf_warn_unused size_t convert_utf16be_to_utf8(const char16_t *input,
2009 size_t length,
2010 char *utf8_buffer) noexcept;
2011 #if SIMDUTF_SPAN
2012simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
2013convert_utf16be_to_utf8(
2014 std::span<const char16_t> utf16_input,
2015 detail::output_span_of_byte_like auto &&utf8_output) noexcept {
2016 #if SIMDUTF_CPLUSPLUS23
2017 if consteval {
2018 return scalar::utf16_to_utf8::convert<endianness::BIG>(
2019 utf16_input.data(), utf16_input.size(), utf8_output.data());
2020 } else
2021 #endif
2022 {
2023 return convert_utf16be_to_utf8(
2024 utf16_input.data(), utf16_input.size(),
2025 reinterpret_cast<char *>(utf8_output.data()));
2026 }
2027}
2028 #endif // SIMDUTF_SPAN
2029#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
2030
2031#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
2048simdutf_warn_unused result convert_utf16_to_latin1_with_errors(
2049 const char16_t *input, size_t length, char *latin1_buffer) noexcept;
2050 #if SIMDUTF_SPAN
2051simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
2052convert_utf16_to_latin1_with_errors(
2053 std::span<const char16_t> utf16_input,
2054 detail::output_span_of_byte_like auto &&latin1_output) noexcept {
2055 #if SIMDUTF_CPLUSPLUS23
2056 if consteval {
2057 return scalar::utf16_to_latin1::convert_with_errors<endianness::NATIVE>(
2058 utf16_input.data(), utf16_input.size(), latin1_output.data());
2059 } else
2060 #endif
2061 {
2062 return convert_utf16_to_latin1_with_errors(
2063 utf16_input.data(), utf16_input.size(),
2064 reinterpret_cast<char *>(latin1_output.data()));
2065 }
2066}
2067 #endif // SIMDUTF_SPAN
2068
2084simdutf_warn_unused result convert_utf16le_to_latin1_with_errors(
2085 const char16_t *input, size_t length, char *latin1_buffer) noexcept;
2086 #if SIMDUTF_SPAN
2087simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
2088convert_utf16le_to_latin1_with_errors(
2089 std::span<const char16_t> utf16_input,
2090 detail::output_span_of_byte_like auto &&latin1_output) noexcept {
2091 #if SIMDUTF_CPLUSPLUS23
2092 if consteval {
2093 return scalar::utf16_to_latin1::convert_with_errors<endianness::LITTLE>(
2094 utf16_input.data(), utf16_input.size(), latin1_output.data());
2095 } else
2096 #endif
2097 {
2098 return convert_utf16le_to_latin1_with_errors(
2099 utf16_input.data(), utf16_input.size(),
2100 reinterpret_cast<char *>(latin1_output.data()));
2101 }
2102}
2103 #endif // SIMDUTF_SPAN
2104
2122simdutf_warn_unused result convert_utf16be_to_latin1_with_errors(
2123 const char16_t *input, size_t length, char *latin1_buffer) noexcept;
2124 #if SIMDUTF_SPAN
2125simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
2126convert_utf16be_to_latin1_with_errors(
2127 std::span<const char16_t> utf16_input,
2128 detail::output_span_of_byte_like auto &&latin1_output) noexcept {
2129 #if SIMDUTF_CPLUSPLUS23
2130 if consteval {
2131 return scalar::utf16_to_latin1::convert_with_errors<endianness::BIG>(
2132 utf16_input.data(), utf16_input.size(), latin1_output.data());
2133 } else
2134 #endif
2135 {
2136 return convert_utf16be_to_latin1_with_errors(
2137 utf16_input.data(), utf16_input.size(),
2138 reinterpret_cast<char *>(latin1_output.data()));
2139 }
2140}
2141 #endif // SIMDUTF_SPAN
2142#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
2143
2144#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
2162simdutf_warn_unused result convert_utf16_to_utf8_with_errors(
2163 const char16_t *input, size_t length, char *utf8_buffer) noexcept;
2164 #if SIMDUTF_SPAN
2165simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
2166convert_utf16_to_utf8_with_errors(
2167 std::span<const char16_t> utf16_input,
2168 detail::output_span_of_byte_like auto &&utf8_output) noexcept {
2169 #if SIMDUTF_CPLUSPLUS23
2170 if consteval {
2171 return scalar::utf16_to_utf8::convert_with_errors<endianness::NATIVE>(
2172 utf16_input.data(), utf16_input.size(), utf8_output.data());
2173 } else
2174 #endif
2175 {
2176 return convert_utf16_to_utf8_with_errors(
2177 utf16_input.data(), utf16_input.size(),
2178 reinterpret_cast<char *>(utf8_output.data()));
2179 }
2180}
2181 #endif // SIMDUTF_SPAN
2182
2199simdutf_warn_unused result convert_utf16le_to_utf8_with_errors(
2200 const char16_t *input, size_t length, char *utf8_buffer) noexcept;
2201 #if SIMDUTF_SPAN
2202simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
2203convert_utf16le_to_utf8_with_errors(
2204 std::span<const char16_t> utf16_input,
2205 detail::output_span_of_byte_like auto &&utf8_output) noexcept {
2206 #if SIMDUTF_CPLUSPLUS23
2207 if consteval {
2208 return scalar::utf16_to_utf8::convert_with_errors<endianness::LITTLE>(
2209 utf16_input.data(), utf16_input.size(), utf8_output.data());
2210 } else
2211 #endif
2212 {
2213 return convert_utf16le_to_utf8_with_errors(
2214 utf16_input.data(), utf16_input.size(),
2215 reinterpret_cast<char *>(utf8_output.data()));
2216 }
2217}
2218 #endif // SIMDUTF_SPAN
2219
2236simdutf_warn_unused result convert_utf16be_to_utf8_with_errors(
2237 const char16_t *input, size_t length, char *utf8_buffer) noexcept;
2238 #if SIMDUTF_SPAN
2239simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
2240convert_utf16be_to_utf8_with_errors(
2241 std::span<const char16_t> utf16_input,
2242 detail::output_span_of_byte_like auto &&utf8_output) noexcept {
2243 #if SIMDUTF_CPLUSPLUS23
2244 if consteval {
2245 return scalar::utf16_to_utf8::convert_with_errors<endianness::BIG>(
2246 utf16_input.data(), utf16_input.size(), utf8_output.data());
2247 } else
2248 #endif
2249 {
2250 return convert_utf16be_to_utf8_with_errors(
2251 utf16_input.data(), utf16_input.size(),
2252 reinterpret_cast<char *>(utf8_output.data()));
2253 }
2254}
2255 #endif // SIMDUTF_SPAN
2256
2270simdutf_warn_unused size_t convert_valid_utf16_to_utf8(
2271 const char16_t *input, size_t length, char *utf8_buffer) noexcept;
2272 #if SIMDUTF_SPAN
2273simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
2274convert_valid_utf16_to_utf8(
2275 std::span<const char16_t> valid_utf16_input,
2276 detail::output_span_of_byte_like auto &&utf8_output) noexcept {
2277 #if SIMDUTF_CPLUSPLUS23
2278 if consteval {
2279 return scalar::utf16_to_utf8::convert_valid<endianness::NATIVE>(
2280 valid_utf16_input.data(), valid_utf16_input.size(), utf8_output.data());
2281 } else
2282 #endif
2283 {
2284 return convert_valid_utf16_to_utf8(
2285 valid_utf16_input.data(), valid_utf16_input.size(),
2286 reinterpret_cast<char *>(utf8_output.data()));
2287 }
2288}
2289 #endif // SIMDUTF_SPAN
2290#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
2291
2292#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
2312simdutf_warn_unused size_t convert_valid_utf16_to_latin1(
2313 const char16_t *input, size_t length, char *latin1_buffer) noexcept;
2314 #if SIMDUTF_SPAN
2315simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
2316convert_valid_utf16_to_latin1(
2317 std::span<const char16_t> valid_utf16_input,
2318 detail::output_span_of_byte_like auto &&latin1_output) noexcept {
2319 #if SIMDUTF_CPLUSPLUS23
2320 if consteval {
2321 return scalar::utf16_to_latin1::convert_valid_impl<endianness::NATIVE>(
2322 detail::constexpr_cast_ptr<uint16_t>(valid_utf16_input.data()),
2323 valid_utf16_input.size(),
2324 detail::constexpr_cast_writeptr<char>(latin1_output.data()));
2325 } else
2326 #endif
2327 {
2328 return convert_valid_utf16_to_latin1(
2329 valid_utf16_input.data(), valid_utf16_input.size(),
2330 reinterpret_cast<char *>(latin1_output.data()));
2331 }
2332}
2333 #endif // SIMDUTF_SPAN
2334
2354simdutf_warn_unused size_t convert_valid_utf16le_to_latin1(
2355 const char16_t *input, size_t length, char *latin1_buffer) noexcept;
2356 #if SIMDUTF_SPAN
2357simdutf_really_inline simdutf_constexpr23 simdutf_warn_unused size_t
2358convert_valid_utf16le_to_latin1(
2359 std::span<const char16_t> valid_utf16_input,
2360 detail::output_span_of_byte_like auto &&latin1_output) noexcept {
2361 #if SIMDUTF_CPLUSPLUS23
2362 if consteval {
2363 return scalar::utf16_to_latin1::convert_valid_impl<endianness::LITTLE>(
2364 detail::constexpr_cast_ptr<uint16_t>(valid_utf16_input.data()),
2365 valid_utf16_input.size(),
2366 detail::constexpr_cast_writeptr<char>(latin1_output.data()));
2367 } else
2368 #endif
2369 {
2370 return convert_valid_utf16le_to_latin1(
2371 valid_utf16_input.data(), valid_utf16_input.size(),
2372 reinterpret_cast<char *>(latin1_output.data()));
2373 }
2374}
2375 #endif // SIMDUTF_SPAN
2376
2396simdutf_warn_unused size_t convert_valid_utf16be_to_latin1(
2397 const char16_t *input, size_t length, char *latin1_buffer) noexcept;
2398 #if SIMDUTF_SPAN
2399simdutf_really_inline simdutf_constexpr23 simdutf_warn_unused size_t
2400convert_valid_utf16be_to_latin1(
2401 std::span<const char16_t> valid_utf16_input,
2402 detail::output_span_of_byte_like auto &&latin1_output) noexcept {
2403 #if SIMDUTF_CPLUSPLUS23
2404 if consteval {
2405 return scalar::utf16_to_latin1::convert_valid_impl<endianness::BIG>(
2406 detail::constexpr_cast_ptr<uint16_t>(valid_utf16_input.data()),
2407 valid_utf16_input.size(),
2408 detail::constexpr_cast_writeptr<char>(latin1_output.data()));
2409 } else
2410 #endif
2411 {
2412 return convert_valid_utf16be_to_latin1(
2413 valid_utf16_input.data(), valid_utf16_input.size(),
2414 reinterpret_cast<char *>(latin1_output.data()));
2415 }
2416}
2417 #endif // SIMDUTF_SPAN
2418#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
2419
2420#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
2434simdutf_warn_unused size_t convert_valid_utf16le_to_utf8(
2435 const char16_t *input, size_t length, char *utf8_buffer) noexcept;
2436 #if SIMDUTF_SPAN
2437simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
2438convert_valid_utf16le_to_utf8(
2439 std::span<const char16_t> valid_utf16_input,
2440 detail::output_span_of_byte_like auto &&utf8_output) noexcept {
2441 #if SIMDUTF_CPLUSPLUS23
2442 if consteval {
2443 return scalar::utf16_to_utf8::convert_valid<endianness::NATIVE>(
2444 valid_utf16_input.data(), valid_utf16_input.size(), utf8_output.data());
2445 } else
2446 #endif
2447 {
2448 return convert_valid_utf16le_to_utf8(
2449 valid_utf16_input.data(), valid_utf16_input.size(),
2450 reinterpret_cast<char *>(utf8_output.data()));
2451 }
2452}
2453 #endif // SIMDUTF_SPAN
2454
2468simdutf_warn_unused size_t convert_valid_utf16be_to_utf8(
2469 const char16_t *input, size_t length, char *utf8_buffer) noexcept;
2470 #if SIMDUTF_SPAN
2471simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
2472convert_valid_utf16be_to_utf8(
2473 std::span<const char16_t> valid_utf16_input,
2474 detail::output_span_of_byte_like auto &&utf8_output) noexcept {
2475 #if SIMDUTF_CPLUSPLUS23
2476 if consteval {
2477 return scalar::utf16_to_utf8::convert_valid<endianness::BIG>(
2478 valid_utf16_input.data(), valid_utf16_input.size(), utf8_output.data());
2479 } else
2480 #endif
2481 {
2482 return convert_valid_utf16be_to_utf8(
2483 valid_utf16_input.data(), valid_utf16_input.size(),
2484 reinterpret_cast<char *>(utf8_output.data()));
2485 }
2486}
2487 #endif // SIMDUTF_SPAN
2488#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
2489
2490#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
2506simdutf_warn_unused size_t convert_utf16_to_utf32(
2507 const char16_t *input, size_t length, char32_t *utf32_buffer) noexcept;
2508 #if SIMDUTF_SPAN
2509simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
2510convert_utf16_to_utf32(std::span<const char16_t> utf16_input,
2511 std::span<char32_t> utf32_output) noexcept {
2512
2513 #if SIMDUTF_CPLUSPLUS23
2514 if consteval {
2515 return scalar::utf16_to_utf32::convert<endianness::NATIVE>(
2516 utf16_input.data(), utf16_input.size(), utf32_output.data());
2517 } else
2518 #endif
2519 {
2520 return convert_utf16_to_utf32(utf16_input.data(), utf16_input.size(),
2521 utf32_output.data());
2522 }
2523}
2524 #endif // SIMDUTF_SPAN
2525
2540simdutf_warn_unused size_t convert_utf16le_to_utf32(
2541 const char16_t *input, size_t length, char32_t *utf32_buffer) noexcept;
2542 #if SIMDUTF_SPAN
2543simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
2544convert_utf16le_to_utf32(std::span<const char16_t> utf16_input,
2545 std::span<char32_t> utf32_output) noexcept {
2546 #if SIMDUTF_CPLUSPLUS23
2547 if consteval {
2548 return scalar::utf16_to_utf32::convert<endianness::LITTLE>(
2549 utf16_input.data(), utf16_input.size(), utf32_output.data());
2550 } else
2551 #endif
2552 {
2553 return convert_utf16le_to_utf32(utf16_input.data(), utf16_input.size(),
2554 utf32_output.data());
2555 }
2556}
2557 #endif // SIMDUTF_SPAN
2558
2573simdutf_warn_unused size_t convert_utf16be_to_utf32(
2574 const char16_t *input, size_t length, char32_t *utf32_buffer) noexcept;
2575 #if SIMDUTF_SPAN
2576simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
2577convert_utf16be_to_utf32(std::span<const char16_t> utf16_input,
2578 std::span<char32_t> utf32_output) noexcept {
2579 #if SIMDUTF_CPLUSPLUS23
2580 if consteval {
2581 return scalar::utf16_to_utf32::convert<endianness::BIG>(
2582 utf16_input.data(), utf16_input.size(), utf32_output.data());
2583 } else
2584 #endif
2585 {
2586 return convert_utf16be_to_utf32(utf16_input.data(), utf16_input.size(),
2587 utf32_output.data());
2588 }
2589}
2590 #endif // SIMDUTF_SPAN
2591
2609simdutf_warn_unused result convert_utf16_to_utf32_with_errors(
2610 const char16_t *input, size_t length, char32_t *utf32_buffer) noexcept;
2611 #if SIMDUTF_SPAN
2612simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
2613convert_utf16_to_utf32_with_errors(std::span<const char16_t> utf16_input,
2614 std::span<char32_t> utf32_output) noexcept {
2615 #if SIMDUTF_CPLUSPLUS23
2616 if consteval {
2617 return scalar::utf16_to_utf32::convert_with_errors<endianness::NATIVE>(
2618 utf16_input.data(), utf16_input.size(), utf32_output.data());
2619 } else
2620 #endif
2621 {
2622 return convert_utf16_to_utf32_with_errors(
2623 utf16_input.data(), utf16_input.size(), utf32_output.data());
2624 }
2625}
2626 #endif // SIMDUTF_SPAN
2627
2644simdutf_warn_unused result convert_utf16le_to_utf32_with_errors(
2645 const char16_t *input, size_t length, char32_t *utf32_buffer) noexcept;
2646 #if SIMDUTF_SPAN
2647simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
2648convert_utf16le_to_utf32_with_errors(
2649 std::span<const char16_t> utf16_input,
2650 std::span<char32_t> utf32_output) noexcept {
2651 #if SIMDUTF_CPLUSPLUS23
2652 if consteval {
2653 return scalar::utf16_to_utf32::convert_with_errors<endianness::LITTLE>(
2654 utf16_input.data(), utf16_input.size(), utf32_output.data());
2655 } else
2656 #endif
2657 {
2658 return convert_utf16le_to_utf32_with_errors(
2659 utf16_input.data(), utf16_input.size(), utf32_output.data());
2660 }
2661}
2662 #endif // SIMDUTF_SPAN
2663
2680simdutf_warn_unused result convert_utf16be_to_utf32_with_errors(
2681 const char16_t *input, size_t length, char32_t *utf32_buffer) noexcept;
2682 #if SIMDUTF_SPAN
2683simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
2684convert_utf16be_to_utf32_with_errors(
2685 std::span<const char16_t> utf16_input,
2686 std::span<char32_t> utf32_output) noexcept {
2687 #if SIMDUTF_CPLUSPLUS23
2688 if consteval {
2689 return scalar::utf16_to_utf32::convert_with_errors<endianness::BIG>(
2690 utf16_input.data(), utf16_input.size(), utf32_output.data());
2691 } else
2692 #endif
2693 {
2694 return convert_utf16be_to_utf32_with_errors(
2695 utf16_input.data(), utf16_input.size(), utf32_output.data());
2696 }
2697}
2698 #endif // SIMDUTF_SPAN
2699
2714simdutf_warn_unused size_t convert_valid_utf16_to_utf32(
2715 const char16_t *input, size_t length, char32_t *utf32_buffer) noexcept;
2716 #if SIMDUTF_SPAN
2717simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
2718convert_valid_utf16_to_utf32(std::span<const char16_t> valid_utf16_input,
2719 std::span<char32_t> utf32_output) noexcept {
2720 #if SIMDUTF_CPLUSPLUS23
2721 if consteval {
2722 return scalar::utf16_to_utf32::convert_valid<endianness::NATIVE>(
2723 valid_utf16_input.data(), valid_utf16_input.size(),
2724 utf32_output.data());
2725 } else
2726 #endif
2727 {
2728 return convert_valid_utf16_to_utf32(valid_utf16_input.data(),
2729 valid_utf16_input.size(),
2730 utf32_output.data());
2731 }
2732}
2733 #endif // SIMDUTF_SPAN
2734
2748simdutf_warn_unused size_t convert_valid_utf16le_to_utf32(
2749 const char16_t *input, size_t length, char32_t *utf32_buffer) noexcept;
2750 #if SIMDUTF_SPAN
2751simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
2752convert_valid_utf16le_to_utf32(std::span<const char16_t> valid_utf16_input,
2753 std::span<char32_t> utf32_output) noexcept {
2754 #if SIMDUTF_CPLUSPLUS23
2755 if consteval {
2756 return scalar::utf16_to_utf32::convert_valid<endianness::LITTLE>(
2757 valid_utf16_input.data(), valid_utf16_input.size(),
2758 utf32_output.data());
2759 } else
2760 #endif
2761 {
2762 return convert_valid_utf16le_to_utf32(valid_utf16_input.data(),
2763 valid_utf16_input.size(),
2764 utf32_output.data());
2765 }
2766}
2767 #endif // SIMDUTF_SPAN
2768
2782simdutf_warn_unused size_t convert_valid_utf16be_to_utf32(
2783 const char16_t *input, size_t length, char32_t *utf32_buffer) noexcept;
2784 #if SIMDUTF_SPAN
2785simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
2786convert_valid_utf16be_to_utf32(std::span<const char16_t> valid_utf16_input,
2787 std::span<char32_t> utf32_output) noexcept {
2788 #if SIMDUTF_CPLUSPLUS23
2789 if consteval {
2790 return scalar::utf16_to_utf32::convert_valid<endianness::BIG>(
2791 valid_utf16_input.data(), valid_utf16_input.size(),
2792 utf32_output.data());
2793 } else
2794 #endif
2795 {
2796 return convert_valid_utf16be_to_utf32(valid_utf16_input.data(),
2797 valid_utf16_input.size(),
2798 utf32_output.data());
2799 }
2800}
2801 #endif // SIMDUTF_SPAN
2802#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
2803
2804#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
2816simdutf_warn_unused size_t utf8_length_from_utf16(const char16_t *input,
2817 size_t length) noexcept;
2818 #if SIMDUTF_SPAN
2819simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
2820utf8_length_from_utf16(std::span<const char16_t> valid_utf16_input) noexcept {
2821 #if SIMDUTF_CPLUSPLUS23
2822 if consteval {
2823 return scalar::utf16::utf8_length_from_utf16<endianness::NATIVE>(
2824 valid_utf16_input.data(), valid_utf16_input.size());
2825 } else
2826 #endif
2827 {
2828 return utf8_length_from_utf16(valid_utf16_input.data(),
2829 valid_utf16_input.size());
2830 }
2831}
2832 #endif // SIMDUTF_SPAN
2833
2852simdutf_warn_unused result utf8_length_from_utf16_with_replacement(
2853 const char16_t *input, size_t length) noexcept;
2854 #if SIMDUTF_SPAN
2855simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
2856utf8_length_from_utf16_with_replacement(
2857 std::span<const char16_t> valid_utf16_input) noexcept {
2858 #if SIMDUTF_CPLUSPLUS23
2859 if consteval {
2860 return scalar::utf16::utf8_length_from_utf16_with_replacement<
2861 endianness::NATIVE>(valid_utf16_input.data(), valid_utf16_input.size());
2862 } else
2863 #endif
2864 {
2865 return utf8_length_from_utf16_with_replacement(valid_utf16_input.data(),
2866 valid_utf16_input.size());
2867 }
2868}
2869 #endif // SIMDUTF_SPAN
2870
2882simdutf_warn_unused size_t utf8_length_from_utf16le(const char16_t *input,
2883 size_t length) noexcept;
2884 #if SIMDUTF_SPAN
2885simdutf_really_inline simdutf_constexpr23 simdutf_warn_unused size_t
2886utf8_length_from_utf16le(std::span<const char16_t> valid_utf16_input) noexcept {
2887 #if SIMDUTF_CPLUSPLUS23
2888 if consteval {
2889 return scalar::utf16::utf8_length_from_utf16<endianness::LITTLE>(
2890 valid_utf16_input.data(), valid_utf16_input.size());
2891 } else
2892 #endif
2893 {
2894 return utf8_length_from_utf16le(valid_utf16_input.data(),
2895 valid_utf16_input.size());
2896 }
2897}
2898 #endif // SIMDUTF_SPAN
2899
2911simdutf_warn_unused size_t utf8_length_from_utf16be(const char16_t *input,
2912 size_t length) noexcept;
2913 #if SIMDUTF_SPAN
2914simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
2915utf8_length_from_utf16be(std::span<const char16_t> valid_utf16_input) noexcept {
2916 #if SIMDUTF_CPLUSPLUS23
2917 if consteval {
2918 return scalar::utf16::utf8_length_from_utf16<endianness::BIG>(
2919 valid_utf16_input.data(), valid_utf16_input.size());
2920 } else
2921 #endif
2922 {
2923 return utf8_length_from_utf16be(valid_utf16_input.data(),
2924 valid_utf16_input.size());
2925 }
2926}
2927 #endif // SIMDUTF_SPAN
2928#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
2929
2930#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
2944simdutf_warn_unused size_t convert_utf32_to_utf8(const char32_t *input,
2945 size_t length,
2946 char *utf8_buffer) noexcept;
2947 #if SIMDUTF_SPAN
2948simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
2949convert_utf32_to_utf8(
2950 std::span<const char32_t> utf32_input,
2951 detail::output_span_of_byte_like auto &&utf8_output) noexcept {
2952 #if SIMDUTF_CPLUSPLUS23
2953 if consteval {
2954 return scalar::utf32_to_utf8::convert(
2955 utf32_input.data(), utf32_input.size(), utf8_output.data());
2956 } else
2957 #endif
2958 {
2959 return convert_utf32_to_utf8(utf32_input.data(), utf32_input.size(),
2960 reinterpret_cast<char *>(utf8_output.data()));
2961 }
2962}
2963 #endif // SIMDUTF_SPAN
2964
2981simdutf_warn_unused result convert_utf32_to_utf8_with_errors(
2982 const char32_t *input, size_t length, char *utf8_buffer) noexcept;
2983 #if SIMDUTF_SPAN
2984simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
2985convert_utf32_to_utf8_with_errors(
2986 std::span<const char32_t> utf32_input,
2987 detail::output_span_of_byte_like auto &&utf8_output) noexcept {
2988 #if SIMDUTF_CPLUSPLUS23
2989 if consteval {
2990 return scalar::utf32_to_utf8::convert_with_errors(
2991 utf32_input.data(), utf32_input.size(), utf8_output.data());
2992 } else
2993 #endif
2994 {
2995 return convert_utf32_to_utf8_with_errors(
2996 utf32_input.data(), utf32_input.size(),
2997 reinterpret_cast<char *>(utf8_output.data()));
2998 }
2999}
3000 #endif // SIMDUTF_SPAN
3001
3015simdutf_warn_unused size_t convert_valid_utf32_to_utf8(
3016 const char32_t *input, size_t length, char *utf8_buffer) noexcept;
3017 #if SIMDUTF_SPAN
3018simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
3019convert_valid_utf32_to_utf8(
3020 std::span<const char32_t> valid_utf32_input,
3021 detail::output_span_of_byte_like auto &&utf8_output) noexcept {
3022 #if SIMDUTF_CPLUSPLUS23
3023 if consteval {
3024 return scalar::utf32_to_utf8::convert_valid(
3025 valid_utf32_input.data(), valid_utf32_input.size(), utf8_output.data());
3026 } else
3027 #endif
3028 {
3029 return convert_valid_utf32_to_utf8(
3030 valid_utf32_input.data(), valid_utf32_input.size(),
3031 reinterpret_cast<char *>(utf8_output.data()));
3032 }
3033}
3034 #endif // SIMDUTF_SPAN
3035#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
3036
3037#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
3052simdutf_warn_unused size_t convert_utf32_to_utf16(
3053 const char32_t *input, size_t length, char16_t *utf16_buffer) noexcept;
3054 #if SIMDUTF_SPAN
3055simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
3056convert_utf32_to_utf16(std::span<const char32_t> utf32_input,
3057 std::span<char16_t> utf16_output) noexcept {
3058 #if SIMDUTF_CPLUSPLUS23
3059 if consteval {
3060 return scalar::utf32_to_utf16::convert<endianness::NATIVE>(
3061 utf32_input.data(), utf32_input.size(), utf16_output.data());
3062 } else
3063 #endif
3064 {
3065 return convert_utf32_to_utf16(utf32_input.data(), utf32_input.size(),
3066 utf16_output.data());
3067 }
3068}
3069 #endif // SIMDUTF_SPAN
3070
3084simdutf_warn_unused size_t convert_utf32_to_utf16le(
3085 const char32_t *input, size_t length, char16_t *utf16_buffer) noexcept;
3086 #if SIMDUTF_SPAN
3087simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
3088convert_utf32_to_utf16le(std::span<const char32_t> utf32_input,
3089 std::span<char16_t> utf16_output) noexcept {
3090 #if SIMDUTF_CPLUSPLUS23
3091 if consteval {
3092 return scalar::utf32_to_utf16::convert<endianness::LITTLE>(
3093 utf32_input.data(), utf32_input.size(), utf16_output.data());
3094 } else
3095 #endif
3096 {
3097 return convert_utf32_to_utf16le(utf32_input.data(), utf32_input.size(),
3098 utf16_output.data());
3099 }
3100}
3101 #endif // SIMDUTF_SPAN
3102#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
3103
3104#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1
3119simdutf_warn_unused size_t convert_utf32_to_latin1(
3120 const char32_t *input, size_t length, char *latin1_buffer) noexcept;
3121 #if SIMDUTF_SPAN
3122simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
3123convert_utf32_to_latin1(
3124 std::span<const char32_t> utf32_input,
3125 detail::output_span_of_byte_like auto &&latin1_output) noexcept {
3126 #if SIMDUTF_CPLUSPLUS23
3127 if consteval {
3128 return scalar::utf32_to_latin1::convert(
3129 utf32_input.data(), utf32_input.size(), latin1_output.data());
3130 } else
3131 #endif
3132 {
3133 return convert_utf32_to_latin1(
3134 utf32_input.data(), utf32_input.size(),
3135 reinterpret_cast<char *>(latin1_output.data()));
3136 }
3137}
3138 #endif // SIMDUTF_SPAN
3139
3157simdutf_warn_unused result convert_utf32_to_latin1_with_errors(
3158 const char32_t *input, size_t length, char *latin1_buffer) noexcept;
3159 #if SIMDUTF_SPAN
3160simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
3161convert_utf32_to_latin1_with_errors(
3162 std::span<const char32_t> utf32_input,
3163 detail::output_span_of_byte_like auto &&latin1_output) noexcept {
3164 #if SIMDUTF_CPLUSPLUS23
3165 if consteval {
3166 return scalar::utf32_to_latin1::convert_with_errors(
3167 utf32_input.data(), utf32_input.size(), latin1_output.data());
3168 } else
3169 #endif
3170 {
3171 return convert_utf32_to_latin1_with_errors(
3172 utf32_input.data(), utf32_input.size(),
3173 reinterpret_cast<char *>(latin1_output.data()));
3174 }
3175}
3176 #endif // SIMDUTF_SPAN
3177
3198simdutf_warn_unused size_t convert_valid_utf32_to_latin1(
3199 const char32_t *input, size_t length, char *latin1_buffer) noexcept;
3200 #if SIMDUTF_SPAN
3201simdutf_really_inline simdutf_constexpr23 simdutf_warn_unused size_t
3202convert_valid_utf32_to_latin1(
3203 std::span<const char32_t> valid_utf32_input,
3204 detail::output_span_of_byte_like auto &&latin1_output) noexcept {
3205 #if SIMDUTF_CPLUSPLUS23
3206 if consteval {
3207 return scalar::utf32_to_latin1::convert_valid(
3208 detail::constexpr_cast_ptr<uint32_t>(valid_utf32_input.data()),
3209 valid_utf32_input.size(),
3210 detail::constexpr_cast_writeptr<char>(latin1_output.data()));
3211 }
3212 #endif
3213 {
3214 return convert_valid_utf32_to_latin1(
3215 valid_utf32_input.data(), valid_utf32_input.size(),
3216 reinterpret_cast<char *>(latin1_output.data()));
3217 }
3218}
3219 #endif // SIMDUTF_SPAN
3220
3233simdutf_warn_unused simdutf_really_inline simdutf_constexpr23 size_t
3234latin1_length_from_utf32(size_t length) noexcept {
3235 return length;
3236}
3237
3246simdutf_warn_unused simdutf_really_inline simdutf_constexpr23 size_t
3247utf32_length_from_latin1(size_t length) noexcept {
3248 return length;
3249}
3250#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1
3251
3252#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
3266simdutf_warn_unused size_t convert_utf32_to_utf16be(
3267 const char32_t *input, size_t length, char16_t *utf16_buffer) noexcept;
3268 #if SIMDUTF_SPAN
3269simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
3270convert_utf32_to_utf16be(std::span<const char32_t> utf32_input,
3271 std::span<char16_t> utf16_output) noexcept {
3272 #if SIMDUTF_CPLUSPLUS23
3273 if consteval {
3274 return scalar::utf32_to_utf16::convert<endianness::BIG>(
3275 utf32_input.data(), utf32_input.size(), utf16_output.data());
3276 } else
3277 #endif
3278 {
3279 return convert_utf32_to_utf16be(utf32_input.data(), utf32_input.size(),
3280 utf16_output.data());
3281 }
3282}
3283 #endif // SIMDUTF_SPAN
3284
3302simdutf_warn_unused result convert_utf32_to_utf16_with_errors(
3303 const char32_t *input, size_t length, char16_t *utf16_buffer) noexcept;
3304 #if SIMDUTF_SPAN
3305simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
3306convert_utf32_to_utf16_with_errors(std::span<const char32_t> utf32_input,
3307 std::span<char16_t> utf16_output) noexcept {
3308 #if SIMDUTF_CPLUSPLUS23
3309 if consteval {
3310 return scalar::utf32_to_utf16::convert_with_errors<endianness::NATIVE>(
3311 utf32_input.data(), utf32_input.size(), utf16_output.data());
3312 } else
3313 #endif
3314 {
3315 return convert_utf32_to_utf16_with_errors(
3316 utf32_input.data(), utf32_input.size(), utf16_output.data());
3317 }
3318}
3319 #endif // SIMDUTF_SPAN
3320
3337simdutf_warn_unused result convert_utf32_to_utf16le_with_errors(
3338 const char32_t *input, size_t length, char16_t *utf16_buffer) noexcept;
3339 #if SIMDUTF_SPAN
3340simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
3341convert_utf32_to_utf16le_with_errors(
3342 std::span<const char32_t> utf32_input,
3343 std::span<char16_t> utf16_output) noexcept {
3344 #if SIMDUTF_CPLUSPLUS23
3345 if consteval {
3346 return scalar::utf32_to_utf16::convert_with_errors<endianness::LITTLE>(
3347 utf32_input.data(), utf32_input.size(), utf16_output.data());
3348 } else
3349 #endif
3350 {
3351 return convert_utf32_to_utf16le_with_errors(
3352 utf32_input.data(), utf32_input.size(), utf16_output.data());
3353 }
3354}
3355 #endif // SIMDUTF_SPAN
3356
3373simdutf_warn_unused result convert_utf32_to_utf16be_with_errors(
3374 const char32_t *input, size_t length, char16_t *utf16_buffer) noexcept;
3375 #if SIMDUTF_SPAN
3376simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
3377convert_utf32_to_utf16be_with_errors(
3378 std::span<const char32_t> utf32_input,
3379 std::span<char16_t> utf16_output) noexcept {
3380 #if SIMDUTF_CPLUSPLUS23
3381 if consteval {
3382 return scalar::utf32_to_utf16::convert_with_errors<endianness::BIG>(
3383 utf32_input.data(), utf32_input.size(), utf16_output.data());
3384 } else
3385 #endif
3386 {
3387 return convert_utf32_to_utf16be_with_errors(
3388 utf32_input.data(), utf32_input.size(), utf16_output.data());
3389 }
3390}
3391 #endif // SIMDUTF_SPAN
3392
3406simdutf_warn_unused size_t convert_valid_utf32_to_utf16(
3407 const char32_t *input, size_t length, char16_t *utf16_buffer) noexcept;
3408 #if SIMDUTF_SPAN
3409simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
3410convert_valid_utf32_to_utf16(std::span<const char32_t> valid_utf32_input,
3411 std::span<char16_t> utf16_output) noexcept {
3412
3413 #if SIMDUTF_CPLUSPLUS23
3414 if consteval {
3415 return scalar::utf32_to_utf16::convert_valid<endianness::NATIVE>(
3416 valid_utf32_input.data(), valid_utf32_input.size(),
3417 utf16_output.data());
3418 } else
3419 #endif
3420 {
3421 return convert_valid_utf32_to_utf16(valid_utf32_input.data(),
3422 valid_utf32_input.size(),
3423 utf16_output.data());
3424 }
3425}
3426 #endif // SIMDUTF_SPAN
3427
3441simdutf_warn_unused size_t convert_valid_utf32_to_utf16le(
3442 const char32_t *input, size_t length, char16_t *utf16_buffer) noexcept;
3443 #if SIMDUTF_SPAN
3444simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
3445convert_valid_utf32_to_utf16le(std::span<const char32_t> valid_utf32_input,
3446 std::span<char16_t> utf16_output) noexcept {
3447 #if SIMDUTF_CPLUSPLUS23
3448 if consteval {
3449 return scalar::utf32_to_utf16::convert_valid<endianness::LITTLE>(
3450 valid_utf32_input.data(), valid_utf32_input.size(),
3451 utf16_output.data());
3452 } else
3453 #endif
3454 {
3455 return convert_valid_utf32_to_utf16le(valid_utf32_input.data(),
3456 valid_utf32_input.size(),
3457 utf16_output.data());
3458 }
3459}
3460 #endif // SIMDUTF_SPAN
3461
3475simdutf_warn_unused size_t convert_valid_utf32_to_utf16be(
3476 const char32_t *input, size_t length, char16_t *utf16_buffer) noexcept;
3477 #if SIMDUTF_SPAN
3478simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
3479convert_valid_utf32_to_utf16be(std::span<const char32_t> valid_utf32_input,
3480 std::span<char16_t> utf16_output) noexcept {
3481 #if SIMDUTF_CPLUSPLUS23
3482 if consteval {
3483 return scalar::utf32_to_utf16::convert_valid<endianness::BIG>(
3484 valid_utf32_input.data(), valid_utf32_input.size(),
3485 utf16_output.data());
3486 } else
3487 #endif
3488 {
3489 return convert_valid_utf32_to_utf16be(valid_utf32_input.data(),
3490 valid_utf32_input.size(),
3491 utf16_output.data());
3492 }
3493}
3494 #endif // SIMDUTF_SPAN
3495#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
3496
3497#if SIMDUTF_FEATURE_UTF16
3511void change_endianness_utf16(const char16_t *input, size_t length,
3512 char16_t *output) noexcept;
3513 #if SIMDUTF_SPAN
3514simdutf_really_inline simdutf_constexpr23 void
3515change_endianness_utf16(std::span<const char16_t> utf16_input,
3516 std::span<char16_t> utf16_output) noexcept {
3517 #if SIMDUTF_CPLUSPLUS23
3518 if consteval {
3519 return scalar::utf16::change_endianness_utf16(
3520 utf16_input.data(), utf16_input.size(), utf16_output.data());
3521 } else
3522 #endif
3523 {
3524 return change_endianness_utf16(utf16_input.data(), utf16_input.size(),
3525 utf16_output.data());
3526 }
3527}
3528 #endif // SIMDUTF_SPAN
3529#endif // SIMDUTF_FEATURE_UTF16
3530
3531#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
3543simdutf_warn_unused size_t utf8_length_from_utf32(const char32_t *input,
3544 size_t length) noexcept;
3545 #if SIMDUTF_SPAN
3546simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
3547utf8_length_from_utf32(std::span<const char32_t> valid_utf32_input) noexcept {
3548 #if SIMDUTF_CPLUSPLUS23
3549 if consteval {
3550 return scalar::utf32::utf8_length_from_utf32(valid_utf32_input.data(),
3551 valid_utf32_input.size());
3552 } else
3553 #endif
3554 {
3555 return utf8_length_from_utf32(valid_utf32_input.data(),
3556 valid_utf32_input.size());
3557 }
3558}
3559 #endif // SIMDUTF_SPAN
3560#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
3561
3562#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
3574simdutf_warn_unused size_t utf16_length_from_utf32(const char32_t *input,
3575 size_t length) noexcept;
3576 #if SIMDUTF_SPAN
3577simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
3578utf16_length_from_utf32(std::span<const char32_t> valid_utf32_input) noexcept {
3579 #if SIMDUTF_CPLUSPLUS23
3580 if consteval {
3581 return scalar::utf32::utf16_length_from_utf32(valid_utf32_input.data(),
3582 valid_utf32_input.size());
3583 } else
3584 #endif
3585 {
3586 return utf16_length_from_utf32(valid_utf32_input.data(),
3587 valid_utf32_input.size());
3588 }
3589}
3590 #endif // SIMDUTF_SPAN
3591
3607simdutf_warn_unused size_t utf32_length_from_utf16(const char16_t *input,
3608 size_t length) noexcept;
3609 #if SIMDUTF_SPAN
3610simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
3611utf32_length_from_utf16(std::span<const char16_t> valid_utf16_input) noexcept {
3612 #if SIMDUTF_CPLUSPLUS23
3613 if consteval {
3614 return scalar::utf16::utf32_length_from_utf16<endianness::NATIVE>(
3615 valid_utf16_input.data(), valid_utf16_input.size());
3616 } else
3617 #endif
3618 {
3619 return utf32_length_from_utf16(valid_utf16_input.data(),
3620 valid_utf16_input.size());
3621 }
3622}
3623 #endif // SIMDUTF_SPAN
3624
3640simdutf_warn_unused size_t utf32_length_from_utf16le(const char16_t *input,
3641 size_t length) noexcept;
3642 #if SIMDUTF_SPAN
3643simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
3644utf32_length_from_utf16le(
3645 std::span<const char16_t> valid_utf16_input) noexcept {
3646 #if SIMDUTF_CPLUSPLUS23
3647 if consteval {
3648 return scalar::utf16::utf32_length_from_utf16<endianness::LITTLE>(
3649 valid_utf16_input.data(), valid_utf16_input.size());
3650 } else
3651 #endif
3652 {
3653 return utf32_length_from_utf16le(valid_utf16_input.data(),
3654 valid_utf16_input.size());
3655 }
3656}
3657 #endif // SIMDUTF_SPAN
3658
3674simdutf_warn_unused size_t utf32_length_from_utf16be(const char16_t *input,
3675 size_t length) noexcept;
3676 #if SIMDUTF_SPAN
3677simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
3678utf32_length_from_utf16be(
3679 std::span<const char16_t> valid_utf16_input) noexcept {
3680 #if SIMDUTF_CPLUSPLUS23
3681 if consteval {
3682 return scalar::utf16::utf32_length_from_utf16<endianness::BIG>(
3683 valid_utf16_input.data(), valid_utf16_input.size());
3684 } else
3685 #endif
3686 {
3687 return utf32_length_from_utf16be(valid_utf16_input.data(),
3688 valid_utf16_input.size());
3689 }
3690}
3691 #endif // SIMDUTF_SPAN
3692#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
3693
3694#if SIMDUTF_FEATURE_UTF16
3709simdutf_warn_unused size_t count_utf16(const char16_t *input,
3710 size_t length) noexcept;
3711 #if SIMDUTF_SPAN
3712simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
3713count_utf16(std::span<const char16_t> valid_utf16_input) noexcept {
3714 #if SIMDUTF_CPLUSPLUS23
3715 if consteval {
3716 return scalar::utf16::count_code_points<endianness::NATIVE>(
3717 valid_utf16_input.data(), valid_utf16_input.size());
3718 } else
3719 #endif
3720 {
3721 return count_utf16(valid_utf16_input.data(), valid_utf16_input.size());
3722 }
3723}
3724 #endif // SIMDUTF_SPAN
3725
3740simdutf_warn_unused size_t count_utf16le(const char16_t *input,
3741 size_t length) noexcept;
3742 #if SIMDUTF_SPAN
3743simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
3744count_utf16le(std::span<const char16_t> valid_utf16_input) noexcept {
3745 #if SIMDUTF_CPLUSPLUS23
3746 if consteval {
3747 return scalar::utf16::count_code_points<endianness::LITTLE>(
3748 valid_utf16_input.data(), valid_utf16_input.size());
3749 } else
3750 #endif
3751 {
3752 return count_utf16le(valid_utf16_input.data(), valid_utf16_input.size());
3753 }
3754}
3755 #endif // SIMDUTF_SPAN
3756
3771simdutf_warn_unused size_t count_utf16be(const char16_t *input,
3772 size_t length) noexcept;
3773 #if SIMDUTF_SPAN
3774simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
3775count_utf16be(std::span<const char16_t> valid_utf16_input) noexcept {
3776 #if SIMDUTF_CPLUSPLUS23
3777 if consteval {
3778 return scalar::utf16::count_code_points<endianness::BIG>(
3779 valid_utf16_input.data(), valid_utf16_input.size());
3780 } else
3781 #endif
3782 {
3783 return count_utf16be(valid_utf16_input.data(), valid_utf16_input.size());
3784 }
3785}
3786 #endif // SIMDUTF_SPAN
3787#endif // SIMDUTF_FEATURE_UTF16
3788
3789#if SIMDUTF_FEATURE_UTF8
3802simdutf_warn_unused size_t count_utf8(const char *input,
3803 size_t length) noexcept;
3804 #if SIMDUTF_SPAN
3805simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t count_utf8(
3806 const detail::input_span_of_byte_like auto &valid_utf8_input) noexcept {
3807 #if SIMDUTF_CPLUSPLUS23
3808 if consteval {
3809 return scalar::utf8::count_code_points(valid_utf8_input.data(),
3810 valid_utf8_input.size());
3811 } else
3812 #endif
3813 {
3814 return count_utf8(reinterpret_cast<const char *>(valid_utf8_input.data()),
3815 valid_utf8_input.size());
3816 }
3817}
3818 #endif // SIMDUTF_SPAN
3819
3834simdutf_warn_unused size_t trim_partial_utf8(const char *input, size_t length);
3835 #if SIMDUTF_SPAN
3836simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
3837trim_partial_utf8(
3838 const detail::input_span_of_byte_like auto &valid_utf8_input) noexcept {
3839 #if SIMDUTF_CPLUSPLUS23
3840 if consteval {
3841 return scalar::utf8::trim_partial_utf8(valid_utf8_input.data(),
3842 valid_utf8_input.size());
3843 } else
3844 #endif
3845 {
3846 return trim_partial_utf8(
3847 reinterpret_cast<const char *>(valid_utf8_input.data()),
3848 valid_utf8_input.size());
3849 }
3850}
3851 #endif // SIMDUTF_SPAN
3852#endif // SIMDUTF_FEATURE_UTF8
3853
3854#if SIMDUTF_FEATURE_UTF16
3869simdutf_warn_unused size_t trim_partial_utf16be(const char16_t *input,
3870 size_t length);
3871 #if SIMDUTF_SPAN
3872simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
3873trim_partial_utf16be(std::span<const char16_t> valid_utf16_input) noexcept {
3874 #if SIMDUTF_CPLUSPLUS23
3875 if consteval {
3876 return scalar::utf16::trim_partial_utf16<endianness::BIG>(
3877 valid_utf16_input.data(), valid_utf16_input.size());
3878 } else
3879 #endif
3880 {
3881 return trim_partial_utf16be(valid_utf16_input.data(),
3882 valid_utf16_input.size());
3883 }
3884}
3885 #endif // SIMDUTF_SPAN
3886
3901simdutf_warn_unused size_t trim_partial_utf16le(const char16_t *input,
3902 size_t length);
3903 #if SIMDUTF_SPAN
3904simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
3905trim_partial_utf16le(std::span<const char16_t> valid_utf16_input) noexcept {
3906 #if SIMDUTF_CPLUSPLUS23
3907 if consteval {
3908 return scalar::utf16::trim_partial_utf16<endianness::LITTLE>(
3909 valid_utf16_input.data(), valid_utf16_input.size());
3910 } else
3911 #endif
3912 {
3913 return trim_partial_utf16le(valid_utf16_input.data(),
3914 valid_utf16_input.size());
3915 }
3916}
3917 #endif // SIMDUTF_SPAN
3918
3933simdutf_warn_unused size_t trim_partial_utf16(const char16_t *input,
3934 size_t length);
3935 #if SIMDUTF_SPAN
3936simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
3937trim_partial_utf16(std::span<const char16_t> valid_utf16_input) noexcept {
3938 #if SIMDUTF_CPLUSPLUS23
3939 if consteval {
3940 return scalar::utf16::trim_partial_utf16<endianness::NATIVE>(
3941 valid_utf16_input.data(), valid_utf16_input.size());
3942 } else
3943 #endif
3944 {
3945 return trim_partial_utf16(valid_utf16_input.data(),
3946 valid_utf16_input.size());
3947 }
3948}
3949 #endif // SIMDUTF_SPAN
3950#endif // SIMDUTF_FEATURE_UTF16
3951
3952#if SIMDUTF_FEATURE_BASE64 || SIMDUTF_FEATURE_UTF16 || \
3953 SIMDUTF_FEATURE_DETECT_ENCODING
3954 #ifndef SIMDUTF_NEED_TRAILING_ZEROES
3955 #define SIMDUTF_NEED_TRAILING_ZEROES 1
3956 #endif
3957#endif // SIMDUTF_FEATURE_BASE64 || SIMDUTF_FEATURE_UTF16 ||
3958 // SIMDUTF_FEATURE_DETECT_ENCODING
3959
3960#if SIMDUTF_FEATURE_BASE64
3961// base64_options are used to specify the base64 encoding options.
3962// ASCII spaces are ' ', '\t', '\n', '\r', '\f'
3963// garbage characters are characters that are not part of the base64 alphabet
3964// nor ASCII spaces.
3965constexpr uint64_t base64_reverse_padding =
3966 2; /* modifier for base64_default and base64_url */
3967enum base64_options : uint64_t {
3968 base64_default = 0, /* standard base64 format (with padding) */
3969 base64_url = 1, /* base64url format (no padding) */
3970 base64_default_no_padding =
3971 base64_default |
3972 base64_reverse_padding, /* standard base64 format without padding */
3973 base64_url_with_padding =
3974 base64_url | base64_reverse_padding, /* base64url with padding */
3975 base64_default_accept_garbage =
3976 4, /* standard base64 format accepting garbage characters, the input stops
3977 with the first '=' if any */
3978 base64_url_accept_garbage =
3979 5, /* base64url format accepting garbage characters, the input stops with
3980 the first '=' if any */
3981 base64_default_or_url =
3982 8, /* standard/base64url hybrid format (only meaningful for decoding!) */
3983 base64_default_or_url_accept_garbage =
3984 12, /* standard/base64url hybrid format accepting garbage characters
3985 (only meaningful for decoding!), the input stops with the first '='
3986 if any */
3987};
3988
3989// last_chunk_handling_options are used to specify the handling of the last
3990// chunk in base64 decoding.
3991// https://tc39.es/proposal-arraybuffer-base64/spec/#sec-frombase64
3992enum last_chunk_handling_options : uint64_t {
3993 loose = 0, /* standard base64 format, decode partial final chunk */
3994 strict = 1, /* error when the last chunk is partial, 2 or 3 chars, and
3995 unpadded, or non-zero bit padding */
3996 stop_before_partial =
3997 2, /* if the last chunk is partial, ignore it (no error) */
3998 only_full_chunks =
3999 3 /* only decode full blocks (4 base64 characters, no padding) */
4000};
4001
4002inline simdutf_constexpr23 bool
4003is_partial(last_chunk_handling_options options) {
4004 return (options == stop_before_partial) || (options == only_full_chunks);
4005}
4006
4007namespace detail {
4008simdutf_warn_unused const char *find(const char *start, const char *end,
4009 char character) noexcept;
4010simdutf_warn_unused const char16_t *
4011find(const char16_t *start, const char16_t *end, char16_t character) noexcept;
4012} // namespace detail
4013
4024simdutf_warn_unused simdutf_really_inline simdutf_constexpr23 const char *
4025find(const char *start, const char *end, char character) noexcept {
4026 #if SIMDUTF_CPLUSPLUS23
4027 if consteval {
4028 for (; start != end; ++start)
4029 if (*start == character)
4030 return start;
4031 return end;
4032 } else
4033 #endif
4034 {
4035 return detail::find(start, end, character);
4036 }
4037}
4038simdutf_warn_unused simdutf_really_inline simdutf_constexpr23 const char16_t *
4039find(const char16_t *start, const char16_t *end, char16_t character) noexcept {
4040 // implementation note: this is repeated instead of a template, to ensure
4041 // the api is still a function and compiles without concepts
4042 #if SIMDUTF_CPLUSPLUS23
4043 if consteval {
4044 for (; start != end; ++start)
4045 if (*start == character)
4046 return start;
4047 return end;
4048 } else
4049 #endif
4050 {
4051 return detail::find(start, end, character);
4052 }
4053}
4054}
4055 // We include base64_tables once.
4056 #include <simdutf/base64_tables.h>
4057 #include <simdutf/scalar/base64.h>
4058
4059namespace simdutf {
4060
4061 #if SIMDUTF_CPLUSPLUS17
4062inline std::string_view to_string(base64_options options) {
4063 switch (options) {
4064 case base64_default:
4065 return "base64_default";
4066 case base64_url:
4067 return "base64_url";
4068 case base64_reverse_padding:
4069 return "base64_reverse_padding";
4070 case base64_url_with_padding:
4071 return "base64_url_with_padding";
4072 case base64_default_accept_garbage:
4073 return "base64_default_accept_garbage";
4074 case base64_url_accept_garbage:
4075 return "base64_url_accept_garbage";
4076 case base64_default_or_url:
4077 return "base64_default_or_url";
4078 case base64_default_or_url_accept_garbage:
4079 return "base64_default_or_url_accept_garbage";
4080 }
4081 return "<unknown>";
4082}
4083 #endif // SIMDUTF_CPLUSPLUS17
4084
4085 #if SIMDUTF_CPLUSPLUS17
4086inline std::string_view to_string(last_chunk_handling_options options) {
4087 switch (options) {
4088 case loose:
4089 return "loose";
4090 case strict:
4091 return "strict";
4092 case stop_before_partial:
4093 return "stop_before_partial";
4094 case only_full_chunks:
4095 return "only_full_chunks";
4096 }
4097 return "<unknown>";
4098}
4099 #endif
4100
4114simdutf_warn_unused size_t
4115maximal_binary_length_from_base64(const char *input, size_t length) noexcept;
4116 #if SIMDUTF_SPAN
4117simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
4118maximal_binary_length_from_base64(
4119 const detail::input_span_of_byte_like auto &input) noexcept {
4120 #if SIMDUTF_CPLUSPLUS23
4121 if consteval {
4122 return scalar::base64::maximal_binary_length_from_base64(
4123 detail::constexpr_cast_ptr<uint8_t>(input.data()), input.size());
4124 } else
4125 #endif
4126 {
4127 return maximal_binary_length_from_base64(
4128 reinterpret_cast<const char *>(input.data()), input.size());
4129 }
4130}
4131 #endif // SIMDUTF_SPAN
4132
4147simdutf_warn_unused size_t maximal_binary_length_from_base64(
4148 const char16_t *input, size_t length) noexcept;
4149 #if SIMDUTF_SPAN
4150simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
4151maximal_binary_length_from_base64(std::span<const char16_t> input) noexcept {
4152 #if SIMDUTF_CPLUSPLUS23
4153 if consteval {
4154 return scalar::base64::maximal_binary_length_from_base64(input.data(),
4155 input.size());
4156 } else
4157 #endif
4158 {
4159 return maximal_binary_length_from_base64(input.data(), input.size());
4160 }
4161}
4162 #endif // SIMDUTF_SPAN
4163
4218simdutf_warn_unused result base64_to_binary(
4219 const char *input, size_t length, char *output,
4220 base64_options options = base64_default,
4221 last_chunk_handling_options last_chunk_options = loose) noexcept;
4222 #if SIMDUTF_SPAN
4223simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
4224base64_to_binary(
4225 const detail::input_span_of_byte_like auto &input,
4226 detail::output_span_of_byte_like auto &&binary_output,
4227 base64_options options = base64_default,
4228 last_chunk_handling_options last_chunk_options = loose) noexcept {
4229 #if SIMDUTF_CPLUSPLUS23
4230 if consteval {
4231 return scalar::base64::base64_to_binary_details_impl(
4232 input.data(), input.size(), binary_output.data(), options,
4233 last_chunk_options);
4234 } else
4235 #endif
4236 {
4237 return base64_to_binary(reinterpret_cast<const char *>(input.data()),
4238 input.size(),
4239 reinterpret_cast<char *>(binary_output.data()),
4240 options, last_chunk_options);
4241 }
4242}
4243 #endif // SIMDUTF_SPAN
4244
4251inline simdutf_warn_unused simdutf_constexpr23 size_t base64_length_from_binary(
4252 size_t length, base64_options options = base64_default) noexcept {
4253 return scalar::base64::base64_length_from_binary(length, options);
4254}
4255
4265inline simdutf_warn_unused simdutf_constexpr23 size_t
4266base64_length_from_binary_with_lines(
4267 size_t length, base64_options options = base64_default,
4268 size_t line_length = default_line_length) noexcept {
4269 return scalar::base64::base64_length_from_binary_with_lines(length, options,
4270 line_length);
4271}
4272
4294size_t binary_to_base64(const char *input, size_t length, char *output,
4295 base64_options options = base64_default) noexcept;
4296 #if SIMDUTF_SPAN
4297simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
4298binary_to_base64(const detail::input_span_of_byte_like auto &input,
4299 detail::output_span_of_byte_like auto &&binary_output,
4300 base64_options options = base64_default) noexcept {
4301 #if SIMDUTF_CPLUSPLUS23
4302 if consteval {
4303 return scalar::base64::tail_encode_base64(
4304 binary_output.data(), input.data(), input.size(), options);
4305 } else
4306 #endif
4307 {
4308 return binary_to_base64(
4309 reinterpret_cast<const char *>(input.data()), input.size(),
4310 reinterpret_cast<char *>(binary_output.data()), options);
4311 }
4312}
4313 #endif // SIMDUTF_SPAN
4314
4339size_t
4340binary_to_base64_with_lines(const char *input, size_t length, char *output,
4341 size_t line_length = simdutf::default_line_length,
4342 base64_options options = base64_default) noexcept;
4343 #if SIMDUTF_SPAN
4344simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
4345binary_to_base64_with_lines(
4346 const detail::input_span_of_byte_like auto &input,
4347 detail::output_span_of_byte_like auto &&binary_output,
4348 size_t line_length = simdutf::default_line_length,
4349 base64_options options = base64_default) noexcept {
4350 #if SIMDUTF_CPLUSPLUS23
4351 if consteval {
4352 return scalar::base64::tail_encode_base64_impl<true>(
4353 binary_output.data(), input.data(), input.size(), options, line_length);
4354 } else
4355 #endif
4356 {
4357 return binary_to_base64_with_lines(
4358 reinterpret_cast<const char *>(input.data()), input.size(),
4359 reinterpret_cast<char *>(binary_output.data()), line_length, options);
4360 }
4361}
4362 #endif // SIMDUTF_SPAN
4363
4364 #if SIMDUTF_ATOMIC_REF
4406size_t
4407atomic_binary_to_base64(const char *input, size_t length, char *output,
4408 base64_options options = base64_default) noexcept;
4409 #if SIMDUTF_SPAN
4410simdutf_really_inline simdutf_warn_unused size_t
4411atomic_binary_to_base64(const detail::input_span_of_byte_like auto &input,
4412 detail::output_span_of_byte_like auto &&binary_output,
4413 base64_options options = base64_default) noexcept {
4414 return atomic_binary_to_base64(
4415 reinterpret_cast<const char *>(input.data()), input.size(),
4416 reinterpret_cast<char *>(binary_output.data()), options);
4417}
4418 #endif // SIMDUTF_SPAN
4419 #endif // SIMDUTF_ATOMIC_REF
4420
4477simdutf_warn_unused result
4478base64_to_binary(const char16_t *input, size_t length, char *output,
4479 base64_options options = base64_default,
4480 last_chunk_handling_options last_chunk_options =
4481 last_chunk_handling_options::loose) noexcept;
4482 #if SIMDUTF_SPAN
4483simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
4484base64_to_binary(
4485 std::span<const char16_t> input,
4486 detail::output_span_of_byte_like auto &&binary_output,
4487 base64_options options = base64_default,
4488 last_chunk_handling_options last_chunk_options = loose) noexcept {
4489 #if SIMDUTF_CPLUSPLUS23
4490 if consteval {
4491 return scalar::base64::base64_to_binary_details_impl(
4492 input.data(), input.size(), binary_output.data(), options,
4493 last_chunk_options);
4494 } else
4495 #endif
4496 {
4497 return base64_to_binary(input.data(), input.size(),
4498 reinterpret_cast<char *>(binary_output.data()),
4499 options, last_chunk_options);
4500 }
4501}
4502 #endif // SIMDUTF_SPAN
4503
4514simdutf_warn_unused simdutf_really_inline simdutf_constexpr23 bool
4515base64_ignorable(char input, base64_options options = base64_default) noexcept {
4516 return scalar::base64::is_ignorable(input, options);
4517}
4518simdutf_warn_unused simdutf_really_inline simdutf_constexpr23 bool
4519base64_ignorable(char16_t input,
4520 base64_options options = base64_default) noexcept {
4521 return scalar::base64::is_ignorable(input, options);
4522}
4523
4535simdutf_warn_unused simdutf_really_inline simdutf_constexpr23 bool
4536base64_valid(char input, base64_options options = base64_default) noexcept {
4537 return scalar::base64::is_base64(input, options);
4538}
4539simdutf_warn_unused simdutf_really_inline simdutf_constexpr23 bool
4540base64_valid(char16_t input, base64_options options = base64_default) noexcept {
4541 return scalar::base64::is_base64(input, options);
4542}
4543
4553simdutf_warn_unused simdutf_really_inline simdutf_constexpr23 bool
4554base64_valid_or_padding(char input,
4555 base64_options options = base64_default) noexcept {
4556 return scalar::base64::is_base64_or_padding(input, options);
4557}
4558simdutf_warn_unused simdutf_really_inline simdutf_constexpr23 bool
4559base64_valid_or_padding(char16_t input,
4560 base64_options options = base64_default) noexcept {
4561 return scalar::base64::is_base64_or_padding(input, options);
4562}
4563
4631simdutf_warn_unused result
4632base64_to_binary_safe(const char *input, size_t length, char *output,
4633 size_t &outlen, base64_options options = base64_default,
4634 last_chunk_handling_options last_chunk_options =
4635 last_chunk_handling_options::loose,
4636 bool decode_up_to_bad_char = false) noexcept;
4637// the span overload has moved to the bottom of the file
4638
4639simdutf_warn_unused result
4640base64_to_binary_safe(const char16_t *input, size_t length, char *output,
4641 size_t &outlen, base64_options options = base64_default,
4642 last_chunk_handling_options last_chunk_options =
4643 last_chunk_handling_options::loose,
4644 bool decode_up_to_bad_char = false) noexcept;
4645 // span overload moved to bottom of file
4646
4647 #if SIMDUTF_ATOMIC_REF
4687simdutf_warn_unused result atomic_base64_to_binary_safe(
4688 const char *input, size_t length, char *output, size_t &outlen,
4689 base64_options options = base64_default,
4690 last_chunk_handling_options last_chunk_options =
4691 last_chunk_handling_options::loose,
4692 bool decode_up_to_bad_char = false) noexcept;
4693simdutf_warn_unused result atomic_base64_to_binary_safe(
4694 const char16_t *input, size_t length, char *output, size_t &outlen,
4695 base64_options options = base64_default,
4696 last_chunk_handling_options last_chunk_options = loose,
4697 bool decode_up_to_bad_char = false) noexcept;
4698 #if SIMDUTF_SPAN
4703simdutf_really_inline simdutf_warn_unused std::tuple<result, std::size_t>
4704atomic_base64_to_binary_safe(
4705 const detail::input_span_of_byte_like auto &binary_input,
4706 detail::output_span_of_byte_like auto &&output,
4707 base64_options options = base64_default,
4708 last_chunk_handling_options last_chunk_options =
4709 last_chunk_handling_options::loose,
4710 bool decode_up_to_bad_char = false) noexcept {
4711 size_t outlen = output.size();
4712 auto ret = atomic_base64_to_binary_safe(
4713 reinterpret_cast<const char *>(binary_input.data()), binary_input.size(),
4714 reinterpret_cast<char *>(output.data()), outlen, options,
4715 last_chunk_options, decode_up_to_bad_char);
4716 return {ret, outlen};
4717}
4722simdutf_warn_unused std::tuple<result, std::size_t>
4723atomic_base64_to_binary_safe(
4724 std::span<const char16_t> base64_input,
4725 detail::output_span_of_byte_like auto &&binary_output,
4726 base64_options options = base64_default,
4727 last_chunk_handling_options last_chunk_options = loose,
4728 bool decode_up_to_bad_char = false) noexcept {
4729 size_t outlen = binary_output.size();
4730 auto ret = atomic_base64_to_binary_safe(
4731 base64_input.data(), base64_input.size(),
4732 reinterpret_cast<char *>(binary_output.data()), outlen, options,
4733 last_chunk_options, decode_up_to_bad_char);
4734 return {ret, outlen};
4735}
4736 #endif // SIMDUTF_SPAN
4737 #endif // SIMDUTF_ATOMIC_REF
4738
4739#endif // SIMDUTF_FEATURE_BASE64
4740
4749public:
4759 virtual std::string name() const { return std::string(_name); }
4760
4770 virtual std::string description() const { return std::string(_description); }
4771
4782
4783#if SIMDUTF_FEATURE_DETECT_ENCODING
4790 virtual encoding_type autodetect_encoding(const char *input,
4791 size_t length) const noexcept;
4792
4799 virtual int detect_encodings(const char *input,
4800 size_t length) const noexcept = 0;
4801#endif // SIMDUTF_FEATURE_DETECT_ENCODING
4802
4810 virtual uint32_t required_instruction_sets() const {
4811 return _required_instruction_sets;
4812 }
4813
4814#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING
4824 simdutf_warn_unused virtual bool validate_utf8(const char *buf,
4825 size_t len) const noexcept = 0;
4826#endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING
4827
4828#if SIMDUTF_FEATURE_UTF8
4841 simdutf_warn_unused virtual result
4842 validate_utf8_with_errors(const char *buf, size_t len) const noexcept = 0;
4843#endif // SIMDUTF_FEATURE_UTF8
4844
4845#if SIMDUTF_FEATURE_ASCII
4855 simdutf_warn_unused virtual bool
4856 validate_ascii(const char *buf, size_t len) const noexcept = 0;
4857
4870 simdutf_warn_unused virtual result
4871 validate_ascii_with_errors(const char *buf, size_t len) const noexcept = 0;
4872
4873#endif // SIMDUTF_FEATURE_ASCII
4874
4875#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_ASCII
4887 simdutf_warn_unused virtual bool
4888 validate_utf16be_as_ascii(const char16_t *buf, size_t len) const noexcept = 0;
4889
4901 simdutf_warn_unused virtual bool
4902 validate_utf16le_as_ascii(const char16_t *buf, size_t len) const noexcept = 0;
4903#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_ASCII
4904
4905#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING
4920 simdutf_warn_unused virtual bool
4921 validate_utf16le(const char16_t *buf, size_t len) const noexcept = 0;
4922#endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING
4923
4924#if SIMDUTF_FEATURE_UTF16
4939 simdutf_warn_unused virtual bool
4940 validate_utf16be(const char16_t *buf, size_t len) const noexcept = 0;
4941
4958 simdutf_warn_unused virtual result
4959 validate_utf16le_with_errors(const char16_t *buf,
4960 size_t len) const noexcept = 0;
4961
4978 simdutf_warn_unused virtual result
4979 validate_utf16be_with_errors(const char16_t *buf,
4980 size_t len) const noexcept = 0;
4993 virtual void to_well_formed_utf16le(const char16_t *input, size_t len,
4994 char16_t *output) const noexcept = 0;
5007 virtual void to_well_formed_utf16be(const char16_t *input, size_t len,
5008 char16_t *output) const noexcept = 0;
5009#endif // SIMDUTF_FEATURE_UTF16
5010
5011#if SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING
5024 simdutf_warn_unused virtual bool
5025 validate_utf32(const char32_t *buf, size_t len) const noexcept = 0;
5026#endif // SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING
5027
5028#if SIMDUTF_FEATURE_UTF32
5044 simdutf_warn_unused virtual result
5045 validate_utf32_with_errors(const char32_t *buf,
5046 size_t len) const noexcept = 0;
5047#endif // SIMDUTF_FEATURE_UTF32
5048
5049#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
5060 simdutf_warn_unused virtual size_t
5061 convert_latin1_to_utf8(const char *input, size_t length,
5062 char *utf8_output) const noexcept = 0;
5063#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
5064
5065#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
5076 simdutf_warn_unused virtual size_t
5077 convert_latin1_to_utf16le(const char *input, size_t length,
5078 char16_t *utf16_output) const noexcept = 0;
5079
5090 simdutf_warn_unused virtual size_t
5091 convert_latin1_to_utf16be(const char *input, size_t length,
5092 char16_t *utf16_output) const noexcept = 0;
5093#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
5094
5095#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1
5106 simdutf_warn_unused virtual size_t
5107 convert_latin1_to_utf32(const char *input, size_t length,
5108 char32_t *utf32_buffer) const noexcept = 0;
5109#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1
5110
5111#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
5124 simdutf_warn_unused virtual size_t
5125 convert_utf8_to_latin1(const char *input, size_t length,
5126 char *latin1_output) const noexcept = 0;
5127
5144 simdutf_warn_unused virtual result
5145 convert_utf8_to_latin1_with_errors(const char *input, size_t length,
5146 char *latin1_output) const noexcept = 0;
5147
5167 simdutf_warn_unused virtual size_t
5168 convert_valid_utf8_to_latin1(const char *input, size_t length,
5169 char *latin1_output) const noexcept = 0;
5170#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
5171
5172#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
5185 simdutf_warn_unused virtual size_t
5186 convert_utf8_to_utf16le(const char *input, size_t length,
5187 char16_t *utf16_output) const noexcept = 0;
5188
5201 simdutf_warn_unused virtual size_t
5202 convert_utf8_to_utf16be(const char *input, size_t length,
5203 char16_t *utf16_output) const noexcept = 0;
5204
5220 simdutf_warn_unused virtual result convert_utf8_to_utf16le_with_errors(
5221 const char *input, size_t length,
5222 char16_t *utf16_output) const noexcept = 0;
5223
5239 simdutf_warn_unused virtual result convert_utf8_to_utf16be_with_errors(
5240 const char *input, size_t length,
5241 char16_t *utf16_output) const noexcept = 0;
5262 const char16_t *input, size_t length) const noexcept = 0;
5263
5284 const char16_t *input, size_t length) const noexcept = 0;
5285
5286#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
5287
5288#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
5301 simdutf_warn_unused virtual size_t
5302 convert_utf8_to_utf32(const char *input, size_t length,
5303 char32_t *utf32_output) const noexcept = 0;
5304
5319 simdutf_warn_unused virtual result
5320 convert_utf8_to_utf32_with_errors(const char *input, size_t length,
5321 char32_t *utf32_output) const noexcept = 0;
5322#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
5323
5324#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
5335 simdutf_warn_unused virtual size_t
5336 convert_valid_utf8_to_utf16le(const char *input, size_t length,
5337 char16_t *utf16_buffer) const noexcept = 0;
5338
5349 simdutf_warn_unused virtual size_t
5350 convert_valid_utf8_to_utf16be(const char *input, size_t length,
5351 char16_t *utf16_buffer) const noexcept = 0;
5352#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
5353
5354#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
5365 simdutf_warn_unused virtual size_t
5366 convert_valid_utf8_to_utf32(const char *input, size_t length,
5367 char32_t *utf32_buffer) const noexcept = 0;
5368#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
5369
5370#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
5383 simdutf_warn_unused virtual size_t
5384 utf16_length_from_utf8(const char *input, size_t length) const noexcept = 0;
5385#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
5386
5387#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
5402 simdutf_warn_unused virtual size_t
5403 utf32_length_from_utf8(const char *input, size_t length) const noexcept = 0;
5404#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
5405
5406#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
5423 simdutf_warn_unused virtual size_t
5424 convert_utf16le_to_latin1(const char16_t *input, size_t length,
5425 char *latin1_buffer) const noexcept = 0;
5426
5443 simdutf_warn_unused virtual size_t
5444 convert_utf16be_to_latin1(const char16_t *input, size_t length,
5445 char *latin1_buffer) const noexcept = 0;
5446
5466 simdutf_warn_unused virtual result
5467 convert_utf16le_to_latin1_with_errors(const char16_t *input, size_t length,
5468 char *latin1_buffer) const noexcept = 0;
5469
5489 simdutf_warn_unused virtual result
5490 convert_utf16be_to_latin1_with_errors(const char16_t *input, size_t length,
5491 char *latin1_buffer) const noexcept = 0;
5492
5513 simdutf_warn_unused virtual size_t
5514 convert_valid_utf16le_to_latin1(const char16_t *input, size_t length,
5515 char *latin1_buffer) const noexcept = 0;
5516
5537 simdutf_warn_unused virtual size_t
5538 convert_valid_utf16be_to_latin1(const char16_t *input, size_t length,
5539 char *latin1_buffer) const noexcept = 0;
5540#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
5541
5542#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
5558 simdutf_warn_unused virtual size_t
5559 convert_utf16le_to_utf8(const char16_t *input, size_t length,
5560 char *utf8_buffer) const noexcept = 0;
5561
5577 simdutf_warn_unused virtual size_t
5578 convert_utf16be_to_utf8(const char16_t *input, size_t length,
5579 char *utf8_buffer) const noexcept = 0;
5580
5599 simdutf_warn_unused virtual result
5600 convert_utf16le_to_utf8_with_errors(const char16_t *input, size_t length,
5601 char *utf8_buffer) const noexcept = 0;
5602
5621 simdutf_warn_unused virtual result
5622 convert_utf16be_to_utf8_with_errors(const char16_t *input, size_t length,
5623 char *utf8_buffer) const noexcept = 0;
5624
5639 simdutf_warn_unused virtual size_t
5640 convert_valid_utf16le_to_utf8(const char16_t *input, size_t length,
5641 char *utf8_buffer) const noexcept = 0;
5642
5657 simdutf_warn_unused virtual size_t
5658 convert_valid_utf16be_to_utf8(const char16_t *input, size_t length,
5659 char *utf8_buffer) const noexcept = 0;
5660#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
5661
5662#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
5678 simdutf_warn_unused virtual size_t
5679 convert_utf16le_to_utf32(const char16_t *input, size_t length,
5680 char32_t *utf32_buffer) const noexcept = 0;
5681
5697 simdutf_warn_unused virtual size_t
5698 convert_utf16be_to_utf32(const char16_t *input, size_t length,
5699 char32_t *utf32_buffer) const noexcept = 0;
5700
5720 const char16_t *input, size_t length,
5721 char32_t *utf32_buffer) const noexcept = 0;
5722
5742 const char16_t *input, size_t length,
5743 char32_t *utf32_buffer) const noexcept = 0;
5744
5759 simdutf_warn_unused virtual size_t
5760 convert_valid_utf16le_to_utf32(const char16_t *input, size_t length,
5761 char32_t *utf32_buffer) const noexcept = 0;
5762
5777 simdutf_warn_unused virtual size_t
5778 convert_valid_utf16be_to_utf32(const char16_t *input, size_t length,
5779 char32_t *utf32_buffer) const noexcept = 0;
5780#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
5781
5782#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
5797 simdutf_warn_unused virtual size_t
5798 utf8_length_from_utf16le(const char16_t *input,
5799 size_t length) const noexcept = 0;
5800
5815 simdutf_warn_unused virtual size_t
5816 utf8_length_from_utf16be(const char16_t *input,
5817 size_t length) const noexcept = 0;
5818#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
5819
5820#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1
5837 simdutf_warn_unused virtual size_t
5838 convert_utf32_to_latin1(const char32_t *input, size_t length,
5839 char *latin1_buffer) const noexcept = 0;
5840#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1
5841
5842#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1
5862 simdutf_warn_unused virtual result
5863 convert_utf32_to_latin1_with_errors(const char32_t *input, size_t length,
5864 char *latin1_buffer) const noexcept = 0;
5865
5886 simdutf_warn_unused virtual size_t
5887 convert_valid_utf32_to_latin1(const char32_t *input, size_t length,
5888 char *latin1_buffer) const noexcept = 0;
5889#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1
5890
5891#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
5907 simdutf_warn_unused virtual size_t
5908 convert_utf32_to_utf8(const char32_t *input, size_t length,
5909 char *utf8_buffer) const noexcept = 0;
5910
5928 simdutf_warn_unused virtual result
5929 convert_utf32_to_utf8_with_errors(const char32_t *input, size_t length,
5930 char *utf8_buffer) const noexcept = 0;
5931
5946 simdutf_warn_unused virtual size_t
5947 convert_valid_utf32_to_utf8(const char32_t *input, size_t length,
5948 char *utf8_buffer) const noexcept = 0;
5949#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
5950
5951#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
5962 simdutf_warn_unused virtual size_t
5963 utf16_length_from_latin1(size_t length) const noexcept {
5964 return length;
5965 }
5966#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
5967
5968#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
5984 simdutf_warn_unused virtual size_t
5985 convert_utf32_to_utf16le(const char32_t *input, size_t length,
5986 char16_t *utf16_buffer) const noexcept = 0;
5987
6003 simdutf_warn_unused virtual size_t
6004 convert_utf32_to_utf16be(const char32_t *input, size_t length,
6005 char16_t *utf16_buffer) const noexcept = 0;
6006
6026 const char32_t *input, size_t length,
6027 char16_t *utf16_buffer) const noexcept = 0;
6028
6048 const char32_t *input, size_t length,
6049 char16_t *utf16_buffer) const noexcept = 0;
6050
6065 simdutf_warn_unused virtual size_t
6066 convert_valid_utf32_to_utf16le(const char32_t *input, size_t length,
6067 char16_t *utf16_buffer) const noexcept = 0;
6068
6083 simdutf_warn_unused virtual size_t
6084 convert_valid_utf32_to_utf16be(const char32_t *input, size_t length,
6085 char16_t *utf16_buffer) const noexcept = 0;
6086#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
6087
6088#if SIMDUTF_FEATURE_UTF16
6103 virtual void change_endianness_utf16(const char16_t *input, size_t length,
6104 char16_t *output) const noexcept = 0;
6105#endif // SIMDUTF_FEATURE_UTF16
6106
6107#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
6116 simdutf_warn_unused virtual size_t
6117 utf8_length_from_latin1(const char *input, size_t length) const noexcept = 0;
6118#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
6119
6120#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
6133 simdutf_warn_unused virtual size_t
6134 utf8_length_from_utf32(const char32_t *input,
6135 size_t length) const noexcept = 0;
6136#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
6137
6138#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1
6150 simdutf_warn_unused virtual size_t
6151 latin1_length_from_utf32(size_t length) const noexcept {
6152 return length;
6153 }
6154#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1
6155
6156#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
6168 simdutf_warn_unused virtual size_t
6169 latin1_length_from_utf8(const char *input, size_t length) const noexcept = 0;
6170#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
6171
6172#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
6188 simdutf_warn_unused virtual size_t
6189 latin1_length_from_utf16(size_t length) const noexcept {
6190 return length;
6191 }
6192#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
6193
6194#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
6207 simdutf_warn_unused virtual size_t
6208 utf16_length_from_utf32(const char32_t *input,
6209 size_t length) const noexcept = 0;
6210#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
6211
6212#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1
6221 simdutf_warn_unused virtual size_t
6222 utf32_length_from_latin1(size_t length) const noexcept {
6223 return length;
6224 }
6225#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1
6226
6227#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
6245 simdutf_warn_unused virtual size_t
6246 utf32_length_from_utf16le(const char16_t *input,
6247 size_t length) const noexcept = 0;
6248
6266 simdutf_warn_unused virtual size_t
6267 utf32_length_from_utf16be(const char16_t *input,
6268 size_t length) const noexcept = 0;
6269#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
6270
6271#if SIMDUTF_FEATURE_UTF16
6287 simdutf_warn_unused virtual size_t
6288 count_utf16le(const char16_t *input, size_t length) const noexcept = 0;
6289
6305 simdutf_warn_unused virtual size_t
6306 count_utf16be(const char16_t *input, size_t length) const noexcept = 0;
6307#endif // SIMDUTF_FEATURE_UTF16
6308
6309#if SIMDUTF_FEATURE_UTF8
6322 simdutf_warn_unused virtual size_t
6323 count_utf8(const char *input, size_t length) const noexcept = 0;
6324#endif // SIMDUTF_FEATURE_UTF8
6325
6326#if SIMDUTF_FEATURE_BASE64
6340 simdutf_warn_unused size_t maximal_binary_length_from_base64(
6341 const char *input, size_t length) const noexcept;
6342
6357 simdutf_warn_unused size_t maximal_binary_length_from_base64(
6358 const char16_t *input, size_t length) const noexcept;
6359
6392 simdutf_warn_unused virtual result
6393 base64_to_binary(const char *input, size_t length, char *output,
6394 base64_options options = base64_default,
6395 last_chunk_handling_options last_chunk_options =
6396 last_chunk_handling_options::loose) const noexcept = 0;
6397
6429 simdutf_warn_unused virtual full_result base64_to_binary_details(
6430 const char *input, size_t length, char *output,
6431 base64_options options = base64_default,
6432 last_chunk_handling_options last_chunk_options =
6433 last_chunk_handling_options::loose) const noexcept = 0;
6434
6468 simdutf_warn_unused virtual result
6469 base64_to_binary(const char16_t *input, size_t length, char *output,
6470 base64_options options = base64_default,
6471 last_chunk_handling_options last_chunk_options =
6472 last_chunk_handling_options::loose) const noexcept = 0;
6473
6505 simdutf_warn_unused virtual full_result base64_to_binary_details(
6506 const char16_t *input, size_t length, char *output,
6507 base64_options options = base64_default,
6508 last_chunk_handling_options last_chunk_options =
6509 last_chunk_handling_options::loose) const noexcept = 0;
6510
6519 simdutf_warn_unused size_t base64_length_from_binary(
6520 size_t length, base64_options options = base64_default) const noexcept;
6521
6543 virtual size_t
6544 binary_to_base64(const char *input, size_t length, char *output,
6545 base64_options options = base64_default) const noexcept = 0;
6546
6573 const char *input, size_t length, char *output,
6574 size_t line_length = simdutf::default_line_length,
6575 base64_options options = base64_default) const noexcept = 0;
6576
6587 virtual const char *find(const char *start, const char *end,
6588 char character) const noexcept = 0;
6589 virtual const char16_t *find(const char16_t *start, const char16_t *end,
6590 char16_t character) const noexcept = 0;
6591#endif // SIMDUTF_FEATURE_BASE64
6592
6593#ifdef SIMDUTF_INTERNAL_TESTS
6594 // This method is exported only in developer mode, its purpose
6595 // is to expose some internal test procedures from the given
6596 // implementation and then use them through our standard test
6597 // framework.
6598 //
6599 // Regular users should not use it, the tests of the public
6600 // API are enough.
6601
6602 struct TestProcedure {
6603 // display name
6604 std::string name;
6605
6606 // procedure should return whether given test pass or not
6607 void (*procedure)(const implementation &);
6608 };
6609
6610 virtual std::vector<TestProcedure> internal_tests() const;
6611#endif
6612
6613protected:
6616 simdutf_really_inline implementation(const char *name,
6617 const char *description,
6618 uint32_t required_instruction_sets)
6619 : _name(name), _description(description),
6620 _required_instruction_sets(required_instruction_sets) {}
6621
6622protected:
6623 ~implementation() = default;
6624
6625private:
6629 const char *_name;
6630
6634 const char *_description;
6635
6639 const uint32_t _required_instruction_sets;
6640};
6641
6643namespace internal {
6644
6648class available_implementation_list {
6649public:
6651 simdutf_really_inline available_implementation_list() {}
6653 size_t size() const noexcept;
6655 const implementation *const *begin() const noexcept;
6657 const implementation *const *end() const noexcept;
6658
6672 const implementation *operator[](const std::string &name) const noexcept {
6673 for (const implementation *impl : *this) {
6674 if (impl->name() == name) {
6675 return impl;
6676 }
6677 }
6678 return nullptr;
6679 }
6680
6694 const implementation *detect_best_supported() const noexcept;
6695};
6696
6697template <typename T> class atomic_ptr {
6698public:
6699 atomic_ptr(T *_ptr) : ptr{_ptr} {}
6700
6701#if defined(SIMDUTF_NO_THREADS)
6702 operator const T *() const { return ptr; }
6703 const T &operator*() const { return *ptr; }
6704 const T *operator->() const { return ptr; }
6705
6706 operator T *() { return ptr; }
6707 T &operator*() { return *ptr; }
6708 T *operator->() { return ptr; }
6709 atomic_ptr &operator=(T *_ptr) {
6710 ptr = _ptr;
6711 return *this;
6712 }
6713
6714#else
6715 operator const T *() const { return ptr.load(); }
6716 const T &operator*() const { return *ptr; }
6717 const T *operator->() const { return ptr.load(); }
6718
6719 operator T *() { return ptr.load(); }
6720 T &operator*() { return *ptr; }
6721 T *operator->() { return ptr.load(); }
6722 atomic_ptr &operator=(T *_ptr) {
6723 ptr = _ptr;
6724 return *this;
6725 }
6726
6727#endif
6728
6729private:
6730#if defined(SIMDUTF_NO_THREADS)
6731 T *ptr;
6732#else
6733 std::atomic<T *> ptr;
6734#endif
6735};
6736
6737class detect_best_supported_implementation_on_first_use;
6738
6739} // namespace internal
6740
6744extern SIMDUTF_DLLIMPORTEXPORT const internal::available_implementation_list &
6745get_available_implementations();
6746
6753extern SIMDUTF_DLLIMPORTEXPORT internal::atomic_ptr<const implementation> &
6754get_active_implementation();
6755
6756} // namespace simdutf
6757
6758#if SIMDUTF_FEATURE_BASE64
6759 // this header is not part of the public api
6760 #include <simdutf/base64_implementation.h>
6761
6762namespace simdutf {
6763 #if SIMDUTF_SPAN
6768simdutf_really_inline
6769 simdutf_constexpr23 simdutf_warn_unused std::tuple<result, std::size_t>
6770 base64_to_binary_safe(
6771 const detail::input_span_of_byte_like auto &input,
6772 detail::output_span_of_byte_like auto &&binary_output,
6773 base64_options options = base64_default,
6774 last_chunk_handling_options last_chunk_options = loose,
6775 bool decode_up_to_bad_char = false) noexcept {
6776 size_t outlen = binary_output.size();
6777 #if SIMDUTF_CPLUSPLUS23
6778 if consteval {
6779 using CInput = std::decay_t<decltype(*input.data())>;
6780 static_assert(std::is_same_v<CInput, char>,
6781 "sorry, the constexpr implementation is for now limited to "
6782 "input of type char");
6783 using COutput = std::decay_t<decltype(*binary_output.data())>;
6784 static_assert(std::is_same_v<COutput, char>,
6785 "sorry, the constexpr implementation is for now limited to "
6786 "output of type char");
6787 auto r = base64_to_binary_safe_impl(
6788 input.data(), input.size(), binary_output.data(), outlen, options,
6789 last_chunk_options, decode_up_to_bad_char);
6790 return {r, outlen};
6791 } else
6792 #endif
6793 {
6794 auto r = base64_to_binary_safe_impl<char>(
6795 reinterpret_cast<const char *>(input.data()), input.size(),
6796 reinterpret_cast<char *>(binary_output.data()), outlen, options,
6797 last_chunk_options, decode_up_to_bad_char);
6798 return {r, outlen};
6799 }
6800}
6801
6802 #if SIMDUTF_SPAN
6807simdutf_really_inline
6808 simdutf_warn_unused simdutf_constexpr23 std::tuple<result, std::size_t>
6809 base64_to_binary_safe(
6810 std::span<const char16_t> input,
6811 detail::output_span_of_byte_like auto &&binary_output,
6812 base64_options options = base64_default,
6813 last_chunk_handling_options last_chunk_options = loose,
6814 bool decode_up_to_bad_char = false) noexcept {
6815 size_t outlen = binary_output.size();
6816 #if SIMDUTF_CPLUSPLUS23
6817 if consteval {
6818 auto r = base64_to_binary_safe_impl(
6819 input.data(), input.size(), binary_output.data(), outlen, options,
6820 last_chunk_options, decode_up_to_bad_char);
6821 return {r, outlen};
6822 } else
6823 #endif
6824 {
6825 auto r = base64_to_binary_safe(
6826 input.data(), input.size(),
6827 reinterpret_cast<char *>(binary_output.data()), outlen, options,
6828 last_chunk_options, decode_up_to_bad_char);
6829 return {r, outlen};
6830 }
6831}
6832 #endif // SIMDUTF_SPAN
6833
6834 #endif // SIMDUTF_SPAN
6835} // namespace simdutf
6836
6837#endif // SIMDUTF_FEATURE_BASE64
6838
6839#endif // SIMDUTF_IMPLEMENTATION_H
An implementation of simdutf for a particular CPU architecture.
virtual simdutf_warn_unused size_t convert_valid_utf32_to_latin1(const char32_t *input, size_t length, char *latin1_buffer) const noexcept=0
Convert valid UTF-32 string into Latin1 string.
virtual const char * find(const char *start, const char *end, char character) const noexcept=0
Find the first occurrence of a character in a string.
virtual simdutf_warn_unused size_t convert_valid_utf8_to_utf16le(const char *input, size_t length, char16_t *utf16_buffer) const noexcept=0
Convert valid UTF-8 string into UTF-16LE string.
virtual simdutf_warn_unused size_t latin1_length_from_utf16(size_t length) const noexcept
Compute the number of bytes that this UTF-16LE/BE string would require in Latin1 format.
virtual simdutf_warn_unused size_t utf32_length_from_utf16be(const char16_t *input, size_t length) const noexcept=0
Compute the number of bytes that this UTF-16BE string would require in UTF-32 format.
virtual simdutf_warn_unused size_t convert_utf8_to_utf16be(const char *input, size_t length, char16_t *utf16_output) const noexcept=0
Convert possibly broken UTF-8 string into UTF-16BE string.
virtual simdutf_warn_unused size_t convert_utf16le_to_utf32(const char16_t *input, size_t length, char32_t *utf32_buffer) const noexcept=0
Convert possibly broken UTF-16LE string into UTF-32 string.
virtual simdutf_warn_unused size_t convert_valid_utf16be_to_utf32(const char16_t *input, size_t length, char32_t *utf32_buffer) const noexcept=0
Convert valid UTF-16LE string into UTF-32BE string.
virtual simdutf_warn_unused size_t convert_valid_utf16be_to_utf8(const char16_t *input, size_t length, char *utf8_buffer) const noexcept=0
Convert valid UTF-16BE string into UTF-8 string.
virtual simdutf_warn_unused size_t convert_latin1_to_utf16le(const char *input, size_t length, char16_t *utf16_output) const noexcept=0
Convert possibly Latin1 string into UTF-16LE string.
virtual simdutf_warn_unused bool validate_utf16be(const char16_t *buf, size_t len) const noexcept=0
Validate the UTF-16BE string.
virtual simdutf_warn_unused result convert_utf16be_to_latin1_with_errors(const char16_t *input, size_t length, char *latin1_buffer) const noexcept=0
Convert possibly broken UTF-16BE string into Latin1 string.
virtual simdutf_warn_unused full_result base64_to_binary_details(const char *input, size_t length, char *output, base64_options options=base64_default, last_chunk_handling_options last_chunk_options=last_chunk_handling_options::loose) const noexcept=0
Convert a base64 input to a binary output while returning more details than base64_to_binary.
virtual simdutf_warn_unused size_t convert_valid_utf16le_to_utf32(const char16_t *input, size_t length, char32_t *utf32_buffer) const noexcept=0
Convert valid UTF-16LE string into UTF-32 string.
virtual simdutf_warn_unused size_t latin1_length_from_utf8(const char *input, size_t length) const noexcept=0
Compute the number of bytes that this UTF-8 string would require in Latin1 format.
virtual simdutf_warn_unused result convert_utf32_to_latin1_with_errors(const char32_t *input, size_t length, char *latin1_buffer) const noexcept=0
Convert possibly broken UTF-32 string into Latin1 string and stop on error.
virtual simdutf_warn_unused size_t utf8_length_from_utf32(const char32_t *input, size_t length) const noexcept=0
Compute the number of bytes that this UTF-32 string would require in UTF-8 format.
virtual simdutf_warn_unused size_t convert_latin1_to_utf16be(const char *input, size_t length, char16_t *utf16_output) const noexcept=0
Convert Latin1 string into UTF-16BE string.
virtual simdutf_warn_unused size_t latin1_length_from_utf32(size_t length) const noexcept
Compute the number of bytes that this UTF-32 string would require in Latin1 format.
virtual simdutf_warn_unused full_result base64_to_binary_details(const char16_t *input, size_t length, char *output, base64_options options=base64_default, last_chunk_handling_options last_chunk_options=last_chunk_handling_options::loose) const noexcept=0
Convert a base64 input to a binary output while returning more details than base64_to_binary.
virtual simdutf_warn_unused size_t convert_utf32_to_utf16le(const char32_t *input, size_t length, char16_t *utf16_buffer) const noexcept=0
Convert possibly broken UTF-32 string into UTF-16LE string.
virtual simdutf_warn_unused size_t utf16_length_from_utf32(const char32_t *input, size_t length) const noexcept=0
Compute the number of two-byte code units that this UTF-32 string would require in UTF-16 format.
virtual simdutf_warn_unused size_t count_utf8(const char *input, size_t length) const noexcept=0
Count the number of code points (characters) in the string assuming that it is valid.
virtual simdutf_warn_unused size_t convert_valid_utf32_to_utf16be(const char32_t *input, size_t length, char16_t *utf16_buffer) const noexcept=0
Convert valid UTF-32 string into UTF-16BE string.
virtual simdutf_warn_unused size_t utf32_length_from_utf8(const char *input, size_t length) const noexcept=0
Compute the number of 4-byte code units that this UTF-8 string would require in UTF-32 format.
virtual simdutf_warn_unused size_t convert_valid_utf16le_to_utf8(const char16_t *input, size_t length, char *utf8_buffer) const noexcept=0
Convert valid UTF-16LE string into UTF-8 string.
virtual void to_well_formed_utf16le(const char16_t *input, size_t len, char16_t *output) const noexcept=0
Copies the UTF-16LE string while replacing mismatched surrogates with the Unicode replacement charact...
virtual simdutf_warn_unused result convert_utf32_to_utf16be_with_errors(const char32_t *input, size_t length, char16_t *utf16_buffer) const noexcept=0
Convert possibly broken UTF-32 string into UTF-16BE string and stop on error.
virtual simdutf_warn_unused result validate_utf8_with_errors(const char *buf, size_t len) const noexcept=0
Validate the UTF-8 string and stop on errors.
virtual simdutf_warn_unused size_t utf8_length_from_utf16le(const char16_t *input, size_t length) const noexcept=0
Compute the number of bytes that this UTF-16LE string would require in UTF-8 format.
virtual size_t binary_to_base64_with_lines(const char *input, size_t length, char *output, size_t line_length=simdutf::default_line_length, base64_options options=base64_default) const noexcept=0
Convert a binary input to a base64 output with lines of given length.
virtual simdutf_warn_unused size_t convert_utf16be_to_utf8(const char16_t *input, size_t length, char *utf8_buffer) const noexcept=0
Convert possibly broken UTF-16BE string into UTF-8 string.
virtual simdutf_warn_unused size_t convert_utf8_to_utf16le(const char *input, size_t length, char16_t *utf16_output) const noexcept=0
Convert possibly broken UTF-8 string into UTF-16LE string.
virtual simdutf_warn_unused result convert_utf16le_to_latin1_with_errors(const char16_t *input, size_t length, char *latin1_buffer) const noexcept=0
Convert possibly broken UTF-16LE string into Latin1 string.
virtual simdutf_warn_unused result utf8_length_from_utf16le_with_replacement(const char16_t *input, size_t length) const noexcept=0
Compute the number of bytes that this UTF-16LE string would require in UTF-8 format even when the UTF...
virtual simdutf_warn_unused result validate_utf16le_with_errors(const char16_t *buf, size_t len) const noexcept=0
Validate the UTF-16LE string and stop on error.
virtual simdutf_warn_unused result convert_utf16be_to_utf8_with_errors(const char16_t *input, size_t length, char *utf8_buffer) const noexcept=0
Convert possibly broken UTF-16BE string into UTF-8 string and stop on error.
virtual size_t binary_to_base64(const char *input, size_t length, char *output, base64_options options=base64_default) const noexcept=0
Convert a binary input to a base64 output.
virtual simdutf_warn_unused result utf8_length_from_utf16be_with_replacement(const char16_t *input, size_t length) const noexcept=0
Compute the number of bytes that this UTF-16BE string would require in UTF-8 format even when the UTF...
virtual simdutf_warn_unused size_t convert_latin1_to_utf32(const char *input, size_t length, char32_t *utf32_buffer) const noexcept=0
Convert Latin1 string into UTF-32 string.
virtual simdutf_warn_unused result convert_utf8_to_utf32_with_errors(const char *input, size_t length, char32_t *utf32_output) const noexcept=0
Convert possibly broken UTF-8 string into UTF-32 string and stop on error.
virtual simdutf_warn_unused size_t convert_valid_utf32_to_utf8(const char32_t *input, size_t length, char *utf8_buffer) const noexcept=0
Convert valid UTF-32 string into UTF-8 string.
virtual simdutf_warn_unused size_t utf32_length_from_latin1(size_t length) const noexcept
Return the number of bytes that this UTF-32 string would require in Latin1 format.
virtual simdutf_warn_unused bool validate_utf16le(const char16_t *buf, size_t len) const noexcept=0
Validate the UTF-16LE string.This function may be best when you expect the input to be almost always ...
virtual simdutf_warn_unused result convert_utf8_to_utf16le_with_errors(const char *input, size_t length, char16_t *utf16_output) const noexcept=0
Convert possibly broken UTF-8 string into UTF-16LE string and stop on error.
virtual simdutf_warn_unused size_t count_utf16le(const char16_t *input, size_t length) const noexcept=0
Count the number of code points (characters) in the string assuming that it is valid.
virtual simdutf_warn_unused size_t convert_utf16be_to_utf32(const char16_t *input, size_t length, char32_t *utf32_buffer) const noexcept=0
Convert possibly broken UTF-16BE string into UTF-32 string.
virtual simdutf_warn_unused size_t convert_valid_utf16le_to_latin1(const char16_t *input, size_t length, char *latin1_buffer) const noexcept=0
Convert valid UTF-16LE string into Latin1 string.
virtual simdutf_warn_unused size_t convert_utf32_to_latin1(const char32_t *input, size_t length, char *latin1_buffer) const noexcept=0
Convert possibly broken UTF-32 string into Latin1 string.
virtual simdutf_warn_unused size_t convert_valid_utf16be_to_latin1(const char16_t *input, size_t length, char *latin1_buffer) const noexcept=0
Convert valid UTF-16BE string into Latin1 string.
bool supported_by_runtime_system() const
The instruction sets this implementation is compiled against and the current CPU match.
virtual simdutf_warn_unused size_t convert_valid_utf8_to_utf32(const char *input, size_t length, char32_t *utf32_buffer) const noexcept=0
Convert valid UTF-8 string into UTF-32 string.
virtual simdutf_warn_unused size_t convert_utf16le_to_latin1(const char16_t *input, size_t length, char *latin1_buffer) const noexcept=0
Convert possibly broken UTF-16LE string into Latin1 string.
virtual simdutf_warn_unused result validate_utf16be_with_errors(const char16_t *buf, size_t len) const noexcept=0
Validate the UTF-16BE string and stop on error.
virtual simdutf_warn_unused result convert_utf16le_to_utf8_with_errors(const char16_t *input, size_t length, char *utf8_buffer) const noexcept=0
Convert possibly broken UTF-16LE string into UTF-8 string and stop on error.
virtual simdutf_warn_unused result convert_utf16be_to_utf32_with_errors(const char16_t *input, size_t length, char32_t *utf32_buffer) const noexcept=0
Convert possibly broken UTF-16BE string into UTF-32 string and stop on error.
virtual simdutf_warn_unused result base64_to_binary(const char *input, size_t length, char *output, base64_options options=base64_default, last_chunk_handling_options last_chunk_options=last_chunk_handling_options::loose) const noexcept=0
Convert a base64 input to a binary output.
virtual simdutf_warn_unused bool validate_utf16be_as_ascii(const char16_t *buf, size_t len) const noexcept=0
Validate the ASCII string as a UTF-16BE sequence.
virtual simdutf_warn_unused size_t convert_utf32_to_utf16be(const char32_t *input, size_t length, char16_t *utf16_buffer) const noexcept=0
Convert possibly broken UTF-32 string into UTF-16BE string.
virtual simdutf_warn_unused result validate_utf32_with_errors(const char32_t *buf, size_t len) const noexcept=0
Validate the UTF-32 string and stop on error.
virtual simdutf_warn_unused size_t utf32_length_from_utf16le(const char16_t *input, size_t length) const noexcept=0
Compute the number of bytes that this UTF-16LE string would require in UTF-32 format.
virtual void change_endianness_utf16(const char16_t *input, size_t length, char16_t *output) const noexcept=0
Change the endianness of the input.
virtual simdutf_warn_unused size_t convert_valid_utf8_to_utf16be(const char *input, size_t length, char16_t *utf16_buffer) const noexcept=0
Convert valid UTF-8 string into UTF-16BE string.
virtual simdutf_warn_unused size_t convert_valid_utf8_to_latin1(const char *input, size_t length, char *latin1_output) const noexcept=0
Convert valid UTF-8 string into latin1 string.
simdutf_warn_unused size_t maximal_binary_length_from_base64(const char *input, size_t length) const noexcept
Provide the maximal binary length in bytes given the base64 input.
virtual simdutf_warn_unused size_t convert_valid_utf32_to_utf16le(const char32_t *input, size_t length, char16_t *utf16_buffer) const noexcept=0
Convert valid UTF-32 string into UTF-16LE string.
virtual std::string name() const
The name of this implementation.
virtual simdutf_warn_unused result validate_ascii_with_errors(const char *buf, size_t len) const noexcept=0
Validate the ASCII string and stop on error.
virtual simdutf_warn_unused bool validate_utf8(const char *buf, size_t len) const noexcept=0
Validate the UTF-8 string.
virtual simdutf_warn_unused size_t convert_utf8_to_utf32(const char *input, size_t length, char32_t *utf32_output) const noexcept=0
Convert possibly broken UTF-8 string into UTF-32 string.
simdutf_warn_unused size_t maximal_binary_length_from_base64(const char16_t *input, size_t length) const noexcept
Provide the maximal binary length in bytes given the base64 input.
virtual simdutf_warn_unused bool validate_utf32(const char32_t *buf, size_t len) const noexcept=0
Validate the UTF-32 string.
virtual simdutf_warn_unused result convert_utf16le_to_utf32_with_errors(const char16_t *input, size_t length, char32_t *utf32_buffer) const noexcept=0
Convert possibly broken UTF-16LE string into UTF-32 string and stop on error.
virtual simdutf_warn_unused size_t convert_utf16le_to_utf8(const char16_t *input, size_t length, char *utf8_buffer) const noexcept=0
Convert possibly broken UTF-16LE string into UTF-8 string.
virtual simdutf_warn_unused size_t utf8_length_from_utf16be(const char16_t *input, size_t length) const noexcept=0
Compute the number of bytes that this UTF-16BE string would require in UTF-8 format.
virtual simdutf_warn_unused size_t convert_utf32_to_utf8(const char32_t *input, size_t length, char *utf8_buffer) const noexcept=0
Convert possibly broken UTF-32 string into UTF-8 string.
virtual simdutf_warn_unused size_t convert_utf16be_to_latin1(const char16_t *input, size_t length, char *latin1_buffer) const noexcept=0
Convert possibly broken UTF-16BE string into Latin1 string.
virtual simdutf_warn_unused size_t convert_utf8_to_latin1(const char *input, size_t length, char *latin1_output) const noexcept=0
Convert possibly broken UTF-8 string into latin1 string.
virtual simdutf_warn_unused result base64_to_binary(const char16_t *input, size_t length, char *output, base64_options options=base64_default, last_chunk_handling_options last_chunk_options=last_chunk_handling_options::loose) const noexcept=0
Convert a base64 input to a binary output.
simdutf_warn_unused size_t base64_length_from_binary(size_t length, base64_options options=base64_default) const noexcept
Provide the base64 length in bytes given the length of a binary input.
virtual int detect_encodings(const char *input, size_t length) const noexcept=0
This function will try to detect the possible encodings in one pass.
virtual simdutf_warn_unused size_t count_utf16be(const char16_t *input, size_t length) const noexcept=0
Count the number of code points (characters) in the string assuming that it is valid.
virtual simdutf_warn_unused result convert_utf32_to_utf8_with_errors(const char32_t *input, size_t length, char *utf8_buffer) const noexcept=0
Convert possibly broken UTF-32 string into UTF-8 string and stop on error.
virtual simdutf_warn_unused bool validate_utf16le_as_ascii(const char16_t *buf, size_t len) const noexcept=0
Validate the ASCII string as a UTF-16LE sequence.
virtual void to_well_formed_utf16be(const char16_t *input, size_t len, char16_t *output) const noexcept=0
Copies the UTF-16BE string while replacing mismatched surrogates with the Unicode replacement charact...
virtual simdutf_warn_unused bool validate_ascii(const char *buf, size_t len) const noexcept=0
Validate the ASCII string.
virtual simdutf_warn_unused size_t utf16_length_from_utf8(const char *input, size_t length) const noexcept=0
Compute the number of 2-byte code units that this UTF-8 string would require in UTF-16LE format.
virtual simdutf_warn_unused result convert_utf8_to_utf16be_with_errors(const char *input, size_t length, char16_t *utf16_output) const noexcept=0
Convert possibly broken UTF-8 string into UTF-16BE string and stop on error.
virtual simdutf_warn_unused result convert_utf8_to_latin1_with_errors(const char *input, size_t length, char *latin1_output) const noexcept=0
Convert possibly broken UTF-8 string into latin1 string with errors.
virtual std::string description() const
The description of this implementation.
virtual simdutf_warn_unused size_t utf16_length_from_latin1(size_t length) const noexcept
Return the number of bytes that this UTF-16 string would require in Latin1 format.
virtual simdutf_warn_unused result convert_utf32_to_utf16le_with_errors(const char32_t *input, size_t length, char16_t *utf16_buffer) const noexcept=0
Convert possibly broken UTF-32 string into UTF-16LE string and stop on error.
virtual simdutf_warn_unused size_t convert_latin1_to_utf8(const char *input, size_t length, char *utf8_output) const noexcept=0
Convert Latin1 string into UTF-8 string.
virtual encoding_type autodetect_encoding(const char *input, size_t length) const noexcept
This function will try to detect the encoding.
virtual simdutf_warn_unused size_t utf8_length_from_latin1(const char *input, size_t length) const noexcept=0
Return the number of bytes that this Latin1 string would require in UTF-8 format.