simdutf 6.4.0
Unicode at GB/s.
Loading...
Searching...
No Matches
implementation.h
1#ifndef SIMDUTF_IMPLEMENTATION_H
2#define SIMDUTF_IMPLEMENTATION_H
3#if !defined(SIMDUTF_NO_THREADS)
4 #include <atomic>
5#endif
6#include <string>
7#include <vector>
8#include "simdutf/common_defs.h"
9#include "simdutf/compiler_check.h"
10#include "simdutf/encoding_types.h"
11#include "simdutf/error.h"
12#include "simdutf/internal/isadetection.h"
13
14#if SIMDUTF_SPAN
15 #include <concepts>
16 #include <type_traits>
17 #include <span>
18#endif
19
20// The following defines are conditionally enabled/disabled during amalgamation.
21// By default all features are enabled, regular code shouldn't check them. Only
22// when user code really relies of a selected subset, it's good to verify these
23// flags, like:
24//
25// #if !SIMDUTF_FEATURE_UTF16
26// # error("Please amalgamate simdutf with UTF-16 support")
27// #endif
28//
29#define SIMDUTF_FEATURE_DETECT_ENCODING 1
30#define SIMDUTF_FEATURE_ASCII 1
31#define SIMDUTF_FEATURE_LATIN1 1
32#define SIMDUTF_FEATURE_UTF8 1
33#define SIMDUTF_FEATURE_UTF16 1
34#define SIMDUTF_FEATURE_UTF32 1
35#define SIMDUTF_FEATURE_BASE64 1
36
37namespace simdutf {
38
39#if SIMDUTF_SPAN
41namespace detail {
46template <typename T>
47concept byte_like = std::is_same_v<T, std::byte> || //
48 std::is_same_v<T, char> || //
49 std::is_same_v<T, signed char> || //
50 std::is_same_v<T, unsigned char>;
51
52template <typename T>
53concept is_byte_like = byte_like<std::remove_cvref_t<T>>;
54
55template <typename T>
56concept is_pointer = std::is_pointer_v<T>;
57
63template <typename T>
64concept input_span_of_byte_like = requires(const T &t) {
65 { t.size() } noexcept -> std::convertible_to<std::size_t>;
66 { t.data() } noexcept -> is_pointer;
67 { *t.data() } noexcept -> is_byte_like;
68};
69
70template <typename T>
71concept is_mutable = !std::is_const_v<std::remove_reference_t<T>>;
72
76template <typename T>
77concept output_span_of_byte_like = requires(T &t) {
78 { t.size() } noexcept -> std::convertible_to<std::size_t>;
79 { t.data() } noexcept -> is_pointer;
80 { *t.data() } noexcept -> is_byte_like;
81 { *t.data() } noexcept -> is_mutable;
82};
83} // namespace detail
84#endif
85
86#if SIMDUTF_FEATURE_DETECT_ENCODING
97simdutf_warn_unused simdutf::encoding_type
98autodetect_encoding(const char *input, size_t length) noexcept;
99simdutf_really_inline simdutf_warn_unused simdutf::encoding_type
100autodetect_encoding(const uint8_t *input, size_t length) noexcept {
101 return autodetect_encoding(reinterpret_cast<const char *>(input), length);
102}
103 #if SIMDUTF_SPAN
115simdutf_really_inline simdutf_warn_unused simdutf::encoding_type
116autodetect_encoding(
117 const detail::input_span_of_byte_like auto &input) noexcept {
118 return autodetect_encoding(reinterpret_cast<const char *>(input.data()),
119 input.size());
120}
121 #endif // SIMDUTF_SPAN
122
134simdutf_warn_unused int detect_encodings(const char *input,
135 size_t length) noexcept;
136simdutf_really_inline simdutf_warn_unused int
137detect_encodings(const uint8_t *input, size_t length) noexcept {
138 return detect_encodings(reinterpret_cast<const char *>(input), length);
139}
140 #if SIMDUTF_SPAN
141simdutf_really_inline simdutf_warn_unused int
142detect_encodings(const detail::input_span_of_byte_like auto &input) noexcept {
143 return detect_encodings(reinterpret_cast<const char *>(input.data()),
144 input.size());
145}
146 #endif // SIMDUTF_SPAN
147#endif // SIMDUTF_FEATURE_DETECT_ENCODING
148
149#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING
161simdutf_warn_unused bool validate_utf8(const char *buf, size_t len) noexcept;
162 #if SIMDUTF_SPAN
163simdutf_really_inline simdutf_warn_unused bool
164validate_utf8(const detail::input_span_of_byte_like auto &input) noexcept {
165 return validate_utf8(reinterpret_cast<const char *>(input.data()),
166 input.size());
167}
168 #endif // SIMDUTF_SPAN
169#endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING
170
171#if SIMDUTF_FEATURE_UTF8
184simdutf_warn_unused result validate_utf8_with_errors(const char *buf,
185 size_t len) noexcept;
186 #if SIMDUTF_SPAN
187simdutf_really_inline simdutf_warn_unused result validate_utf8_with_errors(
188 const detail::input_span_of_byte_like auto &input) noexcept {
189 return validate_utf8_with_errors(reinterpret_cast<const char *>(input.data()),
190 input.size());
191}
192 #endif // SIMDUTF_SPAN
193#endif // SIMDUTF_FEATURE_UTF8
194
195#if SIMDUTF_FEATURE_ASCII
205simdutf_warn_unused bool validate_ascii(const char *buf, size_t len) noexcept;
206 #if SIMDUTF_SPAN
207simdutf_really_inline simdutf_warn_unused bool
208validate_ascii(const detail::input_span_of_byte_like auto &input) noexcept {
209 return validate_ascii(reinterpret_cast<const char *>(input.data()),
210 input.size());
211}
212 #endif // SIMDUTF_SPAN
213
227simdutf_warn_unused result validate_ascii_with_errors(const char *buf,
228 size_t len) noexcept;
229 #if SIMDUTF_SPAN
230simdutf_really_inline simdutf_warn_unused result validate_ascii_with_errors(
231 const detail::input_span_of_byte_like auto &input) noexcept {
232 return validate_ascii_with_errors(
233 reinterpret_cast<const char *>(input.data()), input.size());
234}
235 #endif // SIMDUTF_SPAN
236#endif // SIMDUTF_FEATURE_ASCII
237
238#if SIMDUTF_FEATURE_UTF16
253simdutf_warn_unused bool validate_utf16(const char16_t *buf,
254 size_t len) noexcept;
255 #if SIMDUTF_SPAN
256simdutf_really_inline simdutf_warn_unused bool
257validate_utf16(std::span<const char16_t> input) noexcept {
258 return validate_utf16(input.data(), input.size());
259}
260 #endif // SIMDUTF_SPAN
261#endif // SIMDUTF_FEATURE_UTF16
262
263#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING
278simdutf_warn_unused bool validate_utf16le(const char16_t *buf,
279 size_t len) noexcept;
280 #if SIMDUTF_SPAN
281simdutf_really_inline simdutf_warn_unused bool
282validate_utf16le(std::span<const char16_t> input) noexcept {
283 return validate_utf16le(input.data(), input.size());
284}
285 #endif // SIMDUTF_SPAN
286#endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING
287
288#if SIMDUTF_FEATURE_UTF16
303simdutf_warn_unused bool validate_utf16be(const char16_t *buf,
304 size_t len) noexcept;
305 #if SIMDUTF_SPAN
306simdutf_really_inline simdutf_warn_unused bool
307validate_utf16be(std::span<const char16_t> input) noexcept {
308 return validate_utf16be(input.data(), input.size());
309}
310 #endif // SIMDUTF_SPAN
311
329simdutf_warn_unused result validate_utf16_with_errors(const char16_t *buf,
330 size_t len) noexcept;
331 #if SIMDUTF_SPAN
332simdutf_really_inline simdutf_warn_unused result
333validate_utf16_with_errors(std::span<const char16_t> input) noexcept {
334 return validate_utf16_with_errors(input.data(), input.size());
335}
336 #endif // SIMDUTF_SPAN
337
354simdutf_warn_unused result validate_utf16le_with_errors(const char16_t *buf,
355 size_t len) noexcept;
356 #if SIMDUTF_SPAN
357simdutf_really_inline simdutf_warn_unused result
358validate_utf16le_with_errors(std::span<const char16_t> input) noexcept {
359 return validate_utf16le_with_errors(input.data(), input.size());
360}
361 #endif // SIMDUTF_SPAN
362
379simdutf_warn_unused result validate_utf16be_with_errors(const char16_t *buf,
380 size_t len) noexcept;
381 #if SIMDUTF_SPAN
382simdutf_really_inline simdutf_warn_unused result
383validate_utf16be_with_errors(std::span<const char16_t> input) noexcept {
384 return validate_utf16be_with_errors(input.data(), input.size());
385}
386 #endif // SIMDUTF_SPAN
387#endif // SIMDUTF_FEATURE_UTF16
388
389#if SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING
404simdutf_warn_unused bool validate_utf32(const char32_t *buf,
405 size_t len) noexcept;
406 #if SIMDUTF_SPAN
407simdutf_really_inline simdutf_warn_unused bool
408validate_utf32(std::span<const char32_t> input) noexcept {
409 return validate_utf32(input.data(), input.size());
410}
411 #endif // SIMDUTF_SPAN
412#endif // SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING
413
414#if SIMDUTF_FEATURE_UTF32
431simdutf_warn_unused result validate_utf32_with_errors(const char32_t *buf,
432 size_t len) noexcept;
433 #if SIMDUTF_SPAN
434simdutf_really_inline simdutf_warn_unused result
435validate_utf32_with_errors(std::span<const char32_t> input) noexcept {
436 return validate_utf32_with_errors(input.data(), input.size());
437}
438 #endif // SIMDUTF_SPAN
439#endif // SIMDUTF_FEATURE_UTF32
440
441#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
452simdutf_warn_unused size_t convert_latin1_to_utf8(const char *input,
453 size_t length,
454 char *utf8_output) noexcept;
455 #if SIMDUTF_SPAN
456simdutf_really_inline simdutf_warn_unused size_t convert_latin1_to_utf8(
457 const detail::input_span_of_byte_like auto &latin1_input,
458 detail::output_span_of_byte_like auto &&utf8_output) noexcept {
459 return convert_latin1_to_utf8(
460 reinterpret_cast<const char *>(latin1_input.data()), latin1_input.size(),
461 utf8_output.data());
462}
463 #endif // SIMDUTF_SPAN
464
476simdutf_warn_unused size_t
477convert_latin1_to_utf8_safe(const char *input, size_t length, char *utf8_output,
478 size_t utf8_len) noexcept;
479 #if SIMDUTF_SPAN
480simdutf_really_inline simdutf_warn_unused size_t convert_latin1_to_utf8_safe(
481 const detail::input_span_of_byte_like auto &input,
482 detail::output_span_of_byte_like auto &&utf8_output) noexcept {
483 // implementation note: outputspan is a forwarding ref to avoid copying and
484 // allow both lvalues and rvalues. std::span can be copied without problems,
485 // but std::vector should not, and this function should accept both. it will
486 // allow using an owning rvalue ref (example: passing a temporary std::string)
487 // as output, but the user will quickly find out that he has no way of getting
488 // the data out of the object in that case.
489 return convert_latin1_to_utf8_safe(
490 input.data(), input.size(), reinterpret_cast<char *>(utf8_output.data()),
491 utf8_output.size());
492}
493 #endif // SIMDUTF_SPAN
494#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
495
496#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
507simdutf_warn_unused size_t convert_latin1_to_utf16le(
508 const char *input, size_t length, char16_t *utf16_output) noexcept;
509 #if SIMDUTF_SPAN
510simdutf_really_inline simdutf_warn_unused size_t convert_latin1_to_utf16le(
511 const detail::input_span_of_byte_like auto &latin1_input,
512 std::span<char16_t> utf16_output) noexcept {
513 return convert_latin1_to_utf16le(
514 reinterpret_cast<const char *>(latin1_input.data()), latin1_input.size(),
515 utf16_output.data());
516}
517 #endif // SIMDUTF_SPAN
518
529simdutf_warn_unused size_t convert_latin1_to_utf16be(
530 const char *input, size_t length, char16_t *utf16_output) noexcept;
531 #if SIMDUTF_SPAN
532simdutf_really_inline simdutf_warn_unused size_t
533convert_latin1_to_utf16be(const detail::input_span_of_byte_like auto &input,
534 std::span<char16_t> output) noexcept {
535 return convert_latin1_to_utf16be(reinterpret_cast<const char *>(input.data()),
536 input.size(), output.data());
537}
538 #endif // SIMDUTF_SPAN
547simdutf_warn_unused size_t latin1_length_from_utf16(size_t length) noexcept;
548
557simdutf_warn_unused size_t utf16_length_from_latin1(size_t length) noexcept;
558#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
559
560#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1
571simdutf_warn_unused size_t convert_latin1_to_utf32(
572 const char *input, size_t length, char32_t *utf32_buffer) noexcept;
573 #if SIMDUTF_SPAN
574simdutf_really_inline simdutf_warn_unused size_t convert_latin1_to_utf32(
575 const detail::input_span_of_byte_like auto &latin1_input,
576 std::span<char32_t> utf32_output) noexcept {
577 return convert_latin1_to_utf32(
578 reinterpret_cast<const char *>(latin1_input.data()), latin1_input.size(),
579 utf32_output.data());
580}
581 #endif // SIMDUTF_SPAN
582#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1
583
584#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
597simdutf_warn_unused size_t convert_utf8_to_latin1(const char *input,
598 size_t length,
599 char *latin1_output) noexcept;
600 #if SIMDUTF_SPAN
601simdutf_really_inline simdutf_warn_unused size_t convert_utf8_to_latin1(
602 const detail::input_span_of_byte_like auto &input,
603 detail::output_span_of_byte_like auto &&output) noexcept {
604 return convert_utf8_to_latin1(reinterpret_cast<const char *>(input.data()),
605 input.size(),
606 reinterpret_cast<char *>(output.data()));
607}
608 #endif // SIMDUTF_SPAN
609#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
610
611#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
625simdutf_warn_unused size_t convert_utf8_to_utf16(
626 const char *input, size_t length, char16_t *utf16_output) noexcept;
627 #if SIMDUTF_SPAN
628simdutf_really_inline simdutf_warn_unused size_t
629convert_utf8_to_utf16(const detail::input_span_of_byte_like auto &input,
630 std::span<char16_t> output) noexcept {
631 return convert_utf8_to_utf16(reinterpret_cast<const char *>(input.data()),
632 input.size(), output.data());
633}
634 #endif // SIMDUTF_SPAN
635#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
636
637#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
646simdutf_warn_unused size_t convert_latin1_to_utf16(
647 const char *input, size_t length, char16_t *utf16_output) noexcept;
648 #if SIMDUTF_SPAN
649simdutf_really_inline simdutf_warn_unused size_t
650convert_latin1_to_utf16(const detail::input_span_of_byte_like auto &input,
651 std::span<char16_t> output) noexcept {
652 return convert_latin1_to_utf16(reinterpret_cast<const char *>(input.data()),
653 input.size(), output.data());
654}
655 #endif // SIMDUTF_SPAN
656#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
657
658#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
671simdutf_warn_unused size_t convert_utf8_to_utf16le(
672 const char *input, size_t length, char16_t *utf16_output) noexcept;
673 #if SIMDUTF_SPAN
674simdutf_really_inline simdutf_warn_unused size_t
675convert_utf8_to_utf16le(const detail::input_span_of_byte_like auto &utf8_input,
676 std::span<char16_t> utf16_output) noexcept {
677 return convert_utf8_to_utf16le(
678 reinterpret_cast<const char *>(utf8_input.data()), utf8_input.size(),
679 utf16_output.data());
680}
681 #endif // SIMDUTF_SPAN
682
695simdutf_warn_unused size_t convert_utf8_to_utf16be(
696 const char *input, size_t length, char16_t *utf16_output) noexcept;
697 #if SIMDUTF_SPAN
698simdutf_really_inline simdutf_warn_unused size_t
699convert_utf8_to_utf16be(const detail::input_span_of_byte_like auto &utf8_input,
700 std::span<char16_t> utf16_output) noexcept {
701 return convert_utf8_to_utf16be(
702 reinterpret_cast<const char *>(utf8_input.data()), utf8_input.size(),
703 utf16_output.data());
704}
705 #endif // SIMDUTF_SPAN
706#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
707
708#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
725simdutf_warn_unused result convert_utf8_to_latin1_with_errors(
726 const char *input, size_t length, char *latin1_output) noexcept;
727 #if SIMDUTF_SPAN
728simdutf_really_inline simdutf_warn_unused result
729convert_utf8_to_latin1_with_errors(
730 const detail::input_span_of_byte_like auto &utf8_input,
731 detail::output_span_of_byte_like auto &&latin1_output) noexcept {
732 return convert_utf8_to_latin1_with_errors(
733 reinterpret_cast<const char *>(utf8_input.data()), utf8_input.size(),
734 reinterpret_cast<char *>(latin1_output.data()));
735}
736 #endif // SIMDUTF_SPAN
737#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
738
739#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
755simdutf_warn_unused result convert_utf8_to_utf16_with_errors(
756 const char *input, size_t length, char16_t *utf16_output) noexcept;
757 #if SIMDUTF_SPAN
758simdutf_really_inline simdutf_warn_unused result
759convert_utf8_to_utf16_with_errors(
760 const detail::input_span_of_byte_like auto &utf8_input,
761 std::span<char16_t> utf16_output) noexcept {
762 return convert_utf8_to_utf16_with_errors(
763 reinterpret_cast<const char *>(utf8_input.data()), utf8_input.size(),
764 utf16_output.data());
765}
766 #endif // SIMDUTF_SPAN
767
782simdutf_warn_unused result convert_utf8_to_utf16le_with_errors(
783 const char *input, size_t length, char16_t *utf16_output) noexcept;
784 #if SIMDUTF_SPAN
785simdutf_really_inline simdutf_warn_unused result
786convert_utf8_to_utf16le_with_errors(
787 const detail::input_span_of_byte_like auto &utf8_input,
788 std::span<char16_t> utf16_output) noexcept {
789 return convert_utf8_to_utf16le_with_errors(
790 reinterpret_cast<const char *>(utf8_input.data()), utf8_input.size(),
791 utf16_output.data());
792}
793 #endif // SIMDUTF_SPAN
794
809simdutf_warn_unused result convert_utf8_to_utf16be_with_errors(
810 const char *input, size_t length, char16_t *utf16_output) noexcept;
811 #if SIMDUTF_SPAN
812simdutf_really_inline simdutf_warn_unused result
813convert_utf8_to_utf16be_with_errors(
814 const detail::input_span_of_byte_like auto &utf8_input,
815 std::span<char16_t> utf16_output) noexcept {
816 return convert_utf8_to_utf16be_with_errors(
817 reinterpret_cast<const char *>(utf8_input.data()), utf8_input.size(),
818 utf16_output.data());
819}
820 #endif // SIMDUTF_SPAN
821#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
822
823#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
836simdutf_warn_unused size_t convert_utf8_to_utf32(
837 const char *input, size_t length, char32_t *utf32_output) noexcept;
838 #if SIMDUTF_SPAN
839simdutf_really_inline simdutf_warn_unused size_t
840convert_utf8_to_utf32(const detail::input_span_of_byte_like auto &utf8_input,
841 std::span<char32_t> utf32_output) noexcept {
842 return convert_utf8_to_utf32(
843 reinterpret_cast<const char *>(utf8_input.data()), utf8_input.size(),
844 utf32_output.data());
845}
846 #endif // SIMDUTF_SPAN
847
862simdutf_warn_unused result convert_utf8_to_utf32_with_errors(
863 const char *input, size_t length, char32_t *utf32_output) noexcept;
864 #if SIMDUTF_SPAN
865simdutf_really_inline simdutf_warn_unused result
866convert_utf8_to_utf32_with_errors(
867 const detail::input_span_of_byte_like auto &utf8_input,
868 std::span<char32_t> utf32_output) noexcept {
869 return convert_utf8_to_utf32_with_errors(
870 reinterpret_cast<const char *>(utf8_input.data()), utf8_input.size(),
871 utf32_output.data());
872}
873 #endif // SIMDUTF_SPAN
874#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
875
876#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
896simdutf_warn_unused size_t convert_valid_utf8_to_latin1(
897 const char *input, size_t length, char *latin1_output) noexcept;
898 #if SIMDUTF_SPAN
899simdutf_really_inline simdutf_warn_unused size_t convert_valid_utf8_to_latin1(
900 const detail::input_span_of_byte_like auto &valid_utf8_input,
901 detail::output_span_of_byte_like auto &&latin1_output) noexcept {
902 return convert_valid_utf8_to_latin1(
903 reinterpret_cast<const char *>(valid_utf8_input.data()),
904 valid_utf8_input.size(), latin1_output.data());
905}
906 #endif // SIMDUTF_SPAN
907#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
908
909#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
920simdutf_warn_unused size_t convert_valid_utf8_to_utf16(
921 const char *input, size_t length, char16_t *utf16_buffer) noexcept;
922 #if SIMDUTF_SPAN
923simdutf_really_inline simdutf_warn_unused size_t convert_valid_utf8_to_utf16(
924 const detail::input_span_of_byte_like auto &valid_utf8_input,
925 std::span<char16_t> utf16_output) noexcept {
926 return convert_valid_utf8_to_utf16(
927 reinterpret_cast<const char *>(valid_utf8_input.data()),
928 valid_utf8_input.size(), utf16_output.data());
929}
930 #endif // SIMDUTF_SPAN
931
942simdutf_warn_unused size_t convert_valid_utf8_to_utf16le(
943 const char *input, size_t length, char16_t *utf16_buffer) noexcept;
944 #if SIMDUTF_SPAN
945simdutf_really_inline simdutf_warn_unused size_t convert_valid_utf8_to_utf16le(
946 const detail::input_span_of_byte_like auto &valid_utf8_input,
947 std::span<char16_t> utf16_output) noexcept {
948 return convert_valid_utf8_to_utf16le(
949 reinterpret_cast<const char *>(valid_utf8_input.data()),
950 valid_utf8_input.size(), utf16_output.data());
951}
952 #endif // SIMDUTF_SPAN
953
964simdutf_warn_unused size_t convert_valid_utf8_to_utf16be(
965 const char *input, size_t length, char16_t *utf16_buffer) noexcept;
966 #if SIMDUTF_SPAN
967simdutf_really_inline simdutf_warn_unused size_t convert_valid_utf8_to_utf16be(
968 const detail::input_span_of_byte_like auto &valid_utf8_input,
969 std::span<char16_t> utf16_output) noexcept {
970 return convert_valid_utf8_to_utf16be(
971 reinterpret_cast<const char *>(valid_utf8_input.data()),
972 valid_utf8_input.size(), utf16_output.data());
973}
974 #endif // SIMDUTF_SPAN
975#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
976
977#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
988simdutf_warn_unused size_t convert_valid_utf8_to_utf32(
989 const char *input, size_t length, char32_t *utf32_buffer) noexcept;
990 #if SIMDUTF_SPAN
991simdutf_really_inline simdutf_warn_unused size_t convert_valid_utf8_to_utf32(
992 const detail::input_span_of_byte_like auto &valid_utf8_input,
993 std::span<char32_t> utf32_output) noexcept {
994 return convert_valid_utf8_to_utf32(
995 reinterpret_cast<const char *>(valid_utf8_input.data()),
996 valid_utf8_input.size(), utf32_output.data());
997}
998 #endif // SIMDUTF_SPAN
999#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
1000
1001#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
1010simdutf_warn_unused size_t utf8_length_from_latin1(const char *input,
1011 size_t length) noexcept;
1012 #if SIMDUTF_SPAN
1013simdutf_really_inline simdutf_warn_unused size_t utf8_length_from_latin1(
1014 const detail::input_span_of_byte_like auto &latin1_input) noexcept {
1015 return utf8_length_from_latin1(
1016 reinterpret_cast<const char *>(latin1_input.data()), latin1_input.size());
1017}
1018 #endif // SIMDUTF_SPAN
1019
1033simdutf_warn_unused size_t latin1_length_from_utf8(const char *input,
1034 size_t length) noexcept;
1035 #if SIMDUTF_SPAN
1036simdutf_really_inline simdutf_warn_unused size_t latin1_length_from_utf8(
1037 const detail::input_span_of_byte_like auto &valid_utf8_input) noexcept {
1038 return latin1_length_from_utf8(
1039 reinterpret_cast<const char *>(valid_utf8_input.data()),
1040 valid_utf8_input.size());
1041}
1042 #endif // SIMDUTF_SPAN
1043#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
1044
1045#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
1060simdutf_warn_unused size_t utf16_length_from_utf8(const char *input,
1061 size_t length) noexcept;
1062 #if SIMDUTF_SPAN
1063simdutf_really_inline simdutf_warn_unused size_t utf16_length_from_utf8(
1064 const detail::input_span_of_byte_like auto &valid_utf8_input) noexcept {
1065 return utf16_length_from_utf8(
1066 reinterpret_cast<const char *>(valid_utf8_input.data()),
1067 valid_utf8_input.size());
1068}
1069 #endif // SIMDUTF_SPAN
1070#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
1071
1072#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
1089simdutf_warn_unused size_t utf32_length_from_utf8(const char *input,
1090 size_t length) noexcept;
1091 #if SIMDUTF_SPAN
1092simdutf_really_inline simdutf_warn_unused size_t utf32_length_from_utf8(
1093 const detail::input_span_of_byte_like auto &valid_utf8_input) noexcept {
1094 return utf32_length_from_utf8(
1095 reinterpret_cast<const char *>(valid_utf8_input.data()),
1096 valid_utf8_input.size());
1097}
1098 #endif // SIMDUTF_SPAN
1099#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
1100
1101#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
1117simdutf_warn_unused size_t convert_utf16_to_utf8(const char16_t *input,
1118 size_t length,
1119 char *utf8_buffer) noexcept;
1120 #if SIMDUTF_SPAN
1121simdutf_really_inline simdutf_warn_unused size_t convert_utf16_to_utf8(
1122 std::span<char16_t> utf16_input,
1123 detail::output_span_of_byte_like auto &&utf8_output) noexcept {
1124 return convert_utf16_to_utf8(utf16_input.data(), utf16_input.size(),
1125 reinterpret_cast<char *>(utf8_output.data()));
1126}
1127 #endif // SIMDUTF_SPAN
1128#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
1129
1130#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
1146simdutf_warn_unused size_t convert_utf16_to_latin1(
1147 const char16_t *input, size_t length, char *latin1_buffer) noexcept;
1148 #if SIMDUTF_SPAN
1149simdutf_really_inline simdutf_warn_unused size_t convert_utf16_to_latin1(
1150 std::span<char16_t> utf16_input,
1151 detail::output_span_of_byte_like auto &&latin1_output) noexcept {
1152 return convert_utf16_to_latin1(
1153 utf16_input.data(), utf16_input.size(),
1154 reinterpret_cast<char *>(latin1_output.data()));
1155}
1156 #endif // SIMDUTF_SPAN
1157
1174simdutf_warn_unused size_t convert_utf16le_to_latin1(
1175 const char16_t *input, size_t length, char *latin1_buffer) noexcept;
1176 #if SIMDUTF_SPAN
1177simdutf_really_inline simdutf_warn_unused size_t convert_utf16le_to_latin1(
1178 std::span<char16_t> utf16_input,
1179 detail::output_span_of_byte_like auto &&latin1_output) noexcept {
1180 return convert_utf16le_to_latin1(
1181 utf16_input.data(), utf16_input.size(),
1182 reinterpret_cast<char *>(latin1_output.data()));
1183}
1184 #endif // SIMDUTF_SPAN
1185
1200simdutf_warn_unused size_t convert_utf16be_to_latin1(
1201 const char16_t *input, size_t length, char *latin1_buffer) noexcept;
1202 #if SIMDUTF_SPAN
1203simdutf_really_inline simdutf_warn_unused size_t convert_utf16be_to_latin1(
1204 std::span<char16_t> utf16_input,
1205 detail::output_span_of_byte_like auto &&latin1_output) noexcept {
1206 return convert_utf16be_to_latin1(
1207 utf16_input.data(), utf16_input.size(),
1208 reinterpret_cast<char *>(latin1_output.data()));
1209}
1210 #endif // SIMDUTF_SPAN
1211#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
1212
1213#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
1228simdutf_warn_unused size_t convert_utf16le_to_utf8(const char16_t *input,
1229 size_t length,
1230 char *utf8_buffer) noexcept;
1231 #if SIMDUTF_SPAN
1232simdutf_really_inline simdutf_warn_unused size_t convert_utf16le_to_utf8(
1233 std::span<char16_t> utf16_input,
1234 detail::output_span_of_byte_like auto &&utf8_output) noexcept {
1235 return convert_utf16le_to_utf8(utf16_input.data(), utf16_input.size(),
1236 reinterpret_cast<char *>(utf8_output.data()));
1237}
1238 #endif // SIMDUTF_SPAN
1239
1254simdutf_warn_unused size_t convert_utf16be_to_utf8(const char16_t *input,
1255 size_t length,
1256 char *utf8_buffer) noexcept;
1257 #if SIMDUTF_SPAN
1258simdutf_really_inline simdutf_warn_unused size_t convert_utf16be_to_utf8(
1259 std::span<char16_t> utf16_input,
1260 detail::output_span_of_byte_like auto &&utf8_output) noexcept {
1261 return convert_utf16be_to_utf8(utf16_input.data(), utf16_input.size(),
1262 reinterpret_cast<char *>(utf8_output.data()));
1263}
1264 #endif // SIMDUTF_SPAN
1265#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
1266
1267#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
1284simdutf_warn_unused result convert_utf16_to_latin1_with_errors(
1285 const char16_t *input, size_t length, char *latin1_buffer) noexcept;
1286 #if SIMDUTF_SPAN
1287simdutf_really_inline simdutf_warn_unused result
1288convert_utf16_to_latin1_with_errors(
1289 std::span<char16_t> utf16_input,
1290 detail::output_span_of_byte_like auto &&latin1_output) noexcept {
1291 return convert_utf16_to_latin1_with_errors(
1292 utf16_input.data(), utf16_input.size(),
1293 reinterpret_cast<char *>(latin1_output.data()));
1294}
1295 #endif // SIMDUTF_SPAN
1296
1312simdutf_warn_unused result convert_utf16le_to_latin1_with_errors(
1313 const char16_t *input, size_t length, char *latin1_buffer) noexcept;
1314 #if SIMDUTF_SPAN
1315simdutf_really_inline simdutf_warn_unused result
1316convert_utf16le_to_latin1_with_errors(
1317 std::span<char16_t> utf16_input,
1318 detail::output_span_of_byte_like auto &&latin1_output) noexcept {
1319 return convert_utf16le_to_latin1_with_errors(
1320 utf16_input.data(), utf16_input.size(),
1321 reinterpret_cast<char *>(latin1_output.data()));
1322}
1323 #endif // SIMDUTF_SPAN
1324
1342simdutf_warn_unused result convert_utf16be_to_latin1_with_errors(
1343 const char16_t *input, size_t length, char *latin1_buffer) noexcept;
1344 #if SIMDUTF_SPAN
1345simdutf_really_inline simdutf_warn_unused result
1346convert_utf16be_to_latin1_with_errors(
1347 std::span<char16_t> utf16_input,
1348 detail::output_span_of_byte_like auto &&latin1_output) noexcept {
1349 return convert_utf16be_to_latin1_with_errors(
1350 utf16_input.data(), utf16_input.size(),
1351 reinterpret_cast<char *>(latin1_output.data()));
1352}
1353 #endif // SIMDUTF_SPAN
1354#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
1355
1356#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
1374simdutf_warn_unused result convert_utf16_to_utf8_with_errors(
1375 const char16_t *input, size_t length, char *utf8_buffer) noexcept;
1376 #if SIMDUTF_SPAN
1377simdutf_really_inline simdutf_warn_unused result
1378convert_utf16_to_utf8_with_errors(
1379 std::span<char16_t> utf16_input,
1380 detail::output_span_of_byte_like auto &&utf8_output) noexcept {
1381 return convert_utf16_to_utf8_with_errors(
1382 utf16_input.data(), utf16_input.size(),
1383 reinterpret_cast<char *>(utf8_output.data()));
1384}
1385 #endif // SIMDUTF_SPAN
1386
1403simdutf_warn_unused result convert_utf16le_to_utf8_with_errors(
1404 const char16_t *input, size_t length, char *utf8_buffer) noexcept;
1405 #if SIMDUTF_SPAN
1406simdutf_really_inline simdutf_warn_unused result
1407convert_utf16le_to_utf8_with_errors(
1408 std::span<char16_t> utf16_input,
1409 detail::output_span_of_byte_like auto &&utf8_output) noexcept {
1410 return convert_utf16le_to_utf8_with_errors(
1411 utf16_input.data(), utf16_input.size(),
1412 reinterpret_cast<char *>(utf8_output.data()));
1413}
1414 #endif // SIMDUTF_SPAN
1415
1432simdutf_warn_unused result convert_utf16be_to_utf8_with_errors(
1433 const char16_t *input, size_t length, char *utf8_buffer) noexcept;
1434 #if SIMDUTF_SPAN
1435simdutf_really_inline simdutf_warn_unused result
1436convert_utf16be_to_utf8_with_errors(
1437 std::span<char16_t> utf16_input,
1438 detail::output_span_of_byte_like auto &&utf8_output) noexcept {
1439 return convert_utf16be_to_utf8_with_errors(
1440 utf16_input.data(), utf16_input.size(),
1441 reinterpret_cast<char *>(utf8_output.data()));
1442}
1443 #endif // SIMDUTF_SPAN
1444
1458simdutf_warn_unused size_t convert_valid_utf16_to_utf8(
1459 const char16_t *input, size_t length, char *utf8_buffer) noexcept;
1460 #if SIMDUTF_SPAN
1461simdutf_really_inline simdutf_warn_unused size_t convert_valid_utf16_to_utf8(
1462 std::span<char16_t> valid_utf16_input,
1463 detail::output_span_of_byte_like auto &&utf8_output) noexcept {
1464 return convert_valid_utf16_to_utf8(
1465 valid_utf16_input.data(), valid_utf16_input.size(),
1466 reinterpret_cast<char *>(utf8_output.data()));
1467}
1468 #endif // SIMDUTF_SPAN
1469#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
1470
1471#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
1491simdutf_warn_unused size_t convert_valid_utf16_to_latin1(
1492 const char16_t *input, size_t length, char *latin1_buffer) noexcept;
1493 #if SIMDUTF_SPAN
1494simdutf_really_inline simdutf_warn_unused size_t convert_valid_utf16_to_latin1(
1495 std::span<char16_t> valid_utf16_input,
1496 detail::output_span_of_byte_like auto &&latin1_output) noexcept {
1497 return convert_valid_utf16_to_latin1(
1498 valid_utf16_input.data(), valid_utf16_input.size(),
1499 reinterpret_cast<char *>(latin1_output.data()));
1500}
1501 #endif // SIMDUTF_SPAN
1502
1522simdutf_warn_unused size_t convert_valid_utf16le_to_latin1(
1523 const char16_t *input, size_t length, char *latin1_buffer) noexcept;
1524 #if SIMDUTF_SPAN
1525simdutf_really_inline simdutf_warn_unused size_t
1526convert_valid_utf16le_to_latin1(
1527 std::span<char16_t> valid_utf16_input,
1528 detail::output_span_of_byte_like auto &&latin1_output) noexcept {
1529 return convert_valid_utf16le_to_latin1(
1530 valid_utf16_input.data(), valid_utf16_input.size(),
1531 reinterpret_cast<char *>(latin1_output.data()));
1532}
1533 #endif // SIMDUTF_SPAN
1534
1554simdutf_warn_unused size_t convert_valid_utf16be_to_latin1(
1555 const char16_t *input, size_t length, char *latin1_buffer) noexcept;
1556 #if SIMDUTF_SPAN
1557simdutf_really_inline simdutf_warn_unused size_t
1558convert_valid_utf16be_to_latin1(
1559 std::span<char16_t> valid_utf16_input,
1560 detail::output_span_of_byte_like auto &&latin1_output) noexcept {
1561 return convert_valid_utf16be_to_latin1(
1562 valid_utf16_input.data(), valid_utf16_input.size(),
1563 reinterpret_cast<char *>(latin1_output.data()));
1564}
1565 #endif // SIMDUTF_SPAN
1566#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
1567
1568#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
1583simdutf_warn_unused size_t convert_valid_utf16le_to_utf8(
1584 const char16_t *input, size_t length, char *utf8_buffer) noexcept;
1585 #if SIMDUTF_SPAN
1586simdutf_really_inline simdutf_warn_unused size_t convert_valid_utf16le_to_utf8(
1587 std::span<char16_t> valid_utf16_input,
1588 detail::output_span_of_byte_like auto &&utf8_output) noexcept {
1589 return convert_valid_utf16le_to_utf8(
1590 valid_utf16_input.data(), valid_utf16_input.size(),
1591 reinterpret_cast<char *>(utf8_output.data()));
1592}
1593 #endif // SIMDUTF_SPAN
1594
1608simdutf_warn_unused size_t convert_valid_utf16be_to_utf8(
1609 const char16_t *input, size_t length, char *utf8_buffer) noexcept;
1610 #if SIMDUTF_SPAN
1611simdutf_really_inline simdutf_warn_unused size_t convert_valid_utf16be_to_utf8(
1612 std::span<char16_t> valid_utf16_input,
1613 detail::output_span_of_byte_like auto &&utf8_output) noexcept {
1614 return convert_valid_utf16be_to_utf8(
1615 valid_utf16_input.data(), valid_utf16_input.size(),
1616 reinterpret_cast<char *>(utf8_output.data()));
1617}
1618 #endif // SIMDUTF_SPAN
1619#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
1620
1621#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
1637simdutf_warn_unused size_t convert_utf16_to_utf32(
1638 const char16_t *input, size_t length, char32_t *utf32_buffer) noexcept;
1639 #if SIMDUTF_SPAN
1640simdutf_really_inline simdutf_warn_unused size_t
1641convert_utf16_to_utf32(std::span<const char16_t> utf16_input,
1642 std::span<char32_t> utf32_output) noexcept {
1643 return convert_utf16_to_utf32(utf16_input.data(), utf16_input.size(),
1644 utf32_output.data());
1645}
1646 #endif // SIMDUTF_SPAN
1647
1662simdutf_warn_unused size_t convert_utf16le_to_utf32(
1663 const char16_t *input, size_t length, char32_t *utf32_buffer) noexcept;
1664 #if SIMDUTF_SPAN
1665simdutf_really_inline simdutf_warn_unused size_t
1666convert_utf16le_to_utf32(std::span<const char16_t> utf16_input,
1667 std::span<char32_t> utf32_output) noexcept {
1668 return convert_utf16le_to_utf32(utf16_input.data(), utf16_input.size(),
1669 utf32_output.data());
1670}
1671 #endif // SIMDUTF_SPAN
1672
1687simdutf_warn_unused size_t convert_utf16be_to_utf32(
1688 const char16_t *input, size_t length, char32_t *utf32_buffer) noexcept;
1689 #if SIMDUTF_SPAN
1690simdutf_really_inline simdutf_warn_unused size_t
1691convert_utf16be_to_utf32(std::span<const char16_t> utf16_input,
1692 std::span<char32_t> utf32_output) noexcept {
1693 return convert_utf16be_to_utf32(utf16_input.data(), utf16_input.size(),
1694 utf32_output.data());
1695}
1696 #endif // SIMDUTF_SPAN
1697
1715simdutf_warn_unused result convert_utf16_to_utf32_with_errors(
1716 const char16_t *input, size_t length, char32_t *utf32_buffer) noexcept;
1717 #if SIMDUTF_SPAN
1718simdutf_really_inline simdutf_warn_unused result
1719convert_utf16_to_utf32_with_errors(std::span<const char16_t> utf16_input,
1720 std::span<char32_t> utf32_output) noexcept {
1721 return convert_utf16_to_utf32_with_errors(
1722 utf16_input.data(), utf16_input.size(), utf32_output.data());
1723}
1724 #endif // SIMDUTF_SPAN
1725
1742simdutf_warn_unused result convert_utf16le_to_utf32_with_errors(
1743 const char16_t *input, size_t length, char32_t *utf32_buffer) noexcept;
1744 #if SIMDUTF_SPAN
1745simdutf_really_inline simdutf_warn_unused result
1746convert_utf16le_to_utf32_with_errors(
1747 std::span<const char16_t> utf16_input,
1748 std::span<char32_t> utf32_output) noexcept {
1749 return convert_utf16le_to_utf32_with_errors(
1750 utf16_input.data(), utf16_input.size(), utf32_output.data());
1751}
1752 #endif // SIMDUTF_SPAN
1753
1770simdutf_warn_unused result convert_utf16be_to_utf32_with_errors(
1771 const char16_t *input, size_t length, char32_t *utf32_buffer) noexcept;
1772 #if SIMDUTF_SPAN
1773simdutf_really_inline simdutf_warn_unused result
1774convert_utf16be_to_utf32_with_errors(
1775 std::span<const char16_t> utf16_input,
1776 std::span<char32_t> utf32_output) noexcept {
1777 return convert_utf16be_to_utf32_with_errors(
1778 utf16_input.data(), utf16_input.size(), utf32_output.data());
1779}
1780 #endif // SIMDUTF_SPAN
1781
1796simdutf_warn_unused size_t convert_valid_utf16_to_utf32(
1797 const char16_t *input, size_t length, char32_t *utf32_buffer) noexcept;
1798 #if SIMDUTF_SPAN
1799simdutf_really_inline simdutf_warn_unused size_t
1800convert_valid_utf16_to_utf32(std::span<const char16_t> valid_utf16_input,
1801 std::span<char32_t> utf32_output) noexcept {
1802 return convert_valid_utf16_to_utf32(
1803 valid_utf16_input.data(), valid_utf16_input.size(), utf32_output.data());
1804}
1805 #endif // SIMDUTF_SPAN
1806
1820simdutf_warn_unused size_t convert_valid_utf16le_to_utf32(
1821 const char16_t *input, size_t length, char32_t *utf32_buffer) noexcept;
1822 #if SIMDUTF_SPAN
1823simdutf_really_inline simdutf_warn_unused size_t
1824convert_valid_utf16le_to_utf32(std::span<const char16_t> valid_utf16_input,
1825 std::span<char32_t> utf32_output) noexcept {
1826 return convert_valid_utf16le_to_utf32(
1827 valid_utf16_input.data(), valid_utf16_input.size(), utf32_output.data());
1828}
1829 #endif // SIMDUTF_SPAN
1830
1844simdutf_warn_unused size_t convert_valid_utf16be_to_utf32(
1845 const char16_t *input, size_t length, char32_t *utf32_buffer) noexcept;
1846 #if SIMDUTF_SPAN
1847simdutf_really_inline simdutf_warn_unused size_t
1848convert_valid_utf16be_to_utf32(std::span<const char16_t> valid_utf16_input,
1849 std::span<char32_t> utf32_output) noexcept {
1850 return convert_valid_utf16be_to_utf32(
1851 valid_utf16_input.data(), valid_utf16_input.size(), utf32_output.data());
1852}
1853 #endif // SIMDUTF_SPAN
1854#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
1855
1856#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
1869simdutf_warn_unused size_t latin1_length_from_utf16(size_t length) noexcept;
1870
1882simdutf_warn_unused size_t utf8_length_from_utf16(const char16_t *input,
1883 size_t length) noexcept;
1884 #if SIMDUTF_SPAN
1885simdutf_really_inline simdutf_warn_unused size_t
1886utf8_length_from_utf16(std::span<const char16_t> valid_utf16_input) noexcept {
1887 return utf8_length_from_utf16(valid_utf16_input.data(),
1888 valid_utf16_input.size());
1889}
1890 #endif // SIMDUTF_SPAN
1891#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
1892
1893#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
1905simdutf_warn_unused size_t utf8_length_from_utf16le(const char16_t *input,
1906 size_t length) noexcept;
1907 #if SIMDUTF_SPAN
1908simdutf_really_inline simdutf_warn_unused size_t
1909utf8_length_from_utf16le(std::span<const char16_t> valid_utf16_input) noexcept {
1910 return utf8_length_from_utf16le(valid_utf16_input.data(),
1911 valid_utf16_input.size());
1912}
1913 #endif // SIMDUTF_SPAN
1914
1926simdutf_warn_unused size_t utf8_length_from_utf16be(const char16_t *input,
1927 size_t length) noexcept;
1928 #if SIMDUTF_SPAN
1929simdutf_really_inline simdutf_warn_unused size_t
1930utf8_length_from_utf16be(std::span<const char16_t> valid_utf16_input) noexcept {
1931 return utf8_length_from_utf16be(valid_utf16_input.data(),
1932 valid_utf16_input.size());
1933}
1934 #endif // SIMDUTF_SPAN
1935#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
1936
1937#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
1951simdutf_warn_unused size_t convert_utf32_to_utf8(const char32_t *input,
1952 size_t length,
1953 char *utf8_buffer) noexcept;
1954 #if SIMDUTF_SPAN
1955simdutf_really_inline simdutf_warn_unused size_t convert_utf32_to_utf8(
1956 std::span<const char32_t> utf32_input,
1957 detail::output_span_of_byte_like auto &&utf8_output) noexcept {
1958 return convert_utf32_to_utf8(utf32_input.data(), utf32_input.size(),
1959 reinterpret_cast<char *>(utf8_output.data()));
1960}
1961 #endif // SIMDUTF_SPAN
1962
1979simdutf_warn_unused result convert_utf32_to_utf8_with_errors(
1980 const char32_t *input, size_t length, char *utf8_buffer) noexcept;
1981 #if SIMDUTF_SPAN
1982simdutf_really_inline simdutf_warn_unused result
1983convert_utf32_to_utf8_with_errors(
1984 std::span<const char32_t> utf32_input,
1985 detail::output_span_of_byte_like auto &&utf8_output) noexcept {
1986 return convert_utf32_to_utf8_with_errors(
1987 utf32_input.data(), utf32_input.size(),
1988 reinterpret_cast<char *>(utf8_output.data()));
1989}
1990 #endif // SIMDUTF_SPAN
1991
2005simdutf_warn_unused size_t convert_valid_utf32_to_utf8(
2006 const char32_t *input, size_t length, char *utf8_buffer) noexcept;
2007 #if SIMDUTF_SPAN
2008simdutf_really_inline simdutf_warn_unused size_t convert_valid_utf32_to_utf8(
2009 std::span<const char32_t> valid_utf32_input,
2010 detail::output_span_of_byte_like auto &&utf8_output) noexcept {
2011 return convert_valid_utf32_to_utf8(
2012 valid_utf32_input.data(), valid_utf32_input.size(),
2013 reinterpret_cast<char *>(utf8_output.data()));
2014}
2015 #endif // SIMDUTF_SPAN
2016#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
2017
2018#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
2033simdutf_warn_unused size_t convert_utf32_to_utf16(
2034 const char32_t *input, size_t length, char16_t *utf16_buffer) noexcept;
2035 #if SIMDUTF_SPAN
2036simdutf_really_inline simdutf_warn_unused size_t
2037convert_utf32_to_utf16(std::span<const char32_t> utf32_input,
2038 std::span<char16_t> utf16_output) noexcept {
2039 return convert_utf32_to_utf16(utf32_input.data(), utf32_input.size(),
2040 utf16_output.data());
2041}
2042 #endif // SIMDUTF_SPAN
2043
2057simdutf_warn_unused size_t convert_utf32_to_utf16le(
2058 const char32_t *input, size_t length, char16_t *utf16_buffer) noexcept;
2059 #if SIMDUTF_SPAN
2060simdutf_really_inline simdutf_warn_unused size_t
2061convert_utf32_to_utf16le(std::span<const char32_t> utf32_input,
2062 std::span<char16_t> utf16_output) noexcept {
2063 return convert_utf32_to_utf16le(utf32_input.data(), utf32_input.size(),
2064 utf16_output.data());
2065}
2066 #endif // SIMDUTF_SPAN
2067#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
2068
2069#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1
2084simdutf_warn_unused size_t convert_utf32_to_latin1(
2085 const char32_t *input, size_t length, char *latin1_buffer) noexcept;
2086 #if SIMDUTF_SPAN
2087simdutf_really_inline simdutf_warn_unused size_t convert_utf32_to_latin1(
2088 std::span<char32_t> utf32_input,
2089 detail::output_span_of_byte_like auto &&latin1_output) noexcept {
2090 return convert_utf32_to_latin1(
2091 utf32_input.data(), utf32_input.size(),
2092 reinterpret_cast<char *>(latin1_output.data()));
2093}
2094 #endif // SIMDUTF_SPAN
2095
2113simdutf_warn_unused result convert_utf32_to_latin1_with_errors(
2114 const char32_t *input, size_t length, char *latin1_buffer) noexcept;
2115 #if SIMDUTF_SPAN
2116simdutf_really_inline simdutf_warn_unused result
2117convert_utf32_to_latin1_with_errors(
2118 std::span<char32_t> utf32_input,
2119 detail::output_span_of_byte_like auto &&latin1_output) noexcept {
2120 return convert_utf32_to_latin1_with_errors(
2121 utf32_input.data(), utf32_input.size(),
2122 reinterpret_cast<char *>(latin1_output.data()));
2123}
2124 #endif // SIMDUTF_SPAN
2125
2146simdutf_warn_unused size_t convert_valid_utf32_to_latin1(
2147 const char32_t *input, size_t length, char *latin1_buffer) noexcept;
2148 #if SIMDUTF_SPAN
2149simdutf_really_inline simdutf_warn_unused size_t convert_valid_utf32_to_latin1(
2150 std::span<char32_t> valid_utf32_input,
2151 detail::output_span_of_byte_like auto &&latin1_output) noexcept {
2152 return convert_valid_utf32_to_latin1(
2153 valid_utf32_input.data(), valid_utf32_input.size(),
2154 reinterpret_cast<char *>(latin1_output.data()));
2155}
2156 #endif // SIMDUTF_SPAN
2157
2170simdutf_warn_unused size_t latin1_length_from_utf32(size_t length) noexcept;
2171
2180simdutf_warn_unused size_t utf32_length_from_latin1(size_t length) noexcept;
2181#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1
2182
2183#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
2197simdutf_warn_unused size_t convert_utf32_to_utf16be(
2198 const char32_t *input, size_t length, char16_t *utf16_buffer) noexcept;
2199 #if SIMDUTF_SPAN
2200simdutf_really_inline simdutf_warn_unused size_t
2201convert_utf32_to_utf16be(std::span<const char32_t> utf32_input,
2202 std::span<char16_t> utf16_output) noexcept {
2203 return convert_utf32_to_utf16be(utf32_input.data(), utf32_input.size(),
2204 utf16_output.data());
2205}
2206 #endif // SIMDUTF_SPAN
2207
2225simdutf_warn_unused result convert_utf32_to_utf16_with_errors(
2226 const char32_t *input, size_t length, char16_t *utf16_buffer) noexcept;
2227 #if SIMDUTF_SPAN
2228simdutf_really_inline simdutf_warn_unused result
2229convert_utf32_to_utf16_with_errors(std::span<const char32_t> utf32_input,
2230 std::span<char16_t> utf16_output) noexcept {
2231 return convert_utf32_to_utf16_with_errors(
2232 utf32_input.data(), utf32_input.size(), utf16_output.data());
2233}
2234 #endif // SIMDUTF_SPAN
2235
2252simdutf_warn_unused result convert_utf32_to_utf16le_with_errors(
2253 const char32_t *input, size_t length, char16_t *utf16_buffer) noexcept;
2254 #if SIMDUTF_SPAN
2255simdutf_really_inline simdutf_warn_unused result
2256convert_utf32_to_utf16le_with_errors(
2257 std::span<const char32_t> utf32_input,
2258 std::span<char16_t> utf16_output) noexcept {
2259 return convert_utf32_to_utf16le_with_errors(
2260 utf32_input.data(), utf32_input.size(), utf16_output.data());
2261}
2262 #endif // SIMDUTF_SPAN
2263
2280simdutf_warn_unused result convert_utf32_to_utf16be_with_errors(
2281 const char32_t *input, size_t length, char16_t *utf16_buffer) noexcept;
2282 #if SIMDUTF_SPAN
2283simdutf_really_inline simdutf_warn_unused result
2284convert_utf32_to_utf16be_with_errors(
2285 std::span<const char32_t> utf32_input,
2286 std::span<char16_t> utf16_output) noexcept {
2287 return convert_utf32_to_utf16be_with_errors(
2288 utf32_input.data(), utf32_input.size(), utf16_output.data());
2289}
2290 #endif // SIMDUTF_SPAN
2291
2305simdutf_warn_unused size_t convert_valid_utf32_to_utf16(
2306 const char32_t *input, size_t length, char16_t *utf16_buffer) noexcept;
2307 #if SIMDUTF_SPAN
2308simdutf_really_inline simdutf_warn_unused size_t
2309convert_valid_utf32_to_utf16(std::span<const char32_t> valid_utf32_input,
2310 std::span<char16_t> utf16_output) noexcept {
2311 return convert_valid_utf32_to_utf16(
2312 valid_utf32_input.data(), valid_utf32_input.size(), utf16_output.data());
2313}
2314 #endif // SIMDUTF_SPAN
2315
2329simdutf_warn_unused size_t convert_valid_utf32_to_utf16le(
2330 const char32_t *input, size_t length, char16_t *utf16_buffer) noexcept;
2331 #if SIMDUTF_SPAN
2332simdutf_really_inline simdutf_warn_unused size_t
2333convert_valid_utf32_to_utf16le(std::span<const char32_t> valid_utf32_input,
2334 std::span<char16_t> utf16_output) noexcept {
2335 return convert_valid_utf32_to_utf16le(
2336 valid_utf32_input.data(), valid_utf32_input.size(), utf16_output.data());
2337}
2338 #endif // SIMDUTF_SPAN
2339
2353simdutf_warn_unused size_t convert_valid_utf32_to_utf16be(
2354 const char32_t *input, size_t length, char16_t *utf16_buffer) noexcept;
2355 #if SIMDUTF_SPAN
2356simdutf_really_inline simdutf_warn_unused size_t
2357convert_valid_utf32_to_utf16be(std::span<const char32_t> valid_utf32_input,
2358 std::span<char16_t> utf16_output) noexcept {
2359 return convert_valid_utf32_to_utf16be(
2360 valid_utf32_input.data(), valid_utf32_input.size(), utf16_output.data());
2361}
2362 #endif // SIMDUTF_SPAN
2363#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
2364
2365#if SIMDUTF_FEATURE_UTF16
2379void change_endianness_utf16(const char16_t *input, size_t length,
2380 char16_t *output) noexcept;
2381 #if SIMDUTF_SPAN
2382simdutf_really_inline void
2383change_endianness_utf16(std::span<const char16_t> utf16_input,
2384 std::span<char16_t> utf16_output) noexcept {
2385 return change_endianness_utf16(utf16_input.data(), utf16_input.size(),
2386 utf16_output.data());
2387}
2388 #endif // SIMDUTF_SPAN
2389#endif // SIMDUTF_FEATURE_UTF16
2390
2391#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
2403simdutf_warn_unused size_t utf8_length_from_utf32(const char32_t *input,
2404 size_t length) noexcept;
2405 #if SIMDUTF_SPAN
2406simdutf_really_inline simdutf_warn_unused size_t
2407utf8_length_from_utf32(std::span<const char32_t> valid_utf32_input) noexcept {
2408 return utf8_length_from_utf32(valid_utf32_input.data(),
2409 valid_utf32_input.size());
2410}
2411 #endif // SIMDUTF_SPAN
2412#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
2413
2414#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
2426simdutf_warn_unused size_t utf16_length_from_utf32(const char32_t *input,
2427 size_t length) noexcept;
2428 #if SIMDUTF_SPAN
2429simdutf_really_inline simdutf_warn_unused size_t
2430utf16_length_from_utf32(std::span<const char32_t> valid_utf32_input) noexcept {
2431 return utf16_length_from_utf32(valid_utf32_input.data(),
2432 valid_utf32_input.size());
2433}
2434 #endif // SIMDUTF_SPAN
2435
2451simdutf_warn_unused size_t utf32_length_from_utf16(const char16_t *input,
2452 size_t length) noexcept;
2453 #if SIMDUTF_SPAN
2454simdutf_really_inline simdutf_warn_unused size_t
2455utf32_length_from_utf16(std::span<const char16_t> valid_utf16_input) noexcept {
2456 return utf32_length_from_utf16(valid_utf16_input.data(),
2457 valid_utf16_input.size());
2458}
2459 #endif // SIMDUTF_SPAN
2460
2476simdutf_warn_unused size_t utf32_length_from_utf16le(const char16_t *input,
2477 size_t length) noexcept;
2478 #if SIMDUTF_SPAN
2479simdutf_really_inline simdutf_warn_unused size_t utf32_length_from_utf16le(
2480 std::span<const char16_t> valid_utf16_input) noexcept {
2481 return utf32_length_from_utf16le(valid_utf16_input.data(),
2482 valid_utf16_input.size());
2483}
2484 #endif // SIMDUTF_SPAN
2485
2501simdutf_warn_unused size_t utf32_length_from_utf16be(const char16_t *input,
2502 size_t length) noexcept;
2503 #if SIMDUTF_SPAN
2504simdutf_really_inline simdutf_warn_unused size_t utf32_length_from_utf16be(
2505 std::span<const char16_t> valid_utf16_input) noexcept {
2506 return utf32_length_from_utf16be(valid_utf16_input.data(),
2507 valid_utf16_input.size());
2508}
2509 #endif // SIMDUTF_SPAN
2510#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
2511
2512#if SIMDUTF_FEATURE_UTF16
2527simdutf_warn_unused size_t count_utf16(const char16_t *input,
2528 size_t length) noexcept;
2529 #if SIMDUTF_SPAN
2530simdutf_really_inline simdutf_warn_unused size_t
2531count_utf16(std::span<const char16_t> valid_utf16_input) noexcept {
2532 return count_utf16(valid_utf16_input.data(), valid_utf16_input.size());
2533}
2534 #endif // SIMDUTF_SPAN
2535
2550simdutf_warn_unused size_t count_utf16le(const char16_t *input,
2551 size_t length) noexcept;
2552 #if SIMDUTF_SPAN
2553simdutf_really_inline simdutf_warn_unused size_t
2554count_utf16le(std::span<const char16_t> valid_utf16_input) noexcept {
2555 return count_utf16le(valid_utf16_input.data(), valid_utf16_input.size());
2556}
2557 #endif // SIMDUTF_SPAN
2558
2573simdutf_warn_unused size_t count_utf16be(const char16_t *input,
2574 size_t length) noexcept;
2575 #if SIMDUTF_SPAN
2576simdutf_really_inline simdutf_warn_unused size_t
2577count_utf16be(std::span<const char16_t> valid_utf16_input) noexcept {
2578 return count_utf16be(valid_utf16_input.data(), valid_utf16_input.size());
2579}
2580 #endif // SIMDUTF_SPAN
2581#endif // SIMDUTF_FEATURE_UTF16
2582
2583#if SIMDUTF_FEATURE_UTF8
2596simdutf_warn_unused size_t count_utf8(const char *input,
2597 size_t length) noexcept;
2598 #if SIMDUTF_SPAN
2599simdutf_really_inline simdutf_warn_unused size_t count_utf8(
2600 const detail::input_span_of_byte_like auto &valid_utf8_input) noexcept {
2601 return count_utf8(reinterpret_cast<const char *>(valid_utf8_input.data()),
2602 valid_utf8_input.size());
2603}
2604 #endif // SIMDUTF_SPAN
2605
2620simdutf_warn_unused size_t trim_partial_utf8(const char *input, size_t length);
2621 #if SIMDUTF_SPAN
2622simdutf_really_inline simdutf_warn_unused size_t trim_partial_utf8(
2623 const detail::input_span_of_byte_like auto &valid_utf8_input) noexcept {
2624 return trim_partial_utf8(
2625 reinterpret_cast<const char *>(valid_utf8_input.data()),
2626 valid_utf8_input.size());
2627}
2628 #endif // SIMDUTF_SPAN
2629#endif // SIMDUTF_FEATURE_UTF8
2630
2631#if SIMDUTF_FEATURE_UTF16
2646simdutf_warn_unused size_t trim_partial_utf16be(const char16_t *input,
2647 size_t length);
2648 #if SIMDUTF_SPAN
2649simdutf_really_inline simdutf_warn_unused size_t
2650trim_partial_utf16be(std::span<const char16_t> valid_utf16_input) noexcept {
2651 return trim_partial_utf16be(valid_utf16_input.data(),
2652 valid_utf16_input.size());
2653}
2654 #endif // SIMDUTF_SPAN
2655
2670simdutf_warn_unused size_t trim_partial_utf16le(const char16_t *input,
2671 size_t length);
2672 #if SIMDUTF_SPAN
2673simdutf_really_inline simdutf_warn_unused size_t
2674trim_partial_utf16le(std::span<const char16_t> valid_utf16_input) noexcept {
2675 return trim_partial_utf16le(valid_utf16_input.data(),
2676 valid_utf16_input.size());
2677}
2678 #endif // SIMDUTF_SPAN
2679
2694simdutf_warn_unused size_t trim_partial_utf16(const char16_t *input,
2695 size_t length);
2696 #if SIMDUTF_SPAN
2697simdutf_really_inline simdutf_warn_unused size_t
2698trim_partial_utf16(std::span<const char16_t> valid_utf16_input) noexcept {
2699 return trim_partial_utf16(valid_utf16_input.data(), valid_utf16_input.size());
2700}
2701 #endif // SIMDUTF_SPAN
2702#endif // SIMDUTF_FEATURE_UTF16
2703
2704#if SIMDUTF_FEATURE_BASE64
2705 #ifndef SIMDUTF_NEED_TRAILING_ZEROES
2706 #define SIMDUTF_NEED_TRAILING_ZEROES 1
2707 #endif
2708// base64_options are used to specify the base64 encoding options.
2709// ASCII spaces are ' ', '\t', '\n', '\r', '\f'
2710// garbage characters are characters that are not part of the base64 alphabet
2711// nor ASCII spaces.
2712enum base64_options : uint64_t {
2713 base64_default = 0, /* standard base64 format (with padding) */
2714 base64_url = 1, /* base64url format (no padding) */
2715 base64_reverse_padding = 2, /* modifier for base64_default and base64_url */
2716 base64_default_no_padding =
2717 base64_default |
2718 base64_reverse_padding, /* standard base64 format without padding */
2719 base64_url_with_padding =
2720 base64_url | base64_reverse_padding, /* base64url with padding */
2721 base64_default_accept_garbage =
2722 4, /* standard base64 format accepting garbage characters */
2723 base64_url_accept_garbage =
2724 5, /* base64url format accepting garbage characters */
2725};
2726
2727// last_chunk_handling_options are used to specify the handling of the last
2728// chunk in base64 decoding.
2729// https://tc39.es/proposal-arraybuffer-base64/spec/#sec-frombase64
2730enum last_chunk_handling_options : uint64_t {
2731 loose = 0, /* standard base64 format, decode partial final chunk */
2732 strict = 1, /* error when the last chunk is partial, 2 or 3 chars, and
2733 unpadded, or non-zero bit padding */
2734 stop_before_partial =
2735 2, /* if the last chunk is partial (2 or 3 chars), ignore it (no error) */
2736};
2737
2747simdutf_warn_unused size_t
2748maximal_binary_length_from_base64(const char *input, size_t length) noexcept;
2749 #if SIMDUTF_SPAN
2750simdutf_really_inline simdutf_warn_unused size_t
2751maximal_binary_length_from_base64(
2752 const detail::input_span_of_byte_like auto &input) noexcept {
2753 return maximal_binary_length_from_base64(
2754 reinterpret_cast<const char *>(input.data()), input.size());
2755}
2756 #endif // SIMDUTF_SPAN
2757
2768simdutf_warn_unused size_t maximal_binary_length_from_base64(
2769 const char16_t *input, size_t length) noexcept;
2770 #if SIMDUTF_SPAN
2771simdutf_really_inline simdutf_warn_unused size_t
2772maximal_binary_length_from_base64(std::span<const char16_t> input) noexcept {
2773 return maximal_binary_length_from_base64(input.data(), input.size());
2774}
2775 #endif // SIMDUTF_SPAN
2776
2831simdutf_warn_unused result base64_to_binary(
2832 const char *input, size_t length, char *output,
2833 base64_options options = base64_default,
2834 last_chunk_handling_options last_chunk_options = loose) noexcept;
2835 #if SIMDUTF_SPAN
2836simdutf_really_inline simdutf_warn_unused result base64_to_binary(
2837 const detail::input_span_of_byte_like auto &input,
2838 detail::output_span_of_byte_like auto &&binary_output,
2839 base64_options options = base64_default,
2840 last_chunk_handling_options last_chunk_options = loose) noexcept {
2841 return base64_to_binary(reinterpret_cast<const char *>(input.data()),
2842 input.size(),
2843 reinterpret_cast<char *>(binary_output.data()),
2844 options, last_chunk_options);
2845}
2846 #endif // SIMDUTF_SPAN
2847
2854simdutf_warn_unused size_t base64_length_from_binary(
2855 size_t length, base64_options options = base64_default) noexcept;
2856
2878size_t binary_to_base64(const char *input, size_t length, char *output,
2879 base64_options options = base64_default) noexcept;
2880 #if SIMDUTF_SPAN
2881simdutf_really_inline simdutf_warn_unused size_t
2882binary_to_base64(const detail::input_span_of_byte_like auto &input,
2883 detail::output_span_of_byte_like auto &&binary_output,
2884 base64_options options = base64_default) noexcept {
2885 return binary_to_base64(
2886 reinterpret_cast<const char *>(input.data()), input.size(),
2887 reinterpret_cast<char *>(binary_output.data()), options);
2888}
2889 #endif // SIMDUTF_SPAN
2890
2891 #if SIMDUTF_ATOMIC_REF
2927size_t
2928atomic_binary_to_base64(const char *input, size_t length, char *output,
2929 base64_options options = base64_default) noexcept;
2930 #if SIMDUTF_SPAN
2931simdutf_really_inline simdutf_warn_unused size_t
2932atomic_binary_to_base64(const detail::input_span_of_byte_like auto &input,
2933 detail::output_span_of_byte_like auto &&binary_output,
2934 base64_options options = base64_default) noexcept {
2935 return atomic_binary_to_base64(
2936 reinterpret_cast<const char *>(input.data()), input.size(),
2937 reinterpret_cast<char *>(binary_output.data()), options);
2938}
2939 #endif // SIMDUTF_SPAN
2940 #endif // SIMDUTF_ATOMIC_REF
2941
2998simdutf_warn_unused result
2999base64_to_binary(const char16_t *input, size_t length, char *output,
3000 base64_options options = base64_default,
3001 last_chunk_handling_options last_chunk_options =
3002 last_chunk_handling_options::loose) noexcept;
3003 #if SIMDUTF_SPAN
3004simdutf_really_inline simdutf_warn_unused result base64_to_binary(
3005 std::span<const char16_t> input,
3006 detail::output_span_of_byte_like auto &&binary_output,
3007 base64_options options = base64_default,
3008 last_chunk_handling_options last_chunk_options = loose) noexcept {
3009 return base64_to_binary(input.data(), input.size(),
3010 reinterpret_cast<char *>(binary_output.data()),
3011 options, last_chunk_options);
3012}
3013 #endif // SIMDUTF_SPAN
3014
3076simdutf_warn_unused result
3077base64_to_binary_safe(const char *input, size_t length, char *output,
3078 size_t &outlen, base64_options options = base64_default,
3079 last_chunk_handling_options last_chunk_options =
3080 last_chunk_handling_options::loose) noexcept;
3081 #if SIMDUTF_SPAN
3082simdutf_really_inline simdutf_warn_unused result base64_to_binary_safe(
3083 const detail::input_span_of_byte_like auto &input,
3084 detail::output_span_of_byte_like auto &&binary_output,
3085 base64_options options = base64_default,
3086 last_chunk_handling_options last_chunk_options = loose) noexcept {
3087 // we can't write the outlen to the provided output span, the user will have
3088 // to pick it up from the returned value instead (assuming success). we still
3089 // get the benefit of providing info of how long the output buffer is.
3090 size_t outlen = binary_output.size();
3091 return base64_to_binary_safe(reinterpret_cast<const char *>(input.data()),
3092 input.size(),
3093 reinterpret_cast<char *>(binary_output.data()),
3094 outlen, options, last_chunk_options);
3095}
3096 #endif // SIMDUTF_SPAN
3097
3098simdutf_warn_unused result
3099base64_to_binary_safe(const char16_t *input, size_t length, char *output,
3100 size_t &outlen, base64_options options = base64_default,
3101 last_chunk_handling_options last_chunk_options =
3102 last_chunk_handling_options::loose) noexcept;
3103 #if SIMDUTF_SPAN
3104simdutf_really_inline simdutf_warn_unused result base64_to_binary_safe(
3105 std::span<const char16_t> input,
3106 detail::output_span_of_byte_like auto &&binary_output,
3107 base64_options options = base64_default,
3108 last_chunk_handling_options last_chunk_options = loose) noexcept {
3109 // we can't write the outlen to the provided output span, the user will have
3110 // to pick it up from the returned value instead (assuming success). we still
3111 // get the benefit of providing info of how long the output buffer is.
3112 size_t outlen = binary_output.size();
3113 return base64_to_binary_safe(input.data(), input.size(),
3114 reinterpret_cast<char *>(binary_output.data()),
3115 outlen, options, last_chunk_options);
3116}
3117 #endif // SIMDUTF_SPAN
3118#endif // SIMDUTF_FEATURE_BASE64
3119
3128public:
3138 virtual std::string name() const { return std::string(_name); }
3139
3149 virtual std::string description() const { return std::string(_description); }
3150
3161
3162#if SIMDUTF_FEATURE_DETECT_ENCODING
3169 virtual encoding_type autodetect_encoding(const char *input,
3170 size_t length) const noexcept;
3171
3178 virtual int detect_encodings(const char *input,
3179 size_t length) const noexcept = 0;
3180#endif // SIMDUTF_FEATURE_DETECT_ENCODING
3181
3189 virtual uint32_t required_instruction_sets() const {
3190 return _required_instruction_sets;
3191 }
3192
3193#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING
3203 simdutf_warn_unused virtual bool validate_utf8(const char *buf,
3204 size_t len) const noexcept = 0;
3205#endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING
3206
3207#if SIMDUTF_FEATURE_UTF8
3220 simdutf_warn_unused virtual result
3221 validate_utf8_with_errors(const char *buf, size_t len) const noexcept = 0;
3222#endif // SIMDUTF_FEATURE_UTF8
3223
3224#if SIMDUTF_FEATURE_ASCII
3234 simdutf_warn_unused virtual bool
3235 validate_ascii(const char *buf, size_t len) const noexcept = 0;
3236
3249 simdutf_warn_unused virtual result
3250 validate_ascii_with_errors(const char *buf, size_t len) const noexcept = 0;
3251#endif // SIMDUTF_FEATURE_ASCII
3252
3253#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING
3268 simdutf_warn_unused virtual bool
3269 validate_utf16le(const char16_t *buf, size_t len) const noexcept = 0;
3270#endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING
3271
3272#if SIMDUTF_FEATURE_UTF16
3287 simdutf_warn_unused virtual bool
3288 validate_utf16be(const char16_t *buf, size_t len) const noexcept = 0;
3289
3306 simdutf_warn_unused virtual result
3307 validate_utf16le_with_errors(const char16_t *buf,
3308 size_t len) const noexcept = 0;
3309
3326 simdutf_warn_unused virtual result
3327 validate_utf16be_with_errors(const char16_t *buf,
3328 size_t len) const noexcept = 0;
3329#endif // SIMDUTF_FEATURE_UTF16
3330
3331#if SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING
3344 simdutf_warn_unused virtual bool
3345 validate_utf32(const char32_t *buf, size_t len) const noexcept = 0;
3346#endif // SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING
3347
3348#if SIMDUTF_FEATURE_UTF32
3364 simdutf_warn_unused virtual result
3365 validate_utf32_with_errors(const char32_t *buf,
3366 size_t len) const noexcept = 0;
3367#endif // SIMDUTF_FEATURE_UTF32
3368
3369#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
3380 simdutf_warn_unused virtual size_t
3381 convert_latin1_to_utf8(const char *input, size_t length,
3382 char *utf8_output) const noexcept = 0;
3383#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
3384
3385#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
3396 simdutf_warn_unused virtual size_t
3397 convert_latin1_to_utf16le(const char *input, size_t length,
3398 char16_t *utf16_output) const noexcept = 0;
3399
3410 simdutf_warn_unused virtual size_t
3411 convert_latin1_to_utf16be(const char *input, size_t length,
3412 char16_t *utf16_output) const noexcept = 0;
3413#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
3414
3415#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1
3426 simdutf_warn_unused virtual size_t
3427 convert_latin1_to_utf32(const char *input, size_t length,
3428 char32_t *utf32_buffer) const noexcept = 0;
3429#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1
3430
3431#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
3444 simdutf_warn_unused virtual size_t
3445 convert_utf8_to_latin1(const char *input, size_t length,
3446 char *latin1_output) const noexcept = 0;
3447
3464 simdutf_warn_unused virtual result
3465 convert_utf8_to_latin1_with_errors(const char *input, size_t length,
3466 char *latin1_output) const noexcept = 0;
3467
3487 simdutf_warn_unused virtual size_t
3488 convert_valid_utf8_to_latin1(const char *input, size_t length,
3489 char *latin1_output) const noexcept = 0;
3490#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
3491
3492#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
3505 simdutf_warn_unused virtual size_t
3506 convert_utf8_to_utf16le(const char *input, size_t length,
3507 char16_t *utf16_output) const noexcept = 0;
3508
3521 simdutf_warn_unused virtual size_t
3522 convert_utf8_to_utf16be(const char *input, size_t length,
3523 char16_t *utf16_output) const noexcept = 0;
3524
3540 simdutf_warn_unused virtual result convert_utf8_to_utf16le_with_errors(
3541 const char *input, size_t length,
3542 char16_t *utf16_output) const noexcept = 0;
3543
3559 simdutf_warn_unused virtual result convert_utf8_to_utf16be_with_errors(
3560 const char *input, size_t length,
3561 char16_t *utf16_output) const noexcept = 0;
3562#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
3563
3564#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
3577 simdutf_warn_unused virtual size_t
3578 convert_utf8_to_utf32(const char *input, size_t length,
3579 char32_t *utf32_output) const noexcept = 0;
3580
3595 simdutf_warn_unused virtual result
3596 convert_utf8_to_utf32_with_errors(const char *input, size_t length,
3597 char32_t *utf32_output) const noexcept = 0;
3598#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
3599
3600#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
3611 simdutf_warn_unused virtual size_t
3612 convert_valid_utf8_to_utf16le(const char *input, size_t length,
3613 char16_t *utf16_buffer) const noexcept = 0;
3614
3625 simdutf_warn_unused virtual size_t
3626 convert_valid_utf8_to_utf16be(const char *input, size_t length,
3627 char16_t *utf16_buffer) const noexcept = 0;
3628#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
3629
3630#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
3641 simdutf_warn_unused virtual size_t
3642 convert_valid_utf8_to_utf32(const char *input, size_t length,
3643 char32_t *utf32_buffer) const noexcept = 0;
3644#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
3645
3646#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
3659 simdutf_warn_unused virtual size_t
3660 utf16_length_from_utf8(const char *input, size_t length) const noexcept = 0;
3661#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
3662
3663#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
3678 simdutf_warn_unused virtual size_t
3679 utf32_length_from_utf8(const char *input, size_t length) const noexcept = 0;
3680#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
3681
3682#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
3699 simdutf_warn_unused virtual size_t
3700 convert_utf16le_to_latin1(const char16_t *input, size_t length,
3701 char *latin1_buffer) const noexcept = 0;
3702
3719 simdutf_warn_unused virtual size_t
3720 convert_utf16be_to_latin1(const char16_t *input, size_t length,
3721 char *latin1_buffer) const noexcept = 0;
3722
3742 simdutf_warn_unused virtual result
3743 convert_utf16le_to_latin1_with_errors(const char16_t *input, size_t length,
3744 char *latin1_buffer) const noexcept = 0;
3745
3765 simdutf_warn_unused virtual result
3766 convert_utf16be_to_latin1_with_errors(const char16_t *input, size_t length,
3767 char *latin1_buffer) const noexcept = 0;
3768
3789 simdutf_warn_unused virtual size_t
3790 convert_valid_utf16le_to_latin1(const char16_t *input, size_t length,
3791 char *latin1_buffer) const noexcept = 0;
3792
3813 simdutf_warn_unused virtual size_t
3814 convert_valid_utf16be_to_latin1(const char16_t *input, size_t length,
3815 char *latin1_buffer) const noexcept = 0;
3816#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
3817
3818#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
3834 simdutf_warn_unused virtual size_t
3835 convert_utf16le_to_utf8(const char16_t *input, size_t length,
3836 char *utf8_buffer) const noexcept = 0;
3837
3853 simdutf_warn_unused virtual size_t
3854 convert_utf16be_to_utf8(const char16_t *input, size_t length,
3855 char *utf8_buffer) const noexcept = 0;
3856
3875 simdutf_warn_unused virtual result
3876 convert_utf16le_to_utf8_with_errors(const char16_t *input, size_t length,
3877 char *utf8_buffer) const noexcept = 0;
3878
3897 simdutf_warn_unused virtual result
3898 convert_utf16be_to_utf8_with_errors(const char16_t *input, size_t length,
3899 char *utf8_buffer) const noexcept = 0;
3900
3915 simdutf_warn_unused virtual size_t
3916 convert_valid_utf16le_to_utf8(const char16_t *input, size_t length,
3917 char *utf8_buffer) const noexcept = 0;
3918
3933 simdutf_warn_unused virtual size_t
3934 convert_valid_utf16be_to_utf8(const char16_t *input, size_t length,
3935 char *utf8_buffer) const noexcept = 0;
3936#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
3937
3938#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
3954 simdutf_warn_unused virtual size_t
3955 convert_utf16le_to_utf32(const char16_t *input, size_t length,
3956 char32_t *utf32_buffer) const noexcept = 0;
3957
3973 simdutf_warn_unused virtual size_t
3974 convert_utf16be_to_utf32(const char16_t *input, size_t length,
3975 char32_t *utf32_buffer) const noexcept = 0;
3976
3996 const char16_t *input, size_t length,
3997 char32_t *utf32_buffer) const noexcept = 0;
3998
4018 const char16_t *input, size_t length,
4019 char32_t *utf32_buffer) const noexcept = 0;
4020
4035 simdutf_warn_unused virtual size_t
4036 convert_valid_utf16le_to_utf32(const char16_t *input, size_t length,
4037 char32_t *utf32_buffer) const noexcept = 0;
4038
4053 simdutf_warn_unused virtual size_t
4054 convert_valid_utf16be_to_utf32(const char16_t *input, size_t length,
4055 char32_t *utf32_buffer) const noexcept = 0;
4056#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
4057
4058#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
4073 simdutf_warn_unused virtual size_t
4074 utf8_length_from_utf16le(const char16_t *input,
4075 size_t length) const noexcept = 0;
4076
4091 simdutf_warn_unused virtual size_t
4092 utf8_length_from_utf16be(const char16_t *input,
4093 size_t length) const noexcept = 0;
4094#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
4095
4096#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1
4113 simdutf_warn_unused virtual size_t
4114 convert_utf32_to_latin1(const char32_t *input, size_t length,
4115 char *latin1_buffer) const noexcept = 0;
4116#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1
4117
4118#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1
4138 simdutf_warn_unused virtual result
4139 convert_utf32_to_latin1_with_errors(const char32_t *input, size_t length,
4140 char *latin1_buffer) const noexcept = 0;
4141
4162 simdutf_warn_unused virtual size_t
4163 convert_valid_utf32_to_latin1(const char32_t *input, size_t length,
4164 char *latin1_buffer) const noexcept = 0;
4165#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1
4166
4167#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
4183 simdutf_warn_unused virtual size_t
4184 convert_utf32_to_utf8(const char32_t *input, size_t length,
4185 char *utf8_buffer) const noexcept = 0;
4186
4204 simdutf_warn_unused virtual result
4205 convert_utf32_to_utf8_with_errors(const char32_t *input, size_t length,
4206 char *utf8_buffer) const noexcept = 0;
4207
4222 simdutf_warn_unused virtual size_t
4223 convert_valid_utf32_to_utf8(const char32_t *input, size_t length,
4224 char *utf8_buffer) const noexcept = 0;
4225#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
4226
4227#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
4238 simdutf_warn_unused virtual size_t
4239 utf16_length_from_latin1(size_t length) const noexcept {
4240 return length;
4241 }
4242#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
4243
4244#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
4260 simdutf_warn_unused virtual size_t
4261 convert_utf32_to_utf16le(const char32_t *input, size_t length,
4262 char16_t *utf16_buffer) const noexcept = 0;
4263
4279 simdutf_warn_unused virtual size_t
4280 convert_utf32_to_utf16be(const char32_t *input, size_t length,
4281 char16_t *utf16_buffer) const noexcept = 0;
4282
4302 const char32_t *input, size_t length,
4303 char16_t *utf16_buffer) const noexcept = 0;
4304
4324 const char32_t *input, size_t length,
4325 char16_t *utf16_buffer) const noexcept = 0;
4326
4341 simdutf_warn_unused virtual size_t
4342 convert_valid_utf32_to_utf16le(const char32_t *input, size_t length,
4343 char16_t *utf16_buffer) const noexcept = 0;
4344
4359 simdutf_warn_unused virtual size_t
4360 convert_valid_utf32_to_utf16be(const char32_t *input, size_t length,
4361 char16_t *utf16_buffer) const noexcept = 0;
4362#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
4363
4364#if SIMDUTF_FEATURE_UTF16
4379 virtual void change_endianness_utf16(const char16_t *input, size_t length,
4380 char16_t *output) const noexcept = 0;
4381#endif // SIMDUTF_FEATURE_UTF16
4382
4383#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
4392 simdutf_warn_unused virtual size_t
4393 utf8_length_from_latin1(const char *input, size_t length) const noexcept = 0;
4394#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
4395
4396#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
4409 simdutf_warn_unused virtual size_t
4410 utf8_length_from_utf32(const char32_t *input,
4411 size_t length) const noexcept = 0;
4412#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
4413
4414#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1
4426 simdutf_warn_unused virtual size_t
4427 latin1_length_from_utf32(size_t length) const noexcept {
4428 return length;
4429 }
4430#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1
4431
4432#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
4444 simdutf_warn_unused virtual size_t
4445 latin1_length_from_utf8(const char *input, size_t length) const noexcept = 0;
4446#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
4447
4448#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
4464 simdutf_warn_unused virtual size_t
4465 latin1_length_from_utf16(size_t length) const noexcept {
4466 return length;
4467 }
4468#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
4469
4470#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
4483 simdutf_warn_unused virtual size_t
4484 utf16_length_from_utf32(const char32_t *input,
4485 size_t length) const noexcept = 0;
4486#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
4487
4488#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1
4497 simdutf_warn_unused virtual size_t
4498 utf32_length_from_latin1(size_t length) const noexcept {
4499 return length;
4500 }
4501#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1
4502
4503#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
4521 simdutf_warn_unused virtual size_t
4522 utf32_length_from_utf16le(const char16_t *input,
4523 size_t length) const noexcept = 0;
4524
4542 simdutf_warn_unused virtual size_t
4543 utf32_length_from_utf16be(const char16_t *input,
4544 size_t length) const noexcept = 0;
4545#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
4546
4547#if SIMDUTF_FEATURE_UTF16
4563 simdutf_warn_unused virtual size_t
4564 count_utf16le(const char16_t *input, size_t length) const noexcept = 0;
4565
4581 simdutf_warn_unused virtual size_t
4582 count_utf16be(const char16_t *input, size_t length) const noexcept = 0;
4583#endif // SIMDUTF_FEATURE_UTF16
4584
4585#if SIMDUTF_FEATURE_UTF8
4598 simdutf_warn_unused virtual size_t
4599 count_utf8(const char *input, size_t length) const noexcept = 0;
4600#endif // SIMDUTF_FEATURE_UTF8
4601
4602#if SIMDUTF_FEATURE_BASE64
4613 simdutf_warn_unused size_t maximal_binary_length_from_base64(
4614 const char *input, size_t length) const noexcept;
4615
4627 simdutf_warn_unused size_t maximal_binary_length_from_base64(
4628 const char16_t *input, size_t length) const noexcept;
4629
4662 simdutf_warn_unused virtual result
4663 base64_to_binary(const char *input, size_t length, char *output,
4664 base64_options options = base64_default,
4665 last_chunk_handling_options last_chunk_options =
4666 last_chunk_handling_options::loose) const noexcept = 0;
4667
4699 simdutf_warn_unused virtual full_result base64_to_binary_details(
4700 const char *input, size_t length, char *output,
4701 base64_options options = base64_default,
4702 last_chunk_handling_options last_chunk_options =
4703 last_chunk_handling_options::loose) const noexcept = 0;
4737 simdutf_warn_unused virtual result
4738 base64_to_binary(const char16_t *input, size_t length, char *output,
4739 base64_options options = base64_default,
4740 last_chunk_handling_options last_chunk_options =
4741 last_chunk_handling_options::loose) const noexcept = 0;
4742
4774 simdutf_warn_unused virtual full_result base64_to_binary_details(
4775 const char16_t *input, size_t length, char *output,
4776 base64_options options = base64_default,
4777 last_chunk_handling_options last_chunk_options =
4778 last_chunk_handling_options::loose) const noexcept = 0;
4787 simdutf_warn_unused size_t base64_length_from_binary(
4788 size_t length, base64_options options = base64_default) const noexcept;
4789
4811 virtual size_t
4812 binary_to_base64(const char *input, size_t length, char *output,
4813 base64_options options = base64_default) const noexcept = 0;
4814#endif // SIMDUTF_FEATURE_BASE64
4815
4816#ifdef SIMDUTF_INTERNAL_TESTS
4817 // This method is exported only in developer mode, its purpose
4818 // is to expose some internal test procedures from the given
4819 // implementation and then use them through our standard test
4820 // framework.
4821 //
4822 // Regular users should not use it, the tests of the public
4823 // API are enough.
4824
4825 struct TestProcedure {
4826 // display name
4827 std::string name;
4828
4829 // procedure should return whether given test pass or not
4830 void (*procedure)(const implementation &);
4831 };
4832
4833 virtual std::vector<TestProcedure> internal_tests() const;
4834#endif
4835
4836protected:
4839 simdutf_really_inline implementation(const char *name,
4840 const char *description,
4841 uint32_t required_instruction_sets)
4842 : _name(name), _description(description),
4843 _required_instruction_sets(required_instruction_sets) {}
4844
4845protected:
4846 ~implementation() = default;
4847
4848private:
4852 const char *_name;
4853
4857 const char *_description;
4858
4862 const uint32_t _required_instruction_sets;
4863};
4864
4866namespace internal {
4867
4871class available_implementation_list {
4872public:
4874 simdutf_really_inline available_implementation_list() {}
4876 size_t size() const noexcept;
4878 const implementation *const *begin() const noexcept;
4880 const implementation *const *end() const noexcept;
4881
4895 const implementation *operator[](const std::string &name) const noexcept {
4896 for (const implementation *impl : *this) {
4897 if (impl->name() == name) {
4898 return impl;
4899 }
4900 }
4901 return nullptr;
4902 }
4903
4917 const implementation *detect_best_supported() const noexcept;
4918};
4919
4920template <typename T> class atomic_ptr {
4921public:
4922 atomic_ptr(T *_ptr) : ptr{_ptr} {}
4923
4924#if defined(SIMDUTF_NO_THREADS)
4925 operator const T *() const { return ptr; }
4926 const T &operator*() const { return *ptr; }
4927 const T *operator->() const { return ptr; }
4928
4929 operator T *() { return ptr; }
4930 T &operator*() { return *ptr; }
4931 T *operator->() { return ptr; }
4932 atomic_ptr &operator=(T *_ptr) {
4933 ptr = _ptr;
4934 return *this;
4935 }
4936
4937#else
4938 operator const T *() const { return ptr.load(); }
4939 const T &operator*() const { return *ptr; }
4940 const T *operator->() const { return ptr.load(); }
4941
4942 operator T *() { return ptr.load(); }
4943 T &operator*() { return *ptr; }
4944 T *operator->() { return ptr.load(); }
4945 atomic_ptr &operator=(T *_ptr) {
4946 ptr = _ptr;
4947 return *this;
4948 }
4949
4950#endif
4951
4952private:
4953#if defined(SIMDUTF_NO_THREADS)
4954 T *ptr;
4955#else
4956 std::atomic<T *> ptr;
4957#endif
4958};
4959
4960class detect_best_supported_implementation_on_first_use;
4961
4962} // namespace internal
4963
4967extern SIMDUTF_DLLIMPORTEXPORT const internal::available_implementation_list &
4968get_available_implementations();
4969
4976extern SIMDUTF_DLLIMPORTEXPORT internal::atomic_ptr<const implementation> &
4977get_active_implementation();
4978
4979} // namespace simdutf
4980
4981#endif // SIMDUTF_IMPLEMENTATION_H
An implementation of simdutf for a particular CPU architecture.
virtual simdutf_warn_unused size_t convert_valid_utf32_to_latin1(const char32_t *input, size_t length, char *latin1_buffer) const noexcept=0
Convert valid UTF-32 string into Latin1 string.
virtual simdutf_warn_unused size_t convert_valid_utf8_to_utf16le(const char *input, size_t length, char16_t *utf16_buffer) const noexcept=0
Convert valid UTF-8 string into UTF-16LE string.
virtual simdutf_warn_unused size_t latin1_length_from_utf16(size_t length) const noexcept
Compute the number of bytes that this UTF-16LE/BE string would require in Latin1 format.
virtual simdutf_warn_unused size_t utf32_length_from_utf16be(const char16_t *input, size_t length) const noexcept=0
Compute the number of bytes that this UTF-16BE string would require in UTF-32 format.
virtual simdutf_warn_unused size_t convert_utf8_to_utf16be(const char *input, size_t length, char16_t *utf16_output) const noexcept=0
Convert possibly broken UTF-8 string into UTF-16BE string.
virtual simdutf_warn_unused size_t convert_utf16le_to_utf32(const char16_t *input, size_t length, char32_t *utf32_buffer) const noexcept=0
Convert possibly broken UTF-16LE string into UTF-32 string.
virtual simdutf_warn_unused size_t convert_valid_utf16be_to_utf32(const char16_t *input, size_t length, char32_t *utf32_buffer) const noexcept=0
Convert valid UTF-16LE string into UTF-32BE string.
virtual simdutf_warn_unused size_t convert_valid_utf16be_to_utf8(const char16_t *input, size_t length, char *utf8_buffer) const noexcept=0
Convert valid UTF-16BE string into UTF-8 string.
virtual simdutf_warn_unused size_t convert_latin1_to_utf16le(const char *input, size_t length, char16_t *utf16_output) const noexcept=0
Convert possibly Latin1 string into UTF-16LE string.
virtual simdutf_warn_unused bool validate_utf16be(const char16_t *buf, size_t len) const noexcept=0
Validate the UTF-16BE string.
virtual simdutf_warn_unused result convert_utf16be_to_latin1_with_errors(const char16_t *input, size_t length, char *latin1_buffer) const noexcept=0
Convert possibly broken UTF-16BE string into Latin1 string.
virtual simdutf_warn_unused full_result base64_to_binary_details(const char *input, size_t length, char *output, base64_options options=base64_default, last_chunk_handling_options last_chunk_options=last_chunk_handling_options::loose) const noexcept=0
Convert a base64 input to a binary output while returning more details than base64_to_binary.
virtual simdutf_warn_unused size_t convert_valid_utf16le_to_utf32(const char16_t *input, size_t length, char32_t *utf32_buffer) const noexcept=0
Convert valid UTF-16LE string into UTF-32 string.
virtual simdutf_warn_unused size_t latin1_length_from_utf8(const char *input, size_t length) const noexcept=0
Compute the number of bytes that this UTF-8 string would require in Latin1 format.
virtual simdutf_warn_unused result convert_utf32_to_latin1_with_errors(const char32_t *input, size_t length, char *latin1_buffer) const noexcept=0
Convert possibly broken UTF-32 string into Latin1 string and stop on error.
virtual simdutf_warn_unused size_t utf8_length_from_utf32(const char32_t *input, size_t length) const noexcept=0
Compute the number of bytes that this UTF-32 string would require in UTF-8 format.
virtual simdutf_warn_unused size_t convert_latin1_to_utf16be(const char *input, size_t length, char16_t *utf16_output) const noexcept=0
Convert Latin1 string into UTF-16BE string.
virtual simdutf_warn_unused size_t latin1_length_from_utf32(size_t length) const noexcept
Compute the number of bytes that this UTF-32 string would require in Latin1 format.
virtual simdutf_warn_unused full_result base64_to_binary_details(const char16_t *input, size_t length, char *output, base64_options options=base64_default, last_chunk_handling_options last_chunk_options=last_chunk_handling_options::loose) const noexcept=0
Convert a base64 input to a binary output while returning more details than base64_to_binary.
virtual simdutf_warn_unused size_t convert_utf32_to_utf16le(const char32_t *input, size_t length, char16_t *utf16_buffer) const noexcept=0
Convert possibly broken UTF-32 string into UTF-16LE string.
virtual simdutf_warn_unused size_t utf16_length_from_utf32(const char32_t *input, size_t length) const noexcept=0
Compute the number of two-byte code units that this UTF-32 string would require in UTF-16 format.
virtual simdutf_warn_unused size_t count_utf8(const char *input, size_t length) const noexcept=0
Count the number of code points (characters) in the string assuming that it is valid.
virtual simdutf_warn_unused size_t convert_valid_utf32_to_utf16be(const char32_t *input, size_t length, char16_t *utf16_buffer) const noexcept=0
Convert valid UTF-32 string into UTF-16BE string.
virtual simdutf_warn_unused size_t utf32_length_from_utf8(const char *input, size_t length) const noexcept=0
Compute the number of 4-byte code units that this UTF-8 string would require in UTF-32 format.
virtual simdutf_warn_unused size_t convert_valid_utf16le_to_utf8(const char16_t *input, size_t length, char *utf8_buffer) const noexcept=0
Convert valid UTF-16LE string into UTF-8 string.
virtual simdutf_warn_unused result convert_utf32_to_utf16be_with_errors(const char32_t *input, size_t length, char16_t *utf16_buffer) const noexcept=0
Convert possibly broken UTF-32 string into UTF-16BE string and stop on error.
virtual simdutf_warn_unused result validate_utf8_with_errors(const char *buf, size_t len) const noexcept=0
Validate the UTF-8 string and stop on errors.
virtual simdutf_warn_unused size_t utf8_length_from_utf16le(const char16_t *input, size_t length) const noexcept=0
Compute the number of bytes that this UTF-16LE string would require in UTF-8 format.
virtual simdutf_warn_unused size_t convert_utf16be_to_utf8(const char16_t *input, size_t length, char *utf8_buffer) const noexcept=0
Convert possibly broken UTF-16BE string into UTF-8 string.
virtual simdutf_warn_unused size_t convert_utf8_to_utf16le(const char *input, size_t length, char16_t *utf16_output) const noexcept=0
Convert possibly broken UTF-8 string into UTF-16LE string.
virtual simdutf_warn_unused result convert_utf16le_to_latin1_with_errors(const char16_t *input, size_t length, char *latin1_buffer) const noexcept=0
Convert possibly broken UTF-16LE string into Latin1 string.
virtual simdutf_warn_unused result validate_utf16le_with_errors(const char16_t *buf, size_t len) const noexcept=0
Validate the UTF-16LE string and stop on error.
virtual simdutf_warn_unused result convert_utf16be_to_utf8_with_errors(const char16_t *input, size_t length, char *utf8_buffer) const noexcept=0
Convert possibly broken UTF-16BE string into UTF-8 string and stop on error.
virtual size_t binary_to_base64(const char *input, size_t length, char *output, base64_options options=base64_default) const noexcept=0
Convert a binary input to a base64 output.
virtual simdutf_warn_unused size_t convert_latin1_to_utf32(const char *input, size_t length, char32_t *utf32_buffer) const noexcept=0
Convert Latin1 string into UTF-32 string.
virtual simdutf_warn_unused result convert_utf8_to_utf32_with_errors(const char *input, size_t length, char32_t *utf32_output) const noexcept=0
Convert possibly broken UTF-8 string into UTF-32 string and stop on error.
virtual simdutf_warn_unused size_t convert_valid_utf32_to_utf8(const char32_t *input, size_t length, char *utf8_buffer) const noexcept=0
Convert valid UTF-32 string into UTF-8 string.
virtual simdutf_warn_unused size_t utf32_length_from_latin1(size_t length) const noexcept
Return the number of bytes that this UTF-32 string would require in Latin1 format.
virtual simdutf_warn_unused bool validate_utf16le(const char16_t *buf, size_t len) const noexcept=0
Validate the UTF-16LE string.This function may be best when you expect the input to be almost always ...
virtual simdutf_warn_unused result convert_utf8_to_utf16le_with_errors(const char *input, size_t length, char16_t *utf16_output) const noexcept=0
Convert possibly broken UTF-8 string into UTF-16LE string and stop on error.
virtual simdutf_warn_unused size_t count_utf16le(const char16_t *input, size_t length) const noexcept=0
Count the number of code points (characters) in the string assuming that it is valid.
virtual simdutf_warn_unused size_t convert_utf16be_to_utf32(const char16_t *input, size_t length, char32_t *utf32_buffer) const noexcept=0
Convert possibly broken UTF-16BE string into UTF-32 string.
virtual simdutf_warn_unused size_t convert_valid_utf16le_to_latin1(const char16_t *input, size_t length, char *latin1_buffer) const noexcept=0
Convert valid UTF-16LE string into Latin1 string.
virtual simdutf_warn_unused size_t convert_utf32_to_latin1(const char32_t *input, size_t length, char *latin1_buffer) const noexcept=0
Convert possibly broken UTF-32 string into Latin1 string.
virtual simdutf_warn_unused size_t convert_valid_utf16be_to_latin1(const char16_t *input, size_t length, char *latin1_buffer) const noexcept=0
Convert valid UTF-16BE string into Latin1 string.
bool supported_by_runtime_system() const
The instruction sets this implementation is compiled against and the current CPU match.
virtual simdutf_warn_unused size_t convert_valid_utf8_to_utf32(const char *input, size_t length, char32_t *utf32_buffer) const noexcept=0
Convert valid UTF-8 string into UTF-32 string.
virtual simdutf_warn_unused size_t convert_utf16le_to_latin1(const char16_t *input, size_t length, char *latin1_buffer) const noexcept=0
Convert possibly broken UTF-16LE string into Latin1 string.
virtual simdutf_warn_unused result validate_utf16be_with_errors(const char16_t *buf, size_t len) const noexcept=0
Validate the UTF-16BE string and stop on error.
virtual simdutf_warn_unused result convert_utf16le_to_utf8_with_errors(const char16_t *input, size_t length, char *utf8_buffer) const noexcept=0
Convert possibly broken UTF-16LE string into UTF-8 string and stop on error.
virtual simdutf_warn_unused result convert_utf16be_to_utf32_with_errors(const char16_t *input, size_t length, char32_t *utf32_buffer) const noexcept=0
Convert possibly broken UTF-16BE string into UTF-32 string and stop on error.
virtual simdutf_warn_unused result base64_to_binary(const char *input, size_t length, char *output, base64_options options=base64_default, last_chunk_handling_options last_chunk_options=last_chunk_handling_options::loose) const noexcept=0
Convert a base64 input to a binary output.
virtual simdutf_warn_unused size_t convert_utf32_to_utf16be(const char32_t *input, size_t length, char16_t *utf16_buffer) const noexcept=0
Convert possibly broken UTF-32 string into UTF-16BE string.
virtual simdutf_warn_unused result validate_utf32_with_errors(const char32_t *buf, size_t len) const noexcept=0
Validate the UTF-32 string and stop on error.
virtual simdutf_warn_unused size_t utf32_length_from_utf16le(const char16_t *input, size_t length) const noexcept=0
Compute the number of bytes that this UTF-16LE string would require in UTF-32 format.
virtual void change_endianness_utf16(const char16_t *input, size_t length, char16_t *output) const noexcept=0
Change the endianness of the input.
virtual simdutf_warn_unused size_t convert_valid_utf8_to_utf16be(const char *input, size_t length, char16_t *utf16_buffer) const noexcept=0
Convert valid UTF-8 string into UTF-16BE string.
virtual simdutf_warn_unused size_t convert_valid_utf8_to_latin1(const char *input, size_t length, char *latin1_output) const noexcept=0
Convert valid UTF-8 string into latin1 string.
simdutf_warn_unused size_t maximal_binary_length_from_base64(const char *input, size_t length) const noexcept
Provide the maximal binary length in bytes given the base64 input.
virtual simdutf_warn_unused size_t convert_valid_utf32_to_utf16le(const char32_t *input, size_t length, char16_t *utf16_buffer) const noexcept=0
Convert valid UTF-32 string into UTF-16LE string.
virtual std::string name() const
The name of this implementation.
virtual simdutf_warn_unused result validate_ascii_with_errors(const char *buf, size_t len) const noexcept=0
Validate the ASCII string and stop on error.
virtual simdutf_warn_unused bool validate_utf8(const char *buf, size_t len) const noexcept=0
Validate the UTF-8 string.
virtual simdutf_warn_unused size_t convert_utf8_to_utf32(const char *input, size_t length, char32_t *utf32_output) const noexcept=0
Convert possibly broken UTF-8 string into UTF-32 string.
simdutf_warn_unused size_t maximal_binary_length_from_base64(const char16_t *input, size_t length) const noexcept
Provide the maximal binary length in bytes given the base64 input.
virtual simdutf_warn_unused bool validate_utf32(const char32_t *buf, size_t len) const noexcept=0
Validate the UTF-32 string.
virtual simdutf_warn_unused result convert_utf16le_to_utf32_with_errors(const char16_t *input, size_t length, char32_t *utf32_buffer) const noexcept=0
Convert possibly broken UTF-16LE string into UTF-32 string and stop on error.
virtual simdutf_warn_unused size_t convert_utf16le_to_utf8(const char16_t *input, size_t length, char *utf8_buffer) const noexcept=0
Convert possibly broken UTF-16LE string into UTF-8 string.
virtual simdutf_warn_unused size_t utf8_length_from_utf16be(const char16_t *input, size_t length) const noexcept=0
Compute the number of bytes that this UTF-16BE string would require in UTF-8 format.
virtual simdutf_warn_unused size_t convert_utf32_to_utf8(const char32_t *input, size_t length, char *utf8_buffer) const noexcept=0
Convert possibly broken UTF-32 string into UTF-8 string.
virtual simdutf_warn_unused size_t convert_utf16be_to_latin1(const char16_t *input, size_t length, char *latin1_buffer) const noexcept=0
Convert possibly broken UTF-16BE string into Latin1 string.
virtual simdutf_warn_unused size_t convert_utf8_to_latin1(const char *input, size_t length, char *latin1_output) const noexcept=0
Convert possibly broken UTF-8 string into latin1 string.
virtual simdutf_warn_unused result base64_to_binary(const char16_t *input, size_t length, char *output, base64_options options=base64_default, last_chunk_handling_options last_chunk_options=last_chunk_handling_options::loose) const noexcept=0
Convert a base64 input to a binary output.
simdutf_warn_unused size_t base64_length_from_binary(size_t length, base64_options options=base64_default) const noexcept
Provide the base64 length in bytes given the length of a binary input.
virtual int detect_encodings(const char *input, size_t length) const noexcept=0
This function will try to detect the possible encodings in one pass.
virtual simdutf_warn_unused size_t count_utf16be(const char16_t *input, size_t length) const noexcept=0
Count the number of code points (characters) in the string assuming that it is valid.
virtual simdutf_warn_unused result convert_utf32_to_utf8_with_errors(const char32_t *input, size_t length, char *utf8_buffer) const noexcept=0
Convert possibly broken UTF-32 string into UTF-8 string and stop on error.
virtual simdutf_warn_unused bool validate_ascii(const char *buf, size_t len) const noexcept=0
Validate the ASCII string.
virtual simdutf_warn_unused size_t utf16_length_from_utf8(const char *input, size_t length) const noexcept=0
Compute the number of 2-byte code units that this UTF-8 string would require in UTF-16LE format.
virtual simdutf_warn_unused result convert_utf8_to_utf16be_with_errors(const char *input, size_t length, char16_t *utf16_output) const noexcept=0
Convert possibly broken UTF-8 string into UTF-16BE string and stop on error.
virtual simdutf_warn_unused result convert_utf8_to_latin1_with_errors(const char *input, size_t length, char *latin1_output) const noexcept=0
Convert possibly broken UTF-8 string into latin1 string with errors.
virtual std::string description() const
The description of this implementation.
virtual simdutf_warn_unused size_t utf16_length_from_latin1(size_t length) const noexcept
Return the number of bytes that this UTF-16 string would require in Latin1 format.
virtual simdutf_warn_unused result convert_utf32_to_utf16le_with_errors(const char32_t *input, size_t length, char16_t *utf16_buffer) const noexcept=0
Convert possibly broken UTF-32 string into UTF-16LE string and stop on error.
virtual simdutf_warn_unused size_t convert_latin1_to_utf8(const char *input, size_t length, char *utf8_output) const noexcept=0
Convert Latin1 string into UTF8 string.
virtual encoding_type autodetect_encoding(const char *input, size_t length) const noexcept
This function will try to detect the encoding.
virtual simdutf_warn_unused size_t utf8_length_from_latin1(const char *input, size_t length) const noexcept=0
Return the number of bytes that this Latin1 string would require in UTF-8 format.