1#ifndef SIMDUTF_IMPLEMENTATION_H
2#define SIMDUTF_IMPLEMENTATION_H
3#if !defined(SIMDUTF_NO_THREADS)
7#ifdef SIMDUTF_INTERNAL_TESTS
10#include "simdutf/common_defs.h"
11#include "simdutf/compiler_check.h"
12#include "simdutf/encoding_types.h"
13#include "simdutf/error.h"
14#include "simdutf/internal/isadetection.h"
18 #include <type_traits>
22#if SIMDUTF_CPLUSPLUS17
23 #include <string_view>
34#define SIMDUTF_FEATURE_DETECT_ENCODING 1
35#define SIMDUTF_FEATURE_ASCII 1
36#define SIMDUTF_FEATURE_LATIN1 1
37#define SIMDUTF_FEATURE_UTF8 1
38#define SIMDUTF_FEATURE_UTF16 1
39#define SIMDUTF_FEATURE_UTF32 1
40#define SIMDUTF_FEATURE_BASE64 1
52concept byte_like = std::is_same_v<T, std::byte> ||
53 std::is_same_v<T, char> ||
54 std::is_same_v<T, signed char> ||
55 std::is_same_v<T, unsigned char>;
58concept is_byte_like = byte_like<std::remove_cvref_t<T>>;
61concept is_pointer = std::is_pointer_v<T>;
69concept input_span_of_byte_like =
requires(
const T &t) {
70 { t.size() }
noexcept -> std::convertible_to<std::size_t>;
71 { t.data() }
noexcept -> is_pointer;
72 { *t.data() }
noexcept -> is_byte_like;
76concept is_mutable = !std::is_const_v<std::remove_reference_t<T>>;
82concept output_span_of_byte_like =
requires(T &t) {
83 { t.size() }
noexcept -> std::convertible_to<std::size_t>;
84 { t.data() }
noexcept -> is_pointer;
85 { *t.data() }
noexcept -> is_byte_like;
86 { *t.data() }
noexcept -> is_mutable;
91#if SIMDUTF_FEATURE_DETECT_ENCODING
102simdutf_warn_unused simdutf::encoding_type
103autodetect_encoding(
const char *input,
size_t length)
noexcept;
104simdutf_really_inline simdutf_warn_unused simdutf::encoding_type
105autodetect_encoding(
const uint8_t *input,
size_t length)
noexcept {
106 return autodetect_encoding(
reinterpret_cast<const char *
>(input), length);
120simdutf_really_inline simdutf_warn_unused simdutf::encoding_type
122 const detail::input_span_of_byte_like
auto &input)
noexcept {
123 return autodetect_encoding(
reinterpret_cast<const char *
>(input.data()),
139simdutf_warn_unused
int detect_encodings(
const char *input,
140 size_t length)
noexcept;
141simdutf_really_inline simdutf_warn_unused
int
142detect_encodings(
const uint8_t *input,
size_t length)
noexcept {
143 return detect_encodings(
reinterpret_cast<const char *
>(input), length);
146simdutf_really_inline simdutf_warn_unused
int
147detect_encodings(
const detail::input_span_of_byte_like
auto &input)
noexcept {
148 return detect_encodings(
reinterpret_cast<const char *
>(input.data()),
154#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING
166simdutf_warn_unused
bool validate_utf8(
const char *buf,
size_t len)
noexcept;
168simdutf_really_inline simdutf_warn_unused
bool
169validate_utf8(
const detail::input_span_of_byte_like
auto &input)
noexcept {
170 return validate_utf8(
reinterpret_cast<const char *
>(input.data()),
176#if SIMDUTF_FEATURE_UTF8
189simdutf_warn_unused result validate_utf8_with_errors(
const char *buf,
190 size_t len)
noexcept;
192simdutf_really_inline simdutf_warn_unused result validate_utf8_with_errors(
193 const detail::input_span_of_byte_like
auto &input)
noexcept {
194 return validate_utf8_with_errors(
reinterpret_cast<const char *
>(input.data()),
200#if SIMDUTF_FEATURE_ASCII
210simdutf_warn_unused
bool validate_ascii(
const char *buf,
size_t len)
noexcept;
212simdutf_really_inline simdutf_warn_unused
bool
213validate_ascii(
const detail::input_span_of_byte_like
auto &input)
noexcept {
214 return validate_ascii(
reinterpret_cast<const char *
>(input.data()),
232simdutf_warn_unused result validate_ascii_with_errors(
const char *buf,
233 size_t len)
noexcept;
235simdutf_really_inline simdutf_warn_unused result validate_ascii_with_errors(
236 const detail::input_span_of_byte_like
auto &input)
noexcept {
237 return validate_ascii_with_errors(
238 reinterpret_cast<const char *
>(input.data()), input.size());
243#if SIMDUTF_FEATURE_UTF16
258simdutf_warn_unused
bool validate_utf16(
const char16_t *buf,
259 size_t len)
noexcept;
261simdutf_really_inline simdutf_warn_unused
bool
262validate_utf16(std::span<const char16_t> input)
noexcept {
263 return validate_utf16(input.data(), input.size());
268#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING
283simdutf_warn_unused
bool validate_utf16le(
const char16_t *buf,
284 size_t len)
noexcept;
286simdutf_really_inline simdutf_warn_unused
bool
287validate_utf16le(std::span<const char16_t> input)
noexcept {
288 return validate_utf16le(input.data(), input.size());
293#if SIMDUTF_FEATURE_UTF16
308simdutf_warn_unused
bool validate_utf16be(
const char16_t *buf,
309 size_t len)
noexcept;
311simdutf_really_inline simdutf_warn_unused
bool
312validate_utf16be(std::span<const char16_t> input)
noexcept {
313 return validate_utf16be(input.data(), input.size());
334simdutf_warn_unused result validate_utf16_with_errors(
const char16_t *buf,
335 size_t len)
noexcept;
337simdutf_really_inline simdutf_warn_unused result
338validate_utf16_with_errors(std::span<const char16_t> input)
noexcept {
339 return validate_utf16_with_errors(input.data(), input.size());
359simdutf_warn_unused result validate_utf16le_with_errors(
const char16_t *buf,
360 size_t len)
noexcept;
362simdutf_really_inline simdutf_warn_unused result
363validate_utf16le_with_errors(std::span<const char16_t> input)
noexcept {
364 return validate_utf16le_with_errors(input.data(), input.size());
384simdutf_warn_unused result validate_utf16be_with_errors(
const char16_t *buf,
385 size_t len)
noexcept;
387simdutf_really_inline simdutf_warn_unused result
388validate_utf16be_with_errors(std::span<const char16_t> input)
noexcept {
389 return validate_utf16be_with_errors(input.data(), input.size());
405void to_well_formed_utf16le(
const char16_t *input,
size_t len,
406 char16_t *output)
noexcept;
408simdutf_really_inline
void
409to_well_formed_utf16le(std::span<const char16_t> input,
410 std::span<char16_t> output)
noexcept {
411 to_well_formed_utf16le(input.data(), input.size(), output.data());
427void to_well_formed_utf16be(
const char16_t *input,
size_t len,
428 char16_t *output)
noexcept;
430simdutf_really_inline
void
431to_well_formed_utf16be(std::span<const char16_t> input,
432 std::span<char16_t> output)
noexcept {
433 to_well_formed_utf16be(input.data(), input.size(), output.data());
449void to_well_formed_utf16(
const char16_t *input,
size_t len,
450 char16_t *output)
noexcept;
452simdutf_really_inline
void
453to_well_formed_utf16(std::span<const char16_t> input,
454 std::span<char16_t> output)
noexcept {
455 to_well_formed_utf16(input.data(), input.size(), output.data());
461#if SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING
476simdutf_warn_unused
bool validate_utf32(
const char32_t *buf,
477 size_t len)
noexcept;
479simdutf_really_inline simdutf_warn_unused
bool
480validate_utf32(std::span<const char32_t> input)
noexcept {
481 return validate_utf32(input.data(), input.size());
486#if SIMDUTF_FEATURE_UTF32
503simdutf_warn_unused result validate_utf32_with_errors(
const char32_t *buf,
504 size_t len)
noexcept;
506simdutf_really_inline simdutf_warn_unused result
507validate_utf32_with_errors(std::span<const char32_t> input)
noexcept {
508 return validate_utf32_with_errors(input.data(), input.size());
513#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
524simdutf_warn_unused
size_t convert_latin1_to_utf8(
const char *input,
526 char *utf8_output)
noexcept;
528simdutf_really_inline simdutf_warn_unused
size_t convert_latin1_to_utf8(
529 const detail::input_span_of_byte_like
auto &latin1_input,
530 detail::output_span_of_byte_like
auto &&utf8_output)
noexcept {
531 return convert_latin1_to_utf8(
532 reinterpret_cast<const char *
>(latin1_input.data()), latin1_input.size(),
550simdutf_warn_unused
size_t
551convert_latin1_to_utf8_safe(
const char *input,
size_t length,
char *utf8_output,
552 size_t utf8_len)
noexcept;
554simdutf_really_inline simdutf_warn_unused
size_t convert_latin1_to_utf8_safe(
555 const detail::input_span_of_byte_like
auto &input,
556 detail::output_span_of_byte_like
auto &&utf8_output)
noexcept {
563 return convert_latin1_to_utf8_safe(
564 input.data(), input.size(),
reinterpret_cast<char *
>(utf8_output.data()),
570#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
581simdutf_warn_unused
size_t convert_latin1_to_utf16le(
582 const char *input,
size_t length,
char16_t *utf16_output)
noexcept;
584simdutf_really_inline simdutf_warn_unused
size_t convert_latin1_to_utf16le(
585 const detail::input_span_of_byte_like
auto &latin1_input,
586 std::span<char16_t> utf16_output)
noexcept {
587 return convert_latin1_to_utf16le(
588 reinterpret_cast<const char *
>(latin1_input.data()), latin1_input.size(),
589 utf16_output.data());
603simdutf_warn_unused
size_t convert_latin1_to_utf16be(
604 const char *input,
size_t length,
char16_t *utf16_output)
noexcept;
606simdutf_really_inline simdutf_warn_unused
size_t
607convert_latin1_to_utf16be(
const detail::input_span_of_byte_like
auto &input,
608 std::span<char16_t> output)
noexcept {
609 return convert_latin1_to_utf16be(
reinterpret_cast<const char *
>(input.data()),
610 input.size(), output.data());
621simdutf_warn_unused size_t latin1_length_from_utf16(size_t length) noexcept;
631simdutf_warn_unused
size_t utf16_length_from_latin1(
size_t length)
noexcept;
634#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1
645simdutf_warn_unused
size_t convert_latin1_to_utf32(
646 const char *input,
size_t length,
char32_t *utf32_buffer)
noexcept;
648simdutf_really_inline simdutf_warn_unused
size_t convert_latin1_to_utf32(
649 const detail::input_span_of_byte_like
auto &latin1_input,
650 std::span<char32_t> utf32_output)
noexcept {
651 return convert_latin1_to_utf32(
652 reinterpret_cast<const char *
>(latin1_input.data()), latin1_input.size(),
653 utf32_output.data());
658#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
671simdutf_warn_unused
size_t convert_utf8_to_latin1(
const char *input,
673 char *latin1_output)
noexcept;
675simdutf_really_inline simdutf_warn_unused
size_t convert_utf8_to_latin1(
676 const detail::input_span_of_byte_like
auto &input,
677 detail::output_span_of_byte_like
auto &&output)
noexcept {
678 return convert_utf8_to_latin1(
reinterpret_cast<const char *
>(input.data()),
680 reinterpret_cast<char *
>(output.data()));
685#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
699simdutf_warn_unused
size_t convert_utf8_to_utf16(
700 const char *input,
size_t length,
char16_t *utf16_output)
noexcept;
702simdutf_really_inline simdutf_warn_unused
size_t
703convert_utf8_to_utf16(
const detail::input_span_of_byte_like
auto &input,
704 std::span<char16_t> output)
noexcept {
705 return convert_utf8_to_utf16(
reinterpret_cast<const char *
>(input.data()),
706 input.size(), output.data());
711#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
720simdutf_warn_unused
size_t convert_latin1_to_utf16(
721 const char *input,
size_t length,
char16_t *utf16_output)
noexcept;
723simdutf_really_inline simdutf_warn_unused
size_t
724convert_latin1_to_utf16(
const detail::input_span_of_byte_like
auto &input,
725 std::span<char16_t> output)
noexcept {
726 return convert_latin1_to_utf16(
reinterpret_cast<const char *
>(input.data()),
727 input.size(), output.data());
732#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
745simdutf_warn_unused
size_t convert_utf8_to_utf16le(
746 const char *input,
size_t length,
char16_t *utf16_output)
noexcept;
748simdutf_really_inline simdutf_warn_unused
size_t
749convert_utf8_to_utf16le(
const detail::input_span_of_byte_like
auto &utf8_input,
750 std::span<char16_t> utf16_output)
noexcept {
751 return convert_utf8_to_utf16le(
752 reinterpret_cast<const char *
>(utf8_input.data()), utf8_input.size(),
753 utf16_output.data());
769simdutf_warn_unused
size_t convert_utf8_to_utf16be(
770 const char *input,
size_t length,
char16_t *utf16_output)
noexcept;
772simdutf_really_inline simdutf_warn_unused
size_t
773convert_utf8_to_utf16be(
const detail::input_span_of_byte_like
auto &utf8_input,
774 std::span<char16_t> utf16_output)
noexcept {
775 return convert_utf8_to_utf16be(
776 reinterpret_cast<const char *
>(utf8_input.data()), utf8_input.size(),
777 utf16_output.data());
782#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
799simdutf_warn_unused result convert_utf8_to_latin1_with_errors(
800 const char *input,
size_t length,
char *latin1_output)
noexcept;
802simdutf_really_inline simdutf_warn_unused result
803convert_utf8_to_latin1_with_errors(
804 const detail::input_span_of_byte_like
auto &utf8_input,
805 detail::output_span_of_byte_like
auto &&latin1_output)
noexcept {
806 return convert_utf8_to_latin1_with_errors(
807 reinterpret_cast<const char *
>(utf8_input.data()), utf8_input.size(),
808 reinterpret_cast<char *
>(latin1_output.data()));
813#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
829simdutf_warn_unused result convert_utf8_to_utf16_with_errors(
830 const char *input,
size_t length,
char16_t *utf16_output)
noexcept;
832simdutf_really_inline simdutf_warn_unused result
833convert_utf8_to_utf16_with_errors(
834 const detail::input_span_of_byte_like
auto &utf8_input,
835 std::span<char16_t> utf16_output)
noexcept {
836 return convert_utf8_to_utf16_with_errors(
837 reinterpret_cast<const char *
>(utf8_input.data()), utf8_input.size(),
838 utf16_output.data());
856simdutf_warn_unused result convert_utf8_to_utf16le_with_errors(
857 const char *input,
size_t length,
char16_t *utf16_output)
noexcept;
859simdutf_really_inline simdutf_warn_unused result
860convert_utf8_to_utf16le_with_errors(
861 const detail::input_span_of_byte_like
auto &utf8_input,
862 std::span<char16_t> utf16_output)
noexcept {
863 return convert_utf8_to_utf16le_with_errors(
864 reinterpret_cast<const char *
>(utf8_input.data()), utf8_input.size(),
865 utf16_output.data());
883simdutf_warn_unused result convert_utf8_to_utf16be_with_errors(
884 const char *input,
size_t length,
char16_t *utf16_output)
noexcept;
886simdutf_really_inline simdutf_warn_unused result
887convert_utf8_to_utf16be_with_errors(
888 const detail::input_span_of_byte_like
auto &utf8_input,
889 std::span<char16_t> utf16_output)
noexcept {
890 return convert_utf8_to_utf16be_with_errors(
891 reinterpret_cast<const char *
>(utf8_input.data()), utf8_input.size(),
892 utf16_output.data());
897#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
910simdutf_warn_unused
size_t convert_utf8_to_utf32(
911 const char *input,
size_t length,
char32_t *utf32_output)
noexcept;
913simdutf_really_inline simdutf_warn_unused
size_t
914convert_utf8_to_utf32(
const detail::input_span_of_byte_like
auto &utf8_input,
915 std::span<char32_t> utf32_output)
noexcept {
916 return convert_utf8_to_utf32(
917 reinterpret_cast<const char *
>(utf8_input.data()), utf8_input.size(),
918 utf32_output.data());
936simdutf_warn_unused result convert_utf8_to_utf32_with_errors(
937 const char *input,
size_t length,
char32_t *utf32_output)
noexcept;
939simdutf_really_inline simdutf_warn_unused result
940convert_utf8_to_utf32_with_errors(
941 const detail::input_span_of_byte_like
auto &utf8_input,
942 std::span<char32_t> utf32_output)
noexcept {
943 return convert_utf8_to_utf32_with_errors(
944 reinterpret_cast<const char *
>(utf8_input.data()), utf8_input.size(),
945 utf32_output.data());
950#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
970simdutf_warn_unused
size_t convert_valid_utf8_to_latin1(
971 const char *input,
size_t length,
char *latin1_output)
noexcept;
973simdutf_really_inline simdutf_warn_unused
size_t convert_valid_utf8_to_latin1(
974 const detail::input_span_of_byte_like
auto &valid_utf8_input,
975 detail::output_span_of_byte_like
auto &&latin1_output)
noexcept {
976 return convert_valid_utf8_to_latin1(
977 reinterpret_cast<const char *
>(valid_utf8_input.data()),
978 valid_utf8_input.size(), latin1_output.data());
983#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
994simdutf_warn_unused
size_t convert_valid_utf8_to_utf16(
995 const char *input,
size_t length,
char16_t *utf16_buffer)
noexcept;
997simdutf_really_inline simdutf_warn_unused
size_t convert_valid_utf8_to_utf16(
998 const detail::input_span_of_byte_like
auto &valid_utf8_input,
999 std::span<char16_t> utf16_output)
noexcept {
1000 return convert_valid_utf8_to_utf16(
1001 reinterpret_cast<const char *
>(valid_utf8_input.data()),
1002 valid_utf8_input.size(), utf16_output.data());
1016simdutf_warn_unused
size_t convert_valid_utf8_to_utf16le(
1017 const char *input,
size_t length,
char16_t *utf16_buffer)
noexcept;
1019simdutf_really_inline simdutf_warn_unused
size_t convert_valid_utf8_to_utf16le(
1020 const detail::input_span_of_byte_like
auto &valid_utf8_input,
1021 std::span<char16_t> utf16_output)
noexcept {
1022 return convert_valid_utf8_to_utf16le(
1023 reinterpret_cast<const char *
>(valid_utf8_input.data()),
1024 valid_utf8_input.size(), utf16_output.data());
1038simdutf_warn_unused
size_t convert_valid_utf8_to_utf16be(
1039 const char *input,
size_t length,
char16_t *utf16_buffer)
noexcept;
1041simdutf_really_inline simdutf_warn_unused
size_t convert_valid_utf8_to_utf16be(
1042 const detail::input_span_of_byte_like
auto &valid_utf8_input,
1043 std::span<char16_t> utf16_output)
noexcept {
1044 return convert_valid_utf8_to_utf16be(
1045 reinterpret_cast<const char *
>(valid_utf8_input.data()),
1046 valid_utf8_input.size(), utf16_output.data());
1051#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
1062simdutf_warn_unused
size_t convert_valid_utf8_to_utf32(
1063 const char *input,
size_t length,
char32_t *utf32_buffer)
noexcept;
1065simdutf_really_inline simdutf_warn_unused
size_t convert_valid_utf8_to_utf32(
1066 const detail::input_span_of_byte_like
auto &valid_utf8_input,
1067 std::span<char32_t> utf32_output)
noexcept {
1068 return convert_valid_utf8_to_utf32(
1069 reinterpret_cast<const char *
>(valid_utf8_input.data()),
1070 valid_utf8_input.size(), utf32_output.data());
1075#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
1084simdutf_warn_unused
size_t utf8_length_from_latin1(
const char *input,
1085 size_t length)
noexcept;
1087simdutf_really_inline simdutf_warn_unused
size_t utf8_length_from_latin1(
1088 const detail::input_span_of_byte_like
auto &latin1_input)
noexcept {
1089 return utf8_length_from_latin1(
1090 reinterpret_cast<const char *
>(latin1_input.data()), latin1_input.size());
1107simdutf_warn_unused
size_t latin1_length_from_utf8(
const char *input,
1108 size_t length)
noexcept;
1110simdutf_really_inline simdutf_warn_unused
size_t latin1_length_from_utf8(
1111 const detail::input_span_of_byte_like
auto &valid_utf8_input)
noexcept {
1112 return latin1_length_from_utf8(
1113 reinterpret_cast<const char *
>(valid_utf8_input.data()),
1114 valid_utf8_input.size());
1119#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
1134simdutf_warn_unused
size_t utf16_length_from_utf8(
const char *input,
1135 size_t length)
noexcept;
1137simdutf_really_inline simdutf_warn_unused
size_t utf16_length_from_utf8(
1138 const detail::input_span_of_byte_like
auto &valid_utf8_input)
noexcept {
1139 return utf16_length_from_utf8(
1140 reinterpret_cast<const char *
>(valid_utf8_input.data()),
1141 valid_utf8_input.size());
1146#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
1163simdutf_warn_unused
size_t utf32_length_from_utf8(
const char *input,
1164 size_t length)
noexcept;
1166simdutf_really_inline simdutf_warn_unused
size_t utf32_length_from_utf8(
1167 const detail::input_span_of_byte_like
auto &valid_utf8_input)
noexcept {
1168 return utf32_length_from_utf8(
1169 reinterpret_cast<const char *
>(valid_utf8_input.data()),
1170 valid_utf8_input.size());
1175#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
1191simdutf_warn_unused
size_t convert_utf16_to_utf8(
const char16_t *input,
1193 char *utf8_buffer)
noexcept;
1195simdutf_really_inline simdutf_warn_unused
size_t convert_utf16_to_utf8(
1196 std::span<const char16_t> utf16_input,
1197 detail::output_span_of_byte_like
auto &&utf8_output)
noexcept {
1198 return convert_utf16_to_utf8(utf16_input.data(), utf16_input.size(),
1199 reinterpret_cast<char *
>(utf8_output.data()));
1221simdutf_warn_unused
size_t convert_utf16_to_utf8_safe(
const char16_t *input,
1224 size_t utf8_len)
noexcept;
1226simdutf_really_inline simdutf_warn_unused
size_t convert_utf16_to_utf8_safe(
1227 std::span<const char16_t> utf16_input,
1228 detail::output_span_of_byte_like
auto &&utf8_output)
noexcept {
1235 return convert_utf16_to_utf8_safe(
1236 utf16_input.data(), utf16_input.size(),
1237 reinterpret_cast<char *
>(utf8_output.data()), utf8_output.size());
1242#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
1258simdutf_warn_unused
size_t convert_utf16_to_latin1(
1259 const char16_t *input,
size_t length,
char *latin1_buffer)
noexcept;
1261simdutf_really_inline simdutf_warn_unused
size_t convert_utf16_to_latin1(
1262 std::span<const char16_t> utf16_input,
1263 detail::output_span_of_byte_like
auto &&latin1_output)
noexcept {
1264 return convert_utf16_to_latin1(
1265 utf16_input.data(), utf16_input.size(),
1266 reinterpret_cast<char *
>(latin1_output.data()));
1286simdutf_warn_unused
size_t convert_utf16le_to_latin1(
1287 const char16_t *input,
size_t length,
char *latin1_buffer)
noexcept;
1289simdutf_really_inline simdutf_warn_unused
size_t convert_utf16le_to_latin1(
1290 std::span<const char16_t> utf16_input,
1291 detail::output_span_of_byte_like
auto &&latin1_output)
noexcept {
1292 return convert_utf16le_to_latin1(
1293 utf16_input.data(), utf16_input.size(),
1294 reinterpret_cast<char *
>(latin1_output.data()));
1312simdutf_warn_unused
size_t convert_utf16be_to_latin1(
1313 const char16_t *input,
size_t length,
char *latin1_buffer)
noexcept;
1315simdutf_really_inline simdutf_warn_unused
size_t convert_utf16be_to_latin1(
1316 std::span<const char16_t> utf16_input,
1317 detail::output_span_of_byte_like
auto &&latin1_output)
noexcept {
1318 return convert_utf16be_to_latin1(
1319 utf16_input.data(), utf16_input.size(),
1320 reinterpret_cast<char *
>(latin1_output.data()));
1325#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
1340simdutf_warn_unused
size_t convert_utf16le_to_utf8(
const char16_t *input,
1342 char *utf8_buffer)
noexcept;
1344simdutf_really_inline simdutf_warn_unused
size_t convert_utf16le_to_utf8(
1345 std::span<const char16_t> utf16_input,
1346 detail::output_span_of_byte_like
auto &&utf8_output)
noexcept {
1347 return convert_utf16le_to_utf8(utf16_input.data(), utf16_input.size(),
1348 reinterpret_cast<char *
>(utf8_output.data()));
1366simdutf_warn_unused
size_t convert_utf16be_to_utf8(
const char16_t *input,
1368 char *utf8_buffer)
noexcept;
1370simdutf_really_inline simdutf_warn_unused
size_t convert_utf16be_to_utf8(
1371 std::span<const char16_t> utf16_input,
1372 detail::output_span_of_byte_like
auto &&utf8_output)
noexcept {
1373 return convert_utf16be_to_utf8(utf16_input.data(), utf16_input.size(),
1374 reinterpret_cast<char *
>(utf8_output.data()));
1379#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
1396simdutf_warn_unused result convert_utf16_to_latin1_with_errors(
1397 const char16_t *input,
size_t length,
char *latin1_buffer)
noexcept;
1399simdutf_really_inline simdutf_warn_unused result
1400convert_utf16_to_latin1_with_errors(
1401 std::span<const char16_t> utf16_input,
1402 detail::output_span_of_byte_like
auto &&latin1_output)
noexcept {
1403 return convert_utf16_to_latin1_with_errors(
1404 utf16_input.data(), utf16_input.size(),
1405 reinterpret_cast<char *
>(latin1_output.data()));
1424simdutf_warn_unused result convert_utf16le_to_latin1_with_errors(
1425 const char16_t *input,
size_t length,
char *latin1_buffer)
noexcept;
1427simdutf_really_inline simdutf_warn_unused result
1428convert_utf16le_to_latin1_with_errors(
1429 std::span<const char16_t> utf16_input,
1430 detail::output_span_of_byte_like
auto &&latin1_output)
noexcept {
1431 return convert_utf16le_to_latin1_with_errors(
1432 utf16_input.data(), utf16_input.size(),
1433 reinterpret_cast<char *
>(latin1_output.data()));
1454simdutf_warn_unused result convert_utf16be_to_latin1_with_errors(
1455 const char16_t *input,
size_t length,
char *latin1_buffer)
noexcept;
1457simdutf_really_inline simdutf_warn_unused result
1458convert_utf16be_to_latin1_with_errors(
1459 std::span<const char16_t> utf16_input,
1460 detail::output_span_of_byte_like
auto &&latin1_output)
noexcept {
1461 return convert_utf16be_to_latin1_with_errors(
1462 utf16_input.data(), utf16_input.size(),
1463 reinterpret_cast<char *
>(latin1_output.data()));
1468#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
1486simdutf_warn_unused result convert_utf16_to_utf8_with_errors(
1487 const char16_t *input,
size_t length,
char *utf8_buffer)
noexcept;
1489simdutf_really_inline simdutf_warn_unused result
1490convert_utf16_to_utf8_with_errors(
1491 std::span<const char16_t> utf16_input,
1492 detail::output_span_of_byte_like
auto &&utf8_output)
noexcept {
1493 return convert_utf16_to_utf8_with_errors(
1494 utf16_input.data(), utf16_input.size(),
1495 reinterpret_cast<char *
>(utf8_output.data()));
1515simdutf_warn_unused result convert_utf16le_to_utf8_with_errors(
1516 const char16_t *input,
size_t length,
char *utf8_buffer)
noexcept;
1518simdutf_really_inline simdutf_warn_unused result
1519convert_utf16le_to_utf8_with_errors(
1520 std::span<const char16_t> utf16_input,
1521 detail::output_span_of_byte_like
auto &&utf8_output)
noexcept {
1522 return convert_utf16le_to_utf8_with_errors(
1523 utf16_input.data(), utf16_input.size(),
1524 reinterpret_cast<char *
>(utf8_output.data()));
1544simdutf_warn_unused result convert_utf16be_to_utf8_with_errors(
1545 const char16_t *input,
size_t length,
char *utf8_buffer)
noexcept;
1547simdutf_really_inline simdutf_warn_unused result
1548convert_utf16be_to_utf8_with_errors(
1549 std::span<const char16_t> utf16_input,
1550 detail::output_span_of_byte_like
auto &&utf8_output)
noexcept {
1551 return convert_utf16be_to_utf8_with_errors(
1552 utf16_input.data(), utf16_input.size(),
1553 reinterpret_cast<char *
>(utf8_output.data()));
1570simdutf_warn_unused
size_t convert_valid_utf16_to_utf8(
1571 const char16_t *input,
size_t length,
char *utf8_buffer)
noexcept;
1573simdutf_really_inline simdutf_warn_unused
size_t convert_valid_utf16_to_utf8(
1574 std::span<const char16_t> valid_utf16_input,
1575 detail::output_span_of_byte_like
auto &&utf8_output)
noexcept {
1576 return convert_valid_utf16_to_utf8(
1577 valid_utf16_input.data(), valid_utf16_input.size(),
1578 reinterpret_cast<char *
>(utf8_output.data()));
1583#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
1603simdutf_warn_unused
size_t convert_valid_utf16_to_latin1(
1604 const char16_t *input,
size_t length,
char *latin1_buffer)
noexcept;
1606simdutf_really_inline simdutf_warn_unused
size_t convert_valid_utf16_to_latin1(
1607 std::span<const char16_t> valid_utf16_input,
1608 detail::output_span_of_byte_like
auto &&latin1_output)
noexcept {
1609 return convert_valid_utf16_to_latin1(
1610 valid_utf16_input.data(), valid_utf16_input.size(),
1611 reinterpret_cast<char *
>(latin1_output.data()));
1634simdutf_warn_unused
size_t convert_valid_utf16le_to_latin1(
1635 const char16_t *input,
size_t length,
char *latin1_buffer)
noexcept;
1637simdutf_really_inline simdutf_warn_unused
size_t
1638convert_valid_utf16le_to_latin1(
1639 std::span<const char16_t> valid_utf16_input,
1640 detail::output_span_of_byte_like
auto &&latin1_output)
noexcept {
1641 return convert_valid_utf16le_to_latin1(
1642 valid_utf16_input.data(), valid_utf16_input.size(),
1643 reinterpret_cast<char *
>(latin1_output.data()));
1666simdutf_warn_unused
size_t convert_valid_utf16be_to_latin1(
1667 const char16_t *input,
size_t length,
char *latin1_buffer)
noexcept;
1669simdutf_really_inline simdutf_warn_unused
size_t
1670convert_valid_utf16be_to_latin1(
1671 std::span<const char16_t> valid_utf16_input,
1672 detail::output_span_of_byte_like
auto &&latin1_output)
noexcept {
1673 return convert_valid_utf16be_to_latin1(
1674 valid_utf16_input.data(), valid_utf16_input.size(),
1675 reinterpret_cast<char *
>(latin1_output.data()));
1680#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
1695simdutf_warn_unused
size_t convert_valid_utf16le_to_utf8(
1696 const char16_t *input,
size_t length,
char *utf8_buffer)
noexcept;
1698simdutf_really_inline simdutf_warn_unused
size_t convert_valid_utf16le_to_utf8(
1699 std::span<const char16_t> valid_utf16_input,
1700 detail::output_span_of_byte_like
auto &&utf8_output)
noexcept {
1701 return convert_valid_utf16le_to_utf8(
1702 valid_utf16_input.data(), valid_utf16_input.size(),
1703 reinterpret_cast<char *
>(utf8_output.data()));
1720simdutf_warn_unused
size_t convert_valid_utf16be_to_utf8(
1721 const char16_t *input,
size_t length,
char *utf8_buffer)
noexcept;
1723simdutf_really_inline simdutf_warn_unused
size_t convert_valid_utf16be_to_utf8(
1724 std::span<const char16_t> valid_utf16_input,
1725 detail::output_span_of_byte_like
auto &&utf8_output)
noexcept {
1726 return convert_valid_utf16be_to_utf8(
1727 valid_utf16_input.data(), valid_utf16_input.size(),
1728 reinterpret_cast<char *
>(utf8_output.data()));
1733#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
1749simdutf_warn_unused
size_t convert_utf16_to_utf32(
1750 const char16_t *input,
size_t length,
char32_t *utf32_buffer)
noexcept;
1752simdutf_really_inline simdutf_warn_unused
size_t
1753convert_utf16_to_utf32(std::span<const char16_t> utf16_input,
1754 std::span<char32_t> utf32_output)
noexcept {
1755 return convert_utf16_to_utf32(utf16_input.data(), utf16_input.size(),
1756 utf32_output.data());
1774simdutf_warn_unused
size_t convert_utf16le_to_utf32(
1775 const char16_t *input,
size_t length,
char32_t *utf32_buffer)
noexcept;
1777simdutf_really_inline simdutf_warn_unused
size_t
1778convert_utf16le_to_utf32(std::span<const char16_t> utf16_input,
1779 std::span<char32_t> utf32_output)
noexcept {
1780 return convert_utf16le_to_utf32(utf16_input.data(), utf16_input.size(),
1781 utf32_output.data());
1799simdutf_warn_unused
size_t convert_utf16be_to_utf32(
1800 const char16_t *input,
size_t length,
char32_t *utf32_buffer)
noexcept;
1802simdutf_really_inline simdutf_warn_unused
size_t
1803convert_utf16be_to_utf32(std::span<const char16_t> utf16_input,
1804 std::span<char32_t> utf32_output)
noexcept {
1805 return convert_utf16be_to_utf32(utf16_input.data(), utf16_input.size(),
1806 utf32_output.data());
1827simdutf_warn_unused result convert_utf16_to_utf32_with_errors(
1828 const char16_t *input,
size_t length,
char32_t *utf32_buffer)
noexcept;
1830simdutf_really_inline simdutf_warn_unused result
1831convert_utf16_to_utf32_with_errors(std::span<const char16_t> utf16_input,
1832 std::span<char32_t> utf32_output)
noexcept {
1833 return convert_utf16_to_utf32_with_errors(
1834 utf16_input.data(), utf16_input.size(), utf32_output.data());
1854simdutf_warn_unused result convert_utf16le_to_utf32_with_errors(
1855 const char16_t *input,
size_t length,
char32_t *utf32_buffer)
noexcept;
1857simdutf_really_inline simdutf_warn_unused result
1858convert_utf16le_to_utf32_with_errors(
1859 std::span<const char16_t> utf16_input,
1860 std::span<char32_t> utf32_output)
noexcept {
1861 return convert_utf16le_to_utf32_with_errors(
1862 utf16_input.data(), utf16_input.size(), utf32_output.data());
1882simdutf_warn_unused result convert_utf16be_to_utf32_with_errors(
1883 const char16_t *input,
size_t length,
char32_t *utf32_buffer)
noexcept;
1885simdutf_really_inline simdutf_warn_unused result
1886convert_utf16be_to_utf32_with_errors(
1887 std::span<const char16_t> utf16_input,
1888 std::span<char32_t> utf32_output)
noexcept {
1889 return convert_utf16be_to_utf32_with_errors(
1890 utf16_input.data(), utf16_input.size(), utf32_output.data());
1908simdutf_warn_unused
size_t convert_valid_utf16_to_utf32(
1909 const char16_t *input,
size_t length,
char32_t *utf32_buffer)
noexcept;
1911simdutf_really_inline simdutf_warn_unused
size_t
1912convert_valid_utf16_to_utf32(std::span<const char16_t> valid_utf16_input,
1913 std::span<char32_t> utf32_output)
noexcept {
1914 return convert_valid_utf16_to_utf32(
1915 valid_utf16_input.data(), valid_utf16_input.size(), utf32_output.data());
1932simdutf_warn_unused
size_t convert_valid_utf16le_to_utf32(
1933 const char16_t *input,
size_t length,
char32_t *utf32_buffer)
noexcept;
1935simdutf_really_inline simdutf_warn_unused
size_t
1936convert_valid_utf16le_to_utf32(std::span<const char16_t> valid_utf16_input,
1937 std::span<char32_t> utf32_output)
noexcept {
1938 return convert_valid_utf16le_to_utf32(
1939 valid_utf16_input.data(), valid_utf16_input.size(), utf32_output.data());
1956simdutf_warn_unused
size_t convert_valid_utf16be_to_utf32(
1957 const char16_t *input,
size_t length,
char32_t *utf32_buffer)
noexcept;
1959simdutf_really_inline simdutf_warn_unused
size_t
1960convert_valid_utf16be_to_utf32(std::span<const char16_t> valid_utf16_input,
1961 std::span<char32_t> utf32_output)
noexcept {
1962 return convert_valid_utf16be_to_utf32(
1963 valid_utf16_input.data(), valid_utf16_input.size(), utf32_output.data());
1968#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
1981simdutf_warn_unused
size_t latin1_length_from_utf16(
size_t length)
noexcept;
1994simdutf_warn_unused
size_t utf8_length_from_utf16(
const char16_t *input,
1995 size_t length)
noexcept;
1997simdutf_really_inline simdutf_warn_unused
size_t
1998utf8_length_from_utf16(std::span<const char16_t> valid_utf16_input)
noexcept {
1999 return utf8_length_from_utf16(valid_utf16_input.data(),
2000 valid_utf16_input.size());
2005#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
2017simdutf_warn_unused
size_t utf8_length_from_utf16le(
const char16_t *input,
2018 size_t length)
noexcept;
2020simdutf_really_inline simdutf_warn_unused
size_t
2021utf8_length_from_utf16le(std::span<const char16_t> valid_utf16_input)
noexcept {
2022 return utf8_length_from_utf16le(valid_utf16_input.data(),
2023 valid_utf16_input.size());
2038simdutf_warn_unused
size_t utf8_length_from_utf16be(
const char16_t *input,
2039 size_t length)
noexcept;
2041simdutf_really_inline simdutf_warn_unused
size_t
2042utf8_length_from_utf16be(std::span<const char16_t> valid_utf16_input)
noexcept {
2043 return utf8_length_from_utf16be(valid_utf16_input.data(),
2044 valid_utf16_input.size());
2049#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
2063simdutf_warn_unused
size_t convert_utf32_to_utf8(
const char32_t *input,
2065 char *utf8_buffer)
noexcept;
2067simdutf_really_inline simdutf_warn_unused
size_t convert_utf32_to_utf8(
2068 std::span<const char32_t> utf32_input,
2069 detail::output_span_of_byte_like
auto &&utf8_output)
noexcept {
2070 return convert_utf32_to_utf8(utf32_input.data(), utf32_input.size(),
2071 reinterpret_cast<char *
>(utf8_output.data()));
2091simdutf_warn_unused result convert_utf32_to_utf8_with_errors(
2092 const char32_t *input,
size_t length,
char *utf8_buffer)
noexcept;
2094simdutf_really_inline simdutf_warn_unused result
2095convert_utf32_to_utf8_with_errors(
2096 std::span<const char32_t> utf32_input,
2097 detail::output_span_of_byte_like
auto &&utf8_output)
noexcept {
2098 return convert_utf32_to_utf8_with_errors(
2099 utf32_input.data(), utf32_input.size(),
2100 reinterpret_cast<char *
>(utf8_output.data()));
2117simdutf_warn_unused
size_t convert_valid_utf32_to_utf8(
2118 const char32_t *input,
size_t length,
char *utf8_buffer)
noexcept;
2120simdutf_really_inline simdutf_warn_unused
size_t convert_valid_utf32_to_utf8(
2121 std::span<const char32_t> valid_utf32_input,
2122 detail::output_span_of_byte_like
auto &&utf8_output)
noexcept {
2123 return convert_valid_utf32_to_utf8(
2124 valid_utf32_input.data(), valid_utf32_input.size(),
2125 reinterpret_cast<char *
>(utf8_output.data()));
2130#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
2145simdutf_warn_unused
size_t convert_utf32_to_utf16(
2146 const char32_t *input,
size_t length,
char16_t *utf16_buffer)
noexcept;
2148simdutf_really_inline simdutf_warn_unused
size_t
2149convert_utf32_to_utf16(std::span<const char32_t> utf32_input,
2150 std::span<char16_t> utf16_output)
noexcept {
2151 return convert_utf32_to_utf16(utf32_input.data(), utf32_input.size(),
2152 utf16_output.data());
2169simdutf_warn_unused
size_t convert_utf32_to_utf16le(
2170 const char32_t *input,
size_t length,
char16_t *utf16_buffer)
noexcept;
2172simdutf_really_inline simdutf_warn_unused
size_t
2173convert_utf32_to_utf16le(std::span<const char32_t> utf32_input,
2174 std::span<char16_t> utf16_output)
noexcept {
2175 return convert_utf32_to_utf16le(utf32_input.data(), utf32_input.size(),
2176 utf16_output.data());
2181#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1
2196simdutf_warn_unused
size_t convert_utf32_to_latin1(
2197 const char32_t *input,
size_t length,
char *latin1_buffer)
noexcept;
2199simdutf_really_inline simdutf_warn_unused
size_t convert_utf32_to_latin1(
2200 std::span<const char32_t> utf32_input,
2201 detail::output_span_of_byte_like
auto &&latin1_output)
noexcept {
2202 return convert_utf32_to_latin1(
2203 utf32_input.data(), utf32_input.size(),
2204 reinterpret_cast<char *
>(latin1_output.data()));
2225simdutf_warn_unused result convert_utf32_to_latin1_with_errors(
2226 const char32_t *input,
size_t length,
char *latin1_buffer)
noexcept;
2228simdutf_really_inline simdutf_warn_unused result
2229convert_utf32_to_latin1_with_errors(
2230 std::span<const char32_t> utf32_input,
2231 detail::output_span_of_byte_like
auto &&latin1_output)
noexcept {
2232 return convert_utf32_to_latin1_with_errors(
2233 utf32_input.data(), utf32_input.size(),
2234 reinterpret_cast<char *
>(latin1_output.data()));
2258simdutf_warn_unused
size_t convert_valid_utf32_to_latin1(
2259 const char32_t *input,
size_t length,
char *latin1_buffer)
noexcept;
2261simdutf_really_inline simdutf_warn_unused
size_t convert_valid_utf32_to_latin1(
2262 std::span<const char32_t> valid_utf32_input,
2263 detail::output_span_of_byte_like
auto &&latin1_output)
noexcept {
2264 return convert_valid_utf32_to_latin1(
2265 valid_utf32_input.data(), valid_utf32_input.size(),
2266 reinterpret_cast<char *
>(latin1_output.data()));
2282simdutf_warn_unused
size_t latin1_length_from_utf32(
size_t length)
noexcept;
2292simdutf_warn_unused
size_t utf32_length_from_latin1(
size_t length)
noexcept;
2295#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
2309simdutf_warn_unused
size_t convert_utf32_to_utf16be(
2310 const char32_t *input,
size_t length,
char16_t *utf16_buffer)
noexcept;
2312simdutf_really_inline simdutf_warn_unused
size_t
2313convert_utf32_to_utf16be(std::span<const char32_t> utf32_input,
2314 std::span<char16_t> utf16_output)
noexcept {
2315 return convert_utf32_to_utf16be(utf32_input.data(), utf32_input.size(),
2316 utf16_output.data());
2337simdutf_warn_unused result convert_utf32_to_utf16_with_errors(
2338 const char32_t *input,
size_t length,
char16_t *utf16_buffer)
noexcept;
2340simdutf_really_inline simdutf_warn_unused result
2341convert_utf32_to_utf16_with_errors(std::span<const char32_t> utf32_input,
2342 std::span<char16_t> utf16_output)
noexcept {
2343 return convert_utf32_to_utf16_with_errors(
2344 utf32_input.data(), utf32_input.size(), utf16_output.data());
2364simdutf_warn_unused result convert_utf32_to_utf16le_with_errors(
2365 const char32_t *input,
size_t length,
char16_t *utf16_buffer)
noexcept;
2367simdutf_really_inline simdutf_warn_unused result
2368convert_utf32_to_utf16le_with_errors(
2369 std::span<const char32_t> utf32_input,
2370 std::span<char16_t> utf16_output)
noexcept {
2371 return convert_utf32_to_utf16le_with_errors(
2372 utf32_input.data(), utf32_input.size(), utf16_output.data());
2392simdutf_warn_unused result convert_utf32_to_utf16be_with_errors(
2393 const char32_t *input,
size_t length,
char16_t *utf16_buffer)
noexcept;
2395simdutf_really_inline simdutf_warn_unused result
2396convert_utf32_to_utf16be_with_errors(
2397 std::span<const char32_t> utf32_input,
2398 std::span<char16_t> utf16_output)
noexcept {
2399 return convert_utf32_to_utf16be_with_errors(
2400 utf32_input.data(), utf32_input.size(), utf16_output.data());
2417simdutf_warn_unused
size_t convert_valid_utf32_to_utf16(
2418 const char32_t *input,
size_t length,
char16_t *utf16_buffer)
noexcept;
2420simdutf_really_inline simdutf_warn_unused
size_t
2421convert_valid_utf32_to_utf16(std::span<const char32_t> valid_utf32_input,
2422 std::span<char16_t> utf16_output)
noexcept {
2423 return convert_valid_utf32_to_utf16(
2424 valid_utf32_input.data(), valid_utf32_input.size(), utf16_output.data());
2441simdutf_warn_unused
size_t convert_valid_utf32_to_utf16le(
2442 const char32_t *input,
size_t length,
char16_t *utf16_buffer)
noexcept;
2444simdutf_really_inline simdutf_warn_unused
size_t
2445convert_valid_utf32_to_utf16le(std::span<const char32_t> valid_utf32_input,
2446 std::span<char16_t> utf16_output)
noexcept {
2447 return convert_valid_utf32_to_utf16le(
2448 valid_utf32_input.data(), valid_utf32_input.size(), utf16_output.data());
2465simdutf_warn_unused
size_t convert_valid_utf32_to_utf16be(
2466 const char32_t *input,
size_t length,
char16_t *utf16_buffer)
noexcept;
2468simdutf_really_inline simdutf_warn_unused
size_t
2469convert_valid_utf32_to_utf16be(std::span<const char32_t> valid_utf32_input,
2470 std::span<char16_t> utf16_output)
noexcept {
2471 return convert_valid_utf32_to_utf16be(
2472 valid_utf32_input.data(), valid_utf32_input.size(), utf16_output.data());
2477#if SIMDUTF_FEATURE_UTF16
2491void change_endianness_utf16(
const char16_t *input,
size_t length,
2492 char16_t *output)
noexcept;
2494simdutf_really_inline
void
2495change_endianness_utf16(std::span<const char16_t> utf16_input,
2496 std::span<char16_t> utf16_output)
noexcept {
2497 return change_endianness_utf16(utf16_input.data(), utf16_input.size(),
2498 utf16_output.data());
2503#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
2515simdutf_warn_unused
size_t utf8_length_from_utf32(
const char32_t *input,
2516 size_t length)
noexcept;
2518simdutf_really_inline simdutf_warn_unused
size_t
2519utf8_length_from_utf32(std::span<const char32_t> valid_utf32_input)
noexcept {
2520 return utf8_length_from_utf32(valid_utf32_input.data(),
2521 valid_utf32_input.size());
2526#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
2538simdutf_warn_unused
size_t utf16_length_from_utf32(
const char32_t *input,
2539 size_t length)
noexcept;
2541simdutf_really_inline simdutf_warn_unused
size_t
2542utf16_length_from_utf32(std::span<const char32_t> valid_utf32_input)
noexcept {
2543 return utf16_length_from_utf32(valid_utf32_input.data(),
2544 valid_utf32_input.size());
2563simdutf_warn_unused
size_t utf32_length_from_utf16(
const char16_t *input,
2564 size_t length)
noexcept;
2566simdutf_really_inline simdutf_warn_unused
size_t
2567utf32_length_from_utf16(std::span<const char16_t> valid_utf16_input)
noexcept {
2568 return utf32_length_from_utf16(valid_utf16_input.data(),
2569 valid_utf16_input.size());
2588simdutf_warn_unused
size_t utf32_length_from_utf16le(
const char16_t *input,
2589 size_t length)
noexcept;
2591simdutf_really_inline simdutf_warn_unused
size_t utf32_length_from_utf16le(
2592 std::span<const char16_t> valid_utf16_input)
noexcept {
2593 return utf32_length_from_utf16le(valid_utf16_input.data(),
2594 valid_utf16_input.size());
2613simdutf_warn_unused
size_t utf32_length_from_utf16be(
const char16_t *input,
2614 size_t length)
noexcept;
2616simdutf_really_inline simdutf_warn_unused
size_t utf32_length_from_utf16be(
2617 std::span<const char16_t> valid_utf16_input)
noexcept {
2618 return utf32_length_from_utf16be(valid_utf16_input.data(),
2619 valid_utf16_input.size());
2624#if SIMDUTF_FEATURE_UTF16
2639simdutf_warn_unused
size_t count_utf16(
const char16_t *input,
2640 size_t length)
noexcept;
2642simdutf_really_inline simdutf_warn_unused
size_t
2643count_utf16(std::span<const char16_t> valid_utf16_input)
noexcept {
2644 return count_utf16(valid_utf16_input.data(), valid_utf16_input.size());
2662simdutf_warn_unused
size_t count_utf16le(
const char16_t *input,
2663 size_t length)
noexcept;
2665simdutf_really_inline simdutf_warn_unused
size_t
2666count_utf16le(std::span<const char16_t> valid_utf16_input)
noexcept {
2667 return count_utf16le(valid_utf16_input.data(), valid_utf16_input.size());
2685simdutf_warn_unused
size_t count_utf16be(
const char16_t *input,
2686 size_t length)
noexcept;
2688simdutf_really_inline simdutf_warn_unused
size_t
2689count_utf16be(std::span<const char16_t> valid_utf16_input)
noexcept {
2690 return count_utf16be(valid_utf16_input.data(), valid_utf16_input.size());
2695#if SIMDUTF_FEATURE_UTF8
2708simdutf_warn_unused
size_t count_utf8(
const char *input,
2709 size_t length)
noexcept;
2711simdutf_really_inline simdutf_warn_unused
size_t count_utf8(
2712 const detail::input_span_of_byte_like
auto &valid_utf8_input)
noexcept {
2713 return count_utf8(
reinterpret_cast<const char *
>(valid_utf8_input.data()),
2714 valid_utf8_input.size());
2732simdutf_warn_unused
size_t trim_partial_utf8(
const char *input,
size_t length);
2734simdutf_really_inline simdutf_warn_unused
size_t trim_partial_utf8(
2735 const detail::input_span_of_byte_like
auto &valid_utf8_input)
noexcept {
2736 return trim_partial_utf8(
2737 reinterpret_cast<const char *
>(valid_utf8_input.data()),
2738 valid_utf8_input.size());
2743#if SIMDUTF_FEATURE_UTF16
2758simdutf_warn_unused
size_t trim_partial_utf16be(
const char16_t *input,
2761simdutf_really_inline simdutf_warn_unused
size_t
2762trim_partial_utf16be(std::span<const char16_t> valid_utf16_input)
noexcept {
2763 return trim_partial_utf16be(valid_utf16_input.data(),
2764 valid_utf16_input.size());
2782simdutf_warn_unused
size_t trim_partial_utf16le(
const char16_t *input,
2785simdutf_really_inline simdutf_warn_unused
size_t
2786trim_partial_utf16le(std::span<const char16_t> valid_utf16_input)
noexcept {
2787 return trim_partial_utf16le(valid_utf16_input.data(),
2788 valid_utf16_input.size());
2806simdutf_warn_unused
size_t trim_partial_utf16(
const char16_t *input,
2809simdutf_really_inline simdutf_warn_unused
size_t
2810trim_partial_utf16(std::span<const char16_t> valid_utf16_input)
noexcept {
2811 return trim_partial_utf16(valid_utf16_input.data(), valid_utf16_input.size());
2816#if SIMDUTF_FEATURE_BASE64
2817 #ifndef SIMDUTF_NEED_TRAILING_ZEROES
2818 #define SIMDUTF_NEED_TRAILING_ZEROES 1
2824constexpr uint64_t base64_reverse_padding =
2826enum base64_options : uint64_t {
2829 base64_default_no_padding =
2831 base64_reverse_padding,
2832 base64_url_with_padding =
2833 base64_url | base64_reverse_padding,
2834 base64_default_accept_garbage =
2837 base64_url_accept_garbage =
2840 base64_default_or_url =
2842 base64_default_or_url_accept_garbage =
2848 #if SIMDUTF_CPLUSPLUS17
2849inline std::string_view to_string(base64_options options) {
2851 case base64_default:
2852 return "base64_default";
2854 return "base64_url";
2855 case base64_reverse_padding:
2856 return "base64_reverse_padding";
2857 case base64_url_with_padding:
2858 return "base64_url_with_padding";
2859 case base64_default_accept_garbage:
2860 return "base64_default_accept_garbage";
2861 case base64_url_accept_garbage:
2862 return "base64_url_accept_garbage";
2863 case base64_default_or_url:
2864 return "base64_default_or_url";
2865 case base64_default_or_url_accept_garbage:
2866 return "base64_default_or_url_accept_garbage";
2875enum last_chunk_handling_options : uint64_t {
2879 stop_before_partial =
2885inline bool is_partial(last_chunk_handling_options options) {
2886 return (options == stop_before_partial) || (options == only_full_chunks);
2889 #if SIMDUTF_CPLUSPLUS17
2890inline std::string_view to_string(last_chunk_handling_options options) {
2896 case stop_before_partial:
2897 return "stop_before_partial";
2898 case only_full_chunks:
2899 return "only_full_chunks";
2914simdutf_warn_unused
size_t
2915maximal_binary_length_from_base64(
const char *input,
size_t length)
noexcept;
2917simdutf_really_inline simdutf_warn_unused
size_t
2918maximal_binary_length_from_base64(
2919 const detail::input_span_of_byte_like
auto &input)
noexcept {
2920 return maximal_binary_length_from_base64(
2921 reinterpret_cast<const char *
>(input.data()), input.size());
2935simdutf_warn_unused
size_t maximal_binary_length_from_base64(
2936 const char16_t *input,
size_t length)
noexcept;
2938simdutf_really_inline simdutf_warn_unused
size_t
2939maximal_binary_length_from_base64(std::span<const char16_t> input)
noexcept {
2940 return maximal_binary_length_from_base64(input.data(), input.size());
2998simdutf_warn_unused result base64_to_binary(
2999 const char *input,
size_t length,
char *output,
3000 base64_options options = base64_default,
3001 last_chunk_handling_options last_chunk_options = loose)
noexcept;
3003simdutf_really_inline simdutf_warn_unused result base64_to_binary(
3004 const detail::input_span_of_byte_like
auto &input,
3005 detail::output_span_of_byte_like
auto &&binary_output,
3006 base64_options options = base64_default,
3007 last_chunk_handling_options last_chunk_options = loose)
noexcept {
3008 return base64_to_binary(
reinterpret_cast<const char *
>(input.data()),
3010 reinterpret_cast<char *
>(binary_output.data()),
3011 options, last_chunk_options);
3021simdutf_warn_unused
size_t base64_length_from_binary(
3022 size_t length, base64_options options = base64_default)
noexcept;
3045size_t binary_to_base64(
const char *input,
size_t length,
char *output,
3046 base64_options options = base64_default)
noexcept;
3048simdutf_really_inline simdutf_warn_unused
size_t
3049binary_to_base64(
const detail::input_span_of_byte_like
auto &input,
3050 detail::output_span_of_byte_like
auto &&binary_output,
3051 base64_options options = base64_default)
noexcept {
3052 return binary_to_base64(
3053 reinterpret_cast<const char *
>(input.data()), input.size(),
3054 reinterpret_cast<char *
>(binary_output.data()), options);
3058 #if SIMDUTF_ATOMIC_REF
3101atomic_binary_to_base64(
const char *input,
size_t length,
char *output,
3102 base64_options options = base64_default)
noexcept;
3104simdutf_really_inline simdutf_warn_unused
size_t
3105atomic_binary_to_base64(
const detail::input_span_of_byte_like
auto &input,
3106 detail::output_span_of_byte_like
auto &&binary_output,
3107 base64_options options = base64_default)
noexcept {
3108 return atomic_binary_to_base64(
3109 reinterpret_cast<const char *
>(input.data()), input.size(),
3110 reinterpret_cast<char *
>(binary_output.data()), options);
3171simdutf_warn_unused result
3172base64_to_binary(
const char16_t *input,
size_t length,
char *output,
3173 base64_options options = base64_default,
3174 last_chunk_handling_options last_chunk_options =
3175 last_chunk_handling_options::loose)
noexcept;
3177simdutf_really_inline simdutf_warn_unused result base64_to_binary(
3178 std::span<const char16_t> input,
3179 detail::output_span_of_byte_like
auto &&binary_output,
3180 base64_options options = base64_default,
3181 last_chunk_handling_options last_chunk_options = loose)
noexcept {
3182 return base64_to_binary(input.data(), input.size(),
3183 reinterpret_cast<char *
>(binary_output.data()),
3184 options, last_chunk_options);
3198simdutf_warn_unused
bool
3199base64_ignorable(
char input, base64_options options = base64_default)
noexcept;
3200simdutf_warn_unused
bool
3201base64_ignorable(
char16_t input,
3202 base64_options options = base64_default)
noexcept;
3215simdutf_warn_unused
bool
3216base64_valid(
char input, base64_options options = base64_default)
noexcept;
3217simdutf_warn_unused
bool
3218base64_valid(
char16_t input, base64_options options = base64_default)
noexcept;
3229simdutf_warn_unused
bool
3230base64_valid_or_padding(
char input,
3231 base64_options options = base64_default)
noexcept;
3232simdutf_warn_unused
bool
3233base64_valid_or_padding(
char16_t input,
3234 base64_options options = base64_default)
noexcept;
3303simdutf_warn_unused result
3304base64_to_binary_safe(
const char *input,
size_t length,
char *output,
3305 size_t &outlen, base64_options options = base64_default,
3306 last_chunk_handling_options last_chunk_options =
3307 last_chunk_handling_options::loose,
3308 bool decode_up_to_bad_char =
false) noexcept;
3314simdutf_really_inline simdutf_warn_unused std::tuple<result, std::size_t>
3315base64_to_binary_safe(
const detail::input_span_of_byte_like
auto &input,
3316 detail::output_span_of_byte_like
auto &&binary_output,
3317 base64_options options = base64_default,
3318 last_chunk_handling_options last_chunk_options = loose,
3319 bool decode_up_to_bad_char =
false) noexcept {
3320 size_t outlen = binary_output.size();
3321 auto r = base64_to_binary_safe(
3322 reinterpret_cast<const char *
>(input.data()), input.size(),
3323 reinterpret_cast<char *
>(binary_output.data()), outlen, options,
3324 last_chunk_options, decode_up_to_bad_char);
3329simdutf_warn_unused result
3330base64_to_binary_safe(
const char16_t *input,
size_t length,
char *output,
3331 size_t &outlen, base64_options options = base64_default,
3332 last_chunk_handling_options last_chunk_options =
3333 last_chunk_handling_options::loose,
3334 bool decode_up_to_bad_char =
false) noexcept;
3340simdutf_really_inline simdutf_warn_unused std::tuple<result, std::size_t>
3341base64_to_binary_safe(std::span<const char16_t> input,
3342 detail::output_span_of_byte_like
auto &&binary_output,
3343 base64_options options = base64_default,
3344 last_chunk_handling_options last_chunk_options = loose,
3345 bool decode_up_to_bad_char =
false) noexcept {
3346 size_t outlen = binary_output.size();
3347 auto r = base64_to_binary_safe(input.data(), input.size(),
3348 reinterpret_cast<char *
>(binary_output.data()),
3349 outlen, options, last_chunk_options,
3350 decode_up_to_bad_char);
3355 #if SIMDUTF_ATOMIC_REF
3395simdutf_warn_unused result atomic_base64_to_binary_safe(
3396 const char *input,
size_t length,
char *output,
size_t &outlen,
3397 base64_options options = base64_default,
3398 last_chunk_handling_options last_chunk_options =
3399 last_chunk_handling_options::loose,
3400 bool decode_up_to_bad_char =
false) noexcept;
3401simdutf_warn_unused result atomic_base64_to_binary_safe(
3402 const
char16_t *input,
size_t length,
char *output,
size_t &outlen,
3403 base64_options options = base64_default,
3404 last_chunk_handling_options last_chunk_options = loose,
3405 bool decode_up_to_bad_char = false) noexcept;
3411simdutf_really_inline simdutf_warn_unused std::tuple<result, std::size_t>
3412atomic_base64_to_binary_safe(
3413 const detail::input_span_of_byte_like
auto &binary_input,
3414 detail::output_span_of_byte_like
auto &&output,
3415 base64_options options = base64_default,
3416 last_chunk_handling_options last_chunk_options =
3417 last_chunk_handling_options::loose,
3418 bool decode_up_to_bad_char =
false) noexcept {
3419 size_t outlen = output.size();
3420 auto ret = atomic_base64_to_binary_safe(
3421 reinterpret_cast<const char *
>(binary_input.data()), binary_input.size(),
3422 reinterpret_cast<char *
>(output.data()), outlen, options,
3423 last_chunk_options, decode_up_to_bad_char);
3424 return {ret, outlen};
3430simdutf_warn_unused std::tuple<result, std::size_t>
3431atomic_base64_to_binary_safe(
3432 std::span<const char16_t> base64_input,
3433 detail::output_span_of_byte_like
auto &&binary_output,
3434 base64_options options = base64_default,
3435 last_chunk_handling_options last_chunk_options = loose,
3436 bool decode_up_to_bad_char =
false) noexcept {
3437 size_t outlen = binary_output.size();
3438 auto ret = atomic_base64_to_binary_safe(
3439 base64_input.data(), base64_input.size(),
3440 reinterpret_cast<char *
>(binary_output.data()), outlen, options,
3441 last_chunk_options, decode_up_to_bad_char);
3442 return {ret, outlen};
3457simdutf_warn_unused
const char *find(
const char *start,
const char *end,
3458 char character)
noexcept;
3459simdutf_warn_unused
const char16_t *
3460find(
const char16_t *start,
const char16_t *end,
char16_t character)
noexcept;
3481 virtual std::string
name()
const {
return std::string(_name); }
3492 virtual std::string
description()
const {
return std::string(_description); }
3505#if SIMDUTF_FEATURE_DETECT_ENCODING
3513 size_t length)
const noexcept;
3522 size_t length)
const noexcept = 0;
3532 virtual uint32_t required_instruction_sets()
const {
3533 return _required_instruction_sets;
3536#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING
3547 size_t len)
const noexcept = 0;
3550#if SIMDUTF_FEATURE_UTF8
3563 simdutf_warn_unused
virtual result
3567#if SIMDUTF_FEATURE_ASCII
3577 simdutf_warn_unused
virtual bool
3592 simdutf_warn_unused
virtual result
3596#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING
3611 simdutf_warn_unused
virtual bool
3615#if SIMDUTF_FEATURE_UTF16
3630 simdutf_warn_unused
virtual bool
3649 simdutf_warn_unused
virtual result
3651 size_t len)
const noexcept = 0;
3669 simdutf_warn_unused
virtual result
3671 size_t len)
const noexcept = 0;
3685 char16_t *output)
const noexcept = 0;
3699 char16_t *output)
const noexcept = 0;
3702#if SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING
3715 simdutf_warn_unused
virtual bool
3719#if SIMDUTF_FEATURE_UTF32
3735 simdutf_warn_unused
virtual result
3737 size_t len)
const noexcept = 0;
3740#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
3751 simdutf_warn_unused
virtual size_t
3753 char *utf8_output)
const noexcept = 0;
3756#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
3767 simdutf_warn_unused
virtual size_t
3769 char16_t *utf16_output)
const noexcept = 0;
3781 simdutf_warn_unused
virtual size_t
3783 char16_t *utf16_output)
const noexcept = 0;
3786#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1
3797 simdutf_warn_unused
virtual size_t
3799 char32_t *utf32_buffer)
const noexcept = 0;
3802#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
3815 simdutf_warn_unused
virtual size_t
3817 char *latin1_output)
const noexcept = 0;
3835 simdutf_warn_unused
virtual result
3837 char *latin1_output)
const noexcept = 0;
3858 simdutf_warn_unused
virtual size_t
3860 char *latin1_output)
const noexcept = 0;
3863#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
3876 simdutf_warn_unused
virtual size_t
3878 char16_t *utf16_output)
const noexcept = 0;
3892 simdutf_warn_unused
virtual size_t
3894 char16_t *utf16_output)
const noexcept = 0;
3912 const char *input,
size_t length,
3913 char16_t *utf16_output)
const noexcept = 0;
3931 const char *input,
size_t length,
3932 char16_t *utf16_output)
const noexcept = 0;
3935#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
3948 simdutf_warn_unused
virtual size_t
3950 char32_t *utf32_output)
const noexcept = 0;
3966 simdutf_warn_unused
virtual result
3968 char32_t *utf32_output)
const noexcept = 0;
3971#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
3982 simdutf_warn_unused
virtual size_t
3984 char16_t *utf16_buffer)
const noexcept = 0;
3996 simdutf_warn_unused
virtual size_t
3998 char16_t *utf16_buffer)
const noexcept = 0;
4001#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
4012 simdutf_warn_unused
virtual size_t
4014 char32_t *utf32_buffer)
const noexcept = 0;
4017#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
4030 simdutf_warn_unused
virtual size_t
4034#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
4049 simdutf_warn_unused
virtual size_t
4053#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
4070 simdutf_warn_unused
virtual size_t
4072 char *latin1_buffer)
const noexcept = 0;
4090 simdutf_warn_unused
virtual size_t
4092 char *latin1_buffer)
const noexcept = 0;
4113 simdutf_warn_unused
virtual result
4115 char *latin1_buffer)
const noexcept = 0;
4136 simdutf_warn_unused
virtual result
4138 char *latin1_buffer)
const noexcept = 0;
4160 simdutf_warn_unused
virtual size_t
4162 char *latin1_buffer)
const noexcept = 0;
4184 simdutf_warn_unused
virtual size_t
4186 char *latin1_buffer)
const noexcept = 0;
4189#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
4205 simdutf_warn_unused
virtual size_t
4207 char *utf8_buffer)
const noexcept = 0;
4224 simdutf_warn_unused
virtual size_t
4226 char *utf8_buffer)
const noexcept = 0;
4246 simdutf_warn_unused
virtual result
4248 char *utf8_buffer)
const noexcept = 0;
4268 simdutf_warn_unused
virtual result
4270 char *utf8_buffer)
const noexcept = 0;
4286 simdutf_warn_unused
virtual size_t
4288 char *utf8_buffer)
const noexcept = 0;
4304 simdutf_warn_unused
virtual size_t
4306 char *utf8_buffer)
const noexcept = 0;
4309#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
4325 simdutf_warn_unused
virtual size_t
4327 char32_t *utf32_buffer)
const noexcept = 0;
4344 simdutf_warn_unused
virtual size_t
4346 char32_t *utf32_buffer)
const noexcept = 0;
4367 const char16_t *input,
size_t length,
4368 char32_t *utf32_buffer)
const noexcept = 0;
4389 const char16_t *input,
size_t length,
4390 char32_t *utf32_buffer)
const noexcept = 0;
4406 simdutf_warn_unused
virtual size_t
4408 char32_t *utf32_buffer)
const noexcept = 0;
4424 simdutf_warn_unused
virtual size_t
4426 char32_t *utf32_buffer)
const noexcept = 0;
4429#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
4444 simdutf_warn_unused
virtual size_t
4446 size_t length)
const noexcept = 0;
4462 simdutf_warn_unused
virtual size_t
4464 size_t length)
const noexcept = 0;
4467#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1
4484 simdutf_warn_unused
virtual size_t
4486 char *latin1_buffer)
const noexcept = 0;
4489#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1
4509 simdutf_warn_unused
virtual result
4511 char *latin1_buffer)
const noexcept = 0;
4533 simdutf_warn_unused
virtual size_t
4535 char *latin1_buffer)
const noexcept = 0;
4538#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
4554 simdutf_warn_unused
virtual size_t
4556 char *utf8_buffer)
const noexcept = 0;
4575 simdutf_warn_unused
virtual result
4577 char *utf8_buffer)
const noexcept = 0;
4593 simdutf_warn_unused
virtual size_t
4595 char *utf8_buffer)
const noexcept = 0;
4598#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
4609 simdutf_warn_unused
virtual size_t
4615#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
4631 simdutf_warn_unused
virtual size_t
4633 char16_t *utf16_buffer)
const noexcept = 0;
4650 simdutf_warn_unused
virtual size_t
4652 char16_t *utf16_buffer)
const noexcept = 0;
4673 const char32_t *input,
size_t length,
4674 char16_t *utf16_buffer)
const noexcept = 0;
4695 const char32_t *input,
size_t length,
4696 char16_t *utf16_buffer)
const noexcept = 0;
4712 simdutf_warn_unused
virtual size_t
4714 char16_t *utf16_buffer)
const noexcept = 0;
4730 simdutf_warn_unused
virtual size_t
4732 char16_t *utf16_buffer)
const noexcept = 0;
4735#if SIMDUTF_FEATURE_UTF16
4751 char16_t *output)
const noexcept = 0;
4754#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
4763 simdutf_warn_unused
virtual size_t
4767#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
4780 simdutf_warn_unused
virtual size_t
4782 size_t length)
const noexcept = 0;
4785#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1
4797 simdutf_warn_unused
virtual size_t
4803#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
4815 simdutf_warn_unused
virtual size_t
4819#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
4835 simdutf_warn_unused
virtual size_t
4841#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
4854 simdutf_warn_unused
virtual size_t
4856 size_t length)
const noexcept = 0;
4859#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1
4868 simdutf_warn_unused
virtual size_t
4874#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
4892 simdutf_warn_unused
virtual size_t
4894 size_t length)
const noexcept = 0;
4913 simdutf_warn_unused
virtual size_t
4915 size_t length)
const noexcept = 0;
4918#if SIMDUTF_FEATURE_UTF16
4934 simdutf_warn_unused
virtual size_t
4952 simdutf_warn_unused
virtual size_t
4956#if SIMDUTF_FEATURE_UTF8
4969 simdutf_warn_unused
virtual size_t
4973#if SIMDUTF_FEATURE_BASE64
4985 const char *input,
size_t length)
const noexcept;
4999 const char16_t *input,
size_t length)
const noexcept;
5033 simdutf_warn_unused
virtual result
5035 base64_options options = base64_default,
5036 last_chunk_handling_options last_chunk_options =
5037 last_chunk_handling_options::loose)
const noexcept = 0;
5071 const char *input,
size_t length,
char *output,
5072 base64_options options = base64_default,
5073 last_chunk_handling_options last_chunk_options =
5074 last_chunk_handling_options::loose)
const noexcept = 0;
5108 simdutf_warn_unused
virtual result
5110 base64_options options = base64_default,
5111 last_chunk_handling_options last_chunk_options =
5112 last_chunk_handling_options::loose)
const noexcept = 0;
5146 const char16_t *input,
size_t length,
char *output,
5147 base64_options options = base64_default,
5148 last_chunk_handling_options last_chunk_options =
5149 last_chunk_handling_options::loose)
const noexcept = 0;
5159 size_t length, base64_options options = base64_default)
const noexcept;
5184 base64_options options = base64_default)
const noexcept = 0;
5195 virtual const char *
find(
const char *start,
const char *end,
5196 char character)
const noexcept = 0;
5197 virtual const char16_t *
find(
const char16_t *start,
const char16_t *end,
5198 char16_t character)
const noexcept = 0;
5201#ifdef SIMDUTF_INTERNAL_TESTS
5210 struct TestProcedure {
5218 virtual std::vector<TestProcedure> internal_tests()
const;
5226 uint32_t required_instruction_sets)
5228 _required_instruction_sets(required_instruction_sets) {}
5231 ~implementation() =
default;
5242 const char *_description;
5247 const uint32_t _required_instruction_sets;
5256class available_implementation_list {
5259 simdutf_really_inline available_implementation_list() {}
5261 size_t size() const noexcept;
5263 const implementation *const *begin() const noexcept;
5265 const implementation *const *end() const noexcept;
5280 const implementation *operator[](const std::
string &name) const noexcept {
5281 for (
const implementation *impl : *this) {
5282 if (impl->name() == name) {
5302 const implementation *detect_best_supported() const noexcept;
5305template <typename T> class atomic_ptr {
5307 atomic_ptr(T *_ptr) : ptr{_ptr} {}
5309#if defined(SIMDUTF_NO_THREADS)
5310 operator const T *()
const {
return ptr; }
5311 const T &operator*()
const {
return *ptr; }
5312 const T *operator->()
const {
return ptr; }
5314 operator T *() {
return ptr; }
5315 T &operator*() {
return *ptr; }
5316 T *operator->() {
return ptr; }
5317 atomic_ptr &operator=(T *_ptr) {
5323 operator const T *()
const {
return ptr.load(); }
5324 const T &operator*()
const {
return *ptr; }
5325 const T *operator->()
const {
return ptr.load(); }
5327 operator T *() {
return ptr.load(); }
5328 T &operator*() {
return *ptr; }
5329 T *operator->() {
return ptr.load(); }
5330 atomic_ptr &operator=(T *_ptr) {
5338#if defined(SIMDUTF_NO_THREADS)
5341 std::atomic<T *> ptr;
5345class detect_best_supported_implementation_on_first_use;
5352extern SIMDUTF_DLLIMPORTEXPORT
const internal::available_implementation_list &
5353get_available_implementations();
5361extern SIMDUTF_DLLIMPORTEXPORT internal::atomic_ptr<const implementation> &
5362get_active_implementation();
An implementation of simdutf for a particular CPU architecture.
virtual simdutf_warn_unused size_t convert_valid_utf32_to_latin1(const char32_t *input, size_t length, char *latin1_buffer) const noexcept=0
Convert valid UTF-32 string into Latin1 string.
virtual const char * find(const char *start, const char *end, char character) const noexcept=0
Find the first occurrence of a character in a string.
virtual simdutf_warn_unused size_t convert_valid_utf8_to_utf16le(const char *input, size_t length, char16_t *utf16_buffer) const noexcept=0
Convert valid UTF-8 string into UTF-16LE string.
virtual simdutf_warn_unused size_t latin1_length_from_utf16(size_t length) const noexcept
Compute the number of bytes that this UTF-16LE/BE string would require in Latin1 format.
virtual simdutf_warn_unused size_t utf32_length_from_utf16be(const char16_t *input, size_t length) const noexcept=0
Compute the number of bytes that this UTF-16BE string would require in UTF-32 format.
virtual simdutf_warn_unused size_t convert_utf8_to_utf16be(const char *input, size_t length, char16_t *utf16_output) const noexcept=0
Convert possibly broken UTF-8 string into UTF-16BE string.
virtual simdutf_warn_unused size_t convert_utf16le_to_utf32(const char16_t *input, size_t length, char32_t *utf32_buffer) const noexcept=0
Convert possibly broken UTF-16LE string into UTF-32 string.
virtual simdutf_warn_unused size_t convert_valid_utf16be_to_utf32(const char16_t *input, size_t length, char32_t *utf32_buffer) const noexcept=0
Convert valid UTF-16LE string into UTF-32BE string.
virtual simdutf_warn_unused size_t convert_valid_utf16be_to_utf8(const char16_t *input, size_t length, char *utf8_buffer) const noexcept=0
Convert valid UTF-16BE string into UTF-8 string.
virtual simdutf_warn_unused size_t convert_latin1_to_utf16le(const char *input, size_t length, char16_t *utf16_output) const noexcept=0
Convert possibly Latin1 string into UTF-16LE string.
virtual simdutf_warn_unused bool validate_utf16be(const char16_t *buf, size_t len) const noexcept=0
Validate the UTF-16BE string.
virtual simdutf_warn_unused result convert_utf16be_to_latin1_with_errors(const char16_t *input, size_t length, char *latin1_buffer) const noexcept=0
Convert possibly broken UTF-16BE string into Latin1 string.
virtual simdutf_warn_unused full_result base64_to_binary_details(const char *input, size_t length, char *output, base64_options options=base64_default, last_chunk_handling_options last_chunk_options=last_chunk_handling_options::loose) const noexcept=0
Convert a base64 input to a binary output while returning more details than base64_to_binary.
virtual simdutf_warn_unused size_t convert_valid_utf16le_to_utf32(const char16_t *input, size_t length, char32_t *utf32_buffer) const noexcept=0
Convert valid UTF-16LE string into UTF-32 string.
virtual simdutf_warn_unused size_t latin1_length_from_utf8(const char *input, size_t length) const noexcept=0
Compute the number of bytes that this UTF-8 string would require in Latin1 format.
virtual simdutf_warn_unused result convert_utf32_to_latin1_with_errors(const char32_t *input, size_t length, char *latin1_buffer) const noexcept=0
Convert possibly broken UTF-32 string into Latin1 string and stop on error.
virtual simdutf_warn_unused size_t utf8_length_from_utf32(const char32_t *input, size_t length) const noexcept=0
Compute the number of bytes that this UTF-32 string would require in UTF-8 format.
virtual simdutf_warn_unused size_t convert_latin1_to_utf16be(const char *input, size_t length, char16_t *utf16_output) const noexcept=0
Convert Latin1 string into UTF-16BE string.
virtual simdutf_warn_unused size_t latin1_length_from_utf32(size_t length) const noexcept
Compute the number of bytes that this UTF-32 string would require in Latin1 format.
virtual simdutf_warn_unused full_result base64_to_binary_details(const char16_t *input, size_t length, char *output, base64_options options=base64_default, last_chunk_handling_options last_chunk_options=last_chunk_handling_options::loose) const noexcept=0
Convert a base64 input to a binary output while returning more details than base64_to_binary.
virtual simdutf_warn_unused size_t convert_utf32_to_utf16le(const char32_t *input, size_t length, char16_t *utf16_buffer) const noexcept=0
Convert possibly broken UTF-32 string into UTF-16LE string.
virtual simdutf_warn_unused size_t utf16_length_from_utf32(const char32_t *input, size_t length) const noexcept=0
Compute the number of two-byte code units that this UTF-32 string would require in UTF-16 format.
virtual simdutf_warn_unused size_t count_utf8(const char *input, size_t length) const noexcept=0
Count the number of code points (characters) in the string assuming that it is valid.
virtual simdutf_warn_unused size_t convert_valid_utf32_to_utf16be(const char32_t *input, size_t length, char16_t *utf16_buffer) const noexcept=0
Convert valid UTF-32 string into UTF-16BE string.
virtual simdutf_warn_unused size_t utf32_length_from_utf8(const char *input, size_t length) const noexcept=0
Compute the number of 4-byte code units that this UTF-8 string would require in UTF-32 format.
virtual simdutf_warn_unused size_t convert_valid_utf16le_to_utf8(const char16_t *input, size_t length, char *utf8_buffer) const noexcept=0
Convert valid UTF-16LE string into UTF-8 string.
virtual void to_well_formed_utf16le(const char16_t *input, size_t len, char16_t *output) const noexcept=0
Copies the UTF-16LE string while replacing mismatched surrogates with the Unicode replacement charact...
virtual simdutf_warn_unused result convert_utf32_to_utf16be_with_errors(const char32_t *input, size_t length, char16_t *utf16_buffer) const noexcept=0
Convert possibly broken UTF-32 string into UTF-16BE string and stop on error.
virtual simdutf_warn_unused result validate_utf8_with_errors(const char *buf, size_t len) const noexcept=0
Validate the UTF-8 string and stop on errors.
virtual simdutf_warn_unused size_t utf8_length_from_utf16le(const char16_t *input, size_t length) const noexcept=0
Compute the number of bytes that this UTF-16LE string would require in UTF-8 format.
virtual simdutf_warn_unused size_t convert_utf16be_to_utf8(const char16_t *input, size_t length, char *utf8_buffer) const noexcept=0
Convert possibly broken UTF-16BE string into UTF-8 string.
virtual simdutf_warn_unused size_t convert_utf8_to_utf16le(const char *input, size_t length, char16_t *utf16_output) const noexcept=0
Convert possibly broken UTF-8 string into UTF-16LE string.
virtual simdutf_warn_unused result convert_utf16le_to_latin1_with_errors(const char16_t *input, size_t length, char *latin1_buffer) const noexcept=0
Convert possibly broken UTF-16LE string into Latin1 string.
virtual simdutf_warn_unused result validate_utf16le_with_errors(const char16_t *buf, size_t len) const noexcept=0
Validate the UTF-16LE string and stop on error.
virtual simdutf_warn_unused result convert_utf16be_to_utf8_with_errors(const char16_t *input, size_t length, char *utf8_buffer) const noexcept=0
Convert possibly broken UTF-16BE string into UTF-8 string and stop on error.
virtual size_t binary_to_base64(const char *input, size_t length, char *output, base64_options options=base64_default) const noexcept=0
Convert a binary input to a base64 output.
virtual simdutf_warn_unused size_t convert_latin1_to_utf32(const char *input, size_t length, char32_t *utf32_buffer) const noexcept=0
Convert Latin1 string into UTF-32 string.
virtual simdutf_warn_unused result convert_utf8_to_utf32_with_errors(const char *input, size_t length, char32_t *utf32_output) const noexcept=0
Convert possibly broken UTF-8 string into UTF-32 string and stop on error.
virtual simdutf_warn_unused size_t convert_valid_utf32_to_utf8(const char32_t *input, size_t length, char *utf8_buffer) const noexcept=0
Convert valid UTF-32 string into UTF-8 string.
virtual simdutf_warn_unused size_t utf32_length_from_latin1(size_t length) const noexcept
Return the number of bytes that this UTF-32 string would require in Latin1 format.
virtual simdutf_warn_unused bool validate_utf16le(const char16_t *buf, size_t len) const noexcept=0
Validate the UTF-16LE string.This function may be best when you expect the input to be almost always ...
virtual simdutf_warn_unused result convert_utf8_to_utf16le_with_errors(const char *input, size_t length, char16_t *utf16_output) const noexcept=0
Convert possibly broken UTF-8 string into UTF-16LE string and stop on error.
virtual simdutf_warn_unused size_t count_utf16le(const char16_t *input, size_t length) const noexcept=0
Count the number of code points (characters) in the string assuming that it is valid.
virtual simdutf_warn_unused size_t convert_utf16be_to_utf32(const char16_t *input, size_t length, char32_t *utf32_buffer) const noexcept=0
Convert possibly broken UTF-16BE string into UTF-32 string.
virtual simdutf_warn_unused size_t convert_valid_utf16le_to_latin1(const char16_t *input, size_t length, char *latin1_buffer) const noexcept=0
Convert valid UTF-16LE string into Latin1 string.
virtual simdutf_warn_unused size_t convert_utf32_to_latin1(const char32_t *input, size_t length, char *latin1_buffer) const noexcept=0
Convert possibly broken UTF-32 string into Latin1 string.
virtual simdutf_warn_unused size_t convert_valid_utf16be_to_latin1(const char16_t *input, size_t length, char *latin1_buffer) const noexcept=0
Convert valid UTF-16BE string into Latin1 string.
bool supported_by_runtime_system() const
The instruction sets this implementation is compiled against and the current CPU match.
virtual simdutf_warn_unused size_t convert_valid_utf8_to_utf32(const char *input, size_t length, char32_t *utf32_buffer) const noexcept=0
Convert valid UTF-8 string into UTF-32 string.
virtual simdutf_warn_unused size_t convert_utf16le_to_latin1(const char16_t *input, size_t length, char *latin1_buffer) const noexcept=0
Convert possibly broken UTF-16LE string into Latin1 string.
virtual simdutf_warn_unused result validate_utf16be_with_errors(const char16_t *buf, size_t len) const noexcept=0
Validate the UTF-16BE string and stop on error.
virtual simdutf_warn_unused result convert_utf16le_to_utf8_with_errors(const char16_t *input, size_t length, char *utf8_buffer) const noexcept=0
Convert possibly broken UTF-16LE string into UTF-8 string and stop on error.
virtual simdutf_warn_unused result convert_utf16be_to_utf32_with_errors(const char16_t *input, size_t length, char32_t *utf32_buffer) const noexcept=0
Convert possibly broken UTF-16BE string into UTF-32 string and stop on error.
virtual simdutf_warn_unused result base64_to_binary(const char *input, size_t length, char *output, base64_options options=base64_default, last_chunk_handling_options last_chunk_options=last_chunk_handling_options::loose) const noexcept=0
Convert a base64 input to a binary output.
virtual simdutf_warn_unused size_t convert_utf32_to_utf16be(const char32_t *input, size_t length, char16_t *utf16_buffer) const noexcept=0
Convert possibly broken UTF-32 string into UTF-16BE string.
virtual simdutf_warn_unused result validate_utf32_with_errors(const char32_t *buf, size_t len) const noexcept=0
Validate the UTF-32 string and stop on error.
virtual simdutf_warn_unused size_t utf32_length_from_utf16le(const char16_t *input, size_t length) const noexcept=0
Compute the number of bytes that this UTF-16LE string would require in UTF-32 format.
virtual void change_endianness_utf16(const char16_t *input, size_t length, char16_t *output) const noexcept=0
Change the endianness of the input.
virtual simdutf_warn_unused size_t convert_valid_utf8_to_utf16be(const char *input, size_t length, char16_t *utf16_buffer) const noexcept=0
Convert valid UTF-8 string into UTF-16BE string.
virtual simdutf_warn_unused size_t convert_valid_utf8_to_latin1(const char *input, size_t length, char *latin1_output) const noexcept=0
Convert valid UTF-8 string into latin1 string.
simdutf_warn_unused size_t maximal_binary_length_from_base64(const char *input, size_t length) const noexcept
Provide the maximal binary length in bytes given the base64 input.
virtual simdutf_warn_unused size_t convert_valid_utf32_to_utf16le(const char32_t *input, size_t length, char16_t *utf16_buffer) const noexcept=0
Convert valid UTF-32 string into UTF-16LE string.
virtual std::string name() const
The name of this implementation.
virtual simdutf_warn_unused result validate_ascii_with_errors(const char *buf, size_t len) const noexcept=0
Validate the ASCII string and stop on error.
virtual simdutf_warn_unused bool validate_utf8(const char *buf, size_t len) const noexcept=0
Validate the UTF-8 string.
virtual simdutf_warn_unused size_t convert_utf8_to_utf32(const char *input, size_t length, char32_t *utf32_output) const noexcept=0
Convert possibly broken UTF-8 string into UTF-32 string.
simdutf_warn_unused size_t maximal_binary_length_from_base64(const char16_t *input, size_t length) const noexcept
Provide the maximal binary length in bytes given the base64 input.
virtual simdutf_warn_unused bool validate_utf32(const char32_t *buf, size_t len) const noexcept=0
Validate the UTF-32 string.
virtual simdutf_warn_unused result convert_utf16le_to_utf32_with_errors(const char16_t *input, size_t length, char32_t *utf32_buffer) const noexcept=0
Convert possibly broken UTF-16LE string into UTF-32 string and stop on error.
virtual simdutf_warn_unused size_t convert_utf16le_to_utf8(const char16_t *input, size_t length, char *utf8_buffer) const noexcept=0
Convert possibly broken UTF-16LE string into UTF-8 string.
virtual simdutf_warn_unused size_t utf8_length_from_utf16be(const char16_t *input, size_t length) const noexcept=0
Compute the number of bytes that this UTF-16BE string would require in UTF-8 format.
virtual simdutf_warn_unused size_t convert_utf32_to_utf8(const char32_t *input, size_t length, char *utf8_buffer) const noexcept=0
Convert possibly broken UTF-32 string into UTF-8 string.
virtual simdutf_warn_unused size_t convert_utf16be_to_latin1(const char16_t *input, size_t length, char *latin1_buffer) const noexcept=0
Convert possibly broken UTF-16BE string into Latin1 string.
virtual simdutf_warn_unused size_t convert_utf8_to_latin1(const char *input, size_t length, char *latin1_output) const noexcept=0
Convert possibly broken UTF-8 string into latin1 string.
virtual simdutf_warn_unused result base64_to_binary(const char16_t *input, size_t length, char *output, base64_options options=base64_default, last_chunk_handling_options last_chunk_options=last_chunk_handling_options::loose) const noexcept=0
Convert a base64 input to a binary output.
simdutf_warn_unused size_t base64_length_from_binary(size_t length, base64_options options=base64_default) const noexcept
Provide the base64 length in bytes given the length of a binary input.
virtual int detect_encodings(const char *input, size_t length) const noexcept=0
This function will try to detect the possible encodings in one pass.
virtual simdutf_warn_unused size_t count_utf16be(const char16_t *input, size_t length) const noexcept=0
Count the number of code points (characters) in the string assuming that it is valid.
virtual simdutf_warn_unused result convert_utf32_to_utf8_with_errors(const char32_t *input, size_t length, char *utf8_buffer) const noexcept=0
Convert possibly broken UTF-32 string into UTF-8 string and stop on error.
virtual void to_well_formed_utf16be(const char16_t *input, size_t len, char16_t *output) const noexcept=0
Copies the UTF-16BE string while replacing mismatched surrogates with the Unicode replacement charact...
virtual simdutf_warn_unused bool validate_ascii(const char *buf, size_t len) const noexcept=0
Validate the ASCII string.
virtual simdutf_warn_unused size_t utf16_length_from_utf8(const char *input, size_t length) const noexcept=0
Compute the number of 2-byte code units that this UTF-8 string would require in UTF-16LE format.
virtual simdutf_warn_unused result convert_utf8_to_utf16be_with_errors(const char *input, size_t length, char16_t *utf16_output) const noexcept=0
Convert possibly broken UTF-8 string into UTF-16BE string and stop on error.
virtual simdutf_warn_unused result convert_utf8_to_latin1_with_errors(const char *input, size_t length, char *latin1_output) const noexcept=0
Convert possibly broken UTF-8 string into latin1 string with errors.
virtual std::string description() const
The description of this implementation.
virtual simdutf_warn_unused size_t utf16_length_from_latin1(size_t length) const noexcept
Return the number of bytes that this UTF-16 string would require in Latin1 format.
virtual simdutf_warn_unused result convert_utf32_to_utf16le_with_errors(const char32_t *input, size_t length, char16_t *utf16_buffer) const noexcept=0
Convert possibly broken UTF-32 string into UTF-16LE string and stop on error.
virtual simdutf_warn_unused size_t convert_latin1_to_utf8(const char *input, size_t length, char *utf8_output) const noexcept=0
Convert Latin1 string into UTF-8 string.
virtual encoding_type autodetect_encoding(const char *input, size_t length) const noexcept
This function will try to detect the encoding.
virtual simdutf_warn_unused size_t utf8_length_from_latin1(const char *input, size_t length) const noexcept=0
Return the number of bytes that this Latin1 string would require in UTF-8 format.