1#ifndef SIMDUTF_IMPLEMENTATION_H
2#define SIMDUTF_IMPLEMENTATION_H
3#if !defined(SIMDUTF_NO_THREADS)
6#ifdef SIMDUTF_INTERNAL_TESTS
9#include "simdutf/common_defs.h"
10#include "simdutf/compiler_check.h"
11#include "simdutf/encoding_types.h"
12#include "simdutf/error.h"
13#include "simdutf/internal/isadetection.h"
18 #include <type_traits>
32#ifndef SIMDUTF_FEATURE_DETECT_ENCODING
33 #define SIMDUTF_FEATURE_DETECT_ENCODING 1
35#ifndef SIMDUTF_FEATURE_ASCII
36 #define SIMDUTF_FEATURE_ASCII 1
38#ifndef SIMDUTF_FEATURE_LATIN1
39 #define SIMDUTF_FEATURE_LATIN1 1
41#ifndef SIMDUTF_FEATURE_UTF8
42 #define SIMDUTF_FEATURE_UTF8 1
44#ifndef SIMDUTF_FEATURE_UTF16
45 #define SIMDUTF_FEATURE_UTF16 1
47#ifndef SIMDUTF_FEATURE_UTF32
48 #define SIMDUTF_FEATURE_UTF32 1
50#ifndef SIMDUTF_FEATURE_BASE64
51 #define SIMDUTF_FEATURE_BASE64 1
54#if SIMDUTF_CPLUSPLUS23
55 #include <simdutf/constexpr_ptr.h>
67concept byte_like = std::is_same_v<T, std::byte> ||
68 std::is_same_v<T, char> ||
69 std::is_same_v<T, signed char> ||
70 std::is_same_v<T, unsigned char> ||
71 std::is_same_v<T, char8_t>;
74concept is_byte_like = byte_like<std::remove_cvref_t<T>>;
77concept is_pointer = std::is_pointer_v<T>;
85concept input_span_of_byte_like =
requires(
const T &t) {
86 { t.size() }
noexcept -> std::convertible_to<std::size_t>;
87 { t.data() }
noexcept -> is_pointer;
88 { *t.data() }
noexcept -> is_byte_like;
92concept is_mutable = !std::is_const_v<std::remove_reference_t<T>>;
98concept output_span_of_byte_like =
requires(T &t) {
99 { t.size() }
noexcept -> std::convertible_to<std::size_t>;
100 { t.data() }
noexcept -> is_pointer;
101 { *t.data() }
noexcept -> is_byte_like;
102 { *t.data() }
noexcept -> is_mutable;
110template <
class InputPtr>
111concept indexes_into_byte_like =
requires(InputPtr p) {
112 { std::decay_t<
decltype(p[0])>{} } -> simdutf::detail::byte_like;
114template <
class InputPtr>
115concept indexes_into_utf16 =
requires(InputPtr p) {
116 { std::decay_t<
decltype(p[0])>{} } -> std::same_as<char16_t>;
118template <
class InputPtr>
119concept indexes_into_utf32 =
requires(InputPtr p) {
120 { std::decay_t<
decltype(p[0])>{} } -> std::same_as<char32_t>;
123template <
class InputPtr>
124concept index_assignable_from_char =
requires(InputPtr p,
char s) {
132template <
class InputPtr>
133concept indexes_into_uint32 =
requires(InputPtr p) {
134 { std::decay_t<
decltype(p[0])>{} } -> std::same_as<std::uint32_t>;
142#include <simdutf/scalar/swap_bytes.h>
143#include <simdutf/scalar/ascii.h>
144#include <simdutf/scalar/atomic_util.h>
145#include <simdutf/scalar/latin1.h>
146#include <simdutf/scalar/latin1_to_utf16/latin1_to_utf16.h>
147#include <simdutf/scalar/latin1_to_utf32/latin1_to_utf32.h>
148#include <simdutf/scalar/latin1_to_utf8/latin1_to_utf8.h>
149#include <simdutf/scalar/utf16.h>
150#include <simdutf/scalar/utf16_to_latin1/utf16_to_latin1.h>
151#include <simdutf/scalar/utf16_to_latin1/valid_utf16_to_latin1.h>
152#include <simdutf/scalar/utf16_to_utf32/utf16_to_utf32.h>
153#include <simdutf/scalar/utf16_to_utf32/valid_utf16_to_utf32.h>
154#include <simdutf/scalar/utf16_to_utf8/utf16_to_utf8.h>
155#include <simdutf/scalar/utf16_to_utf8/valid_utf16_to_utf8.h>
156#include <simdutf/scalar/utf32.h>
157#include <simdutf/scalar/utf32_to_latin1/utf32_to_latin1.h>
158#include <simdutf/scalar/utf32_to_latin1/valid_utf32_to_latin1.h>
159#include <simdutf/scalar/utf32_to_utf16/utf32_to_utf16.h>
160#include <simdutf/scalar/utf32_to_utf16/valid_utf32_to_utf16.h>
161#include <simdutf/scalar/utf32_to_utf8/utf32_to_utf8.h>
162#include <simdutf/scalar/utf32_to_utf8/valid_utf32_to_utf8.h>
163#include <simdutf/scalar/utf8.h>
164#include <simdutf/scalar/utf8_to_latin1/utf8_to_latin1.h>
165#include <simdutf/scalar/utf8_to_latin1/valid_utf8_to_latin1.h>
166#include <simdutf/scalar/utf8_to_utf16/utf8_to_utf16.h>
167#include <simdutf/scalar/utf8_to_utf16/valid_utf8_to_utf16.h>
168#include <simdutf/scalar/utf8_to_utf32/utf8_to_utf32.h>
169#include <simdutf/scalar/utf8_to_utf32/valid_utf8_to_utf32.h>
173constexpr size_t default_line_length =
176#if SIMDUTF_FEATURE_DETECT_ENCODING
187simdutf_warn_unused simdutf::encoding_type
188autodetect_encoding(
const char *input,
size_t length)
noexcept;
189simdutf_really_inline simdutf_warn_unused simdutf::encoding_type
190autodetect_encoding(
const uint8_t *input,
size_t length)
noexcept {
191 return autodetect_encoding(
reinterpret_cast<const char *
>(input), length);
205simdutf_really_inline simdutf_warn_unused simdutf::encoding_type
207 const detail::input_span_of_byte_like
auto &input)
noexcept {
208 return autodetect_encoding(
reinterpret_cast<const char *
>(input.data()),
224simdutf_warn_unused
int detect_encodings(
const char *input,
225 size_t length)
noexcept;
226simdutf_really_inline simdutf_warn_unused
int
227detect_encodings(
const uint8_t *input,
size_t length)
noexcept {
228 return detect_encodings(
reinterpret_cast<const char *
>(input), length);
231simdutf_really_inline simdutf_warn_unused
int
232detect_encodings(
const detail::input_span_of_byte_like
auto &input)
noexcept {
233 return detect_encodings(
reinterpret_cast<const char *
>(input.data()),
239#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING
251simdutf_warn_unused
bool validate_utf8(
const char *buf,
size_t len)
noexcept;
253simdutf_constexpr23 simdutf_really_inline simdutf_warn_unused
bool
254validate_utf8(
const detail::input_span_of_byte_like
auto &input)
noexcept {
255 #if SIMDUTF_CPLUSPLUS23
257 return scalar::utf8::validate(
258 detail::constexpr_cast_ptr<uint8_t>(input.data()), input.size());
262 return validate_utf8(
reinterpret_cast<const char *
>(input.data()),
269#if SIMDUTF_FEATURE_UTF8
282simdutf_warn_unused result validate_utf8_with_errors(
const char *buf,
283 size_t len)
noexcept;
285simdutf_really_inline simdutf_constexpr23 simdutf_warn_unused result
286validate_utf8_with_errors(
287 const detail::input_span_of_byte_like
auto &input)
noexcept {
288 #if SIMDUTF_CPLUSPLUS23
290 return scalar::utf8::validate_with_errors(
291 detail::constexpr_cast_ptr<uint8_t>(input.data()), input.size());
295 return validate_utf8_with_errors(
296 reinterpret_cast<const char *
>(input.data()), input.size());
302#if SIMDUTF_FEATURE_ASCII
312simdutf_warn_unused
bool validate_ascii(
const char *buf,
size_t len)
noexcept;
314simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
bool
315validate_ascii(
const detail::input_span_of_byte_like
auto &input)
noexcept {
316 #if SIMDUTF_CPLUSPLUS23
318 return scalar::ascii::validate(
319 detail::constexpr_cast_ptr<std::uint8_t>(input.data()), input.size());
323 return validate_ascii(
reinterpret_cast<const char *
>(input.data()),
342simdutf_warn_unused result validate_ascii_with_errors(
const char *buf,
343 size_t len)
noexcept;
345simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
346validate_ascii_with_errors(
347 const detail::input_span_of_byte_like
auto &input)
noexcept {
348 #if SIMDUTF_CPLUSPLUS23
350 return scalar::ascii::validate_with_errors(
351 detail::constexpr_cast_ptr<std::uint8_t>(input.data()), input.size());
355 return validate_ascii_with_errors(
356 reinterpret_cast<const char *
>(input.data()), input.size());
362#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_ASCII
374simdutf_warn_unused
bool validate_utf16_as_ascii(
const char16_t *buf,
375 size_t len)
noexcept;
377simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
bool
378validate_utf16_as_ascii(std::span<const char16_t> input)
noexcept {
379 #if SIMDUTF_CPLUSPLUS23
381 return scalar::utf16::validate_as_ascii<endianness::NATIVE>(input.data(),
386 return validate_utf16_as_ascii(input.data(), input.size());
402simdutf_warn_unused
bool validate_utf16be_as_ascii(
const char16_t *buf,
403 size_t len)
noexcept;
405simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
bool
406validate_utf16be_as_ascii(std::span<const char16_t> input)
noexcept {
407 #if SIMDUTF_CPLUSPLUS23
409 return scalar::utf16::validate_as_ascii<endianness::BIG>(input.data(),
414 return validate_utf16be_as_ascii(input.data(), input.size());
430simdutf_warn_unused
bool validate_utf16le_as_ascii(
const char16_t *buf,
431 size_t len)
noexcept;
433simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
bool
434validate_utf16le_as_ascii(std::span<const char16_t> input)
noexcept {
435 #if SIMDUTF_CPLUSPLUS23
437 return scalar::utf16::validate_as_ascii<endianness::LITTLE>(input.data(),
442 return validate_utf16le_as_ascii(input.data(), input.size());
448#if SIMDUTF_FEATURE_UTF16
463simdutf_warn_unused
bool validate_utf16(
const char16_t *buf,
464 size_t len)
noexcept;
466simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
bool
467validate_utf16(std::span<const char16_t> input)
noexcept {
468 #if SIMDUTF_CPLUSPLUS23
470 return scalar::utf16::validate<endianness::NATIVE>(input.data(),
475 return validate_utf16(input.data(), input.size());
481#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING
496simdutf_warn_unused
bool validate_utf16le(
const char16_t *buf,
497 size_t len)
noexcept;
499simdutf_really_inline simdutf_constexpr23 simdutf_warn_unused
bool
500validate_utf16le(std::span<const char16_t> input)
noexcept {
501 #if SIMDUTF_CPLUSPLUS23
503 return scalar::utf16::validate<endianness::LITTLE>(input.data(),
508 return validate_utf16le(input.data(), input.size());
514#if SIMDUTF_FEATURE_UTF16
529simdutf_warn_unused
bool validate_utf16be(
const char16_t *buf,
530 size_t len)
noexcept;
532simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
bool
533validate_utf16be(std::span<const char16_t> input)
noexcept {
534 #if SIMDUTF_CPLUSPLUS23
536 return scalar::utf16::validate<endianness::BIG>(input.data(), input.size());
540 return validate_utf16be(input.data(), input.size());
562simdutf_warn_unused result validate_utf16_with_errors(
const char16_t *buf,
563 size_t len)
noexcept;
565simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
566validate_utf16_with_errors(std::span<const char16_t> input)
noexcept {
567 #if SIMDUTF_CPLUSPLUS23
569 return scalar::utf16::validate_with_errors<endianness::NATIVE>(
570 input.data(), input.size());
574 return validate_utf16_with_errors(input.data(), input.size());
595simdutf_warn_unused result validate_utf16le_with_errors(
const char16_t *buf,
596 size_t len)
noexcept;
598simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
599validate_utf16le_with_errors(std::span<const char16_t> input)
noexcept {
600 #if SIMDUTF_CPLUSPLUS23
602 return scalar::utf16::validate_with_errors<endianness::LITTLE>(
603 input.data(), input.size());
607 return validate_utf16le_with_errors(input.data(), input.size());
628simdutf_warn_unused result validate_utf16be_with_errors(
const char16_t *buf,
629 size_t len)
noexcept;
631simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
632validate_utf16be_with_errors(std::span<const char16_t> input)
noexcept {
633 #if SIMDUTF_CPLUSPLUS23
635 return scalar::utf16::validate_with_errors<endianness::BIG>(input.data(),
640 return validate_utf16be_with_errors(input.data(), input.size());
657void to_well_formed_utf16le(
const char16_t *input,
size_t len,
658 char16_t *output)
noexcept;
660simdutf_really_inline simdutf_constexpr23
void
661to_well_formed_utf16le(std::span<const char16_t> input,
662 std::span<char16_t> output)
noexcept {
663 #if SIMDUTF_CPLUSPLUS23
665 scalar::utf16::to_well_formed_utf16<endianness::LITTLE>(
666 input.data(), input.size(), output.data());
670 to_well_formed_utf16le(input.data(), input.size(), output.data());
687void to_well_formed_utf16be(
const char16_t *input,
size_t len,
688 char16_t *output)
noexcept;
690simdutf_really_inline simdutf_constexpr23
void
691to_well_formed_utf16be(std::span<const char16_t> input,
692 std::span<char16_t> output)
noexcept {
693 #if SIMDUTF_CPLUSPLUS23
695 scalar::utf16::to_well_formed_utf16<endianness::BIG>(
696 input.data(), input.size(), output.data());
700 to_well_formed_utf16be(input.data(), input.size(), output.data());
717void to_well_formed_utf16(
const char16_t *input,
size_t len,
718 char16_t *output)
noexcept;
720simdutf_really_inline simdutf_constexpr23
void
721to_well_formed_utf16(std::span<const char16_t> input,
722 std::span<char16_t> output)
noexcept {
723 #if SIMDUTF_CPLUSPLUS23
725 scalar::utf16::to_well_formed_utf16<endianness::NATIVE>(
726 input.data(), input.size(), output.data());
730 to_well_formed_utf16(input.data(), input.size(), output.data());
737#if SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING
752simdutf_warn_unused
bool validate_utf32(
const char32_t *buf,
753 size_t len)
noexcept;
755simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
bool
756validate_utf32(std::span<const char32_t> input)
noexcept {
757 #if SIMDUTF_CPLUSPLUS23
759 return scalar::utf32::validate(
760 detail::constexpr_cast_ptr<std::uint32_t>(input.data()), input.size());
764 return validate_utf32(input.data(), input.size());
770#if SIMDUTF_FEATURE_UTF32
787simdutf_warn_unused result validate_utf32_with_errors(
const char32_t *buf,
788 size_t len)
noexcept;
790simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
791validate_utf32_with_errors(std::span<const char32_t> input)
noexcept {
792 #if SIMDUTF_CPLUSPLUS23
794 return scalar::utf32::validate_with_errors(
795 detail::constexpr_cast_ptr<std::uint32_t>(input.data()), input.size());
799 return validate_utf32_with_errors(input.data(), input.size());
805#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
816simdutf_warn_unused
size_t convert_latin1_to_utf8(
const char *input,
818 char *utf8_output)
noexcept;
820simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t
821convert_latin1_to_utf8(
822 const detail::input_span_of_byte_like
auto &latin1_input,
823 detail::output_span_of_byte_like
auto &&utf8_output)
noexcept {
824 #if SIMDUTF_CPLUSPLUS23
826 return scalar::latin1_to_utf8::convert(
827 detail::constexpr_cast_ptr<char>(latin1_input.data()),
829 detail::constexpr_cast_writeptr<char>(utf8_output.data()));
833 return convert_latin1_to_utf8(
834 reinterpret_cast<const char *
>(latin1_input.data()),
835 latin1_input.size(),
reinterpret_cast<char *
>(utf8_output.data()));
853simdutf_warn_unused
size_t
854convert_latin1_to_utf8_safe(
const char *input,
size_t length,
char *utf8_output,
855 size_t utf8_len)
noexcept;
857simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t
858convert_latin1_to_utf8_safe(
859 const detail::input_span_of_byte_like
auto &input,
860 detail::output_span_of_byte_like
auto &&utf8_output)
noexcept {
867 #if SIMDUTF_CPLUSPLUS23
869 return scalar::latin1_to_utf8::convert_safe_constexpr(
870 input.data(), input.size(), utf8_output.data(), utf8_output.size());
874 return convert_latin1_to_utf8_safe(
875 reinterpret_cast<const char *
>(input.data()), input.size(),
876 reinterpret_cast<char *
>(utf8_output.data()), utf8_output.size());
882#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
893simdutf_warn_unused
size_t convert_latin1_to_utf16le(
894 const char *input,
size_t length,
char16_t *utf16_output)
noexcept;
896simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t
897convert_latin1_to_utf16le(
898 const detail::input_span_of_byte_like
auto &latin1_input,
899 std::span<char16_t> utf16_output)
noexcept {
900 #if SIMDUTF_CPLUSPLUS23
902 return scalar::latin1_to_utf16::convert<endianness::LITTLE>(
903 latin1_input.data(), latin1_input.size(), utf16_output.data());
907 return convert_latin1_to_utf16le(
908 reinterpret_cast<const char *
>(latin1_input.data()),
909 latin1_input.size(), utf16_output.data());
924simdutf_warn_unused
size_t convert_latin1_to_utf16be(
925 const char *input,
size_t length,
char16_t *utf16_output)
noexcept;
927simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t
928convert_latin1_to_utf16be(
const detail::input_span_of_byte_like
auto &input,
929 std::span<char16_t> output)
noexcept {
930 #if SIMDUTF_CPLUSPLUS23
932 return scalar::latin1_to_utf16::convert<endianness::BIG>(
933 input.data(), input.size(), output.data());
937 return convert_latin1_to_utf16be(
938 reinterpret_cast<const char *
>(input.data()), input.size(),
951simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t
952latin1_length_from_utf16(
size_t length)
noexcept {
964simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t
965utf16_length_from_latin1(
size_t length)
noexcept {
970#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1
981simdutf_warn_unused
size_t convert_latin1_to_utf32(
982 const char *input,
size_t length,
char32_t *utf32_buffer)
noexcept;
984simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t
985convert_latin1_to_utf32(
986 const detail::input_span_of_byte_like
auto &latin1_input,
987 std::span<char32_t> utf32_output)
noexcept {
988 #if SIMDUTF_CPLUSPLUS23
990 return scalar::latin1_to_utf32::convert(
991 latin1_input.data(), latin1_input.size(), utf32_output.data());
995 return convert_latin1_to_utf32(
996 reinterpret_cast<const char *
>(latin1_input.data()),
997 latin1_input.size(), utf32_output.data());
1003#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
1016simdutf_warn_unused
size_t convert_utf8_to_latin1(
const char *input,
1018 char *latin1_output)
noexcept;
1020simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t
1021convert_utf8_to_latin1(
1022 const detail::input_span_of_byte_like
auto &input,
1023 detail::output_span_of_byte_like
auto &&output)
noexcept {
1024 #if SIMDUTF_CPLUSPLUS23
1026 return scalar::utf8_to_latin1::convert(input.data(), input.size(),
1031 return convert_utf8_to_latin1(
reinterpret_cast<const char *
>(input.data()),
1033 reinterpret_cast<char *
>(output.data()));
1039#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
1053simdutf_warn_unused
size_t convert_utf8_to_utf16(
1054 const char *input,
size_t length,
char16_t *utf16_output)
noexcept;
1056simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t
1057convert_utf8_to_utf16(
const detail::input_span_of_byte_like
auto &input,
1058 std::span<char16_t> output)
noexcept {
1059 #if SIMDUTF_CPLUSPLUS23
1061 return scalar::utf8_to_utf16::convert<endianness::NATIVE>(
1062 input.data(), input.size(), output.data());
1066 return convert_utf8_to_utf16(
reinterpret_cast<const char *
>(input.data()),
1067 input.size(), output.data());
1089simdutf_warn_unused result utf8_length_from_utf16le_with_replacement(
1090 const char16_t *input,
size_t length)
noexcept;
1092simdutf_really_inline simdutf_constexpr23 simdutf_warn_unused result
1093utf8_length_from_utf16le_with_replacement(
1094 std::span<const char16_t> valid_utf16_input)
noexcept {
1095 #if SIMDUTF_CPLUSPLUS23
1097 return scalar::utf16::utf8_length_from_utf16_with_replacement<
1098 endianness::LITTLE>(valid_utf16_input.data(), valid_utf16_input.size());
1102 return utf8_length_from_utf16le_with_replacement(valid_utf16_input.data(),
1103 valid_utf16_input.size());
1125simdutf_warn_unused result utf8_length_from_utf16be_with_replacement(
1126 const char16_t *input,
size_t length)
noexcept;
1128simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
1129utf8_length_from_utf16be_with_replacement(
1130 std::span<const char16_t> valid_utf16_input)
noexcept {
1131 #if SIMDUTF_CPLUSPLUS23
1133 return scalar::utf16::utf8_length_from_utf16_with_replacement<
1134 endianness::BIG>(valid_utf16_input.data(), valid_utf16_input.size());
1138 return utf8_length_from_utf16be_with_replacement(valid_utf16_input.data(),
1139 valid_utf16_input.size());
1146#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
1155simdutf_warn_unused
size_t convert_latin1_to_utf16(
1156 const char *input,
size_t length,
char16_t *utf16_output)
noexcept;
1158simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t
1159convert_latin1_to_utf16(
const detail::input_span_of_byte_like
auto &input,
1160 std::span<char16_t> output)
noexcept {
1161 #if SIMDUTF_CPLUSPLUS23
1163 return scalar::latin1_to_utf16::convert<endianness::NATIVE>(
1164 input.data(), input.size(), output.data());
1168 return convert_latin1_to_utf16(
reinterpret_cast<const char *
>(input.data()),
1169 input.size(), output.data());
1175#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
1188simdutf_warn_unused
size_t convert_utf8_to_utf16le(
1189 const char *input,
size_t length,
char16_t *utf16_output)
noexcept;
1191simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t
1192convert_utf8_to_utf16le(
const detail::input_span_of_byte_like
auto &utf8_input,
1193 std::span<char16_t> utf16_output)
noexcept {
1194 #if SIMDUTF_CPLUSPLUS23
1196 return scalar::utf8_to_utf16::convert<endianness::LITTLE>(
1197 utf8_input.data(), utf8_input.size(), utf16_output.data());
1201 return convert_utf8_to_utf16le(
1202 reinterpret_cast<const char *
>(utf8_input.data()), utf8_input.size(),
1203 utf16_output.data());
1220simdutf_warn_unused
size_t convert_utf8_to_utf16be(
1221 const char *input,
size_t length,
char16_t *utf16_output)
noexcept;
1223simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t
1224convert_utf8_to_utf16be(
const detail::input_span_of_byte_like
auto &utf8_input,
1225 std::span<char16_t> utf16_output)
noexcept {
1227 #if SIMDUTF_CPLUSPLUS23
1229 return scalar::utf8_to_utf16::convert<endianness::BIG>(
1230 utf8_input.data(), utf8_input.size(), utf16_output.data());
1234 return convert_utf8_to_utf16be(
1235 reinterpret_cast<const char *
>(utf8_input.data()), utf8_input.size(),
1236 utf16_output.data());
1242#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
1259simdutf_warn_unused result convert_utf8_to_latin1_with_errors(
1260 const char *input,
size_t length,
char *latin1_output)
noexcept;
1262simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
1263convert_utf8_to_latin1_with_errors(
1264 const detail::input_span_of_byte_like
auto &utf8_input,
1265 detail::output_span_of_byte_like
auto &&latin1_output)
noexcept {
1266 #if SIMDUTF_CPLUSPLUS23
1268 return scalar::utf8_to_latin1::convert_with_errors(
1269 utf8_input.data(), utf8_input.size(), latin1_output.data());
1273 return convert_utf8_to_latin1_with_errors(
1274 reinterpret_cast<const char *
>(utf8_input.data()), utf8_input.size(),
1275 reinterpret_cast<char *
>(latin1_output.data()));
1281#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
1297simdutf_warn_unused result convert_utf8_to_utf16_with_errors(
1298 const char *input,
size_t length,
char16_t *utf16_output)
noexcept;
1300simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
1301convert_utf8_to_utf16_with_errors(
1302 const detail::input_span_of_byte_like
auto &utf8_input,
1303 std::span<char16_t> utf16_output)
noexcept {
1304 #if SIMDUTF_CPLUSPLUS23
1306 return scalar::utf8_to_utf16::convert_with_errors<endianness::NATIVE>(
1307 utf8_input.data(), utf8_input.size(), utf16_output.data());
1311 return convert_utf8_to_utf16_with_errors(
1312 reinterpret_cast<const char *
>(utf8_input.data()), utf8_input.size(),
1313 utf16_output.data());
1332simdutf_warn_unused result convert_utf8_to_utf16le_with_errors(
1333 const char *input,
size_t length,
char16_t *utf16_output)
noexcept;
1335simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
1336convert_utf8_to_utf16le_with_errors(
1337 const detail::input_span_of_byte_like
auto &utf8_input,
1338 std::span<char16_t> utf16_output)
noexcept {
1339 #if SIMDUTF_CPLUSPLUS23
1341 return scalar::utf8_to_utf16::convert_with_errors<endianness::LITTLE>(
1342 utf8_input.data(), utf8_input.size(), utf16_output.data());
1346 return convert_utf8_to_utf16le_with_errors(
1347 reinterpret_cast<const char *
>(utf8_input.data()), utf8_input.size(),
1348 utf16_output.data());
1367simdutf_warn_unused result convert_utf8_to_utf16be_with_errors(
1368 const char *input,
size_t length,
char16_t *utf16_output)
noexcept;
1370simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
1371convert_utf8_to_utf16be_with_errors(
1372 const detail::input_span_of_byte_like
auto &utf8_input,
1373 std::span<char16_t> utf16_output)
noexcept {
1374 #if SIMDUTF_CPLUSPLUS23
1376 return scalar::utf8_to_utf16::convert_with_errors<endianness::BIG>(
1377 utf8_input.data(), utf8_input.size(), utf16_output.data());
1381 return convert_utf8_to_utf16be_with_errors(
1382 reinterpret_cast<const char *
>(utf8_input.data()), utf8_input.size(),
1383 utf16_output.data());
1389#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
1402simdutf_warn_unused
size_t convert_utf8_to_utf32(
1403 const char *input,
size_t length,
char32_t *utf32_output)
noexcept;
1405simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t
1406convert_utf8_to_utf32(
const detail::input_span_of_byte_like
auto &utf8_input,
1407 std::span<char32_t> utf32_output)
noexcept {
1408 #if SIMDUTF_CPLUSPLUS23
1410 return scalar::utf8_to_utf32::convert(utf8_input.data(), utf8_input.size(),
1411 utf32_output.data());
1415 return convert_utf8_to_utf32(
1416 reinterpret_cast<const char *
>(utf8_input.data()), utf8_input.size(),
1417 utf32_output.data());
1436simdutf_warn_unused result convert_utf8_to_utf32_with_errors(
1437 const char *input,
size_t length,
char32_t *utf32_output)
noexcept;
1439simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
1440convert_utf8_to_utf32_with_errors(
1441 const detail::input_span_of_byte_like
auto &utf8_input,
1442 std::span<char32_t> utf32_output)
noexcept {
1443 #if SIMDUTF_CPLUSPLUS23
1445 return scalar::utf8_to_utf32::convert_with_errors(
1446 utf8_input.data(), utf8_input.size(), utf32_output.data());
1450 return convert_utf8_to_utf32_with_errors(
1451 reinterpret_cast<const char *
>(utf8_input.data()), utf8_input.size(),
1452 utf32_output.data());
1458#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
1478simdutf_warn_unused
size_t convert_valid_utf8_to_latin1(
1479 const char *input,
size_t length,
char *latin1_output)
noexcept;
1481simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t
1482convert_valid_utf8_to_latin1(
1483 const detail::input_span_of_byte_like
auto &valid_utf8_input,
1484 detail::output_span_of_byte_like
auto &&latin1_output)
noexcept {
1485 #if SIMDUTF_CPLUSPLUS23
1487 return scalar::utf8_to_latin1::convert_valid(
1488 valid_utf8_input.data(), valid_utf8_input.size(), latin1_output.data());
1492 return convert_valid_utf8_to_latin1(
1493 reinterpret_cast<const char *
>(valid_utf8_input.data()),
1494 valid_utf8_input.size(), latin1_output.data());
1500#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
1511simdutf_warn_unused
size_t convert_valid_utf8_to_utf16(
1512 const char *input,
size_t length,
char16_t *utf16_buffer)
noexcept;
1514simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t
1515convert_valid_utf8_to_utf16(
1516 const detail::input_span_of_byte_like
auto &valid_utf8_input,
1517 std::span<char16_t> utf16_output)
noexcept {
1518 #if SIMDUTF_CPLUSPLUS23
1520 return scalar::utf8_to_utf16::convert_valid<endianness::NATIVE>(
1521 valid_utf8_input.data(), valid_utf8_input.size(), utf16_output.data());
1525 return convert_valid_utf8_to_utf16(
1526 reinterpret_cast<const char *
>(valid_utf8_input.data()),
1527 valid_utf8_input.size(), utf16_output.data());
1542simdutf_warn_unused
size_t convert_valid_utf8_to_utf16le(
1543 const char *input,
size_t length,
char16_t *utf16_buffer)
noexcept;
1545simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t
1546convert_valid_utf8_to_utf16le(
1547 const detail::input_span_of_byte_like
auto &valid_utf8_input,
1548 std::span<char16_t> utf16_output)
noexcept {
1550 #if SIMDUTF_CPLUSPLUS23
1552 return scalar::utf8_to_utf16::convert_valid<endianness::LITTLE>(
1553 valid_utf8_input.data(), valid_utf8_input.size(), utf16_output.data());
1557 return convert_valid_utf8_to_utf16le(
1558 reinterpret_cast<const char *
>(valid_utf8_input.data()),
1559 valid_utf8_input.size(), utf16_output.data());
1574simdutf_warn_unused
size_t convert_valid_utf8_to_utf16be(
1575 const char *input,
size_t length,
char16_t *utf16_buffer)
noexcept;
1577simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t
1578convert_valid_utf8_to_utf16be(
1579 const detail::input_span_of_byte_like
auto &valid_utf8_input,
1580 std::span<char16_t> utf16_output)
noexcept {
1581 #if SIMDUTF_CPLUSPLUS23
1583 return scalar::utf8_to_utf16::convert_valid<endianness::BIG>(
1584 valid_utf8_input.data(), valid_utf8_input.size(), utf16_output.data());
1588 return convert_valid_utf8_to_utf16be(
1589 reinterpret_cast<const char *
>(valid_utf8_input.data()),
1590 valid_utf8_input.size(), utf16_output.data());
1596#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
1607simdutf_warn_unused
size_t convert_valid_utf8_to_utf32(
1608 const char *input,
size_t length,
char32_t *utf32_buffer)
noexcept;
1610simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t
1611convert_valid_utf8_to_utf32(
1612 const detail::input_span_of_byte_like
auto &valid_utf8_input,
1613 std::span<char32_t> utf32_output)
noexcept {
1614 #if SIMDUTF_CPLUSPLUS23
1616 return scalar::utf8_to_utf32::convert_valid(
1617 valid_utf8_input.data(), valid_utf8_input.size(), utf32_output.data());
1621 return convert_valid_utf8_to_utf32(
1622 reinterpret_cast<const char *
>(valid_utf8_input.data()),
1623 valid_utf8_input.size(), utf32_output.data());
1629#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
1638simdutf_warn_unused
size_t utf8_length_from_latin1(
const char *input,
1639 size_t length)
noexcept;
1641simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t
1642utf8_length_from_latin1(
1643 const detail::input_span_of_byte_like
auto &latin1_input)
noexcept {
1644 #if SIMDUTF_CPLUSPLUS23
1646 return scalar::latin1_to_utf8::utf8_length_from_latin1(latin1_input.data(),
1647 latin1_input.size());
1651 return utf8_length_from_latin1(
1652 reinterpret_cast<const char *
>(latin1_input.data()),
1653 latin1_input.size());
1671simdutf_warn_unused
size_t latin1_length_from_utf8(
const char *input,
1672 size_t length)
noexcept;
1674simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t
1675latin1_length_from_utf8(
1676 const detail::input_span_of_byte_like
auto &valid_utf8_input)
noexcept {
1677 #if SIMDUTF_CPLUSPLUS23
1679 return scalar::utf8::count_code_points(valid_utf8_input.data(),
1680 valid_utf8_input.size());
1684 return latin1_length_from_utf8(
1685 reinterpret_cast<const char *
>(valid_utf8_input.data()),
1686 valid_utf8_input.size());
1692#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
1707simdutf_warn_unused
size_t utf16_length_from_utf8(
const char *input,
1708 size_t length)
noexcept;
1710simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t
1711utf16_length_from_utf8(
1712 const detail::input_span_of_byte_like
auto &valid_utf8_input)
noexcept {
1713 #if SIMDUTF_CPLUSPLUS23
1715 return scalar::utf8::utf16_length_from_utf8(valid_utf8_input.data(),
1716 valid_utf8_input.size());
1720 return utf16_length_from_utf8(
1721 reinterpret_cast<const char *
>(valid_utf8_input.data()),
1722 valid_utf8_input.size());
1728#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
1745simdutf_warn_unused
size_t utf32_length_from_utf8(
const char *input,
1746 size_t length)
noexcept;
1748simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t
1749utf32_length_from_utf8(
1750 const detail::input_span_of_byte_like
auto &valid_utf8_input)
noexcept {
1752 #if SIMDUTF_CPLUSPLUS23
1754 return scalar::utf8::count_code_points(valid_utf8_input.data(),
1755 valid_utf8_input.size());
1759 return utf32_length_from_utf8(
1760 reinterpret_cast<const char *
>(valid_utf8_input.data()),
1761 valid_utf8_input.size());
1767#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
1783simdutf_warn_unused
size_t convert_utf16_to_utf8(
const char16_t *input,
1785 char *utf8_buffer)
noexcept;
1787simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t
1788convert_utf16_to_utf8(
1789 std::span<const char16_t> utf16_input,
1790 detail::output_span_of_byte_like
auto &&utf8_output)
noexcept {
1791 #if SIMDUTF_CPLUSPLUS23
1793 return scalar::utf16_to_utf8::convert<endianness::NATIVE>(
1794 utf16_input.data(), utf16_input.size(), utf8_output.data());
1798 return convert_utf16_to_utf8(utf16_input.data(), utf16_input.size(),
1799 reinterpret_cast<char *
>(utf8_output.data()));
1822simdutf_warn_unused
size_t convert_utf16_to_utf8_safe(
const char16_t *input,
1825 size_t utf8_len)
noexcept;
1827simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t
1828convert_utf16_to_utf8_safe(
1829 std::span<const char16_t> utf16_input,
1830 detail::output_span_of_byte_like
auto &&utf8_output)
noexcept {
1837 #if SIMDUTF_CPLUSPLUS23
1839 const full_result r =
1840 scalar::utf16_to_utf8::convert_with_errors<endianness::NATIVE, true>(
1841 utf16_input.data(), utf16_input.size(), utf8_output.data(),
1842 utf8_output.size());
1843 if (r.error != error_code::SUCCESS &&
1844 r.error != error_code::OUTPUT_BUFFER_TOO_SMALL) {
1847 return r.output_count;
1851 return convert_utf16_to_utf8_safe(
1852 utf16_input.data(), utf16_input.size(),
1853 reinterpret_cast<char *
>(utf8_output.data()), utf8_output.size());
1859#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
1875simdutf_warn_unused
size_t convert_utf16_to_latin1(
1876 const char16_t *input,
size_t length,
char *latin1_buffer)
noexcept;
1878simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t
1879convert_utf16_to_latin1(
1880 std::span<const char16_t> utf16_input,
1881 detail::output_span_of_byte_like
auto &&latin1_output)
noexcept {
1882 #if SIMDUTF_CPLUSPLUS23
1884 return scalar::utf16_to_latin1::convert<endianness::NATIVE>(
1885 utf16_input.data(), utf16_input.size(), latin1_output.data());
1889 return convert_utf16_to_latin1(
1890 utf16_input.data(), utf16_input.size(),
1891 reinterpret_cast<char *
>(latin1_output.data()));
1912simdutf_warn_unused
size_t convert_utf16le_to_latin1(
1913 const char16_t *input,
size_t length,
char *latin1_buffer)
noexcept;
1915simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t
1916convert_utf16le_to_latin1(
1917 std::span<const char16_t> utf16_input,
1918 detail::output_span_of_byte_like
auto &&latin1_output)
noexcept {
1919 #if SIMDUTF_CPLUSPLUS23
1921 return scalar::utf16_to_latin1::convert<endianness::LITTLE>(
1922 utf16_input.data(), utf16_input.size(), latin1_output.data());
1926 return convert_utf16le_to_latin1(
1927 utf16_input.data(), utf16_input.size(),
1928 reinterpret_cast<char *
>(latin1_output.data()));
1947simdutf_warn_unused
size_t convert_utf16be_to_latin1(
1948 const char16_t *input,
size_t length,
char *latin1_buffer)
noexcept;
1950simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t
1951convert_utf16be_to_latin1(
1952 std::span<const char16_t> utf16_input,
1953 detail::output_span_of_byte_like
auto &&latin1_output)
noexcept {
1954 #if SIMDUTF_CPLUSPLUS23
1956 return scalar::utf16_to_latin1::convert<endianness::BIG>(
1957 utf16_input.data(), utf16_input.size(), latin1_output.data());
1961 return convert_utf16be_to_latin1(
1962 utf16_input.data(), utf16_input.size(),
1963 reinterpret_cast<char *
>(latin1_output.data()));
1969#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
1984simdutf_warn_unused
size_t convert_utf16le_to_utf8(
const char16_t *input,
1986 char *utf8_buffer)
noexcept;
1988simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t
1989convert_utf16le_to_utf8(
1990 std::span<const char16_t> utf16_input,
1991 detail::output_span_of_byte_like
auto &&utf8_output)
noexcept {
1992 #if SIMDUTF_CPLUSPLUS23
1994 return scalar::utf16_to_utf8::convert<endianness::LITTLE>(
1995 utf16_input.data(), utf16_input.size(), utf8_output.data());
1999 return convert_utf16le_to_utf8(
2000 utf16_input.data(), utf16_input.size(),
2001 reinterpret_cast<char *
>(utf8_output.data()));
2020simdutf_warn_unused
size_t convert_utf16be_to_utf8(
const char16_t *input,
2022 char *utf8_buffer)
noexcept;
2024simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t
2025convert_utf16be_to_utf8(
2026 std::span<const char16_t> utf16_input,
2027 detail::output_span_of_byte_like
auto &&utf8_output)
noexcept {
2028 #if SIMDUTF_CPLUSPLUS23
2030 return scalar::utf16_to_utf8::convert<endianness::BIG>(
2031 utf16_input.data(), utf16_input.size(), utf8_output.data());
2035 return convert_utf16be_to_utf8(
2036 utf16_input.data(), utf16_input.size(),
2037 reinterpret_cast<char *
>(utf8_output.data()));
2043#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
2060simdutf_warn_unused result convert_utf16_to_latin1_with_errors(
2061 const char16_t *input,
size_t length,
char *latin1_buffer)
noexcept;
2063simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
2064convert_utf16_to_latin1_with_errors(
2065 std::span<const char16_t> utf16_input,
2066 detail::output_span_of_byte_like
auto &&latin1_output)
noexcept {
2067 #if SIMDUTF_CPLUSPLUS23
2069 return scalar::utf16_to_latin1::convert_with_errors<endianness::NATIVE>(
2070 utf16_input.data(), utf16_input.size(), latin1_output.data());
2074 return convert_utf16_to_latin1_with_errors(
2075 utf16_input.data(), utf16_input.size(),
2076 reinterpret_cast<char *
>(latin1_output.data()));
2096simdutf_warn_unused result convert_utf16le_to_latin1_with_errors(
2097 const char16_t *input,
size_t length,
char *latin1_buffer)
noexcept;
2099simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
2100convert_utf16le_to_latin1_with_errors(
2101 std::span<const char16_t> utf16_input,
2102 detail::output_span_of_byte_like
auto &&latin1_output)
noexcept {
2103 #if SIMDUTF_CPLUSPLUS23
2105 return scalar::utf16_to_latin1::convert_with_errors<endianness::LITTLE>(
2106 utf16_input.data(), utf16_input.size(), latin1_output.data());
2110 return convert_utf16le_to_latin1_with_errors(
2111 utf16_input.data(), utf16_input.size(),
2112 reinterpret_cast<char *
>(latin1_output.data()));
2134simdutf_warn_unused result convert_utf16be_to_latin1_with_errors(
2135 const char16_t *input,
size_t length,
char *latin1_buffer)
noexcept;
2137simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
2138convert_utf16be_to_latin1_with_errors(
2139 std::span<const char16_t> utf16_input,
2140 detail::output_span_of_byte_like
auto &&latin1_output)
noexcept {
2141 #if SIMDUTF_CPLUSPLUS23
2143 return scalar::utf16_to_latin1::convert_with_errors<endianness::BIG>(
2144 utf16_input.data(), utf16_input.size(), latin1_output.data());
2148 return convert_utf16be_to_latin1_with_errors(
2149 utf16_input.data(), utf16_input.size(),
2150 reinterpret_cast<char *
>(latin1_output.data()));
2156#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
2174simdutf_warn_unused result convert_utf16_to_utf8_with_errors(
2175 const char16_t *input,
size_t length,
char *utf8_buffer)
noexcept;
2177simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
2178convert_utf16_to_utf8_with_errors(
2179 std::span<const char16_t> utf16_input,
2180 detail::output_span_of_byte_like
auto &&utf8_output)
noexcept {
2181 #if SIMDUTF_CPLUSPLUS23
2183 return scalar::utf16_to_utf8::convert_with_errors<endianness::NATIVE>(
2184 utf16_input.data(), utf16_input.size(), utf8_output.data());
2188 return convert_utf16_to_utf8_with_errors(
2189 utf16_input.data(), utf16_input.size(),
2190 reinterpret_cast<char *
>(utf8_output.data()));
2211simdutf_warn_unused result convert_utf16le_to_utf8_with_errors(
2212 const char16_t *input,
size_t length,
char *utf8_buffer)
noexcept;
2214simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
2215convert_utf16le_to_utf8_with_errors(
2216 std::span<const char16_t> utf16_input,
2217 detail::output_span_of_byte_like
auto &&utf8_output)
noexcept {
2218 #if SIMDUTF_CPLUSPLUS23
2220 return scalar::utf16_to_utf8::convert_with_errors<endianness::LITTLE>(
2221 utf16_input.data(), utf16_input.size(), utf8_output.data());
2225 return convert_utf16le_to_utf8_with_errors(
2226 utf16_input.data(), utf16_input.size(),
2227 reinterpret_cast<char *
>(utf8_output.data()));
2248simdutf_warn_unused result convert_utf16be_to_utf8_with_errors(
2249 const char16_t *input,
size_t length,
char *utf8_buffer)
noexcept;
2251simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
2252convert_utf16be_to_utf8_with_errors(
2253 std::span<const char16_t> utf16_input,
2254 detail::output_span_of_byte_like
auto &&utf8_output)
noexcept {
2255 #if SIMDUTF_CPLUSPLUS23
2257 return scalar::utf16_to_utf8::convert_with_errors<endianness::BIG>(
2258 utf16_input.data(), utf16_input.size(), utf8_output.data());
2262 return convert_utf16be_to_utf8_with_errors(
2263 utf16_input.data(), utf16_input.size(),
2264 reinterpret_cast<char *
>(utf8_output.data()));
2283simdutf_warn_unused
size_t convert_utf16le_to_utf8_with_replacement(
2284 const char16_t *input,
size_t length,
char *utf8_buffer)
noexcept;
2286simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t
2287convert_utf16le_to_utf8_with_replacement(
2288 std::span<const char16_t> utf16_input,
2289 detail::output_span_of_byte_like
auto &&utf8_output)
noexcept {
2290 #if SIMDUTF_CPLUSPLUS23
2292 return scalar::utf16_to_utf8::convert_with_replacement<endianness::LITTLE>(
2293 utf16_input.data(), utf16_input.size(), utf8_output.data());
2297 return convert_utf16le_to_utf8_with_replacement(
2298 utf16_input.data(), utf16_input.size(),
2299 reinterpret_cast<char *
>(utf8_output.data()));
2318simdutf_warn_unused
size_t convert_utf16be_to_utf8_with_replacement(
2319 const char16_t *input,
size_t length,
char *utf8_buffer)
noexcept;
2321simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t
2322convert_utf16be_to_utf8_with_replacement(
2323 std::span<const char16_t> utf16_input,
2324 detail::output_span_of_byte_like
auto &&utf8_output)
noexcept {
2325 #if SIMDUTF_CPLUSPLUS23
2327 return scalar::utf16_to_utf8::convert_with_replacement<endianness::BIG>(
2328 utf16_input.data(), utf16_input.size(), utf8_output.data());
2332 return convert_utf16be_to_utf8_with_replacement(
2333 utf16_input.data(), utf16_input.size(),
2334 reinterpret_cast<char *
>(utf8_output.data()));
2353simdutf_warn_unused
size_t convert_utf16_to_utf8_with_replacement(
2354 const char16_t *input,
size_t length,
char *utf8_buffer)
noexcept;
2356simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t
2357convert_utf16_to_utf8_with_replacement(
2358 std::span<const char16_t> utf16_input,
2359 detail::output_span_of_byte_like
auto &&utf8_output)
noexcept {
2360 #if SIMDUTF_CPLUSPLUS23
2362 return scalar::utf16_to_utf8::convert_with_replacement<endianness::NATIVE>(
2363 utf16_input.data(), utf16_input.size(), utf8_output.data());
2367 return convert_utf16_to_utf8_with_replacement(
2368 utf16_input.data(), utf16_input.size(),
2369 reinterpret_cast<char *
>(utf8_output.data()));
2375#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
2389simdutf_warn_unused
size_t convert_valid_utf16_to_utf8(
2390 const char16_t *input,
size_t length,
char *utf8_buffer)
noexcept;
2392simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t
2393convert_valid_utf16_to_utf8(
2394 std::span<const char16_t> valid_utf16_input,
2395 detail::output_span_of_byte_like
auto &&utf8_output)
noexcept {
2396 #if SIMDUTF_CPLUSPLUS23
2398 return scalar::utf16_to_utf8::convert_valid<endianness::NATIVE>(
2399 valid_utf16_input.data(), valid_utf16_input.size(), utf8_output.data());
2403 return convert_valid_utf16_to_utf8(
2404 valid_utf16_input.data(), valid_utf16_input.size(),
2405 reinterpret_cast<char *
>(utf8_output.data()));
2411#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
2431simdutf_warn_unused
size_t convert_valid_utf16_to_latin1(
2432 const char16_t *input,
size_t length,
char *latin1_buffer)
noexcept;
2434simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t
2435convert_valid_utf16_to_latin1(
2436 std::span<const char16_t> valid_utf16_input,
2437 detail::output_span_of_byte_like
auto &&latin1_output)
noexcept {
2438 #if SIMDUTF_CPLUSPLUS23
2440 return scalar::utf16_to_latin1::convert_valid_impl<endianness::NATIVE>(
2441 detail::constexpr_cast_ptr<uint16_t>(valid_utf16_input.data()),
2442 valid_utf16_input.size(),
2443 detail::constexpr_cast_writeptr<char>(latin1_output.data()));
2447 return convert_valid_utf16_to_latin1(
2448 valid_utf16_input.data(), valid_utf16_input.size(),
2449 reinterpret_cast<char *
>(latin1_output.data()));
2473simdutf_warn_unused
size_t convert_valid_utf16le_to_latin1(
2474 const char16_t *input,
size_t length,
char *latin1_buffer)
noexcept;
2476simdutf_really_inline simdutf_constexpr23 simdutf_warn_unused
size_t
2477convert_valid_utf16le_to_latin1(
2478 std::span<const char16_t> valid_utf16_input,
2479 detail::output_span_of_byte_like
auto &&latin1_output)
noexcept {
2480 #if SIMDUTF_CPLUSPLUS23
2482 return scalar::utf16_to_latin1::convert_valid_impl<endianness::LITTLE>(
2483 detail::constexpr_cast_ptr<uint16_t>(valid_utf16_input.data()),
2484 valid_utf16_input.size(),
2485 detail::constexpr_cast_writeptr<char>(latin1_output.data()));
2489 return convert_valid_utf16le_to_latin1(
2490 valid_utf16_input.data(), valid_utf16_input.size(),
2491 reinterpret_cast<char *
>(latin1_output.data()));
2515simdutf_warn_unused
size_t convert_valid_utf16be_to_latin1(
2516 const char16_t *input,
size_t length,
char *latin1_buffer)
noexcept;
2518simdutf_really_inline simdutf_constexpr23 simdutf_warn_unused
size_t
2519convert_valid_utf16be_to_latin1(
2520 std::span<const char16_t> valid_utf16_input,
2521 detail::output_span_of_byte_like
auto &&latin1_output)
noexcept {
2522 #if SIMDUTF_CPLUSPLUS23
2524 return scalar::utf16_to_latin1::convert_valid_impl<endianness::BIG>(
2525 detail::constexpr_cast_ptr<uint16_t>(valid_utf16_input.data()),
2526 valid_utf16_input.size(),
2527 detail::constexpr_cast_writeptr<char>(latin1_output.data()));
2531 return convert_valid_utf16be_to_latin1(
2532 valid_utf16_input.data(), valid_utf16_input.size(),
2533 reinterpret_cast<char *
>(latin1_output.data()));
2539#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
2553simdutf_warn_unused
size_t convert_valid_utf16le_to_utf8(
2554 const char16_t *input,
size_t length,
char *utf8_buffer)
noexcept;
2556simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t
2557convert_valid_utf16le_to_utf8(
2558 std::span<const char16_t> valid_utf16_input,
2559 detail::output_span_of_byte_like
auto &&utf8_output)
noexcept {
2560 #if SIMDUTF_CPLUSPLUS23
2562 return scalar::utf16_to_utf8::convert_valid<endianness::NATIVE>(
2563 valid_utf16_input.data(), valid_utf16_input.size(), utf8_output.data());
2567 return convert_valid_utf16le_to_utf8(
2568 valid_utf16_input.data(), valid_utf16_input.size(),
2569 reinterpret_cast<char *
>(utf8_output.data()));
2587simdutf_warn_unused
size_t convert_valid_utf16be_to_utf8(
2588 const char16_t *input,
size_t length,
char *utf8_buffer)
noexcept;
2590simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t
2591convert_valid_utf16be_to_utf8(
2592 std::span<const char16_t> valid_utf16_input,
2593 detail::output_span_of_byte_like
auto &&utf8_output)
noexcept {
2594 #if SIMDUTF_CPLUSPLUS23
2596 return scalar::utf16_to_utf8::convert_valid<endianness::BIG>(
2597 valid_utf16_input.data(), valid_utf16_input.size(), utf8_output.data());
2601 return convert_valid_utf16be_to_utf8(
2602 valid_utf16_input.data(), valid_utf16_input.size(),
2603 reinterpret_cast<char *
>(utf8_output.data()));
2609#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
2625simdutf_warn_unused
size_t convert_utf16_to_utf32(
2626 const char16_t *input,
size_t length,
char32_t *utf32_buffer)
noexcept;
2628simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t
2629convert_utf16_to_utf32(std::span<const char16_t> utf16_input,
2630 std::span<char32_t> utf32_output)
noexcept {
2632 #if SIMDUTF_CPLUSPLUS23
2634 return scalar::utf16_to_utf32::convert<endianness::NATIVE>(
2635 utf16_input.data(), utf16_input.size(), utf32_output.data());
2639 return convert_utf16_to_utf32(utf16_input.data(), utf16_input.size(),
2640 utf32_output.data());
2659simdutf_warn_unused
size_t convert_utf16le_to_utf32(
2660 const char16_t *input,
size_t length,
char32_t *utf32_buffer)
noexcept;
2662simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t
2663convert_utf16le_to_utf32(std::span<const char16_t> utf16_input,
2664 std::span<char32_t> utf32_output)
noexcept {
2665 #if SIMDUTF_CPLUSPLUS23
2667 return scalar::utf16_to_utf32::convert<endianness::LITTLE>(
2668 utf16_input.data(), utf16_input.size(), utf32_output.data());
2672 return convert_utf16le_to_utf32(utf16_input.data(), utf16_input.size(),
2673 utf32_output.data());
2692simdutf_warn_unused
size_t convert_utf16be_to_utf32(
2693 const char16_t *input,
size_t length,
char32_t *utf32_buffer)
noexcept;
2695simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t
2696convert_utf16be_to_utf32(std::span<const char16_t> utf16_input,
2697 std::span<char32_t> utf32_output)
noexcept {
2698 #if SIMDUTF_CPLUSPLUS23
2700 return scalar::utf16_to_utf32::convert<endianness::BIG>(
2701 utf16_input.data(), utf16_input.size(), utf32_output.data());
2705 return convert_utf16be_to_utf32(utf16_input.data(), utf16_input.size(),
2706 utf32_output.data());
2728simdutf_warn_unused result convert_utf16_to_utf32_with_errors(
2729 const char16_t *input,
size_t length,
char32_t *utf32_buffer)
noexcept;
2731simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
2732convert_utf16_to_utf32_with_errors(std::span<const char16_t> utf16_input,
2733 std::span<char32_t> utf32_output)
noexcept {
2734 #if SIMDUTF_CPLUSPLUS23
2736 return scalar::utf16_to_utf32::convert_with_errors<endianness::NATIVE>(
2737 utf16_input.data(), utf16_input.size(), utf32_output.data());
2741 return convert_utf16_to_utf32_with_errors(
2742 utf16_input.data(), utf16_input.size(), utf32_output.data());
2763simdutf_warn_unused result convert_utf16le_to_utf32_with_errors(
2764 const char16_t *input,
size_t length,
char32_t *utf32_buffer)
noexcept;
2766simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
2767convert_utf16le_to_utf32_with_errors(
2768 std::span<const char16_t> utf16_input,
2769 std::span<char32_t> utf32_output)
noexcept {
2770 #if SIMDUTF_CPLUSPLUS23
2772 return scalar::utf16_to_utf32::convert_with_errors<endianness::LITTLE>(
2773 utf16_input.data(), utf16_input.size(), utf32_output.data());
2777 return convert_utf16le_to_utf32_with_errors(
2778 utf16_input.data(), utf16_input.size(), utf32_output.data());
2799simdutf_warn_unused result convert_utf16be_to_utf32_with_errors(
2800 const char16_t *input,
size_t length,
char32_t *utf32_buffer)
noexcept;
2802simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
2803convert_utf16be_to_utf32_with_errors(
2804 std::span<const char16_t> utf16_input,
2805 std::span<char32_t> utf32_output)
noexcept {
2806 #if SIMDUTF_CPLUSPLUS23
2808 return scalar::utf16_to_utf32::convert_with_errors<endianness::BIG>(
2809 utf16_input.data(), utf16_input.size(), utf32_output.data());
2813 return convert_utf16be_to_utf32_with_errors(
2814 utf16_input.data(), utf16_input.size(), utf32_output.data());
2833simdutf_warn_unused
size_t convert_valid_utf16_to_utf32(
2834 const char16_t *input,
size_t length,
char32_t *utf32_buffer)
noexcept;
2836simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t
2837convert_valid_utf16_to_utf32(std::span<const char16_t> valid_utf16_input,
2838 std::span<char32_t> utf32_output)
noexcept {
2839 #if SIMDUTF_CPLUSPLUS23
2841 return scalar::utf16_to_utf32::convert_valid<endianness::NATIVE>(
2842 valid_utf16_input.data(), valid_utf16_input.size(),
2843 utf32_output.data());
2847 return convert_valid_utf16_to_utf32(valid_utf16_input.data(),
2848 valid_utf16_input.size(),
2849 utf32_output.data());
2867simdutf_warn_unused
size_t convert_valid_utf16le_to_utf32(
2868 const char16_t *input,
size_t length,
char32_t *utf32_buffer)
noexcept;
2870simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t
2871convert_valid_utf16le_to_utf32(std::span<const char16_t> valid_utf16_input,
2872 std::span<char32_t> utf32_output)
noexcept {
2873 #if SIMDUTF_CPLUSPLUS23
2875 return scalar::utf16_to_utf32::convert_valid<endianness::LITTLE>(
2876 valid_utf16_input.data(), valid_utf16_input.size(),
2877 utf32_output.data());
2881 return convert_valid_utf16le_to_utf32(valid_utf16_input.data(),
2882 valid_utf16_input.size(),
2883 utf32_output.data());
2901simdutf_warn_unused
size_t convert_valid_utf16be_to_utf32(
2902 const char16_t *input,
size_t length,
char32_t *utf32_buffer)
noexcept;
2904simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t
2905convert_valid_utf16be_to_utf32(std::span<const char16_t> valid_utf16_input,
2906 std::span<char32_t> utf32_output)
noexcept {
2907 #if SIMDUTF_CPLUSPLUS23
2909 return scalar::utf16_to_utf32::convert_valid<endianness::BIG>(
2910 valid_utf16_input.data(), valid_utf16_input.size(),
2911 utf32_output.data());
2915 return convert_valid_utf16be_to_utf32(valid_utf16_input.data(),
2916 valid_utf16_input.size(),
2917 utf32_output.data());
2923#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
2935simdutf_warn_unused
size_t utf8_length_from_utf16(
const char16_t *input,
2936 size_t length)
noexcept;
2938simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t
2939utf8_length_from_utf16(std::span<const char16_t> valid_utf16_input)
noexcept {
2940 #if SIMDUTF_CPLUSPLUS23
2942 return scalar::utf16::utf8_length_from_utf16<endianness::NATIVE>(
2943 valid_utf16_input.data(), valid_utf16_input.size());
2947 return utf8_length_from_utf16(valid_utf16_input.data(),
2948 valid_utf16_input.size());
2971simdutf_warn_unused result utf8_length_from_utf16_with_replacement(
2972 const char16_t *input,
size_t length)
noexcept;
2974simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
2975utf8_length_from_utf16_with_replacement(
2976 std::span<const char16_t> valid_utf16_input)
noexcept {
2977 #if SIMDUTF_CPLUSPLUS23
2979 return scalar::utf16::utf8_length_from_utf16_with_replacement<
2980 endianness::NATIVE>(valid_utf16_input.data(), valid_utf16_input.size());
2984 return utf8_length_from_utf16_with_replacement(valid_utf16_input.data(),
2985 valid_utf16_input.size());
3001simdutf_warn_unused
size_t utf8_length_from_utf16le(
const char16_t *input,
3002 size_t length)
noexcept;
3004simdutf_really_inline simdutf_constexpr23 simdutf_warn_unused
size_t
3005utf8_length_from_utf16le(std::span<const char16_t> valid_utf16_input)
noexcept {
3006 #if SIMDUTF_CPLUSPLUS23
3008 return scalar::utf16::utf8_length_from_utf16<endianness::LITTLE>(
3009 valid_utf16_input.data(), valid_utf16_input.size());
3013 return utf8_length_from_utf16le(valid_utf16_input.data(),
3014 valid_utf16_input.size());
3030simdutf_warn_unused
size_t utf8_length_from_utf16be(
const char16_t *input,
3031 size_t length)
noexcept;
3033simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t
3034utf8_length_from_utf16be(std::span<const char16_t> valid_utf16_input)
noexcept {
3035 #if SIMDUTF_CPLUSPLUS23
3037 return scalar::utf16::utf8_length_from_utf16<endianness::BIG>(
3038 valid_utf16_input.data(), valid_utf16_input.size());
3042 return utf8_length_from_utf16be(valid_utf16_input.data(),
3043 valid_utf16_input.size());
3049#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
3063simdutf_warn_unused
size_t convert_utf32_to_utf8(
const char32_t *input,
3065 char *utf8_buffer)
noexcept;
3067simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t
3068convert_utf32_to_utf8(
3069 std::span<const char32_t> utf32_input,
3070 detail::output_span_of_byte_like
auto &&utf8_output)
noexcept {
3071 #if SIMDUTF_CPLUSPLUS23
3073 return scalar::utf32_to_utf8::convert(
3074 utf32_input.data(), utf32_input.size(), utf8_output.data());
3078 return convert_utf32_to_utf8(utf32_input.data(), utf32_input.size(),
3079 reinterpret_cast<char *
>(utf8_output.data()));
3100simdutf_warn_unused result convert_utf32_to_utf8_with_errors(
3101 const char32_t *input,
size_t length,
char *utf8_buffer)
noexcept;
3103simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
3104convert_utf32_to_utf8_with_errors(
3105 std::span<const char32_t> utf32_input,
3106 detail::output_span_of_byte_like
auto &&utf8_output)
noexcept {
3107 #if SIMDUTF_CPLUSPLUS23
3109 return scalar::utf32_to_utf8::convert_with_errors(
3110 utf32_input.data(), utf32_input.size(), utf8_output.data());
3114 return convert_utf32_to_utf8_with_errors(
3115 utf32_input.data(), utf32_input.size(),
3116 reinterpret_cast<char *
>(utf8_output.data()));
3134simdutf_warn_unused
size_t convert_valid_utf32_to_utf8(
3135 const char32_t *input,
size_t length,
char *utf8_buffer)
noexcept;
3137simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t
3138convert_valid_utf32_to_utf8(
3139 std::span<const char32_t> valid_utf32_input,
3140 detail::output_span_of_byte_like
auto &&utf8_output)
noexcept {
3141 #if SIMDUTF_CPLUSPLUS23
3143 return scalar::utf32_to_utf8::convert_valid(
3144 valid_utf32_input.data(), valid_utf32_input.size(), utf8_output.data());
3148 return convert_valid_utf32_to_utf8(
3149 valid_utf32_input.data(), valid_utf32_input.size(),
3150 reinterpret_cast<char *
>(utf8_output.data()));
3156#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
3171simdutf_warn_unused
size_t convert_utf32_to_utf16(
3172 const char32_t *input,
size_t length,
char16_t *utf16_buffer)
noexcept;
3174simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t
3175convert_utf32_to_utf16(std::span<const char32_t> utf32_input,
3176 std::span<char16_t> utf16_output)
noexcept {
3177 #if SIMDUTF_CPLUSPLUS23
3179 return scalar::utf32_to_utf16::convert<endianness::NATIVE>(
3180 utf32_input.data(), utf32_input.size(), utf16_output.data());
3184 return convert_utf32_to_utf16(utf32_input.data(), utf32_input.size(),
3185 utf16_output.data());
3203simdutf_warn_unused
size_t convert_utf32_to_utf16le(
3204 const char32_t *input,
size_t length,
char16_t *utf16_buffer)
noexcept;
3206simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t
3207convert_utf32_to_utf16le(std::span<const char32_t> utf32_input,
3208 std::span<char16_t> utf16_output)
noexcept {
3209 #if SIMDUTF_CPLUSPLUS23
3211 return scalar::utf32_to_utf16::convert<endianness::LITTLE>(
3212 utf32_input.data(), utf32_input.size(), utf16_output.data());
3216 return convert_utf32_to_utf16le(utf32_input.data(), utf32_input.size(),
3217 utf16_output.data());
3223#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1
3238simdutf_warn_unused
size_t convert_utf32_to_latin1(
3239 const char32_t *input,
size_t length,
char *latin1_buffer)
noexcept;
3241simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t
3242convert_utf32_to_latin1(
3243 std::span<const char32_t> utf32_input,
3244 detail::output_span_of_byte_like
auto &&latin1_output)
noexcept {
3245 #if SIMDUTF_CPLUSPLUS23
3247 return scalar::utf32_to_latin1::convert(
3248 utf32_input.data(), utf32_input.size(), latin1_output.data());
3252 return convert_utf32_to_latin1(
3253 utf32_input.data(), utf32_input.size(),
3254 reinterpret_cast<char *
>(latin1_output.data()));
3276simdutf_warn_unused result convert_utf32_to_latin1_with_errors(
3277 const char32_t *input,
size_t length,
char *latin1_buffer)
noexcept;
3279simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
3280convert_utf32_to_latin1_with_errors(
3281 std::span<const char32_t> utf32_input,
3282 detail::output_span_of_byte_like
auto &&latin1_output)
noexcept {
3283 #if SIMDUTF_CPLUSPLUS23
3285 return scalar::utf32_to_latin1::convert_with_errors(
3286 utf32_input.data(), utf32_input.size(), latin1_output.data());
3290 return convert_utf32_to_latin1_with_errors(
3291 utf32_input.data(), utf32_input.size(),
3292 reinterpret_cast<char *
>(latin1_output.data()));
3317simdutf_warn_unused
size_t convert_valid_utf32_to_latin1(
3318 const char32_t *input,
size_t length,
char *latin1_buffer)
noexcept;
3320simdutf_really_inline simdutf_constexpr23 simdutf_warn_unused
size_t
3321convert_valid_utf32_to_latin1(
3322 std::span<const char32_t> valid_utf32_input,
3323 detail::output_span_of_byte_like
auto &&latin1_output)
noexcept {
3324 #if SIMDUTF_CPLUSPLUS23
3326 return scalar::utf32_to_latin1::convert_valid(
3327 detail::constexpr_cast_ptr<uint32_t>(valid_utf32_input.data()),
3328 valid_utf32_input.size(),
3329 detail::constexpr_cast_writeptr<char>(latin1_output.data()));
3333 return convert_valid_utf32_to_latin1(
3334 valid_utf32_input.data(), valid_utf32_input.size(),
3335 reinterpret_cast<char *
>(latin1_output.data()));
3352simdutf_warn_unused simdutf_really_inline simdutf_constexpr23
size_t
3353latin1_length_from_utf32(
size_t length)
noexcept {
3365simdutf_warn_unused simdutf_really_inline simdutf_constexpr23
size_t
3366utf32_length_from_latin1(
size_t length)
noexcept {
3371#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
3385simdutf_warn_unused
size_t convert_utf32_to_utf16be(
3386 const char32_t *input,
size_t length,
char16_t *utf16_buffer)
noexcept;
3388simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t
3389convert_utf32_to_utf16be(std::span<const char32_t> utf32_input,
3390 std::span<char16_t> utf16_output)
noexcept {
3391 #if SIMDUTF_CPLUSPLUS23
3393 return scalar::utf32_to_utf16::convert<endianness::BIG>(
3394 utf32_input.data(), utf32_input.size(), utf16_output.data());
3398 return convert_utf32_to_utf16be(utf32_input.data(), utf32_input.size(),
3399 utf16_output.data());
3421simdutf_warn_unused result convert_utf32_to_utf16_with_errors(
3422 const char32_t *input,
size_t length,
char16_t *utf16_buffer)
noexcept;
3424simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
3425convert_utf32_to_utf16_with_errors(std::span<const char32_t> utf32_input,
3426 std::span<char16_t> utf16_output)
noexcept {
3427 #if SIMDUTF_CPLUSPLUS23
3429 return scalar::utf32_to_utf16::convert_with_errors<endianness::NATIVE>(
3430 utf32_input.data(), utf32_input.size(), utf16_output.data());
3434 return convert_utf32_to_utf16_with_errors(
3435 utf32_input.data(), utf32_input.size(), utf16_output.data());
3456simdutf_warn_unused result convert_utf32_to_utf16le_with_errors(
3457 const char32_t *input,
size_t length,
char16_t *utf16_buffer)
noexcept;
3459simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
3460convert_utf32_to_utf16le_with_errors(
3461 std::span<const char32_t> utf32_input,
3462 std::span<char16_t> utf16_output)
noexcept {
3463 #if SIMDUTF_CPLUSPLUS23
3465 return scalar::utf32_to_utf16::convert_with_errors<endianness::LITTLE>(
3466 utf32_input.data(), utf32_input.size(), utf16_output.data());
3470 return convert_utf32_to_utf16le_with_errors(
3471 utf32_input.data(), utf32_input.size(), utf16_output.data());
3492simdutf_warn_unused result convert_utf32_to_utf16be_with_errors(
3493 const char32_t *input,
size_t length,
char16_t *utf16_buffer)
noexcept;
3495simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
3496convert_utf32_to_utf16be_with_errors(
3497 std::span<const char32_t> utf32_input,
3498 std::span<char16_t> utf16_output)
noexcept {
3499 #if SIMDUTF_CPLUSPLUS23
3501 return scalar::utf32_to_utf16::convert_with_errors<endianness::BIG>(
3502 utf32_input.data(), utf32_input.size(), utf16_output.data());
3506 return convert_utf32_to_utf16be_with_errors(
3507 utf32_input.data(), utf32_input.size(), utf16_output.data());
3525simdutf_warn_unused
size_t convert_valid_utf32_to_utf16(
3526 const char32_t *input,
size_t length,
char16_t *utf16_buffer)
noexcept;
3528simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t
3529convert_valid_utf32_to_utf16(std::span<const char32_t> valid_utf32_input,
3530 std::span<char16_t> utf16_output)
noexcept {
3532 #if SIMDUTF_CPLUSPLUS23
3534 return scalar::utf32_to_utf16::convert_valid<endianness::NATIVE>(
3535 valid_utf32_input.data(), valid_utf32_input.size(),
3536 utf16_output.data());
3540 return convert_valid_utf32_to_utf16(valid_utf32_input.data(),
3541 valid_utf32_input.size(),
3542 utf16_output.data());
3560simdutf_warn_unused
size_t convert_valid_utf32_to_utf16le(
3561 const char32_t *input,
size_t length,
char16_t *utf16_buffer)
noexcept;
3563simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t
3564convert_valid_utf32_to_utf16le(std::span<const char32_t> valid_utf32_input,
3565 std::span<char16_t> utf16_output)
noexcept {
3566 #if SIMDUTF_CPLUSPLUS23
3568 return scalar::utf32_to_utf16::convert_valid<endianness::LITTLE>(
3569 valid_utf32_input.data(), valid_utf32_input.size(),
3570 utf16_output.data());
3574 return convert_valid_utf32_to_utf16le(valid_utf32_input.data(),
3575 valid_utf32_input.size(),
3576 utf16_output.data());
3594simdutf_warn_unused
size_t convert_valid_utf32_to_utf16be(
3595 const char32_t *input,
size_t length,
char16_t *utf16_buffer)
noexcept;
3597simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t
3598convert_valid_utf32_to_utf16be(std::span<const char32_t> valid_utf32_input,
3599 std::span<char16_t> utf16_output)
noexcept {
3600 #if SIMDUTF_CPLUSPLUS23
3602 return scalar::utf32_to_utf16::convert_valid<endianness::BIG>(
3603 valid_utf32_input.data(), valid_utf32_input.size(),
3604 utf16_output.data());
3608 return convert_valid_utf32_to_utf16be(valid_utf32_input.data(),
3609 valid_utf32_input.size(),
3610 utf16_output.data());
3616#if SIMDUTF_FEATURE_UTF16
3630void change_endianness_utf16(
const char16_t *input,
size_t length,
3631 char16_t *output)
noexcept;
3633simdutf_really_inline simdutf_constexpr23
void
3634change_endianness_utf16(std::span<const char16_t> utf16_input,
3635 std::span<char16_t> utf16_output)
noexcept {
3636 #if SIMDUTF_CPLUSPLUS23
3638 return scalar::utf16::change_endianness_utf16(
3639 utf16_input.data(), utf16_input.size(), utf16_output.data());
3643 return change_endianness_utf16(utf16_input.data(), utf16_input.size(),
3644 utf16_output.data());
3650#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
3662simdutf_warn_unused
size_t utf8_length_from_utf32(
const char32_t *input,
3663 size_t length)
noexcept;
3665simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t
3666utf8_length_from_utf32(std::span<const char32_t> valid_utf32_input)
noexcept {
3667 #if SIMDUTF_CPLUSPLUS23
3669 return scalar::utf32::utf8_length_from_utf32(valid_utf32_input.data(),
3670 valid_utf32_input.size());
3674 return utf8_length_from_utf32(valid_utf32_input.data(),
3675 valid_utf32_input.size());
3681#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
3693simdutf_warn_unused
size_t utf16_length_from_utf32(
const char32_t *input,
3694 size_t length)
noexcept;
3696simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t
3697utf16_length_from_utf32(std::span<const char32_t> valid_utf32_input)
noexcept {
3698 #if SIMDUTF_CPLUSPLUS23
3700 return scalar::utf32::utf16_length_from_utf32(valid_utf32_input.data(),
3701 valid_utf32_input.size());
3705 return utf16_length_from_utf32(valid_utf32_input.data(),
3706 valid_utf32_input.size());
3726simdutf_warn_unused
size_t utf32_length_from_utf16(
const char16_t *input,
3727 size_t length)
noexcept;
3729simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t
3730utf32_length_from_utf16(std::span<const char16_t> valid_utf16_input)
noexcept {
3731 #if SIMDUTF_CPLUSPLUS23
3733 return scalar::utf16::utf32_length_from_utf16<endianness::NATIVE>(
3734 valid_utf16_input.data(), valid_utf16_input.size());
3738 return utf32_length_from_utf16(valid_utf16_input.data(),
3739 valid_utf16_input.size());
3759simdutf_warn_unused
size_t utf32_length_from_utf16le(
const char16_t *input,
3760 size_t length)
noexcept;
3762simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t
3763utf32_length_from_utf16le(
3764 std::span<const char16_t> valid_utf16_input)
noexcept {
3765 #if SIMDUTF_CPLUSPLUS23
3767 return scalar::utf16::utf32_length_from_utf16<endianness::LITTLE>(
3768 valid_utf16_input.data(), valid_utf16_input.size());
3772 return utf32_length_from_utf16le(valid_utf16_input.data(),
3773 valid_utf16_input.size());
3793simdutf_warn_unused
size_t utf32_length_from_utf16be(
const char16_t *input,
3794 size_t length)
noexcept;
3796simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t
3797utf32_length_from_utf16be(
3798 std::span<const char16_t> valid_utf16_input)
noexcept {
3799 #if SIMDUTF_CPLUSPLUS23
3801 return scalar::utf16::utf32_length_from_utf16<endianness::BIG>(
3802 valid_utf16_input.data(), valid_utf16_input.size());
3806 return utf32_length_from_utf16be(valid_utf16_input.data(),
3807 valid_utf16_input.size());
3813#if SIMDUTF_FEATURE_UTF16
3828simdutf_warn_unused
size_t count_utf16(
const char16_t *input,
3829 size_t length)
noexcept;
3831simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t
3832count_utf16(std::span<const char16_t> valid_utf16_input)
noexcept {
3833 #if SIMDUTF_CPLUSPLUS23
3835 return scalar::utf16::count_code_points<endianness::NATIVE>(
3836 valid_utf16_input.data(), valid_utf16_input.size());
3840 return count_utf16(valid_utf16_input.data(), valid_utf16_input.size());
3859simdutf_warn_unused
size_t count_utf16le(
const char16_t *input,
3860 size_t length)
noexcept;
3862simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t
3863count_utf16le(std::span<const char16_t> valid_utf16_input)
noexcept {
3864 #if SIMDUTF_CPLUSPLUS23
3866 return scalar::utf16::count_code_points<endianness::LITTLE>(
3867 valid_utf16_input.data(), valid_utf16_input.size());
3871 return count_utf16le(valid_utf16_input.data(), valid_utf16_input.size());
3890simdutf_warn_unused
size_t count_utf16be(
const char16_t *input,
3891 size_t length)
noexcept;
3893simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t
3894count_utf16be(std::span<const char16_t> valid_utf16_input)
noexcept {
3895 #if SIMDUTF_CPLUSPLUS23
3897 return scalar::utf16::count_code_points<endianness::BIG>(
3898 valid_utf16_input.data(), valid_utf16_input.size());
3902 return count_utf16be(valid_utf16_input.data(), valid_utf16_input.size());
3908#if SIMDUTF_FEATURE_UTF8
3921simdutf_warn_unused
size_t count_utf8(
const char *input,
3922 size_t length)
noexcept;
3924simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t count_utf8(
3925 const detail::input_span_of_byte_like
auto &valid_utf8_input)
noexcept {
3926 #if SIMDUTF_CPLUSPLUS23
3928 return scalar::utf8::count_code_points(valid_utf8_input.data(),
3929 valid_utf8_input.size());
3933 return count_utf8(
reinterpret_cast<const char *
>(valid_utf8_input.data()),
3934 valid_utf8_input.size());
3953simdutf_warn_unused
size_t trim_partial_utf8(
const char *input,
size_t length);
3955simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t
3957 const detail::input_span_of_byte_like
auto &valid_utf8_input)
noexcept {
3958 #if SIMDUTF_CPLUSPLUS23
3960 return scalar::utf8::trim_partial_utf8(valid_utf8_input.data(),
3961 valid_utf8_input.size());
3965 return trim_partial_utf8(
3966 reinterpret_cast<const char *
>(valid_utf8_input.data()),
3967 valid_utf8_input.size());
3973#if SIMDUTF_FEATURE_UTF16
3988simdutf_warn_unused
size_t trim_partial_utf16be(
const char16_t *input,
3991simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t
3992trim_partial_utf16be(std::span<const char16_t> valid_utf16_input)
noexcept {
3993 #if SIMDUTF_CPLUSPLUS23
3995 return scalar::utf16::trim_partial_utf16<endianness::BIG>(
3996 valid_utf16_input.data(), valid_utf16_input.size());
4000 return trim_partial_utf16be(valid_utf16_input.data(),
4001 valid_utf16_input.size());
4020simdutf_warn_unused
size_t trim_partial_utf16le(
const char16_t *input,
4023simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t
4024trim_partial_utf16le(std::span<const char16_t> valid_utf16_input)
noexcept {
4025 #if SIMDUTF_CPLUSPLUS23
4027 return scalar::utf16::trim_partial_utf16<endianness::LITTLE>(
4028 valid_utf16_input.data(), valid_utf16_input.size());
4032 return trim_partial_utf16le(valid_utf16_input.data(),
4033 valid_utf16_input.size());
4052simdutf_warn_unused
size_t trim_partial_utf16(
const char16_t *input,
4055simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t
4056trim_partial_utf16(std::span<const char16_t> valid_utf16_input)
noexcept {
4057 #if SIMDUTF_CPLUSPLUS23
4059 return scalar::utf16::trim_partial_utf16<endianness::NATIVE>(
4060 valid_utf16_input.data(), valid_utf16_input.size());
4064 return trim_partial_utf16(valid_utf16_input.data(),
4065 valid_utf16_input.size());
4071#if SIMDUTF_FEATURE_BASE64 || SIMDUTF_FEATURE_UTF16 || \
4072 SIMDUTF_FEATURE_DETECT_ENCODING
4073 #ifndef SIMDUTF_NEED_TRAILING_ZEROES
4074 #define SIMDUTF_NEED_TRAILING_ZEROES 1
4079#if SIMDUTF_FEATURE_BASE64
4084constexpr uint64_t base64_reverse_padding =
4086enum base64_options : uint64_t {
4089 base64_default_no_padding =
4091 base64_reverse_padding,
4092 base64_url_with_padding =
4093 base64_url | base64_reverse_padding,
4094 base64_default_accept_garbage =
4097 base64_url_accept_garbage =
4100 base64_default_or_url =
4102 base64_default_or_url_accept_garbage =
4111enum last_chunk_handling_options : uint64_t {
4115 stop_before_partial =
4121inline simdutf_constexpr23
bool
4122is_partial(last_chunk_handling_options options) {
4123 return (options == stop_before_partial) || (options == only_full_chunks);
4127simdutf_warn_unused
const char *find(
const char *start,
const char *end,
4128 char character)
noexcept;
4129simdutf_warn_unused
const char16_t *
4130find(
const char16_t *start,
const char16_t *end,
char16_t character)
noexcept;
4143simdutf_warn_unused simdutf_really_inline simdutf_constexpr23
const char *
4144find(
const char *start,
const char *end,
char character)
noexcept {
4145 #if SIMDUTF_CPLUSPLUS23
4147 for (; start != end; ++start)
4148 if (*start == character)
4154 return detail::find(start, end, character);
4157simdutf_warn_unused simdutf_really_inline simdutf_constexpr23
const char16_t *
4158find(
const char16_t *start,
const char16_t *end,
char16_t character)
noexcept {
4161 #if SIMDUTF_CPLUSPLUS23
4163 for (; start != end; ++start)
4164 if (*start == character)
4170 return detail::find(start, end, character);
4175 #include <simdutf/base64_tables.h>
4176 #include <simdutf/scalar/base64.h>
4180inline std::string_view to_string(base64_options options) {
4182 case base64_default:
4183 return "base64_default";
4185 return "base64_url";
4186 case base64_reverse_padding:
4187 return "base64_reverse_padding";
4188 case base64_url_with_padding:
4189 return "base64_url_with_padding";
4190 case base64_default_accept_garbage:
4191 return "base64_default_accept_garbage";
4192 case base64_url_accept_garbage:
4193 return "base64_url_accept_garbage";
4194 case base64_default_or_url:
4195 return "base64_default_or_url";
4196 case base64_default_or_url_accept_garbage:
4197 return "base64_default_or_url_accept_garbage";
4202inline std::string_view to_string(last_chunk_handling_options options) {
4208 case stop_before_partial:
4209 return "stop_before_partial";
4210 case only_full_chunks:
4211 return "only_full_chunks";
4229simdutf_warn_unused
size_t
4230maximal_binary_length_from_base64(
const char *input,
size_t length)
noexcept;
4232simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t
4233maximal_binary_length_from_base64(
4234 const detail::input_span_of_byte_like
auto &input)
noexcept {
4235 #if SIMDUTF_CPLUSPLUS23
4237 return scalar::base64::maximal_binary_length_from_base64(
4238 detail::constexpr_cast_ptr<uint8_t>(input.data()), input.size());
4242 return maximal_binary_length_from_base64(
4243 reinterpret_cast<const char *
>(input.data()), input.size());
4262simdutf_warn_unused
size_t maximal_binary_length_from_base64(
4263 const char16_t *input,
size_t length)
noexcept;
4265simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t
4266maximal_binary_length_from_base64(std::span<const char16_t> input)
noexcept {
4267 #if SIMDUTF_CPLUSPLUS23
4269 return scalar::base64::maximal_binary_length_from_base64(input.data(),
4274 return maximal_binary_length_from_base64(input.data(), input.size());
4293simdutf_warn_unused
size_t binary_length_from_base64(
const char *input,
4294 size_t length)
noexcept;
4296simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t
4297binary_length_from_base64(
4298 const detail::input_span_of_byte_like
auto &input)
noexcept {
4299 #if SIMDUTF_CPLUSPLUS23
4301 return scalar::base64::binary_length_from_base64(input.data(),
4306 return binary_length_from_base64(
4307 reinterpret_cast<const char *
>(input.data()), input.size());
4327simdutf_warn_unused
size_t binary_length_from_base64(
const char16_t *input,
4328 size_t length)
noexcept;
4330simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t
4331binary_length_from_base64(std::span<const char16_t> input)
noexcept {
4332 #if SIMDUTF_CPLUSPLUS23
4334 return scalar::base64::binary_length_from_base64(input.data(),
4339 return binary_length_from_base64(input.data(), input.size());
4398simdutf_warn_unused result base64_to_binary(
4399 const char *input,
size_t length,
char *output,
4400 base64_options options = base64_default,
4401 last_chunk_handling_options last_chunk_options = loose)
noexcept;
4403simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
4405 const detail::input_span_of_byte_like
auto &input,
4406 detail::output_span_of_byte_like
auto &&binary_output,
4407 base64_options options = base64_default,
4408 last_chunk_handling_options last_chunk_options = loose)
noexcept {
4409 #if SIMDUTF_CPLUSPLUS23
4411 return scalar::base64::base64_to_binary_details_impl(
4412 input.data(), input.size(), binary_output.data(), options,
4413 last_chunk_options);
4417 return base64_to_binary(
reinterpret_cast<const char *
>(input.data()),
4419 reinterpret_cast<char *
>(binary_output.data()),
4420 options, last_chunk_options);
4432inline simdutf_warn_unused simdutf_constexpr23
size_t base64_length_from_binary(
4433 size_t length, base64_options options = base64_default)
noexcept {
4434 return scalar::base64::base64_length_from_binary(length, options);
4447inline simdutf_warn_unused simdutf_constexpr23
size_t
4448base64_length_from_binary_with_lines(
4449 size_t length, base64_options options = base64_default,
4450 size_t line_length = default_line_length)
noexcept {
4451 return scalar::base64::base64_length_from_binary_with_lines(length, options,
4476size_t binary_to_base64(
const char *input,
size_t length,
char *output,
4477 base64_options options = base64_default)
noexcept;
4479simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t
4480binary_to_base64(
const detail::input_span_of_byte_like
auto &input,
4481 detail::output_span_of_byte_like
auto &&binary_output,
4482 base64_options options = base64_default)
noexcept {
4483 #if SIMDUTF_CPLUSPLUS23
4485 return scalar::base64::tail_encode_base64(
4486 binary_output.data(), input.data(), input.size(), options);
4490 return binary_to_base64(
4491 reinterpret_cast<const char *
>(input.data()), input.size(),
4492 reinterpret_cast<char *
>(binary_output.data()), options);
4522binary_to_base64_with_lines(
const char *input,
size_t length,
char *output,
4523 size_t line_length = simdutf::default_line_length,
4524 base64_options options = base64_default)
noexcept;
4526simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t
4527binary_to_base64_with_lines(
4528 const detail::input_span_of_byte_like
auto &input,
4529 detail::output_span_of_byte_like
auto &&binary_output,
4530 size_t line_length = simdutf::default_line_length,
4531 base64_options options = base64_default)
noexcept {
4532 #if SIMDUTF_CPLUSPLUS23
4534 return scalar::base64::tail_encode_base64_impl<true>(
4535 binary_output.data(), input.data(), input.size(), options, line_length);
4539 return binary_to_base64_with_lines(
4540 reinterpret_cast<const char *
>(input.data()), input.size(),
4541 reinterpret_cast<char *
>(binary_output.data()), line_length, options);
4546 #if SIMDUTF_ATOMIC_REF
4589atomic_binary_to_base64(
const char *input,
size_t length,
char *output,
4590 base64_options options = base64_default)
noexcept;
4592simdutf_really_inline simdutf_warn_unused
size_t
4593atomic_binary_to_base64(
const detail::input_span_of_byte_like
auto &input,
4594 detail::output_span_of_byte_like
auto &&binary_output,
4595 base64_options options = base64_default)
noexcept {
4596 return atomic_binary_to_base64(
4597 reinterpret_cast<const char *
>(input.data()), input.size(),
4598 reinterpret_cast<char *
>(binary_output.data()), options);
4659simdutf_warn_unused result
4660base64_to_binary(
const char16_t *input,
size_t length,
char *output,
4661 base64_options options = base64_default,
4662 last_chunk_handling_options last_chunk_options =
4663 last_chunk_handling_options::loose)
noexcept;
4665simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
4667 std::span<const char16_t> input,
4668 detail::output_span_of_byte_like
auto &&binary_output,
4669 base64_options options = base64_default,
4670 last_chunk_handling_options last_chunk_options = loose)
noexcept {
4671 #if SIMDUTF_CPLUSPLUS23
4673 return scalar::base64::base64_to_binary_details_impl(
4674 input.data(), input.size(), binary_output.data(), options,
4675 last_chunk_options);
4679 return base64_to_binary(input.data(), input.size(),
4680 reinterpret_cast<char *
>(binary_output.data()),
4681 options, last_chunk_options);
4733simdutf_warn_unused full_result
4734base64_to_binary_details(
const char *input,
size_t length,
char *output,
4735 base64_options options = base64_default,
4736 last_chunk_handling_options last_chunk_options =
4737 last_chunk_handling_options::loose)
noexcept;
4739simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 full_result
4740base64_to_binary_details(
4741 const detail::input_span_of_byte_like
auto &input,
4742 detail::output_span_of_byte_like
auto &&binary_output,
4743 base64_options options = base64_default,
4744 last_chunk_handling_options last_chunk_options = loose)
noexcept {
4745 #if SIMDUTF_CPLUSPLUS23
4747 return scalar::base64::base64_to_binary_details_impl(
4748 input.data(), input.size(), binary_output.data(), options,
4749 last_chunk_options);
4753 return base64_to_binary_details(
4754 reinterpret_cast<const char *
>(input.data()), input.size(),
4755 reinterpret_cast<char *
>(binary_output.data()), options,
4756 last_chunk_options);
4809simdutf_warn_unused full_result
4810base64_to_binary_details(
const char16_t *input,
size_t length,
char *output,
4811 base64_options options = base64_default,
4812 last_chunk_handling_options last_chunk_options =
4813 last_chunk_handling_options::loose)
noexcept;
4815simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 full_result
4816base64_to_binary_details(
4817 std::span<const char16_t> input,
4818 detail::output_span_of_byte_like
auto &&binary_output,
4819 base64_options options = base64_default,
4820 last_chunk_handling_options last_chunk_options = loose)
noexcept {
4821 #if SIMDUTF_CPLUSPLUS23
4823 return scalar::base64::base64_to_binary_details_impl(
4824 input.data(), input.size(), binary_output.data(), options,
4825 last_chunk_options);
4829 return base64_to_binary_details(
4830 input.data(), input.size(),
4831 reinterpret_cast<char *
>(binary_output.data()), options,
4832 last_chunk_options);
4847simdutf_warn_unused simdutf_really_inline simdutf_constexpr23
bool
4848base64_ignorable(
char input, base64_options options = base64_default)
noexcept {
4849 return scalar::base64::is_ignorable(input, options);
4851simdutf_warn_unused simdutf_really_inline simdutf_constexpr23
bool
4852base64_ignorable(
char16_t input,
4853 base64_options options = base64_default)
noexcept {
4854 return scalar::base64::is_ignorable(input, options);
4868simdutf_warn_unused simdutf_really_inline simdutf_constexpr23
bool
4869base64_valid(
char input, base64_options options = base64_default)
noexcept {
4870 return scalar::base64::is_base64(input, options);
4872simdutf_warn_unused simdutf_really_inline simdutf_constexpr23
bool
4873base64_valid(
char16_t input, base64_options options = base64_default)
noexcept {
4874 return scalar::base64::is_base64(input, options);
4886simdutf_warn_unused simdutf_really_inline simdutf_constexpr23
bool
4887base64_valid_or_padding(
char input,
4888 base64_options options = base64_default)
noexcept {
4889 return scalar::base64::is_base64_or_padding(input, options);
4891simdutf_warn_unused simdutf_really_inline simdutf_constexpr23
bool
4892base64_valid_or_padding(
char16_t input,
4893 base64_options options = base64_default)
noexcept {
4894 return scalar::base64::is_base64_or_padding(input, options);
4964simdutf_warn_unused result
4965base64_to_binary_safe(
const char *input,
size_t length,
char *output,
4966 size_t &outlen, base64_options options = base64_default,
4967 last_chunk_handling_options last_chunk_options =
4968 last_chunk_handling_options::loose,
4969 bool decode_up_to_bad_char =
false) noexcept;
4972simdutf_warn_unused result
4973base64_to_binary_safe(const
char16_t *input,
size_t length,
char *output,
4974 size_t &outlen, base64_options options = base64_default,
4975 last_chunk_handling_options last_chunk_options =
4976 last_chunk_handling_options::loose,
4977 bool decode_up_to_bad_char = false) noexcept;
4980 #if SIMDUTF_ATOMIC_REF
5020simdutf_warn_unused result atomic_base64_to_binary_safe(
5021 const char *input,
size_t length,
char *output,
size_t &outlen,
5022 base64_options options = base64_default,
5023 last_chunk_handling_options last_chunk_options =
5024 last_chunk_handling_options::loose,
5025 bool decode_up_to_bad_char =
false) noexcept;
5026simdutf_warn_unused result atomic_base64_to_binary_safe(
5027 const
char16_t *input,
size_t length,
char *output,
size_t &outlen,
5028 base64_options options = base64_default,
5029 last_chunk_handling_options last_chunk_options = loose,
5030 bool decode_up_to_bad_char = false) noexcept;
5036simdutf_really_inline simdutf_warn_unused std::tuple<result, std::size_t>
5037atomic_base64_to_binary_safe(
5038 const detail::input_span_of_byte_like
auto &binary_input,
5039 detail::output_span_of_byte_like
auto &&output,
5040 base64_options options = base64_default,
5041 last_chunk_handling_options last_chunk_options =
5042 last_chunk_handling_options::loose,
5043 bool decode_up_to_bad_char =
false) noexcept {
5044 size_t outlen = output.size();
5045 auto ret = atomic_base64_to_binary_safe(
5046 reinterpret_cast<const char *
>(binary_input.data()), binary_input.size(),
5047 reinterpret_cast<char *
>(output.data()), outlen, options,
5048 last_chunk_options, decode_up_to_bad_char);
5049 return {ret, outlen};
5055simdutf_warn_unused std::tuple<result, std::size_t>
5056atomic_base64_to_binary_safe(
5057 std::span<const char16_t> base64_input,
5058 detail::output_span_of_byte_like
auto &&binary_output,
5059 base64_options options = base64_default,
5060 last_chunk_handling_options last_chunk_options = loose,
5061 bool decode_up_to_bad_char =
false) noexcept {
5062 size_t outlen = binary_output.size();
5063 auto ret = atomic_base64_to_binary_safe(
5064 base64_input.data(), base64_input.size(),
5065 reinterpret_cast<char *
>(binary_output.data()), outlen, options,
5066 last_chunk_options, decode_up_to_bad_char);
5067 return {ret, outlen};
5092 virtual std::string_view
name() const noexcept {
return _name; }
5103 virtual std::string_view
description() const noexcept {
return _description; }
5116#if SIMDUTF_FEATURE_DETECT_ENCODING
5124 size_t length)
const noexcept;
5133 size_t length)
const noexcept = 0;
5143 virtual uint32_t required_instruction_sets()
const {
5144 return _required_instruction_sets;
5147#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING
5158 size_t len)
const noexcept = 0;
5161#if SIMDUTF_FEATURE_UTF8
5174 simdutf_warn_unused
virtual result
5178#if SIMDUTF_FEATURE_ASCII
5188 simdutf_warn_unused
virtual bool
5203 simdutf_warn_unused
virtual result
5208#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_ASCII
5220 simdutf_warn_unused
virtual bool
5234 simdutf_warn_unused
virtual bool
5238#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING
5253 simdutf_warn_unused
virtual bool
5257#if SIMDUTF_FEATURE_UTF16
5272 simdutf_warn_unused
virtual bool
5291 simdutf_warn_unused
virtual result
5293 size_t len)
const noexcept = 0;
5311 simdutf_warn_unused
virtual result
5313 size_t len)
const noexcept = 0;
5327 char16_t *output)
const noexcept = 0;
5341 char16_t *output)
const noexcept = 0;
5344#if SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING
5357 simdutf_warn_unused
virtual bool
5361#if SIMDUTF_FEATURE_UTF32
5377 simdutf_warn_unused
virtual result
5379 size_t len)
const noexcept = 0;
5382#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
5393 simdutf_warn_unused
virtual size_t
5395 char *utf8_output)
const noexcept = 0;
5398#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
5409 simdutf_warn_unused
virtual size_t
5411 char16_t *utf16_output)
const noexcept = 0;
5423 simdutf_warn_unused
virtual size_t
5425 char16_t *utf16_output)
const noexcept = 0;
5428#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1
5439 simdutf_warn_unused
virtual size_t
5441 char32_t *utf32_buffer)
const noexcept = 0;
5444#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
5457 simdutf_warn_unused
virtual size_t
5459 char *latin1_output)
const noexcept = 0;
5477 simdutf_warn_unused
virtual result
5479 char *latin1_output)
const noexcept = 0;
5500 simdutf_warn_unused
virtual size_t
5502 char *latin1_output)
const noexcept = 0;
5505#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
5518 simdutf_warn_unused
virtual size_t
5520 char16_t *utf16_output)
const noexcept = 0;
5534 simdutf_warn_unused
virtual size_t
5536 char16_t *utf16_output)
const noexcept = 0;
5554 const char *input,
size_t length,
5555 char16_t *utf16_output)
const noexcept = 0;
5573 const char *input,
size_t length,
5574 char16_t *utf16_output)
const noexcept = 0;
5595 const char16_t *input,
size_t length)
const noexcept = 0;
5617 const char16_t *input,
size_t length)
const noexcept = 0;
5621#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
5634 simdutf_warn_unused
virtual size_t
5636 char32_t *utf32_output)
const noexcept = 0;
5652 simdutf_warn_unused
virtual result
5654 char32_t *utf32_output)
const noexcept = 0;
5657#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
5668 simdutf_warn_unused
virtual size_t
5670 char16_t *utf16_buffer)
const noexcept = 0;
5682 simdutf_warn_unused
virtual size_t
5684 char16_t *utf16_buffer)
const noexcept = 0;
5687#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
5698 simdutf_warn_unused
virtual size_t
5700 char32_t *utf32_buffer)
const noexcept = 0;
5703#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
5716 simdutf_warn_unused
virtual size_t
5720#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
5735 simdutf_warn_unused
virtual size_t
5739#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
5756 simdutf_warn_unused
virtual size_t
5758 char *latin1_buffer)
const noexcept = 0;
5776 simdutf_warn_unused
virtual size_t
5778 char *latin1_buffer)
const noexcept = 0;
5799 simdutf_warn_unused
virtual result
5801 char *latin1_buffer)
const noexcept = 0;
5822 simdutf_warn_unused
virtual result
5824 char *latin1_buffer)
const noexcept = 0;
5846 simdutf_warn_unused
virtual size_t
5848 char *latin1_buffer)
const noexcept = 0;
5870 simdutf_warn_unused
virtual size_t
5872 char *latin1_buffer)
const noexcept = 0;
5875#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
5891 simdutf_warn_unused
virtual size_t
5893 char *utf8_buffer)
const noexcept = 0;
5910 simdutf_warn_unused
virtual size_t
5912 char *utf8_buffer)
const noexcept = 0;
5932 simdutf_warn_unused
virtual result
5934 char *utf8_buffer)
const noexcept = 0;
5954 simdutf_warn_unused
virtual result
5956 char *utf8_buffer)
const noexcept = 0;
5974 const char16_t *input,
size_t length,
5975 char *utf8_buffer)
const noexcept = 0;
5993 const char16_t *input,
size_t length,
5994 char *utf8_buffer)
const noexcept = 0;
6010 simdutf_warn_unused
virtual size_t
6012 char *utf8_buffer)
const noexcept = 0;
6028 simdutf_warn_unused
virtual size_t
6030 char *utf8_buffer)
const noexcept = 0;
6033#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
6049 simdutf_warn_unused
virtual size_t
6051 char32_t *utf32_buffer)
const noexcept = 0;
6068 simdutf_warn_unused
virtual size_t
6070 char32_t *utf32_buffer)
const noexcept = 0;
6091 const char16_t *input,
size_t length,
6092 char32_t *utf32_buffer)
const noexcept = 0;
6113 const char16_t *input,
size_t length,
6114 char32_t *utf32_buffer)
const noexcept = 0;
6130 simdutf_warn_unused
virtual size_t
6132 char32_t *utf32_buffer)
const noexcept = 0;
6148 simdutf_warn_unused
virtual size_t
6150 char32_t *utf32_buffer)
const noexcept = 0;
6153#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
6168 simdutf_warn_unused
virtual size_t
6170 size_t length)
const noexcept = 0;
6186 simdutf_warn_unused
virtual size_t
6188 size_t length)
const noexcept = 0;
6191#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1
6208 simdutf_warn_unused
virtual size_t
6210 char *latin1_buffer)
const noexcept = 0;
6213#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1
6233 simdutf_warn_unused
virtual result
6235 char *latin1_buffer)
const noexcept = 0;
6257 simdutf_warn_unused
virtual size_t
6259 char *latin1_buffer)
const noexcept = 0;
6262#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
6278 simdutf_warn_unused
virtual size_t
6280 char *utf8_buffer)
const noexcept = 0;
6299 simdutf_warn_unused
virtual result
6301 char *utf8_buffer)
const noexcept = 0;
6317 simdutf_warn_unused
virtual size_t
6319 char *utf8_buffer)
const noexcept = 0;
6322#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
6332 simdutf_warn_unused
virtual size_t
6338#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
6354 simdutf_warn_unused
virtual size_t
6356 char16_t *utf16_buffer)
const noexcept = 0;
6373 simdutf_warn_unused
virtual size_t
6375 char16_t *utf16_buffer)
const noexcept = 0;
6396 const char32_t *input,
size_t length,
6397 char16_t *utf16_buffer)
const noexcept = 0;
6418 const char32_t *input,
size_t length,
6419 char16_t *utf16_buffer)
const noexcept = 0;
6435 simdutf_warn_unused
virtual size_t
6437 char16_t *utf16_buffer)
const noexcept = 0;
6453 simdutf_warn_unused
virtual size_t
6455 char16_t *utf16_buffer)
const noexcept = 0;
6458#if SIMDUTF_FEATURE_UTF16
6474 char16_t *output)
const noexcept = 0;
6477#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
6486 simdutf_warn_unused
virtual size_t
6490#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
6503 simdutf_warn_unused
virtual size_t
6505 size_t length)
const noexcept = 0;
6508#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1
6520 simdutf_warn_unused
virtual size_t
6526#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
6538 simdutf_warn_unused
virtual size_t
6542#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
6557 simdutf_warn_unused
virtual size_t
6563#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
6576 simdutf_warn_unused
virtual size_t
6578 size_t length)
const noexcept = 0;
6581#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1
6590 simdutf_warn_unused
virtual size_t
6596#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
6614 simdutf_warn_unused
virtual size_t
6616 size_t length)
const noexcept = 0;
6635 simdutf_warn_unused
virtual size_t
6637 size_t length)
const noexcept = 0;
6640#if SIMDUTF_FEATURE_UTF16
6656 simdutf_warn_unused
virtual size_t
6674 simdutf_warn_unused
virtual size_t
6678#if SIMDUTF_FEATURE_UTF8
6691 simdutf_warn_unused
virtual size_t
6695#if SIMDUTF_FEATURE_BASE64
6710 const char *input,
size_t length)
const noexcept;
6727 const char16_t *input,
size_t length)
const noexcept;
6741 simdutf_warn_unused
virtual size_t
6757 simdutf_warn_unused
virtual size_t
6759 size_t length)
const noexcept;
6794 simdutf_warn_unused
virtual result
6796 base64_options options = base64_default,
6797 last_chunk_handling_options last_chunk_options =
6798 last_chunk_handling_options::loose)
const noexcept = 0;
6833 const char *input,
size_t length,
char *output,
6834 base64_options options = base64_default,
6835 last_chunk_handling_options last_chunk_options =
6836 last_chunk_handling_options::loose)
const noexcept = 0;
6872 simdutf_warn_unused
virtual result
6874 base64_options options = base64_default,
6875 last_chunk_handling_options last_chunk_options =
6876 last_chunk_handling_options::loose)
const noexcept = 0;
6911 const char16_t *input,
size_t length,
char *output,
6912 base64_options options = base64_default,
6913 last_chunk_handling_options last_chunk_options =
6914 last_chunk_handling_options::loose)
const noexcept = 0;
6925 size_t length, base64_options options = base64_default)
const noexcept;
6950 base64_options options = base64_default)
const noexcept = 0;
6978 const char *input,
size_t length,
char *output,
6979 size_t line_length = simdutf::default_line_length,
6980 base64_options options = base64_default)
const noexcept = 0;
6992 virtual const char *
find(
const char *start,
const char *end,
6993 char character)
const noexcept = 0;
6994 virtual const char16_t *
find(
const char16_t *start,
const char16_t *end,
6995 char16_t character)
const noexcept = 0;
6998#ifdef SIMDUTF_INTERNAL_TESTS
7007 struct TestProcedure {
7009 std::string_view name;
7015 virtual std::vector<TestProcedure> internal_tests()
const;
7028 uint32_t required_instruction_sets)
7030 _required_instruction_sets(required_instruction_sets) {}
7033 ~implementation() =
default;
7044 const char *_description;
7049 const uint32_t _required_instruction_sets;
7058class available_implementation_list {
7061 simdutf_really_inline available_implementation_list() {}
7063 size_t size() const noexcept;
7065 const implementation *const *begin() const noexcept;
7067 const implementation *const *end() const noexcept;
7082 const implementation *operator[](std::string_view name) const noexcept {
7083 for (
const implementation *impl : *this) {
7084 if (impl->name() == name) {
7104 const implementation *detect_best_supported() const noexcept;
7107template <typename T> class atomic_ptr {
7109 atomic_ptr(T *_ptr) : ptr{_ptr} {}
7111#if defined(SIMDUTF_NO_THREADS)
7112 operator const T *()
const {
return ptr; }
7113 const T &operator*()
const {
return *ptr; }
7114 const T *operator->()
const {
return ptr; }
7116 operator T *() {
return ptr; }
7117 T &operator*() {
return *ptr; }
7118 T *operator->() {
return ptr; }
7119 atomic_ptr &operator=(T *_ptr) {
7125 operator const T *()
const {
return ptr.load(); }
7126 const T &operator*()
const {
return *ptr; }
7127 const T *operator->()
const {
return ptr.load(); }
7129 operator T *() {
return ptr.load(); }
7130 T &operator*() {
return *ptr; }
7131 T *operator->() {
return ptr.load(); }
7132 atomic_ptr &operator=(T *_ptr) {
7140#if defined(SIMDUTF_NO_THREADS)
7143 std::atomic<T *> ptr;
7147class detect_best_supported_implementation_on_first_use;
7154extern SIMDUTF_DLLIMPORTEXPORT
const internal::available_implementation_list &
7155get_available_implementations();
7163extern SIMDUTF_DLLIMPORTEXPORT internal::atomic_ptr<const implementation> &
7164get_active_implementation();
7168#if SIMDUTF_FEATURE_BASE64
7170 #include <simdutf/base64_implementation.h>
7178simdutf_really_inline
7179 simdutf_constexpr23 simdutf_warn_unused std::tuple<result, std::size_t>
7180 base64_to_binary_safe(
7181 const detail::input_span_of_byte_like
auto &input,
7182 detail::output_span_of_byte_like
auto &&binary_output,
7183 base64_options options = base64_default,
7184 last_chunk_handling_options last_chunk_options = loose,
7185 bool decode_up_to_bad_char =
false) noexcept {
7186 size_t outlen = binary_output.size();
7187 #if SIMDUTF_CPLUSPLUS23
7189 using CInput = std::decay_t<
decltype(*input.data())>;
7190 static_assert(std::is_same_v<CInput, char>,
7191 "sorry, the constexpr implementation is for now limited to "
7192 "input of type char");
7193 using COutput = std::decay_t<
decltype(*binary_output.data())>;
7194 static_assert(std::is_same_v<COutput, char>,
7195 "sorry, the constexpr implementation is for now limited to "
7196 "output of type char");
7197 auto r = base64_to_binary_safe_impl(
7198 input.data(), input.size(), binary_output.data(), outlen, options,
7199 last_chunk_options, decode_up_to_bad_char);
7204 auto r = base64_to_binary_safe_impl<char>(
7205 reinterpret_cast<const char *
>(input.data()), input.size(),
7206 reinterpret_cast<char *
>(binary_output.data()), outlen, options,
7207 last_chunk_options, decode_up_to_bad_char);
7217simdutf_really_inline
7218 simdutf_warn_unused simdutf_constexpr23 std::tuple<result, std::size_t>
7219 base64_to_binary_safe(
7220 std::span<const char16_t> input,
7221 detail::output_span_of_byte_like
auto &&binary_output,
7222 base64_options options = base64_default,
7223 last_chunk_handling_options last_chunk_options = loose,
7224 bool decode_up_to_bad_char =
false) noexcept {
7225 size_t outlen = binary_output.size();
7226 #if SIMDUTF_CPLUSPLUS23
7228 auto r = base64_to_binary_safe_impl(
7229 input.data(), input.size(), binary_output.data(), outlen, options,
7230 last_chunk_options, decode_up_to_bad_char);
7235 auto r = base64_to_binary_safe(
7236 input.data(), input.size(),
7237 reinterpret_cast<char *
>(binary_output.data()), outlen, options,
7238 last_chunk_options, decode_up_to_bad_char);
7249#if SIMDUTF_CPLUSPLUS23 && SIMDUTF_FEATURE_BASE64
7258template <std::
size_t N>
struct base64_literal_helper {
7259 std::array<char, N - 1> storage{};
7260 static constexpr std::size_t size() noexcept {
return N - 1; }
7261 consteval base64_literal_helper(
const char (&str)[N]) {
7262 for (std::size_t i = 0; i < size(); i++) {
7263 storage[i] = str[i];
7268template <std::
size_t InputLen>
struct base64_decode_result {
7269 static constexpr std::size_t max_out = (InputLen + 3) / 4 * 3;
7270 std::array<char, max_out> buffer{};
7271 std::size_t output_count{};
7274template <std::
size_t InputLen>
7275consteval auto base64_decode_literal(
const char *str) {
7276 base64_decode_result<InputLen> result{};
7277 auto r = scalar::base64::base64_to_binary_details_impl(
7278 str, InputLen, result.buffer.data(), base64_default, loose);
7279 if (r.error != error_code::SUCCESS) {
7280 #if __cpp_lib_unreachable >= 202202L
7284 throw "invalid base64 input in _base64 literal";
7287 result.output_count = r.output_count;
7291template <base64_literal_helper a>
consteval auto base64_make_array() {
7292 constexpr auto decoded = base64_decode_literal<a.size()>(a.storage.data());
7293 std::array<char, decoded.output_count> ret{};
7294 for (std::size_t i = 0; i < decoded.output_count; i++) {
7295 ret[i] = decoded.buffer[i];
7313template <detail::base64_literal_helper a>
consteval auto operator""_base64() {
7314 return detail::base64_make_array<a>();
An implementation of simdutf for a particular CPU architecture.
virtual simdutf_warn_unused size_t convert_valid_utf32_to_latin1(const char32_t *input, size_t length, char *latin1_buffer) const noexcept=0
Convert valid UTF-32 string into Latin1 string.
virtual simdutf_warn_unused size_t binary_length_from_base64(const char *input, size_t length) const noexcept
Compute the binary length from a base64 input with ASCII spaces.
virtual const char * find(const char *start, const char *end, char character) const noexcept=0
Find the first occurrence of a character in a string.
virtual simdutf_warn_unused size_t convert_valid_utf8_to_utf16le(const char *input, size_t length, char16_t *utf16_buffer) const noexcept=0
Convert valid UTF-8 string into UTF-16LE string.
virtual simdutf_warn_unused size_t latin1_length_from_utf16(size_t length) const noexcept
Compute the number of bytes that this UTF-16LE/BE string would require in Latin1 format.
virtual simdutf_warn_unused size_t convert_utf16le_to_utf8_with_replacement(const char16_t *input, size_t length, char *utf8_buffer) const noexcept=0
Convert possibly broken UTF-16LE string into UTF-8 string, replacing unpaired surrogates with the Uni...
virtual simdutf_warn_unused size_t binary_length_from_base64(const char16_t *input, size_t length) const noexcept
Compute the binary length from a base64 input with ASCII spaces.
virtual simdutf_warn_unused size_t utf32_length_from_utf16be(const char16_t *input, size_t length) const noexcept=0
Compute the number of bytes that this UTF-16BE string would require in UTF-32 format.
virtual simdutf_warn_unused size_t convert_utf8_to_utf16be(const char *input, size_t length, char16_t *utf16_output) const noexcept=0
Convert possibly broken UTF-8 string into UTF-16BE string.
virtual simdutf_warn_unused size_t convert_utf16le_to_utf32(const char16_t *input, size_t length, char32_t *utf32_buffer) const noexcept=0
Convert possibly broken UTF-16LE string into UTF-32 string.
virtual simdutf_warn_unused size_t convert_valid_utf16be_to_utf32(const char16_t *input, size_t length, char32_t *utf32_buffer) const noexcept=0
Convert valid UTF-16LE string into UTF-32BE string.
virtual std::string_view name() const noexcept
The name of this implementation.
virtual simdutf_warn_unused size_t convert_valid_utf16be_to_utf8(const char16_t *input, size_t length, char *utf8_buffer) const noexcept=0
Convert valid UTF-16BE string into UTF-8 string.
virtual simdutf_warn_unused size_t convert_latin1_to_utf16le(const char *input, size_t length, char16_t *utf16_output) const noexcept=0
Convert possibly Latin1 string into UTF-16LE string.
virtual simdutf_warn_unused bool validate_utf16be(const char16_t *buf, size_t len) const noexcept=0
Validate the UTF-16BE string.
virtual simdutf_warn_unused result convert_utf16be_to_latin1_with_errors(const char16_t *input, size_t length, char *latin1_buffer) const noexcept=0
Convert possibly broken UTF-16BE string into Latin1 string.
virtual simdutf_warn_unused full_result base64_to_binary_details(const char *input, size_t length, char *output, base64_options options=base64_default, last_chunk_handling_options last_chunk_options=last_chunk_handling_options::loose) const noexcept=0
Convert a base64 input to a binary output while returning more details than base64_to_binary.
virtual simdutf_warn_unused size_t convert_valid_utf16le_to_utf32(const char16_t *input, size_t length, char32_t *utf32_buffer) const noexcept=0
Convert valid UTF-16LE string into UTF-32 string.
virtual simdutf_warn_unused size_t latin1_length_from_utf8(const char *input, size_t length) const noexcept=0
Compute the number of bytes that this UTF-8 string would require in Latin1 format.
virtual simdutf_warn_unused result convert_utf32_to_latin1_with_errors(const char32_t *input, size_t length, char *latin1_buffer) const noexcept=0
Convert possibly broken UTF-32 string into Latin1 string and stop on error.
virtual simdutf_warn_unused size_t utf8_length_from_utf32(const char32_t *input, size_t length) const noexcept=0
Compute the number of bytes that this UTF-32 string would require in UTF-8 format.
virtual simdutf_warn_unused size_t convert_latin1_to_utf16be(const char *input, size_t length, char16_t *utf16_output) const noexcept=0
Convert Latin1 string into UTF-16BE string.
virtual simdutf_warn_unused size_t latin1_length_from_utf32(size_t length) const noexcept
Compute the number of bytes that this UTF-32 string would require in Latin1 format.
virtual simdutf_warn_unused full_result base64_to_binary_details(const char16_t *input, size_t length, char *output, base64_options options=base64_default, last_chunk_handling_options last_chunk_options=last_chunk_handling_options::loose) const noexcept=0
Convert a base64 input to a binary output while returning more details than base64_to_binary.
virtual simdutf_warn_unused size_t convert_utf32_to_utf16le(const char32_t *input, size_t length, char16_t *utf16_buffer) const noexcept=0
Convert possibly broken UTF-32 string into UTF-16LE string.
virtual simdutf_warn_unused size_t utf16_length_from_utf32(const char32_t *input, size_t length) const noexcept=0
Compute the number of two-byte code units that this UTF-32 string would require in UTF-16 format.
virtual simdutf_warn_unused size_t count_utf8(const char *input, size_t length) const noexcept=0
Count the number of code points (characters) in the string assuming that it is valid.
virtual simdutf_warn_unused size_t convert_valid_utf32_to_utf16be(const char32_t *input, size_t length, char16_t *utf16_buffer) const noexcept=0
Convert valid UTF-32 string into UTF-16BE string.
virtual simdutf_warn_unused size_t utf32_length_from_utf8(const char *input, size_t length) const noexcept=0
Compute the number of 4-byte code units that this UTF-8 string would require in UTF-32 format.
virtual simdutf_warn_unused size_t convert_valid_utf16le_to_utf8(const char16_t *input, size_t length, char *utf8_buffer) const noexcept=0
Convert valid UTF-16LE string into UTF-8 string.
virtual void to_well_formed_utf16le(const char16_t *input, size_t len, char16_t *output) const noexcept=0
Copies the UTF-16LE string while replacing mismatched surrogates with the Unicode replacement charact...
virtual simdutf_warn_unused result convert_utf32_to_utf16be_with_errors(const char32_t *input, size_t length, char16_t *utf16_buffer) const noexcept=0
Convert possibly broken UTF-32 string into UTF-16BE string and stop on error.
virtual simdutf_warn_unused result validate_utf8_with_errors(const char *buf, size_t len) const noexcept=0
Validate the UTF-8 string and stop on errors.
virtual simdutf_warn_unused size_t utf8_length_from_utf16le(const char16_t *input, size_t length) const noexcept=0
Compute the number of bytes that this UTF-16LE string would require in UTF-8 format.
virtual size_t binary_to_base64_with_lines(const char *input, size_t length, char *output, size_t line_length=simdutf::default_line_length, base64_options options=base64_default) const noexcept=0
Convert a binary input to a base64 output with lines of given length.
virtual simdutf_warn_unused size_t convert_utf16be_to_utf8(const char16_t *input, size_t length, char *utf8_buffer) const noexcept=0
Convert possibly broken UTF-16BE string into UTF-8 string.
virtual simdutf_warn_unused size_t convert_utf8_to_utf16le(const char *input, size_t length, char16_t *utf16_output) const noexcept=0
Convert possibly broken UTF-8 string into UTF-16LE string.
virtual simdutf_warn_unused result convert_utf16le_to_latin1_with_errors(const char16_t *input, size_t length, char *latin1_buffer) const noexcept=0
Convert possibly broken UTF-16LE string into Latin1 string.
virtual simdutf_warn_unused result utf8_length_from_utf16le_with_replacement(const char16_t *input, size_t length) const noexcept=0
Compute the number of bytes that this UTF-16LE string would require in UTF-8 format even when the UTF...
virtual simdutf_warn_unused result validate_utf16le_with_errors(const char16_t *buf, size_t len) const noexcept=0
Validate the UTF-16LE string and stop on error.
virtual simdutf_warn_unused result convert_utf16be_to_utf8_with_errors(const char16_t *input, size_t length, char *utf8_buffer) const noexcept=0
Convert possibly broken UTF-16BE string into UTF-8 string and stop on error.
virtual size_t binary_to_base64(const char *input, size_t length, char *output, base64_options options=base64_default) const noexcept=0
Convert a binary input to a base64 output.
virtual simdutf_warn_unused result utf8_length_from_utf16be_with_replacement(const char16_t *input, size_t length) const noexcept=0
Compute the number of bytes that this UTF-16BE string would require in UTF-8 format even when the UTF...
virtual simdutf_warn_unused size_t convert_latin1_to_utf32(const char *input, size_t length, char32_t *utf32_buffer) const noexcept=0
Convert Latin1 string into UTF-32 string.
virtual simdutf_warn_unused result convert_utf8_to_utf32_with_errors(const char *input, size_t length, char32_t *utf32_output) const noexcept=0
Convert possibly broken UTF-8 string into UTF-32 string and stop on error.
virtual simdutf_warn_unused size_t convert_valid_utf32_to_utf8(const char32_t *input, size_t length, char *utf8_buffer) const noexcept=0
Convert valid UTF-32 string into UTF-8 string.
virtual simdutf_warn_unused size_t utf32_length_from_latin1(size_t length) const noexcept
Return the number of bytes that this UTF-32 string would require in Latin1 format.
virtual std::string_view description() const noexcept
The description of this implementation.
virtual simdutf_warn_unused bool validate_utf16le(const char16_t *buf, size_t len) const noexcept=0
Validate the UTF-16LE string.This function may be best when you expect the input to be almost always ...
virtual simdutf_warn_unused result convert_utf8_to_utf16le_with_errors(const char *input, size_t length, char16_t *utf16_output) const noexcept=0
Convert possibly broken UTF-8 string into UTF-16LE string and stop on error.
virtual simdutf_warn_unused size_t count_utf16le(const char16_t *input, size_t length) const noexcept=0
Count the number of code points (characters) in the string assuming that it is valid.
virtual simdutf_warn_unused size_t convert_utf16be_to_utf32(const char16_t *input, size_t length, char32_t *utf32_buffer) const noexcept=0
Convert possibly broken UTF-16BE string into UTF-32 string.
virtual simdutf_warn_unused size_t convert_valid_utf16le_to_latin1(const char16_t *input, size_t length, char *latin1_buffer) const noexcept=0
Convert valid UTF-16LE string into Latin1 string.
virtual simdutf_warn_unused size_t convert_utf32_to_latin1(const char32_t *input, size_t length, char *latin1_buffer) const noexcept=0
Convert possibly broken UTF-32 string into Latin1 string.
virtual simdutf_warn_unused size_t convert_valid_utf16be_to_latin1(const char16_t *input, size_t length, char *latin1_buffer) const noexcept=0
Convert valid UTF-16BE string into Latin1 string.
bool supported_by_runtime_system() const
The instruction sets this implementation is compiled against and the current CPU match.
virtual simdutf_warn_unused size_t convert_valid_utf8_to_utf32(const char *input, size_t length, char32_t *utf32_buffer) const noexcept=0
Convert valid UTF-8 string into UTF-32 string.
virtual simdutf_warn_unused size_t convert_utf16le_to_latin1(const char16_t *input, size_t length, char *latin1_buffer) const noexcept=0
Convert possibly broken UTF-16LE string into Latin1 string.
virtual simdutf_warn_unused result validate_utf16be_with_errors(const char16_t *buf, size_t len) const noexcept=0
Validate the UTF-16BE string and stop on error.
virtual simdutf_warn_unused result convert_utf16le_to_utf8_with_errors(const char16_t *input, size_t length, char *utf8_buffer) const noexcept=0
Convert possibly broken UTF-16LE string into UTF-8 string and stop on error.
virtual simdutf_warn_unused result convert_utf16be_to_utf32_with_errors(const char16_t *input, size_t length, char32_t *utf32_buffer) const noexcept=0
Convert possibly broken UTF-16BE string into UTF-32 string and stop on error.
virtual simdutf_warn_unused result base64_to_binary(const char *input, size_t length, char *output, base64_options options=base64_default, last_chunk_handling_options last_chunk_options=last_chunk_handling_options::loose) const noexcept=0
Convert a base64 input to a binary output.
virtual simdutf_warn_unused bool validate_utf16be_as_ascii(const char16_t *buf, size_t len) const noexcept=0
Validate the ASCII string as a UTF-16BE sequence.
virtual simdutf_warn_unused size_t convert_utf32_to_utf16be(const char32_t *input, size_t length, char16_t *utf16_buffer) const noexcept=0
Convert possibly broken UTF-32 string into UTF-16BE string.
virtual simdutf_warn_unused result validate_utf32_with_errors(const char32_t *buf, size_t len) const noexcept=0
Validate the UTF-32 string and stop on error.
virtual simdutf_warn_unused size_t utf32_length_from_utf16le(const char16_t *input, size_t length) const noexcept=0
Compute the number of bytes that this UTF-16LE string would require in UTF-32 format.
virtual void change_endianness_utf16(const char16_t *input, size_t length, char16_t *output) const noexcept=0
Change the endianness of the input.
virtual simdutf_warn_unused size_t convert_valid_utf8_to_utf16be(const char *input, size_t length, char16_t *utf16_buffer) const noexcept=0
Convert valid UTF-8 string into UTF-16BE string.
virtual simdutf_warn_unused size_t convert_valid_utf8_to_latin1(const char *input, size_t length, char *latin1_output) const noexcept=0
Convert valid UTF-8 string into latin1 string.
simdutf_warn_unused size_t maximal_binary_length_from_base64(const char *input, size_t length) const noexcept
Provide the maximal binary length in bytes given the base64 input.
virtual simdutf_warn_unused size_t convert_valid_utf32_to_utf16le(const char32_t *input, size_t length, char16_t *utf16_buffer) const noexcept=0
Convert valid UTF-32 string into UTF-16LE string.
virtual simdutf_warn_unused result validate_ascii_with_errors(const char *buf, size_t len) const noexcept=0
Validate the ASCII string and stop on error.
virtual simdutf_warn_unused bool validate_utf8(const char *buf, size_t len) const noexcept=0
Validate the UTF-8 string.
virtual simdutf_warn_unused size_t convert_utf8_to_utf32(const char *input, size_t length, char32_t *utf32_output) const noexcept=0
Convert possibly broken UTF-8 string into UTF-32 string.
simdutf_warn_unused size_t maximal_binary_length_from_base64(const char16_t *input, size_t length) const noexcept
Provide the maximal binary length in bytes given the base64 input.
virtual simdutf_warn_unused bool validate_utf32(const char32_t *buf, size_t len) const noexcept=0
Validate the UTF-32 string.
virtual simdutf_warn_unused result convert_utf16le_to_utf32_with_errors(const char16_t *input, size_t length, char32_t *utf32_buffer) const noexcept=0
Convert possibly broken UTF-16LE string into UTF-32 string and stop on error.
virtual simdutf_warn_unused size_t convert_utf16be_to_utf8_with_replacement(const char16_t *input, size_t length, char *utf8_buffer) const noexcept=0
Convert possibly broken UTF-16BE string into UTF-8 string, replacing unpaired surrogates with the Uni...
virtual simdutf_warn_unused size_t convert_utf16le_to_utf8(const char16_t *input, size_t length, char *utf8_buffer) const noexcept=0
Convert possibly broken UTF-16LE string into UTF-8 string.
virtual simdutf_warn_unused size_t utf8_length_from_utf16be(const char16_t *input, size_t length) const noexcept=0
Compute the number of bytes that this UTF-16BE string would require in UTF-8 format.
virtual simdutf_warn_unused size_t convert_utf32_to_utf8(const char32_t *input, size_t length, char *utf8_buffer) const noexcept=0
Convert possibly broken UTF-32 string into UTF-8 string.
virtual simdutf_warn_unused size_t convert_utf16be_to_latin1(const char16_t *input, size_t length, char *latin1_buffer) const noexcept=0
Convert possibly broken UTF-16BE string into Latin1 string.
virtual simdutf_warn_unused size_t convert_utf8_to_latin1(const char *input, size_t length, char *latin1_output) const noexcept=0
Convert possibly broken UTF-8 string into latin1 string.
virtual simdutf_warn_unused result base64_to_binary(const char16_t *input, size_t length, char *output, base64_options options=base64_default, last_chunk_handling_options last_chunk_options=last_chunk_handling_options::loose) const noexcept=0
Convert a base64 input to a binary output.
simdutf_warn_unused size_t base64_length_from_binary(size_t length, base64_options options=base64_default) const noexcept
Provide the base64 length in bytes given the length of a binary input.
virtual int detect_encodings(const char *input, size_t length) const noexcept=0
This function will try to detect the possible encodings in one pass.
virtual simdutf_warn_unused size_t count_utf16be(const char16_t *input, size_t length) const noexcept=0
Count the number of code points (characters) in the string assuming that it is valid.
virtual simdutf_warn_unused result convert_utf32_to_utf8_with_errors(const char32_t *input, size_t length, char *utf8_buffer) const noexcept=0
Convert possibly broken UTF-32 string into UTF-8 string and stop on error.
virtual simdutf_warn_unused bool validate_utf16le_as_ascii(const char16_t *buf, size_t len) const noexcept=0
Validate the ASCII string as a UTF-16LE sequence.
virtual void to_well_formed_utf16be(const char16_t *input, size_t len, char16_t *output) const noexcept=0
Copies the UTF-16BE string while replacing mismatched surrogates with the Unicode replacement charact...
virtual simdutf_warn_unused bool validate_ascii(const char *buf, size_t len) const noexcept=0
Validate the ASCII string.
virtual simdutf_warn_unused size_t utf16_length_from_utf8(const char *input, size_t length) const noexcept=0
Compute the number of 2-byte code units that this UTF-8 string would require in UTF-16LE format.
virtual simdutf_warn_unused result convert_utf8_to_utf16be_with_errors(const char *input, size_t length, char16_t *utf16_output) const noexcept=0
Convert possibly broken UTF-8 string into UTF-16BE string and stop on error.
virtual simdutf_warn_unused result convert_utf8_to_latin1_with_errors(const char *input, size_t length, char *latin1_output) const noexcept=0
Convert possibly broken UTF-8 string into latin1 string with errors.
virtual simdutf_warn_unused size_t utf16_length_from_latin1(size_t length) const noexcept
Return the number of bytes that this UTF-16 string would require in Latin1 format.
virtual simdutf_warn_unused result convert_utf32_to_utf16le_with_errors(const char32_t *input, size_t length, char16_t *utf16_buffer) const noexcept=0
Convert possibly broken UTF-32 string into UTF-16LE string and stop on error.
virtual simdutf_warn_unused size_t convert_latin1_to_utf8(const char *input, size_t length, char *utf8_output) const noexcept=0
Convert Latin1 string into UTF-8 string.
virtual encoding_type autodetect_encoding(const char *input, size_t length) const noexcept
This function will try to detect the encoding.
virtual simdutf_warn_unused size_t utf8_length_from_latin1(const char *input, size_t length) const noexcept=0
Return the number of bytes that this Latin1 string would require in UTF-8 format.