1#ifndef SIMDUTF_IMPLEMENTATION_H
2#define SIMDUTF_IMPLEMENTATION_H
3#if !defined(SIMDUTF_NO_THREADS)
7#ifdef SIMDUTF_INTERNAL_TESTS
10#include "simdutf/common_defs.h"
11#include "simdutf/compiler_check.h"
12#include "simdutf/encoding_types.h"
13#include "simdutf/error.h"
14#include "simdutf/internal/isadetection.h"
18 #include <type_traits>
22#if SIMDUTF_CPLUSPLUS17
23 #include <string_view>
34#define SIMDUTF_FEATURE_DETECT_ENCODING 1
35#define SIMDUTF_FEATURE_ASCII 1
36#define SIMDUTF_FEATURE_LATIN1 1
37#define SIMDUTF_FEATURE_UTF8 1
38#define SIMDUTF_FEATURE_UTF16 1
39#define SIMDUTF_FEATURE_UTF32 1
40#define SIMDUTF_FEATURE_BASE64 1
42#if SIMDUTF_CPLUSPLUS23
43 #include <simdutf/constexpr_ptr.h>
55concept byte_like = std::is_same_v<T, std::byte> ||
56 std::is_same_v<T, char> ||
57 std::is_same_v<T, signed char> ||
58 std::is_same_v<T, unsigned char> ||
59 std::is_same_v<T, char8_t>;
62concept is_byte_like = byte_like<std::remove_cvref_t<T>>;
65concept is_pointer = std::is_pointer_v<T>;
73concept input_span_of_byte_like =
requires(
const T &t) {
74 { t.size() }
noexcept -> std::convertible_to<std::size_t>;
75 { t.data() }
noexcept -> is_pointer;
76 { *t.data() }
noexcept -> is_byte_like;
80concept is_mutable = !std::is_const_v<std::remove_reference_t<T>>;
86concept output_span_of_byte_like =
requires(T &t) {
87 { t.size() }
noexcept -> std::convertible_to<std::size_t>;
88 { t.data() }
noexcept -> is_pointer;
89 { *t.data() }
noexcept -> is_byte_like;
90 { *t.data() }
noexcept -> is_mutable;
98template <
class InputPtr>
99concept indexes_into_byte_like =
requires(InputPtr p) {
100 { std::decay_t<
decltype(p[0])>{} } -> simdutf::detail::byte_like;
102template <
class InputPtr>
103concept indexes_into_utf16 =
requires(InputPtr p) {
104 { std::decay_t<
decltype(p[0])>{} } -> std::same_as<char16_t>;
106template <
class InputPtr>
107concept indexes_into_utf32 =
requires(InputPtr p) {
108 { std::decay_t<
decltype(p[0])>{} } -> std::same_as<char32_t>;
111template <
class InputPtr>
112concept index_assignable_from_char =
requires(InputPtr p,
char s) {
120template <
class InputPtr>
121concept indexes_into_uint32 =
requires(InputPtr p) {
122 { std::decay_t<
decltype(p[0])>{} } -> std::same_as<std::uint32_t>;
130#include <simdutf/scalar/swap_bytes.h>
131#include <simdutf/scalar/ascii.h>
132#include <simdutf/scalar/atomic_util.h>
133#include <simdutf/scalar/latin1.h>
134#include <simdutf/scalar/latin1_to_utf16/latin1_to_utf16.h>
135#include <simdutf/scalar/latin1_to_utf32/latin1_to_utf32.h>
136#include <simdutf/scalar/latin1_to_utf8/latin1_to_utf8.h>
137#include <simdutf/scalar/utf16.h>
138#include <simdutf/scalar/utf16_to_latin1/utf16_to_latin1.h>
139#include <simdutf/scalar/utf16_to_latin1/valid_utf16_to_latin1.h>
140#include <simdutf/scalar/utf16_to_utf32/utf16_to_utf32.h>
141#include <simdutf/scalar/utf16_to_utf32/valid_utf16_to_utf32.h>
142#include <simdutf/scalar/utf16_to_utf8/utf16_to_utf8.h>
143#include <simdutf/scalar/utf16_to_utf8/valid_utf16_to_utf8.h>
144#include <simdutf/scalar/utf32.h>
145#include <simdutf/scalar/utf32_to_latin1/utf32_to_latin1.h>
146#include <simdutf/scalar/utf32_to_latin1/valid_utf32_to_latin1.h>
147#include <simdutf/scalar/utf32_to_utf16/utf32_to_utf16.h>
148#include <simdutf/scalar/utf32_to_utf16/valid_utf32_to_utf16.h>
149#include <simdutf/scalar/utf32_to_utf8/utf32_to_utf8.h>
150#include <simdutf/scalar/utf32_to_utf8/valid_utf32_to_utf8.h>
151#include <simdutf/scalar/utf8.h>
152#include <simdutf/scalar/utf8_to_latin1/utf8_to_latin1.h>
153#include <simdutf/scalar/utf8_to_latin1/valid_utf8_to_latin1.h>
154#include <simdutf/scalar/utf8_to_utf16/utf8_to_utf16.h>
155#include <simdutf/scalar/utf8_to_utf16/valid_utf8_to_utf16.h>
156#include <simdutf/scalar/utf8_to_utf32/utf8_to_utf32.h>
157#include <simdutf/scalar/utf8_to_utf32/valid_utf8_to_utf32.h>
161constexpr size_t default_line_length =
164#if SIMDUTF_FEATURE_DETECT_ENCODING
175simdutf_warn_unused simdutf::encoding_type
176autodetect_encoding(
const char *input,
size_t length)
noexcept;
177simdutf_really_inline simdutf_warn_unused simdutf::encoding_type
178autodetect_encoding(
const uint8_t *input,
size_t length)
noexcept {
179 return autodetect_encoding(
reinterpret_cast<const char *
>(input), length);
193simdutf_really_inline simdutf_warn_unused simdutf::encoding_type
195 const detail::input_span_of_byte_like
auto &input)
noexcept {
196 return autodetect_encoding(
reinterpret_cast<const char *
>(input.data()),
212simdutf_warn_unused
int detect_encodings(
const char *input,
213 size_t length)
noexcept;
214simdutf_really_inline simdutf_warn_unused
int
215detect_encodings(
const uint8_t *input,
size_t length)
noexcept {
216 return detect_encodings(
reinterpret_cast<const char *
>(input), length);
219simdutf_really_inline simdutf_warn_unused
int
220detect_encodings(
const detail::input_span_of_byte_like
auto &input)
noexcept {
221 return detect_encodings(
reinterpret_cast<const char *
>(input.data()),
227#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING
239simdutf_warn_unused
bool validate_utf8(
const char *buf,
size_t len)
noexcept;
241simdutf_constexpr23 simdutf_really_inline simdutf_warn_unused
bool
242validate_utf8(
const detail::input_span_of_byte_like
auto &input)
noexcept {
243 #if SIMDUTF_CPLUSPLUS23
245 return scalar::utf8::validate(
246 detail::constexpr_cast_ptr<uint8_t>(input.data()), input.size());
250 return validate_utf8(
reinterpret_cast<const char *
>(input.data()),
257#if SIMDUTF_FEATURE_UTF8
270simdutf_warn_unused result validate_utf8_with_errors(
const char *buf,
271 size_t len)
noexcept;
273simdutf_really_inline simdutf_constexpr23 simdutf_warn_unused result
274validate_utf8_with_errors(
275 const detail::input_span_of_byte_like
auto &input)
noexcept {
276 #if SIMDUTF_CPLUSPLUS23
278 return scalar::utf8::validate_with_errors(
279 detail::constexpr_cast_ptr<uint8_t>(input.data()), input.size());
283 return validate_utf8_with_errors(
284 reinterpret_cast<const char *
>(input.data()), input.size());
290#if SIMDUTF_FEATURE_ASCII
300simdutf_warn_unused
bool validate_ascii(
const char *buf,
size_t len)
noexcept;
302simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
bool
303validate_ascii(
const detail::input_span_of_byte_like
auto &input)
noexcept {
304 #if SIMDUTF_CPLUSPLUS23
306 return scalar::ascii::validate(
307 detail::constexpr_cast_ptr<std::uint8_t>(input.data()), input.size());
311 return validate_ascii(
reinterpret_cast<const char *
>(input.data()),
330simdutf_warn_unused result validate_ascii_with_errors(
const char *buf,
331 size_t len)
noexcept;
333simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
334validate_ascii_with_errors(
335 const detail::input_span_of_byte_like
auto &input)
noexcept {
336 #if SIMDUTF_CPLUSPLUS23
338 return scalar::ascii::validate_with_errors(
339 detail::constexpr_cast_ptr<std::uint8_t>(input.data()), input.size());
343 return validate_ascii_with_errors(
344 reinterpret_cast<const char *
>(input.data()), input.size());
350#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_ASCII
362simdutf_warn_unused
bool validate_utf16_as_ascii(
const char16_t *buf,
363 size_t len)
noexcept;
365simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
bool
366validate_utf16_as_ascii(std::span<const char16_t> input)
noexcept {
367 #if SIMDUTF_CPLUSPLUS23
369 return scalar::utf16::validate_as_ascii<endianness::NATIVE>(input.data(),
374 return validate_utf16_as_ascii(input.data(), input.size());
390simdutf_warn_unused
bool validate_utf16be_as_ascii(
const char16_t *buf,
391 size_t len)
noexcept;
393simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
bool
394validate_utf16be_as_ascii(std::span<const char16_t> input)
noexcept {
395 #if SIMDUTF_CPLUSPLUS23
397 return scalar::utf16::validate_as_ascii<endianness::BIG>(input.data(),
402 return validate_utf16be_as_ascii(input.data(), input.size());
418simdutf_warn_unused
bool validate_utf16le_as_ascii(
const char16_t *buf,
419 size_t len)
noexcept;
421simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
bool
422validate_utf16le_as_ascii(std::span<const char16_t> input)
noexcept {
423 #if SIMDUTF_CPLUSPLUS23
425 return scalar::utf16::validate_as_ascii<endianness::LITTLE>(input.data(),
430 return validate_utf16le_as_ascii(input.data(), input.size());
436#if SIMDUTF_FEATURE_UTF16
451simdutf_warn_unused
bool validate_utf16(
const char16_t *buf,
452 size_t len)
noexcept;
454simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
bool
455validate_utf16(std::span<const char16_t> input)
noexcept {
456 #if SIMDUTF_CPLUSPLUS23
458 return scalar::utf16::validate<endianness::NATIVE>(input.data(),
463 return validate_utf16(input.data(), input.size());
469#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING
484simdutf_warn_unused
bool validate_utf16le(
const char16_t *buf,
485 size_t len)
noexcept;
487simdutf_really_inline simdutf_constexpr23 simdutf_warn_unused
bool
488validate_utf16le(std::span<const char16_t> input)
noexcept {
489 #if SIMDUTF_CPLUSPLUS23
491 return scalar::utf16::validate<endianness::LITTLE>(input.data(),
496 return validate_utf16le(input.data(), input.size());
502#if SIMDUTF_FEATURE_UTF16
517simdutf_warn_unused
bool validate_utf16be(
const char16_t *buf,
518 size_t len)
noexcept;
520simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
bool
521validate_utf16be(std::span<const char16_t> input)
noexcept {
522 #if SIMDUTF_CPLUSPLUS23
524 return scalar::utf16::validate<endianness::BIG>(input.data(), input.size());
528 return validate_utf16be(input.data(), input.size());
550simdutf_warn_unused result validate_utf16_with_errors(
const char16_t *buf,
551 size_t len)
noexcept;
553simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
554validate_utf16_with_errors(std::span<const char16_t> input)
noexcept {
555 #if SIMDUTF_CPLUSPLUS23
557 return scalar::utf16::validate_with_errors<endianness::NATIVE>(
558 input.data(), input.size());
562 return validate_utf16_with_errors(input.data(), input.size());
583simdutf_warn_unused result validate_utf16le_with_errors(
const char16_t *buf,
584 size_t len)
noexcept;
586simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
587validate_utf16le_with_errors(std::span<const char16_t> input)
noexcept {
588 #if SIMDUTF_CPLUSPLUS23
590 return scalar::utf16::validate_with_errors<endianness::LITTLE>(
591 input.data(), input.size());
595 return validate_utf16le_with_errors(input.data(), input.size());
616simdutf_warn_unused result validate_utf16be_with_errors(
const char16_t *buf,
617 size_t len)
noexcept;
619simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
620validate_utf16be_with_errors(std::span<const char16_t> input)
noexcept {
621 #if SIMDUTF_CPLUSPLUS23
623 return scalar::utf16::validate_with_errors<endianness::BIG>(input.data(),
628 return validate_utf16be_with_errors(input.data(), input.size());
645void to_well_formed_utf16le(
const char16_t *input,
size_t len,
646 char16_t *output)
noexcept;
648simdutf_really_inline simdutf_constexpr23
void
649to_well_formed_utf16le(std::span<const char16_t> input,
650 std::span<char16_t> output)
noexcept {
651 #if SIMDUTF_CPLUSPLUS23
653 scalar::utf16::to_well_formed_utf16<endianness::LITTLE>(
654 input.data(), input.size(), output.data());
658 to_well_formed_utf16le(input.data(), input.size(), output.data());
675void to_well_formed_utf16be(
const char16_t *input,
size_t len,
676 char16_t *output)
noexcept;
678simdutf_really_inline simdutf_constexpr23
void
679to_well_formed_utf16be(std::span<const char16_t> input,
680 std::span<char16_t> output)
noexcept {
681 #if SIMDUTF_CPLUSPLUS23
683 scalar::utf16::to_well_formed_utf16<endianness::BIG>(
684 input.data(), input.size(), output.data());
688 to_well_formed_utf16be(input.data(), input.size(), output.data());
705void to_well_formed_utf16(
const char16_t *input,
size_t len,
706 char16_t *output)
noexcept;
708simdutf_really_inline simdutf_constexpr23
void
709to_well_formed_utf16(std::span<const char16_t> input,
710 std::span<char16_t> output)
noexcept {
711 #if SIMDUTF_CPLUSPLUS23
713 scalar::utf16::to_well_formed_utf16<endianness::NATIVE>(
714 input.data(), input.size(), output.data());
718 to_well_formed_utf16(input.data(), input.size(), output.data());
725#if SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING
740simdutf_warn_unused
bool validate_utf32(
const char32_t *buf,
741 size_t len)
noexcept;
743simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
bool
744validate_utf32(std::span<const char32_t> input)
noexcept {
745 #if SIMDUTF_CPLUSPLUS23
747 return scalar::utf32::validate(
748 detail::constexpr_cast_ptr<std::uint32_t>(input.data()), input.size());
752 return validate_utf32(input.data(), input.size());
758#if SIMDUTF_FEATURE_UTF32
775simdutf_warn_unused result validate_utf32_with_errors(
const char32_t *buf,
776 size_t len)
noexcept;
778simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
779validate_utf32_with_errors(std::span<const char32_t> input)
noexcept {
780 #if SIMDUTF_CPLUSPLUS23
782 return scalar::utf32::validate_with_errors(
783 detail::constexpr_cast_ptr<std::uint32_t>(input.data()), input.size());
787 return validate_utf32_with_errors(input.data(), input.size());
793#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
804simdutf_warn_unused
size_t convert_latin1_to_utf8(
const char *input,
806 char *utf8_output)
noexcept;
808simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t
809convert_latin1_to_utf8(
810 const detail::input_span_of_byte_like
auto &latin1_input,
811 detail::output_span_of_byte_like
auto &&utf8_output)
noexcept {
812 #if SIMDUTF_CPLUSPLUS23
814 return scalar::latin1_to_utf8::convert(
815 detail::constexpr_cast_ptr<char>(latin1_input.data()),
817 detail::constexpr_cast_writeptr<char>(utf8_output.data()));
821 return convert_latin1_to_utf8(
822 reinterpret_cast<const char *
>(latin1_input.data()),
823 latin1_input.size(),
reinterpret_cast<char *
>(utf8_output.data()));
841simdutf_warn_unused
size_t
842convert_latin1_to_utf8_safe(
const char *input,
size_t length,
char *utf8_output,
843 size_t utf8_len)
noexcept;
845simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t
846convert_latin1_to_utf8_safe(
847 const detail::input_span_of_byte_like
auto &input,
848 detail::output_span_of_byte_like
auto &&utf8_output)
noexcept {
855 #if SIMDUTF_CPLUSPLUS23
857 return scalar::latin1_to_utf8::convert_safe_constexpr(
858 input.data(), input.size(), utf8_output.data(), utf8_output.size());
862 return convert_latin1_to_utf8_safe(
863 reinterpret_cast<const char *
>(input.data()), input.size(),
864 reinterpret_cast<char *
>(utf8_output.data()), utf8_output.size());
870#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
881simdutf_warn_unused
size_t convert_latin1_to_utf16le(
882 const char *input,
size_t length,
char16_t *utf16_output)
noexcept;
884simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t
885convert_latin1_to_utf16le(
886 const detail::input_span_of_byte_like
auto &latin1_input,
887 std::span<char16_t> utf16_output)
noexcept {
888 #if SIMDUTF_CPLUSPLUS23
890 return scalar::latin1_to_utf16::convert<endianness::LITTLE>(
891 latin1_input.data(), latin1_input.size(), utf16_output.data());
895 return convert_latin1_to_utf16le(
896 reinterpret_cast<const char *
>(latin1_input.data()),
897 latin1_input.size(), utf16_output.data());
912simdutf_warn_unused
size_t convert_latin1_to_utf16be(
913 const char *input,
size_t length,
char16_t *utf16_output)
noexcept;
915simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t
916convert_latin1_to_utf16be(
const detail::input_span_of_byte_like
auto &input,
917 std::span<char16_t> output)
noexcept {
918 #if SIMDUTF_CPLUSPLUS23
920 return scalar::latin1_to_utf16::convert<endianness::BIG>(
921 input.data(), input.size(), output.data());
925 return convert_latin1_to_utf16be(
926 reinterpret_cast<const char *
>(input.data()), input.size(),
939simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 size_t
940latin1_length_from_utf16(
size_t length)
noexcept {
952simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t
953utf16_length_from_latin1(
size_t length)
noexcept {
958#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1
969simdutf_warn_unused
size_t convert_latin1_to_utf32(
970 const char *input,
size_t length,
char32_t *utf32_buffer)
noexcept;
972simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t
973convert_latin1_to_utf32(
974 const detail::input_span_of_byte_like
auto &latin1_input,
975 std::span<char32_t> utf32_output)
noexcept {
976 #if SIMDUTF_CPLUSPLUS23
978 return scalar::latin1_to_utf32::convert(
979 latin1_input.data(), latin1_input.size(), utf32_output.data());
983 return convert_latin1_to_utf32(
984 reinterpret_cast<const char *
>(latin1_input.data()),
985 latin1_input.size(), utf32_output.data());
991#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
1004simdutf_warn_unused
size_t convert_utf8_to_latin1(
const char *input,
1006 char *latin1_output)
noexcept;
1008simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t
1009convert_utf8_to_latin1(
1010 const detail::input_span_of_byte_like
auto &input,
1011 detail::output_span_of_byte_like
auto &&output)
noexcept {
1012 #if SIMDUTF_CPLUSPLUS23
1014 return scalar::utf8_to_latin1::convert(input.data(), input.size(),
1019 return convert_utf8_to_latin1(
reinterpret_cast<const char *
>(input.data()),
1021 reinterpret_cast<char *
>(output.data()));
1027#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
1041simdutf_warn_unused
size_t convert_utf8_to_utf16(
1042 const char *input,
size_t length,
char16_t *utf16_output)
noexcept;
1044simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t
1045convert_utf8_to_utf16(
const detail::input_span_of_byte_like
auto &input,
1046 std::span<char16_t> output)
noexcept {
1047 #if SIMDUTF_CPLUSPLUS23
1049 return scalar::utf8_to_utf16::convert<endianness::NATIVE>(
1050 input.data(), input.size(), output.data());
1054 return convert_utf8_to_utf16(
reinterpret_cast<const char *
>(input.data()),
1055 input.size(), output.data());
1077simdutf_warn_unused result utf8_length_from_utf16le_with_replacement(
1078 const char16_t *input,
size_t length)
noexcept;
1080simdutf_really_inline simdutf_constexpr23 simdutf_warn_unused result
1081utf8_length_from_utf16le_with_replacement(
1082 std::span<const char16_t> valid_utf16_input)
noexcept {
1083 #if SIMDUTF_CPLUSPLUS23
1085 return scalar::utf16::utf8_length_from_utf16_with_replacement<
1086 endianness::LITTLE>(valid_utf16_input.data(), valid_utf16_input.size());
1090 return utf8_length_from_utf16le_with_replacement(valid_utf16_input.data(),
1091 valid_utf16_input.size());
1113simdutf_warn_unused result utf8_length_from_utf16be_with_replacement(
1114 const char16_t *input,
size_t length)
noexcept;
1116simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
1117utf8_length_from_utf16be_with_replacement(
1118 std::span<const char16_t> valid_utf16_input)
noexcept {
1119 #if SIMDUTF_CPLUSPLUS23
1121 return scalar::utf16::utf8_length_from_utf16_with_replacement<
1122 endianness::BIG>(valid_utf16_input.data(), valid_utf16_input.size());
1126 return utf8_length_from_utf16be_with_replacement(valid_utf16_input.data(),
1127 valid_utf16_input.size());
1134#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
1143simdutf_warn_unused
size_t convert_latin1_to_utf16(
1144 const char *input,
size_t length,
char16_t *utf16_output)
noexcept;
1146simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t
1147convert_latin1_to_utf16(
const detail::input_span_of_byte_like
auto &input,
1148 std::span<char16_t> output)
noexcept {
1149 #if SIMDUTF_CPLUSPLUS23
1151 return scalar::latin1_to_utf16::convert<endianness::NATIVE>(
1152 input.data(), input.size(), output.data());
1156 return convert_latin1_to_utf16(
reinterpret_cast<const char *
>(input.data()),
1157 input.size(), output.data());
1163#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
1176simdutf_warn_unused
size_t convert_utf8_to_utf16le(
1177 const char *input,
size_t length,
char16_t *utf16_output)
noexcept;
1179simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t
1180convert_utf8_to_utf16le(
const detail::input_span_of_byte_like
auto &utf8_input,
1181 std::span<char16_t> utf16_output)
noexcept {
1182 #if SIMDUTF_CPLUSPLUS23
1184 return scalar::utf8_to_utf16::convert<endianness::LITTLE>(
1185 utf8_input.data(), utf8_input.size(), utf16_output.data());
1189 return convert_utf8_to_utf16le(
1190 reinterpret_cast<const char *
>(utf8_input.data()), utf8_input.size(),
1191 utf16_output.data());
1208simdutf_warn_unused
size_t convert_utf8_to_utf16be(
1209 const char *input,
size_t length,
char16_t *utf16_output)
noexcept;
1211simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t
1212convert_utf8_to_utf16be(
const detail::input_span_of_byte_like
auto &utf8_input,
1213 std::span<char16_t> utf16_output)
noexcept {
1215 #if SIMDUTF_CPLUSPLUS23
1217 return scalar::utf8_to_utf16::convert<endianness::BIG>(
1218 utf8_input.data(), utf8_input.size(), utf16_output.data());
1222 return convert_utf8_to_utf16be(
1223 reinterpret_cast<const char *
>(utf8_input.data()), utf8_input.size(),
1224 utf16_output.data());
1230#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
1247simdutf_warn_unused result convert_utf8_to_latin1_with_errors(
1248 const char *input,
size_t length,
char *latin1_output)
noexcept;
1250simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
1251convert_utf8_to_latin1_with_errors(
1252 const detail::input_span_of_byte_like
auto &utf8_input,
1253 detail::output_span_of_byte_like
auto &&latin1_output)
noexcept {
1254 #if SIMDUTF_CPLUSPLUS23
1256 return scalar::utf8_to_latin1::convert_with_errors(
1257 utf8_input.data(), utf8_input.size(), latin1_output.data());
1261 return convert_utf8_to_latin1_with_errors(
1262 reinterpret_cast<const char *
>(utf8_input.data()), utf8_input.size(),
1263 reinterpret_cast<char *
>(latin1_output.data()));
1269#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
1285simdutf_warn_unused result convert_utf8_to_utf16_with_errors(
1286 const char *input,
size_t length,
char16_t *utf16_output)
noexcept;
1288simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
1289convert_utf8_to_utf16_with_errors(
1290 const detail::input_span_of_byte_like
auto &utf8_input,
1291 std::span<char16_t> utf16_output)
noexcept {
1292 #if SIMDUTF_CPLUSPLUS23
1294 return scalar::utf8_to_utf16::convert_with_errors<endianness::NATIVE>(
1295 utf8_input.data(), utf8_input.size(), utf16_output.data());
1299 return convert_utf8_to_utf16_with_errors(
1300 reinterpret_cast<const char *
>(utf8_input.data()), utf8_input.size(),
1301 utf16_output.data());
1320simdutf_warn_unused result convert_utf8_to_utf16le_with_errors(
1321 const char *input,
size_t length,
char16_t *utf16_output)
noexcept;
1323simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
1324convert_utf8_to_utf16le_with_errors(
1325 const detail::input_span_of_byte_like
auto &utf8_input,
1326 std::span<char16_t> utf16_output)
noexcept {
1327 #if SIMDUTF_CPLUSPLUS23
1329 return scalar::utf8_to_utf16::convert_with_errors<endianness::LITTLE>(
1330 utf8_input.data(), utf8_input.size(), utf16_output.data());
1334 return convert_utf8_to_utf16le_with_errors(
1335 reinterpret_cast<const char *
>(utf8_input.data()), utf8_input.size(),
1336 utf16_output.data());
1355simdutf_warn_unused result convert_utf8_to_utf16be_with_errors(
1356 const char *input,
size_t length,
char16_t *utf16_output)
noexcept;
1358simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
1359convert_utf8_to_utf16be_with_errors(
1360 const detail::input_span_of_byte_like
auto &utf8_input,
1361 std::span<char16_t> utf16_output)
noexcept {
1362 #if SIMDUTF_CPLUSPLUS23
1364 return scalar::utf8_to_utf16::convert_with_errors<endianness::BIG>(
1365 utf8_input.data(), utf8_input.size(), utf16_output.data());
1369 return convert_utf8_to_utf16be_with_errors(
1370 reinterpret_cast<const char *
>(utf8_input.data()), utf8_input.size(),
1371 utf16_output.data());
1377#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
1390simdutf_warn_unused
size_t convert_utf8_to_utf32(
1391 const char *input,
size_t length,
char32_t *utf32_output)
noexcept;
1393simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t
1394convert_utf8_to_utf32(
const detail::input_span_of_byte_like
auto &utf8_input,
1395 std::span<char32_t> utf32_output)
noexcept {
1396 #if SIMDUTF_CPLUSPLUS23
1398 return scalar::utf8_to_utf32::convert(utf8_input.data(), utf8_input.size(),
1399 utf32_output.data());
1403 return convert_utf8_to_utf32(
1404 reinterpret_cast<const char *
>(utf8_input.data()), utf8_input.size(),
1405 utf32_output.data());
1424simdutf_warn_unused result convert_utf8_to_utf32_with_errors(
1425 const char *input,
size_t length,
char32_t *utf32_output)
noexcept;
1427simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
1428convert_utf8_to_utf32_with_errors(
1429 const detail::input_span_of_byte_like
auto &utf8_input,
1430 std::span<char32_t> utf32_output)
noexcept {
1431 #if SIMDUTF_CPLUSPLUS23
1433 return scalar::utf8_to_utf32::convert_with_errors(
1434 utf8_input.data(), utf8_input.size(), utf32_output.data());
1438 return convert_utf8_to_utf32_with_errors(
1439 reinterpret_cast<const char *
>(utf8_input.data()), utf8_input.size(),
1440 utf32_output.data());
1446#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
1466simdutf_warn_unused
size_t convert_valid_utf8_to_latin1(
1467 const char *input,
size_t length,
char *latin1_output)
noexcept;
1469simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t
1470convert_valid_utf8_to_latin1(
1471 const detail::input_span_of_byte_like
auto &valid_utf8_input,
1472 detail::output_span_of_byte_like
auto &&latin1_output)
noexcept {
1473 #if SIMDUTF_CPLUSPLUS23
1475 return scalar::utf8_to_latin1::convert_valid(
1476 valid_utf8_input.data(), valid_utf8_input.size(), latin1_output.data());
1480 return convert_valid_utf8_to_latin1(
1481 reinterpret_cast<const char *
>(valid_utf8_input.data()),
1482 valid_utf8_input.size(), latin1_output.data());
1488#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
1499simdutf_warn_unused
size_t convert_valid_utf8_to_utf16(
1500 const char *input,
size_t length,
char16_t *utf16_buffer)
noexcept;
1502simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t
1503convert_valid_utf8_to_utf16(
1504 const detail::input_span_of_byte_like
auto &valid_utf8_input,
1505 std::span<char16_t> utf16_output)
noexcept {
1506 #if SIMDUTF_CPLUSPLUS23
1508 return scalar::utf8_to_utf16::convert_valid<endianness::NATIVE>(
1509 valid_utf8_input.data(), valid_utf8_input.size(), utf16_output.data());
1513 return convert_valid_utf8_to_utf16(
1514 reinterpret_cast<const char *
>(valid_utf8_input.data()),
1515 valid_utf8_input.size(), utf16_output.data());
1530simdutf_warn_unused
size_t convert_valid_utf8_to_utf16le(
1531 const char *input,
size_t length,
char16_t *utf16_buffer)
noexcept;
1533simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t
1534convert_valid_utf8_to_utf16le(
1535 const detail::input_span_of_byte_like
auto &valid_utf8_input,
1536 std::span<char16_t> utf16_output)
noexcept {
1538 #if SIMDUTF_CPLUSPLUS23
1540 return scalar::utf8_to_utf16::convert_valid<endianness::LITTLE>(
1541 valid_utf8_input.data(), valid_utf8_input.size(), utf16_output.data());
1545 return convert_valid_utf8_to_utf16le(
1546 reinterpret_cast<const char *
>(valid_utf8_input.data()),
1547 valid_utf8_input.size(), utf16_output.data());
1562simdutf_warn_unused
size_t convert_valid_utf8_to_utf16be(
1563 const char *input,
size_t length,
char16_t *utf16_buffer)
noexcept;
1565simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t
1566convert_valid_utf8_to_utf16be(
1567 const detail::input_span_of_byte_like
auto &valid_utf8_input,
1568 std::span<char16_t> utf16_output)
noexcept {
1569 #if SIMDUTF_CPLUSPLUS23
1571 return scalar::utf8_to_utf16::convert_valid<endianness::BIG>(
1572 valid_utf8_input.data(), valid_utf8_input.size(), utf16_output.data());
1576 return convert_valid_utf8_to_utf16be(
1577 reinterpret_cast<const char *
>(valid_utf8_input.data()),
1578 valid_utf8_input.size(), utf16_output.data());
1584#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
1595simdutf_warn_unused
size_t convert_valid_utf8_to_utf32(
1596 const char *input,
size_t length,
char32_t *utf32_buffer)
noexcept;
1598simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t
1599convert_valid_utf8_to_utf32(
1600 const detail::input_span_of_byte_like
auto &valid_utf8_input,
1601 std::span<char32_t> utf32_output)
noexcept {
1602 #if SIMDUTF_CPLUSPLUS23
1604 return scalar::utf8_to_utf32::convert_valid(
1605 valid_utf8_input.data(), valid_utf8_input.size(), utf32_output.data());
1609 return convert_valid_utf8_to_utf32(
1610 reinterpret_cast<const char *
>(valid_utf8_input.data()),
1611 valid_utf8_input.size(), utf32_output.data());
1617#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
1626simdutf_warn_unused
size_t utf8_length_from_latin1(
const char *input,
1627 size_t length)
noexcept;
1629simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t
1630utf8_length_from_latin1(
1631 const detail::input_span_of_byte_like
auto &latin1_input)
noexcept {
1632 #if SIMDUTF_CPLUSPLUS23
1634 return scalar::latin1_to_utf8::utf8_length_from_latin1(latin1_input.data(),
1635 latin1_input.size());
1639 return utf8_length_from_latin1(
1640 reinterpret_cast<const char *
>(latin1_input.data()),
1641 latin1_input.size());
1659simdutf_warn_unused
size_t latin1_length_from_utf8(
const char *input,
1660 size_t length)
noexcept;
1662simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t
1663latin1_length_from_utf8(
1664 const detail::input_span_of_byte_like
auto &valid_utf8_input)
noexcept {
1665 #if SIMDUTF_CPLUSPLUS23
1667 return scalar::utf8::count_code_points(valid_utf8_input.data(),
1668 valid_utf8_input.size());
1672 return latin1_length_from_utf8(
1673 reinterpret_cast<const char *
>(valid_utf8_input.data()),
1674 valid_utf8_input.size());
1680#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
1695simdutf_warn_unused
size_t utf16_length_from_utf8(
const char *input,
1696 size_t length)
noexcept;
1698simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t
1699utf16_length_from_utf8(
1700 const detail::input_span_of_byte_like
auto &valid_utf8_input)
noexcept {
1701 #if SIMDUTF_CPLUSPLUS23
1703 return scalar::utf8::utf16_length_from_utf8(valid_utf8_input.data(),
1704 valid_utf8_input.size());
1708 return utf16_length_from_utf8(
1709 reinterpret_cast<const char *
>(valid_utf8_input.data()),
1710 valid_utf8_input.size());
1716#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
1733simdutf_warn_unused
size_t utf32_length_from_utf8(
const char *input,
1734 size_t length)
noexcept;
1736simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t
1737utf32_length_from_utf8(
1738 const detail::input_span_of_byte_like
auto &valid_utf8_input)
noexcept {
1740 #if SIMDUTF_CPLUSPLUS23
1742 return scalar::utf8::count_code_points(valid_utf8_input.data(),
1743 valid_utf8_input.size());
1747 return utf32_length_from_utf8(
1748 reinterpret_cast<const char *
>(valid_utf8_input.data()),
1749 valid_utf8_input.size());
1755#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
1771simdutf_warn_unused
size_t convert_utf16_to_utf8(
const char16_t *input,
1773 char *utf8_buffer)
noexcept;
1775simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t
1776convert_utf16_to_utf8(
1777 std::span<const char16_t> utf16_input,
1778 detail::output_span_of_byte_like
auto &&utf8_output)
noexcept {
1779 #if SIMDUTF_CPLUSPLUS23
1781 return scalar::utf16_to_utf8::convert<endianness::NATIVE>(
1782 utf16_input.data(), utf16_input.size(), utf8_output.data());
1786 return convert_utf16_to_utf8(utf16_input.data(), utf16_input.size(),
1787 reinterpret_cast<char *
>(utf8_output.data()));
1810simdutf_warn_unused
size_t convert_utf16_to_utf8_safe(
const char16_t *input,
1813 size_t utf8_len)
noexcept;
1815simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t
1816convert_utf16_to_utf8_safe(
1817 std::span<const char16_t> utf16_input,
1818 detail::output_span_of_byte_like
auto &&utf8_output)
noexcept {
1825 #if SIMDUTF_CPLUSPLUS23
1827 const full_result r =
1828 scalar::utf16_to_utf8::convert_with_errors<endianness::NATIVE, true>(
1829 utf16_input.data(), utf16_input.size(), utf8_output.data(),
1830 utf8_output.size());
1831 if (r.error != error_code::SUCCESS &&
1832 r.error != error_code::OUTPUT_BUFFER_TOO_SMALL) {
1835 return r.output_count;
1839 return convert_utf16_to_utf8_safe(
1840 utf16_input.data(), utf16_input.size(),
1841 reinterpret_cast<char *
>(utf8_output.data()), utf8_output.size());
1847#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
1863simdutf_warn_unused
size_t convert_utf16_to_latin1(
1864 const char16_t *input,
size_t length,
char *latin1_buffer)
noexcept;
1866simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t
1867convert_utf16_to_latin1(
1868 std::span<const char16_t> utf16_input,
1869 detail::output_span_of_byte_like
auto &&latin1_output)
noexcept {
1870 #if SIMDUTF_CPLUSPLUS23
1872 return scalar::utf16_to_latin1::convert<endianness::NATIVE>(
1873 utf16_input.data(), utf16_input.size(), latin1_output.data());
1877 return convert_utf16_to_latin1(
1878 utf16_input.data(), utf16_input.size(),
1879 reinterpret_cast<char *
>(latin1_output.data()));
1900simdutf_warn_unused
size_t convert_utf16le_to_latin1(
1901 const char16_t *input,
size_t length,
char *latin1_buffer)
noexcept;
1903simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t
1904convert_utf16le_to_latin1(
1905 std::span<const char16_t> utf16_input,
1906 detail::output_span_of_byte_like
auto &&latin1_output)
noexcept {
1907 #if SIMDUTF_CPLUSPLUS23
1909 return scalar::utf16_to_latin1::convert<endianness::LITTLE>(
1910 utf16_input.data(), utf16_input.size(), latin1_output.data());
1914 return convert_utf16le_to_latin1(
1915 utf16_input.data(), utf16_input.size(),
1916 reinterpret_cast<char *
>(latin1_output.data()));
1935simdutf_warn_unused
size_t convert_utf16be_to_latin1(
1936 const char16_t *input,
size_t length,
char *latin1_buffer)
noexcept;
1938simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t
1939convert_utf16be_to_latin1(
1940 std::span<const char16_t> utf16_input,
1941 detail::output_span_of_byte_like
auto &&latin1_output)
noexcept {
1942 #if SIMDUTF_CPLUSPLUS23
1944 return scalar::utf16_to_latin1::convert<endianness::BIG>(
1945 utf16_input.data(), utf16_input.size(), latin1_output.data());
1949 return convert_utf16be_to_latin1(
1950 utf16_input.data(), utf16_input.size(),
1951 reinterpret_cast<char *
>(latin1_output.data()));
1957#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
1972simdutf_warn_unused
size_t convert_utf16le_to_utf8(
const char16_t *input,
1974 char *utf8_buffer)
noexcept;
1976simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t
1977convert_utf16le_to_utf8(
1978 std::span<const char16_t> utf16_input,
1979 detail::output_span_of_byte_like
auto &&utf8_output)
noexcept {
1980 #if SIMDUTF_CPLUSPLUS23
1982 return scalar::utf16_to_utf8::convert<endianness::LITTLE>(
1983 utf16_input.data(), utf16_input.size(), utf8_output.data());
1987 return convert_utf16le_to_utf8(
1988 utf16_input.data(), utf16_input.size(),
1989 reinterpret_cast<char *
>(utf8_output.data()));
2008simdutf_warn_unused
size_t convert_utf16be_to_utf8(
const char16_t *input,
2010 char *utf8_buffer)
noexcept;
2012simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t
2013convert_utf16be_to_utf8(
2014 std::span<const char16_t> utf16_input,
2015 detail::output_span_of_byte_like
auto &&utf8_output)
noexcept {
2016 #if SIMDUTF_CPLUSPLUS23
2018 return scalar::utf16_to_utf8::convert<endianness::BIG>(
2019 utf16_input.data(), utf16_input.size(), utf8_output.data());
2023 return convert_utf16be_to_utf8(
2024 utf16_input.data(), utf16_input.size(),
2025 reinterpret_cast<char *
>(utf8_output.data()));
2031#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
2048simdutf_warn_unused result convert_utf16_to_latin1_with_errors(
2049 const char16_t *input,
size_t length,
char *latin1_buffer)
noexcept;
2051simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
2052convert_utf16_to_latin1_with_errors(
2053 std::span<const char16_t> utf16_input,
2054 detail::output_span_of_byte_like
auto &&latin1_output)
noexcept {
2055 #if SIMDUTF_CPLUSPLUS23
2057 return scalar::utf16_to_latin1::convert_with_errors<endianness::NATIVE>(
2058 utf16_input.data(), utf16_input.size(), latin1_output.data());
2062 return convert_utf16_to_latin1_with_errors(
2063 utf16_input.data(), utf16_input.size(),
2064 reinterpret_cast<char *
>(latin1_output.data()));
2084simdutf_warn_unused result convert_utf16le_to_latin1_with_errors(
2085 const char16_t *input,
size_t length,
char *latin1_buffer)
noexcept;
2087simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
2088convert_utf16le_to_latin1_with_errors(
2089 std::span<const char16_t> utf16_input,
2090 detail::output_span_of_byte_like
auto &&latin1_output)
noexcept {
2091 #if SIMDUTF_CPLUSPLUS23
2093 return scalar::utf16_to_latin1::convert_with_errors<endianness::LITTLE>(
2094 utf16_input.data(), utf16_input.size(), latin1_output.data());
2098 return convert_utf16le_to_latin1_with_errors(
2099 utf16_input.data(), utf16_input.size(),
2100 reinterpret_cast<char *
>(latin1_output.data()));
2122simdutf_warn_unused result convert_utf16be_to_latin1_with_errors(
2123 const char16_t *input,
size_t length,
char *latin1_buffer)
noexcept;
2125simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
2126convert_utf16be_to_latin1_with_errors(
2127 std::span<const char16_t> utf16_input,
2128 detail::output_span_of_byte_like
auto &&latin1_output)
noexcept {
2129 #if SIMDUTF_CPLUSPLUS23
2131 return scalar::utf16_to_latin1::convert_with_errors<endianness::BIG>(
2132 utf16_input.data(), utf16_input.size(), latin1_output.data());
2136 return convert_utf16be_to_latin1_with_errors(
2137 utf16_input.data(), utf16_input.size(),
2138 reinterpret_cast<char *
>(latin1_output.data()));
2144#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
2162simdutf_warn_unused result convert_utf16_to_utf8_with_errors(
2163 const char16_t *input,
size_t length,
char *utf8_buffer)
noexcept;
2165simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
2166convert_utf16_to_utf8_with_errors(
2167 std::span<const char16_t> utf16_input,
2168 detail::output_span_of_byte_like
auto &&utf8_output)
noexcept {
2169 #if SIMDUTF_CPLUSPLUS23
2171 return scalar::utf16_to_utf8::convert_with_errors<endianness::NATIVE>(
2172 utf16_input.data(), utf16_input.size(), utf8_output.data());
2176 return convert_utf16_to_utf8_with_errors(
2177 utf16_input.data(), utf16_input.size(),
2178 reinterpret_cast<char *
>(utf8_output.data()));
2199simdutf_warn_unused result convert_utf16le_to_utf8_with_errors(
2200 const char16_t *input,
size_t length,
char *utf8_buffer)
noexcept;
2202simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
2203convert_utf16le_to_utf8_with_errors(
2204 std::span<const char16_t> utf16_input,
2205 detail::output_span_of_byte_like
auto &&utf8_output)
noexcept {
2206 #if SIMDUTF_CPLUSPLUS23
2208 return scalar::utf16_to_utf8::convert_with_errors<endianness::LITTLE>(
2209 utf16_input.data(), utf16_input.size(), utf8_output.data());
2213 return convert_utf16le_to_utf8_with_errors(
2214 utf16_input.data(), utf16_input.size(),
2215 reinterpret_cast<char *
>(utf8_output.data()));
2236simdutf_warn_unused result convert_utf16be_to_utf8_with_errors(
2237 const char16_t *input,
size_t length,
char *utf8_buffer)
noexcept;
2239simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
2240convert_utf16be_to_utf8_with_errors(
2241 std::span<const char16_t> utf16_input,
2242 detail::output_span_of_byte_like
auto &&utf8_output)
noexcept {
2243 #if SIMDUTF_CPLUSPLUS23
2245 return scalar::utf16_to_utf8::convert_with_errors<endianness::BIG>(
2246 utf16_input.data(), utf16_input.size(), utf8_output.data());
2250 return convert_utf16be_to_utf8_with_errors(
2251 utf16_input.data(), utf16_input.size(),
2252 reinterpret_cast<char *
>(utf8_output.data()));
2270simdutf_warn_unused
size_t convert_valid_utf16_to_utf8(
2271 const char16_t *input,
size_t length,
char *utf8_buffer)
noexcept;
2273simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t
2274convert_valid_utf16_to_utf8(
2275 std::span<const char16_t> valid_utf16_input,
2276 detail::output_span_of_byte_like
auto &&utf8_output)
noexcept {
2277 #if SIMDUTF_CPLUSPLUS23
2279 return scalar::utf16_to_utf8::convert_valid<endianness::NATIVE>(
2280 valid_utf16_input.data(), valid_utf16_input.size(), utf8_output.data());
2284 return convert_valid_utf16_to_utf8(
2285 valid_utf16_input.data(), valid_utf16_input.size(),
2286 reinterpret_cast<char *
>(utf8_output.data()));
2292#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
2312simdutf_warn_unused
size_t convert_valid_utf16_to_latin1(
2313 const char16_t *input,
size_t length,
char *latin1_buffer)
noexcept;
2315simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t
2316convert_valid_utf16_to_latin1(
2317 std::span<const char16_t> valid_utf16_input,
2318 detail::output_span_of_byte_like
auto &&latin1_output)
noexcept {
2319 #if SIMDUTF_CPLUSPLUS23
2321 return scalar::utf16_to_latin1::convert_valid_impl<endianness::NATIVE>(
2322 detail::constexpr_cast_ptr<uint16_t>(valid_utf16_input.data()),
2323 valid_utf16_input.size(),
2324 detail::constexpr_cast_writeptr<char>(latin1_output.data()));
2328 return convert_valid_utf16_to_latin1(
2329 valid_utf16_input.data(), valid_utf16_input.size(),
2330 reinterpret_cast<char *
>(latin1_output.data()));
2354simdutf_warn_unused
size_t convert_valid_utf16le_to_latin1(
2355 const char16_t *input,
size_t length,
char *latin1_buffer)
noexcept;
2357simdutf_really_inline simdutf_constexpr23 simdutf_warn_unused
size_t
2358convert_valid_utf16le_to_latin1(
2359 std::span<const char16_t> valid_utf16_input,
2360 detail::output_span_of_byte_like
auto &&latin1_output)
noexcept {
2361 #if SIMDUTF_CPLUSPLUS23
2363 return scalar::utf16_to_latin1::convert_valid_impl<endianness::LITTLE>(
2364 detail::constexpr_cast_ptr<uint16_t>(valid_utf16_input.data()),
2365 valid_utf16_input.size(),
2366 detail::constexpr_cast_writeptr<char>(latin1_output.data()));
2370 return convert_valid_utf16le_to_latin1(
2371 valid_utf16_input.data(), valid_utf16_input.size(),
2372 reinterpret_cast<char *
>(latin1_output.data()));
2396simdutf_warn_unused
size_t convert_valid_utf16be_to_latin1(
2397 const char16_t *input,
size_t length,
char *latin1_buffer)
noexcept;
2399simdutf_really_inline simdutf_constexpr23 simdutf_warn_unused
size_t
2400convert_valid_utf16be_to_latin1(
2401 std::span<const char16_t> valid_utf16_input,
2402 detail::output_span_of_byte_like
auto &&latin1_output)
noexcept {
2403 #if SIMDUTF_CPLUSPLUS23
2405 return scalar::utf16_to_latin1::convert_valid_impl<endianness::BIG>(
2406 detail::constexpr_cast_ptr<uint16_t>(valid_utf16_input.data()),
2407 valid_utf16_input.size(),
2408 detail::constexpr_cast_writeptr<char>(latin1_output.data()));
2412 return convert_valid_utf16be_to_latin1(
2413 valid_utf16_input.data(), valid_utf16_input.size(),
2414 reinterpret_cast<char *
>(latin1_output.data()));
2420#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
2434simdutf_warn_unused
size_t convert_valid_utf16le_to_utf8(
2435 const char16_t *input,
size_t length,
char *utf8_buffer)
noexcept;
2437simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t
2438convert_valid_utf16le_to_utf8(
2439 std::span<const char16_t> valid_utf16_input,
2440 detail::output_span_of_byte_like
auto &&utf8_output)
noexcept {
2441 #if SIMDUTF_CPLUSPLUS23
2443 return scalar::utf16_to_utf8::convert_valid<endianness::NATIVE>(
2444 valid_utf16_input.data(), valid_utf16_input.size(), utf8_output.data());
2448 return convert_valid_utf16le_to_utf8(
2449 valid_utf16_input.data(), valid_utf16_input.size(),
2450 reinterpret_cast<char *
>(utf8_output.data()));
2468simdutf_warn_unused
size_t convert_valid_utf16be_to_utf8(
2469 const char16_t *input,
size_t length,
char *utf8_buffer)
noexcept;
2471simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t
2472convert_valid_utf16be_to_utf8(
2473 std::span<const char16_t> valid_utf16_input,
2474 detail::output_span_of_byte_like
auto &&utf8_output)
noexcept {
2475 #if SIMDUTF_CPLUSPLUS23
2477 return scalar::utf16_to_utf8::convert_valid<endianness::BIG>(
2478 valid_utf16_input.data(), valid_utf16_input.size(), utf8_output.data());
2482 return convert_valid_utf16be_to_utf8(
2483 valid_utf16_input.data(), valid_utf16_input.size(),
2484 reinterpret_cast<char *
>(utf8_output.data()));
2490#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
2506simdutf_warn_unused
size_t convert_utf16_to_utf32(
2507 const char16_t *input,
size_t length,
char32_t *utf32_buffer)
noexcept;
2509simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t
2510convert_utf16_to_utf32(std::span<const char16_t> utf16_input,
2511 std::span<char32_t> utf32_output)
noexcept {
2513 #if SIMDUTF_CPLUSPLUS23
2515 return scalar::utf16_to_utf32::convert<endianness::NATIVE>(
2516 utf16_input.data(), utf16_input.size(), utf32_output.data());
2520 return convert_utf16_to_utf32(utf16_input.data(), utf16_input.size(),
2521 utf32_output.data());
2540simdutf_warn_unused
size_t convert_utf16le_to_utf32(
2541 const char16_t *input,
size_t length,
char32_t *utf32_buffer)
noexcept;
2543simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t
2544convert_utf16le_to_utf32(std::span<const char16_t> utf16_input,
2545 std::span<char32_t> utf32_output)
noexcept {
2546 #if SIMDUTF_CPLUSPLUS23
2548 return scalar::utf16_to_utf32::convert<endianness::LITTLE>(
2549 utf16_input.data(), utf16_input.size(), utf32_output.data());
2553 return convert_utf16le_to_utf32(utf16_input.data(), utf16_input.size(),
2554 utf32_output.data());
2573simdutf_warn_unused
size_t convert_utf16be_to_utf32(
2574 const char16_t *input,
size_t length,
char32_t *utf32_buffer)
noexcept;
2576simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t
2577convert_utf16be_to_utf32(std::span<const char16_t> utf16_input,
2578 std::span<char32_t> utf32_output)
noexcept {
2579 #if SIMDUTF_CPLUSPLUS23
2581 return scalar::utf16_to_utf32::convert<endianness::BIG>(
2582 utf16_input.data(), utf16_input.size(), utf32_output.data());
2586 return convert_utf16be_to_utf32(utf16_input.data(), utf16_input.size(),
2587 utf32_output.data());
2609simdutf_warn_unused result convert_utf16_to_utf32_with_errors(
2610 const char16_t *input,
size_t length,
char32_t *utf32_buffer)
noexcept;
2612simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
2613convert_utf16_to_utf32_with_errors(std::span<const char16_t> utf16_input,
2614 std::span<char32_t> utf32_output)
noexcept {
2615 #if SIMDUTF_CPLUSPLUS23
2617 return scalar::utf16_to_utf32::convert_with_errors<endianness::NATIVE>(
2618 utf16_input.data(), utf16_input.size(), utf32_output.data());
2622 return convert_utf16_to_utf32_with_errors(
2623 utf16_input.data(), utf16_input.size(), utf32_output.data());
2644simdutf_warn_unused result convert_utf16le_to_utf32_with_errors(
2645 const char16_t *input,
size_t length,
char32_t *utf32_buffer)
noexcept;
2647simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
2648convert_utf16le_to_utf32_with_errors(
2649 std::span<const char16_t> utf16_input,
2650 std::span<char32_t> utf32_output)
noexcept {
2651 #if SIMDUTF_CPLUSPLUS23
2653 return scalar::utf16_to_utf32::convert_with_errors<endianness::LITTLE>(
2654 utf16_input.data(), utf16_input.size(), utf32_output.data());
2658 return convert_utf16le_to_utf32_with_errors(
2659 utf16_input.data(), utf16_input.size(), utf32_output.data());
2680simdutf_warn_unused result convert_utf16be_to_utf32_with_errors(
2681 const char16_t *input,
size_t length,
char32_t *utf32_buffer)
noexcept;
2683simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
2684convert_utf16be_to_utf32_with_errors(
2685 std::span<const char16_t> utf16_input,
2686 std::span<char32_t> utf32_output)
noexcept {
2687 #if SIMDUTF_CPLUSPLUS23
2689 return scalar::utf16_to_utf32::convert_with_errors<endianness::BIG>(
2690 utf16_input.data(), utf16_input.size(), utf32_output.data());
2694 return convert_utf16be_to_utf32_with_errors(
2695 utf16_input.data(), utf16_input.size(), utf32_output.data());
2714simdutf_warn_unused
size_t convert_valid_utf16_to_utf32(
2715 const char16_t *input,
size_t length,
char32_t *utf32_buffer)
noexcept;
2717simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t
2718convert_valid_utf16_to_utf32(std::span<const char16_t> valid_utf16_input,
2719 std::span<char32_t> utf32_output)
noexcept {
2720 #if SIMDUTF_CPLUSPLUS23
2722 return scalar::utf16_to_utf32::convert_valid<endianness::NATIVE>(
2723 valid_utf16_input.data(), valid_utf16_input.size(),
2724 utf32_output.data());
2728 return convert_valid_utf16_to_utf32(valid_utf16_input.data(),
2729 valid_utf16_input.size(),
2730 utf32_output.data());
2748simdutf_warn_unused
size_t convert_valid_utf16le_to_utf32(
2749 const char16_t *input,
size_t length,
char32_t *utf32_buffer)
noexcept;
2751simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t
2752convert_valid_utf16le_to_utf32(std::span<const char16_t> valid_utf16_input,
2753 std::span<char32_t> utf32_output)
noexcept {
2754 #if SIMDUTF_CPLUSPLUS23
2756 return scalar::utf16_to_utf32::convert_valid<endianness::LITTLE>(
2757 valid_utf16_input.data(), valid_utf16_input.size(),
2758 utf32_output.data());
2762 return convert_valid_utf16le_to_utf32(valid_utf16_input.data(),
2763 valid_utf16_input.size(),
2764 utf32_output.data());
2782simdutf_warn_unused
size_t convert_valid_utf16be_to_utf32(
2783 const char16_t *input,
size_t length,
char32_t *utf32_buffer)
noexcept;
2785simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t
2786convert_valid_utf16be_to_utf32(std::span<const char16_t> valid_utf16_input,
2787 std::span<char32_t> utf32_output)
noexcept {
2788 #if SIMDUTF_CPLUSPLUS23
2790 return scalar::utf16_to_utf32::convert_valid<endianness::BIG>(
2791 valid_utf16_input.data(), valid_utf16_input.size(),
2792 utf32_output.data());
2796 return convert_valid_utf16be_to_utf32(valid_utf16_input.data(),
2797 valid_utf16_input.size(),
2798 utf32_output.data());
2804#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
2816simdutf_warn_unused
size_t utf8_length_from_utf16(
const char16_t *input,
2817 size_t length)
noexcept;
2819simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t
2820utf8_length_from_utf16(std::span<const char16_t> valid_utf16_input)
noexcept {
2821 #if SIMDUTF_CPLUSPLUS23
2823 return scalar::utf16::utf8_length_from_utf16<endianness::NATIVE>(
2824 valid_utf16_input.data(), valid_utf16_input.size());
2828 return utf8_length_from_utf16(valid_utf16_input.data(),
2829 valid_utf16_input.size());
2852simdutf_warn_unused result utf8_length_from_utf16_with_replacement(
2853 const char16_t *input,
size_t length)
noexcept;
2855simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
2856utf8_length_from_utf16_with_replacement(
2857 std::span<const char16_t> valid_utf16_input)
noexcept {
2858 #if SIMDUTF_CPLUSPLUS23
2860 return scalar::utf16::utf8_length_from_utf16_with_replacement<
2861 endianness::NATIVE>(valid_utf16_input.data(), valid_utf16_input.size());
2865 return utf8_length_from_utf16_with_replacement(valid_utf16_input.data(),
2866 valid_utf16_input.size());
2882simdutf_warn_unused
size_t utf8_length_from_utf16le(
const char16_t *input,
2883 size_t length)
noexcept;
2885simdutf_really_inline simdutf_constexpr23 simdutf_warn_unused
size_t
2886utf8_length_from_utf16le(std::span<const char16_t> valid_utf16_input)
noexcept {
2887 #if SIMDUTF_CPLUSPLUS23
2889 return scalar::utf16::utf8_length_from_utf16<endianness::LITTLE>(
2890 valid_utf16_input.data(), valid_utf16_input.size());
2894 return utf8_length_from_utf16le(valid_utf16_input.data(),
2895 valid_utf16_input.size());
2911simdutf_warn_unused
size_t utf8_length_from_utf16be(
const char16_t *input,
2912 size_t length)
noexcept;
2914simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t
2915utf8_length_from_utf16be(std::span<const char16_t> valid_utf16_input)
noexcept {
2916 #if SIMDUTF_CPLUSPLUS23
2918 return scalar::utf16::utf8_length_from_utf16<endianness::BIG>(
2919 valid_utf16_input.data(), valid_utf16_input.size());
2923 return utf8_length_from_utf16be(valid_utf16_input.data(),
2924 valid_utf16_input.size());
2930#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
2944simdutf_warn_unused
size_t convert_utf32_to_utf8(
const char32_t *input,
2946 char *utf8_buffer)
noexcept;
2948simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t
2949convert_utf32_to_utf8(
2950 std::span<const char32_t> utf32_input,
2951 detail::output_span_of_byte_like
auto &&utf8_output)
noexcept {
2952 #if SIMDUTF_CPLUSPLUS23
2954 return scalar::utf32_to_utf8::convert(
2955 utf32_input.data(), utf32_input.size(), utf8_output.data());
2959 return convert_utf32_to_utf8(utf32_input.data(), utf32_input.size(),
2960 reinterpret_cast<char *
>(utf8_output.data()));
2981simdutf_warn_unused result convert_utf32_to_utf8_with_errors(
2982 const char32_t *input,
size_t length,
char *utf8_buffer)
noexcept;
2984simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
2985convert_utf32_to_utf8_with_errors(
2986 std::span<const char32_t> utf32_input,
2987 detail::output_span_of_byte_like
auto &&utf8_output)
noexcept {
2988 #if SIMDUTF_CPLUSPLUS23
2990 return scalar::utf32_to_utf8::convert_with_errors(
2991 utf32_input.data(), utf32_input.size(), utf8_output.data());
2995 return convert_utf32_to_utf8_with_errors(
2996 utf32_input.data(), utf32_input.size(),
2997 reinterpret_cast<char *
>(utf8_output.data()));
3015simdutf_warn_unused
size_t convert_valid_utf32_to_utf8(
3016 const char32_t *input,
size_t length,
char *utf8_buffer)
noexcept;
3018simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t
3019convert_valid_utf32_to_utf8(
3020 std::span<const char32_t> valid_utf32_input,
3021 detail::output_span_of_byte_like
auto &&utf8_output)
noexcept {
3022 #if SIMDUTF_CPLUSPLUS23
3024 return scalar::utf32_to_utf8::convert_valid(
3025 valid_utf32_input.data(), valid_utf32_input.size(), utf8_output.data());
3029 return convert_valid_utf32_to_utf8(
3030 valid_utf32_input.data(), valid_utf32_input.size(),
3031 reinterpret_cast<char *
>(utf8_output.data()));
3037#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
3052simdutf_warn_unused
size_t convert_utf32_to_utf16(
3053 const char32_t *input,
size_t length,
char16_t *utf16_buffer)
noexcept;
3055simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t
3056convert_utf32_to_utf16(std::span<const char32_t> utf32_input,
3057 std::span<char16_t> utf16_output)
noexcept {
3058 #if SIMDUTF_CPLUSPLUS23
3060 return scalar::utf32_to_utf16::convert<endianness::NATIVE>(
3061 utf32_input.data(), utf32_input.size(), utf16_output.data());
3065 return convert_utf32_to_utf16(utf32_input.data(), utf32_input.size(),
3066 utf16_output.data());
3084simdutf_warn_unused
size_t convert_utf32_to_utf16le(
3085 const char32_t *input,
size_t length,
char16_t *utf16_buffer)
noexcept;
3087simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t
3088convert_utf32_to_utf16le(std::span<const char32_t> utf32_input,
3089 std::span<char16_t> utf16_output)
noexcept {
3090 #if SIMDUTF_CPLUSPLUS23
3092 return scalar::utf32_to_utf16::convert<endianness::LITTLE>(
3093 utf32_input.data(), utf32_input.size(), utf16_output.data());
3097 return convert_utf32_to_utf16le(utf32_input.data(), utf32_input.size(),
3098 utf16_output.data());
3104#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1
3119simdutf_warn_unused
size_t convert_utf32_to_latin1(
3120 const char32_t *input,
size_t length,
char *latin1_buffer)
noexcept;
3122simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t
3123convert_utf32_to_latin1(
3124 std::span<const char32_t> utf32_input,
3125 detail::output_span_of_byte_like
auto &&latin1_output)
noexcept {
3126 #if SIMDUTF_CPLUSPLUS23
3128 return scalar::utf32_to_latin1::convert(
3129 utf32_input.data(), utf32_input.size(), latin1_output.data());
3133 return convert_utf32_to_latin1(
3134 utf32_input.data(), utf32_input.size(),
3135 reinterpret_cast<char *
>(latin1_output.data()));
3157simdutf_warn_unused result convert_utf32_to_latin1_with_errors(
3158 const char32_t *input,
size_t length,
char *latin1_buffer)
noexcept;
3160simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
3161convert_utf32_to_latin1_with_errors(
3162 std::span<const char32_t> utf32_input,
3163 detail::output_span_of_byte_like
auto &&latin1_output)
noexcept {
3164 #if SIMDUTF_CPLUSPLUS23
3166 return scalar::utf32_to_latin1::convert_with_errors(
3167 utf32_input.data(), utf32_input.size(), latin1_output.data());
3171 return convert_utf32_to_latin1_with_errors(
3172 utf32_input.data(), utf32_input.size(),
3173 reinterpret_cast<char *
>(latin1_output.data()));
3198simdutf_warn_unused
size_t convert_valid_utf32_to_latin1(
3199 const char32_t *input,
size_t length,
char *latin1_buffer)
noexcept;
3201simdutf_really_inline simdutf_constexpr23 simdutf_warn_unused
size_t
3202convert_valid_utf32_to_latin1(
3203 std::span<const char32_t> valid_utf32_input,
3204 detail::output_span_of_byte_like
auto &&latin1_output)
noexcept {
3205 #if SIMDUTF_CPLUSPLUS23
3207 return scalar::utf32_to_latin1::convert_valid(
3208 detail::constexpr_cast_ptr<uint32_t>(valid_utf32_input.data()),
3209 valid_utf32_input.size(),
3210 detail::constexpr_cast_writeptr<char>(latin1_output.data()));
3214 return convert_valid_utf32_to_latin1(
3215 valid_utf32_input.data(), valid_utf32_input.size(),
3216 reinterpret_cast<char *
>(latin1_output.data()));
3233simdutf_warn_unused simdutf_really_inline simdutf_constexpr23
size_t
3234latin1_length_from_utf32(
size_t length)
noexcept {
3246simdutf_warn_unused simdutf_really_inline simdutf_constexpr23
size_t
3247utf32_length_from_latin1(
size_t length)
noexcept {
3252#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
3266simdutf_warn_unused
size_t convert_utf32_to_utf16be(
3267 const char32_t *input,
size_t length,
char16_t *utf16_buffer)
noexcept;
3269simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t
3270convert_utf32_to_utf16be(std::span<const char32_t> utf32_input,
3271 std::span<char16_t> utf16_output)
noexcept {
3272 #if SIMDUTF_CPLUSPLUS23
3274 return scalar::utf32_to_utf16::convert<endianness::BIG>(
3275 utf32_input.data(), utf32_input.size(), utf16_output.data());
3279 return convert_utf32_to_utf16be(utf32_input.data(), utf32_input.size(),
3280 utf16_output.data());
3302simdutf_warn_unused result convert_utf32_to_utf16_with_errors(
3303 const char32_t *input,
size_t length,
char16_t *utf16_buffer)
noexcept;
3305simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
3306convert_utf32_to_utf16_with_errors(std::span<const char32_t> utf32_input,
3307 std::span<char16_t> utf16_output)
noexcept {
3308 #if SIMDUTF_CPLUSPLUS23
3310 return scalar::utf32_to_utf16::convert_with_errors<endianness::NATIVE>(
3311 utf32_input.data(), utf32_input.size(), utf16_output.data());
3315 return convert_utf32_to_utf16_with_errors(
3316 utf32_input.data(), utf32_input.size(), utf16_output.data());
3337simdutf_warn_unused result convert_utf32_to_utf16le_with_errors(
3338 const char32_t *input,
size_t length,
char16_t *utf16_buffer)
noexcept;
3340simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
3341convert_utf32_to_utf16le_with_errors(
3342 std::span<const char32_t> utf32_input,
3343 std::span<char16_t> utf16_output)
noexcept {
3344 #if SIMDUTF_CPLUSPLUS23
3346 return scalar::utf32_to_utf16::convert_with_errors<endianness::LITTLE>(
3347 utf32_input.data(), utf32_input.size(), utf16_output.data());
3351 return convert_utf32_to_utf16le_with_errors(
3352 utf32_input.data(), utf32_input.size(), utf16_output.data());
3373simdutf_warn_unused result convert_utf32_to_utf16be_with_errors(
3374 const char32_t *input,
size_t length,
char16_t *utf16_buffer)
noexcept;
3376simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
3377convert_utf32_to_utf16be_with_errors(
3378 std::span<const char32_t> utf32_input,
3379 std::span<char16_t> utf16_output)
noexcept {
3380 #if SIMDUTF_CPLUSPLUS23
3382 return scalar::utf32_to_utf16::convert_with_errors<endianness::BIG>(
3383 utf32_input.data(), utf32_input.size(), utf16_output.data());
3387 return convert_utf32_to_utf16be_with_errors(
3388 utf32_input.data(), utf32_input.size(), utf16_output.data());
3406simdutf_warn_unused
size_t convert_valid_utf32_to_utf16(
3407 const char32_t *input,
size_t length,
char16_t *utf16_buffer)
noexcept;
3409simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t
3410convert_valid_utf32_to_utf16(std::span<const char32_t> valid_utf32_input,
3411 std::span<char16_t> utf16_output)
noexcept {
3413 #if SIMDUTF_CPLUSPLUS23
3415 return scalar::utf32_to_utf16::convert_valid<endianness::NATIVE>(
3416 valid_utf32_input.data(), valid_utf32_input.size(),
3417 utf16_output.data());
3421 return convert_valid_utf32_to_utf16(valid_utf32_input.data(),
3422 valid_utf32_input.size(),
3423 utf16_output.data());
3441simdutf_warn_unused
size_t convert_valid_utf32_to_utf16le(
3442 const char32_t *input,
size_t length,
char16_t *utf16_buffer)
noexcept;
3444simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t
3445convert_valid_utf32_to_utf16le(std::span<const char32_t> valid_utf32_input,
3446 std::span<char16_t> utf16_output)
noexcept {
3447 #if SIMDUTF_CPLUSPLUS23
3449 return scalar::utf32_to_utf16::convert_valid<endianness::LITTLE>(
3450 valid_utf32_input.data(), valid_utf32_input.size(),
3451 utf16_output.data());
3455 return convert_valid_utf32_to_utf16le(valid_utf32_input.data(),
3456 valid_utf32_input.size(),
3457 utf16_output.data());
3475simdutf_warn_unused
size_t convert_valid_utf32_to_utf16be(
3476 const char32_t *input,
size_t length,
char16_t *utf16_buffer)
noexcept;
3478simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t
3479convert_valid_utf32_to_utf16be(std::span<const char32_t> valid_utf32_input,
3480 std::span<char16_t> utf16_output)
noexcept {
3481 #if SIMDUTF_CPLUSPLUS23
3483 return scalar::utf32_to_utf16::convert_valid<endianness::BIG>(
3484 valid_utf32_input.data(), valid_utf32_input.size(),
3485 utf16_output.data());
3489 return convert_valid_utf32_to_utf16be(valid_utf32_input.data(),
3490 valid_utf32_input.size(),
3491 utf16_output.data());
3497#if SIMDUTF_FEATURE_UTF16
3511void change_endianness_utf16(
const char16_t *input,
size_t length,
3512 char16_t *output)
noexcept;
3514simdutf_really_inline simdutf_constexpr23
void
3515change_endianness_utf16(std::span<const char16_t> utf16_input,
3516 std::span<char16_t> utf16_output)
noexcept {
3517 #if SIMDUTF_CPLUSPLUS23
3519 return scalar::utf16::change_endianness_utf16(
3520 utf16_input.data(), utf16_input.size(), utf16_output.data());
3524 return change_endianness_utf16(utf16_input.data(), utf16_input.size(),
3525 utf16_output.data());
3531#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
3543simdutf_warn_unused
size_t utf8_length_from_utf32(
const char32_t *input,
3544 size_t length)
noexcept;
3546simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t
3547utf8_length_from_utf32(std::span<const char32_t> valid_utf32_input)
noexcept {
3548 #if SIMDUTF_CPLUSPLUS23
3550 return scalar::utf32::utf8_length_from_utf32(valid_utf32_input.data(),
3551 valid_utf32_input.size());
3555 return utf8_length_from_utf32(valid_utf32_input.data(),
3556 valid_utf32_input.size());
3562#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
3574simdutf_warn_unused
size_t utf16_length_from_utf32(
const char32_t *input,
3575 size_t length)
noexcept;
3577simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t
3578utf16_length_from_utf32(std::span<const char32_t> valid_utf32_input)
noexcept {
3579 #if SIMDUTF_CPLUSPLUS23
3581 return scalar::utf32::utf16_length_from_utf32(valid_utf32_input.data(),
3582 valid_utf32_input.size());
3586 return utf16_length_from_utf32(valid_utf32_input.data(),
3587 valid_utf32_input.size());
3607simdutf_warn_unused
size_t utf32_length_from_utf16(
const char16_t *input,
3608 size_t length)
noexcept;
3610simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t
3611utf32_length_from_utf16(std::span<const char16_t> valid_utf16_input)
noexcept {
3612 #if SIMDUTF_CPLUSPLUS23
3614 return scalar::utf16::utf32_length_from_utf16<endianness::NATIVE>(
3615 valid_utf16_input.data(), valid_utf16_input.size());
3619 return utf32_length_from_utf16(valid_utf16_input.data(),
3620 valid_utf16_input.size());
3640simdutf_warn_unused
size_t utf32_length_from_utf16le(
const char16_t *input,
3641 size_t length)
noexcept;
3643simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t
3644utf32_length_from_utf16le(
3645 std::span<const char16_t> valid_utf16_input)
noexcept {
3646 #if SIMDUTF_CPLUSPLUS23
3648 return scalar::utf16::utf32_length_from_utf16<endianness::LITTLE>(
3649 valid_utf16_input.data(), valid_utf16_input.size());
3653 return utf32_length_from_utf16le(valid_utf16_input.data(),
3654 valid_utf16_input.size());
3674simdutf_warn_unused
size_t utf32_length_from_utf16be(
const char16_t *input,
3675 size_t length)
noexcept;
3677simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t
3678utf32_length_from_utf16be(
3679 std::span<const char16_t> valid_utf16_input)
noexcept {
3680 #if SIMDUTF_CPLUSPLUS23
3682 return scalar::utf16::utf32_length_from_utf16<endianness::BIG>(
3683 valid_utf16_input.data(), valid_utf16_input.size());
3687 return utf32_length_from_utf16be(valid_utf16_input.data(),
3688 valid_utf16_input.size());
3694#if SIMDUTF_FEATURE_UTF16
3709simdutf_warn_unused
size_t count_utf16(
const char16_t *input,
3710 size_t length)
noexcept;
3712simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t
3713count_utf16(std::span<const char16_t> valid_utf16_input)
noexcept {
3714 #if SIMDUTF_CPLUSPLUS23
3716 return scalar::utf16::count_code_points<endianness::NATIVE>(
3717 valid_utf16_input.data(), valid_utf16_input.size());
3721 return count_utf16(valid_utf16_input.data(), valid_utf16_input.size());
3740simdutf_warn_unused
size_t count_utf16le(
const char16_t *input,
3741 size_t length)
noexcept;
3743simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t
3744count_utf16le(std::span<const char16_t> valid_utf16_input)
noexcept {
3745 #if SIMDUTF_CPLUSPLUS23
3747 return scalar::utf16::count_code_points<endianness::LITTLE>(
3748 valid_utf16_input.data(), valid_utf16_input.size());
3752 return count_utf16le(valid_utf16_input.data(), valid_utf16_input.size());
3771simdutf_warn_unused
size_t count_utf16be(
const char16_t *input,
3772 size_t length)
noexcept;
3774simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t
3775count_utf16be(std::span<const char16_t> valid_utf16_input)
noexcept {
3776 #if SIMDUTF_CPLUSPLUS23
3778 return scalar::utf16::count_code_points<endianness::BIG>(
3779 valid_utf16_input.data(), valid_utf16_input.size());
3783 return count_utf16be(valid_utf16_input.data(), valid_utf16_input.size());
3789#if SIMDUTF_FEATURE_UTF8
3802simdutf_warn_unused
size_t count_utf8(
const char *input,
3803 size_t length)
noexcept;
3805simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t count_utf8(
3806 const detail::input_span_of_byte_like
auto &valid_utf8_input)
noexcept {
3807 #if SIMDUTF_CPLUSPLUS23
3809 return scalar::utf8::count_code_points(valid_utf8_input.data(),
3810 valid_utf8_input.size());
3814 return count_utf8(
reinterpret_cast<const char *
>(valid_utf8_input.data()),
3815 valid_utf8_input.size());
3834simdutf_warn_unused
size_t trim_partial_utf8(
const char *input,
size_t length);
3836simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t
3838 const detail::input_span_of_byte_like
auto &valid_utf8_input)
noexcept {
3839 #if SIMDUTF_CPLUSPLUS23
3841 return scalar::utf8::trim_partial_utf8(valid_utf8_input.data(),
3842 valid_utf8_input.size());
3846 return trim_partial_utf8(
3847 reinterpret_cast<const char *
>(valid_utf8_input.data()),
3848 valid_utf8_input.size());
3854#if SIMDUTF_FEATURE_UTF16
3869simdutf_warn_unused
size_t trim_partial_utf16be(
const char16_t *input,
3872simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t
3873trim_partial_utf16be(std::span<const char16_t> valid_utf16_input)
noexcept {
3874 #if SIMDUTF_CPLUSPLUS23
3876 return scalar::utf16::trim_partial_utf16<endianness::BIG>(
3877 valid_utf16_input.data(), valid_utf16_input.size());
3881 return trim_partial_utf16be(valid_utf16_input.data(),
3882 valid_utf16_input.size());
3901simdutf_warn_unused
size_t trim_partial_utf16le(
const char16_t *input,
3904simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t
3905trim_partial_utf16le(std::span<const char16_t> valid_utf16_input)
noexcept {
3906 #if SIMDUTF_CPLUSPLUS23
3908 return scalar::utf16::trim_partial_utf16<endianness::LITTLE>(
3909 valid_utf16_input.data(), valid_utf16_input.size());
3913 return trim_partial_utf16le(valid_utf16_input.data(),
3914 valid_utf16_input.size());
3933simdutf_warn_unused
size_t trim_partial_utf16(
const char16_t *input,
3936simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t
3937trim_partial_utf16(std::span<const char16_t> valid_utf16_input)
noexcept {
3938 #if SIMDUTF_CPLUSPLUS23
3940 return scalar::utf16::trim_partial_utf16<endianness::NATIVE>(
3941 valid_utf16_input.data(), valid_utf16_input.size());
3945 return trim_partial_utf16(valid_utf16_input.data(),
3946 valid_utf16_input.size());
3952#if SIMDUTF_FEATURE_BASE64 || SIMDUTF_FEATURE_UTF16 || \
3953 SIMDUTF_FEATURE_DETECT_ENCODING
3954 #ifndef SIMDUTF_NEED_TRAILING_ZEROES
3955 #define SIMDUTF_NEED_TRAILING_ZEROES 1
3960#if SIMDUTF_FEATURE_BASE64
3965constexpr uint64_t base64_reverse_padding =
3967enum base64_options : uint64_t {
3970 base64_default_no_padding =
3972 base64_reverse_padding,
3973 base64_url_with_padding =
3974 base64_url | base64_reverse_padding,
3975 base64_default_accept_garbage =
3978 base64_url_accept_garbage =
3981 base64_default_or_url =
3983 base64_default_or_url_accept_garbage =
3992enum last_chunk_handling_options : uint64_t {
3996 stop_before_partial =
4002inline simdutf_constexpr23
bool
4003is_partial(last_chunk_handling_options options) {
4004 return (options == stop_before_partial) || (options == only_full_chunks);
4008simdutf_warn_unused
const char *find(
const char *start,
const char *end,
4009 char character)
noexcept;
4010simdutf_warn_unused
const char16_t *
4011find(
const char16_t *start,
const char16_t *end,
char16_t character)
noexcept;
4024simdutf_warn_unused simdutf_really_inline simdutf_constexpr23
const char *
4025find(
const char *start,
const char *end,
char character)
noexcept {
4026 #if SIMDUTF_CPLUSPLUS23
4028 for (; start != end; ++start)
4029 if (*start == character)
4035 return detail::find(start, end, character);
4038simdutf_warn_unused simdutf_really_inline simdutf_constexpr23
const char16_t *
4039find(
const char16_t *start,
const char16_t *end,
char16_t character)
noexcept {
4042 #if SIMDUTF_CPLUSPLUS23
4044 for (; start != end; ++start)
4045 if (*start == character)
4051 return detail::find(start, end, character);
4056 #include <simdutf/base64_tables.h>
4057 #include <simdutf/scalar/base64.h>
4061 #if SIMDUTF_CPLUSPLUS17
4062inline std::string_view to_string(base64_options options) {
4064 case base64_default:
4065 return "base64_default";
4067 return "base64_url";
4068 case base64_reverse_padding:
4069 return "base64_reverse_padding";
4070 case base64_url_with_padding:
4071 return "base64_url_with_padding";
4072 case base64_default_accept_garbage:
4073 return "base64_default_accept_garbage";
4074 case base64_url_accept_garbage:
4075 return "base64_url_accept_garbage";
4076 case base64_default_or_url:
4077 return "base64_default_or_url";
4078 case base64_default_or_url_accept_garbage:
4079 return "base64_default_or_url_accept_garbage";
4085 #if SIMDUTF_CPLUSPLUS17
4086inline std::string_view to_string(last_chunk_handling_options options) {
4092 case stop_before_partial:
4093 return "stop_before_partial";
4094 case only_full_chunks:
4095 return "only_full_chunks";
4114simdutf_warn_unused
size_t
4115maximal_binary_length_from_base64(
const char *input,
size_t length)
noexcept;
4117simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t
4118maximal_binary_length_from_base64(
4119 const detail::input_span_of_byte_like
auto &input)
noexcept {
4120 #if SIMDUTF_CPLUSPLUS23
4122 return scalar::base64::maximal_binary_length_from_base64(
4123 detail::constexpr_cast_ptr<uint8_t>(input.data()), input.size());
4127 return maximal_binary_length_from_base64(
4128 reinterpret_cast<const char *
>(input.data()), input.size());
4147simdutf_warn_unused
size_t maximal_binary_length_from_base64(
4148 const char16_t *input,
size_t length)
noexcept;
4150simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t
4151maximal_binary_length_from_base64(std::span<const char16_t> input)
noexcept {
4152 #if SIMDUTF_CPLUSPLUS23
4154 return scalar::base64::maximal_binary_length_from_base64(input.data(),
4159 return maximal_binary_length_from_base64(input.data(), input.size());
4218simdutf_warn_unused result base64_to_binary(
4219 const char *input,
size_t length,
char *output,
4220 base64_options options = base64_default,
4221 last_chunk_handling_options last_chunk_options = loose)
noexcept;
4223simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
4225 const detail::input_span_of_byte_like
auto &input,
4226 detail::output_span_of_byte_like
auto &&binary_output,
4227 base64_options options = base64_default,
4228 last_chunk_handling_options last_chunk_options = loose)
noexcept {
4229 #if SIMDUTF_CPLUSPLUS23
4231 return scalar::base64::base64_to_binary_details_impl(
4232 input.data(), input.size(), binary_output.data(), options,
4233 last_chunk_options);
4237 return base64_to_binary(
reinterpret_cast<const char *
>(input.data()),
4239 reinterpret_cast<char *
>(binary_output.data()),
4240 options, last_chunk_options);
4251inline simdutf_warn_unused simdutf_constexpr23
size_t base64_length_from_binary(
4252 size_t length, base64_options options = base64_default)
noexcept {
4253 return scalar::base64::base64_length_from_binary(length, options);
4265inline simdutf_warn_unused simdutf_constexpr23
size_t
4266base64_length_from_binary_with_lines(
4267 size_t length, base64_options options = base64_default,
4268 size_t line_length = default_line_length)
noexcept {
4269 return scalar::base64::base64_length_from_binary_with_lines(length, options,
4294size_t binary_to_base64(
const char *input,
size_t length,
char *output,
4295 base64_options options = base64_default)
noexcept;
4297simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t
4298binary_to_base64(
const detail::input_span_of_byte_like
auto &input,
4299 detail::output_span_of_byte_like
auto &&binary_output,
4300 base64_options options = base64_default)
noexcept {
4301 #if SIMDUTF_CPLUSPLUS23
4303 return scalar::base64::tail_encode_base64(
4304 binary_output.data(), input.data(), input.size(), options);
4308 return binary_to_base64(
4309 reinterpret_cast<const char *
>(input.data()), input.size(),
4310 reinterpret_cast<char *
>(binary_output.data()), options);
4340binary_to_base64_with_lines(
const char *input,
size_t length,
char *output,
4341 size_t line_length = simdutf::default_line_length,
4342 base64_options options = base64_default)
noexcept;
4344simdutf_really_inline simdutf_warn_unused simdutf_constexpr23
size_t
4345binary_to_base64_with_lines(
4346 const detail::input_span_of_byte_like
auto &input,
4347 detail::output_span_of_byte_like
auto &&binary_output,
4348 size_t line_length = simdutf::default_line_length,
4349 base64_options options = base64_default)
noexcept {
4350 #if SIMDUTF_CPLUSPLUS23
4352 return scalar::base64::tail_encode_base64_impl<true>(
4353 binary_output.data(), input.data(), input.size(), options, line_length);
4357 return binary_to_base64_with_lines(
4358 reinterpret_cast<const char *
>(input.data()), input.size(),
4359 reinterpret_cast<char *
>(binary_output.data()), line_length, options);
4364 #if SIMDUTF_ATOMIC_REF
4407atomic_binary_to_base64(
const char *input,
size_t length,
char *output,
4408 base64_options options = base64_default)
noexcept;
4410simdutf_really_inline simdutf_warn_unused
size_t
4411atomic_binary_to_base64(
const detail::input_span_of_byte_like
auto &input,
4412 detail::output_span_of_byte_like
auto &&binary_output,
4413 base64_options options = base64_default)
noexcept {
4414 return atomic_binary_to_base64(
4415 reinterpret_cast<const char *
>(input.data()), input.size(),
4416 reinterpret_cast<char *
>(binary_output.data()), options);
4477simdutf_warn_unused result
4478base64_to_binary(
const char16_t *input,
size_t length,
char *output,
4479 base64_options options = base64_default,
4480 last_chunk_handling_options last_chunk_options =
4481 last_chunk_handling_options::loose)
noexcept;
4483simdutf_really_inline simdutf_warn_unused simdutf_constexpr23 result
4485 std::span<const char16_t> input,
4486 detail::output_span_of_byte_like
auto &&binary_output,
4487 base64_options options = base64_default,
4488 last_chunk_handling_options last_chunk_options = loose)
noexcept {
4489 #if SIMDUTF_CPLUSPLUS23
4491 return scalar::base64::base64_to_binary_details_impl(
4492 input.data(), input.size(), binary_output.data(), options,
4493 last_chunk_options);
4497 return base64_to_binary(input.data(), input.size(),
4498 reinterpret_cast<char *
>(binary_output.data()),
4499 options, last_chunk_options);
4514simdutf_warn_unused simdutf_really_inline simdutf_constexpr23
bool
4515base64_ignorable(
char input, base64_options options = base64_default)
noexcept {
4516 return scalar::base64::is_ignorable(input, options);
4518simdutf_warn_unused simdutf_really_inline simdutf_constexpr23
bool
4519base64_ignorable(
char16_t input,
4520 base64_options options = base64_default)
noexcept {
4521 return scalar::base64::is_ignorable(input, options);
4535simdutf_warn_unused simdutf_really_inline simdutf_constexpr23
bool
4536base64_valid(
char input, base64_options options = base64_default)
noexcept {
4537 return scalar::base64::is_base64(input, options);
4539simdutf_warn_unused simdutf_really_inline simdutf_constexpr23
bool
4540base64_valid(
char16_t input, base64_options options = base64_default)
noexcept {
4541 return scalar::base64::is_base64(input, options);
4553simdutf_warn_unused simdutf_really_inline simdutf_constexpr23
bool
4554base64_valid_or_padding(
char input,
4555 base64_options options = base64_default)
noexcept {
4556 return scalar::base64::is_base64_or_padding(input, options);
4558simdutf_warn_unused simdutf_really_inline simdutf_constexpr23
bool
4559base64_valid_or_padding(
char16_t input,
4560 base64_options options = base64_default)
noexcept {
4561 return scalar::base64::is_base64_or_padding(input, options);
4631simdutf_warn_unused result
4632base64_to_binary_safe(
const char *input,
size_t length,
char *output,
4633 size_t &outlen, base64_options options = base64_default,
4634 last_chunk_handling_options last_chunk_options =
4635 last_chunk_handling_options::loose,
4636 bool decode_up_to_bad_char =
false) noexcept;
4639simdutf_warn_unused result
4640base64_to_binary_safe(const
char16_t *input,
size_t length,
char *output,
4641 size_t &outlen, base64_options options = base64_default,
4642 last_chunk_handling_options last_chunk_options =
4643 last_chunk_handling_options::loose,
4644 bool decode_up_to_bad_char = false) noexcept;
4647 #if SIMDUTF_ATOMIC_REF
4687simdutf_warn_unused result atomic_base64_to_binary_safe(
4688 const char *input,
size_t length,
char *output,
size_t &outlen,
4689 base64_options options = base64_default,
4690 last_chunk_handling_options last_chunk_options =
4691 last_chunk_handling_options::loose,
4692 bool decode_up_to_bad_char =
false) noexcept;
4693simdutf_warn_unused result atomic_base64_to_binary_safe(
4694 const
char16_t *input,
size_t length,
char *output,
size_t &outlen,
4695 base64_options options = base64_default,
4696 last_chunk_handling_options last_chunk_options = loose,
4697 bool decode_up_to_bad_char = false) noexcept;
4703simdutf_really_inline simdutf_warn_unused std::tuple<result, std::size_t>
4704atomic_base64_to_binary_safe(
4705 const detail::input_span_of_byte_like
auto &binary_input,
4706 detail::output_span_of_byte_like
auto &&output,
4707 base64_options options = base64_default,
4708 last_chunk_handling_options last_chunk_options =
4709 last_chunk_handling_options::loose,
4710 bool decode_up_to_bad_char =
false) noexcept {
4711 size_t outlen = output.size();
4712 auto ret = atomic_base64_to_binary_safe(
4713 reinterpret_cast<const char *
>(binary_input.data()), binary_input.size(),
4714 reinterpret_cast<char *
>(output.data()), outlen, options,
4715 last_chunk_options, decode_up_to_bad_char);
4716 return {ret, outlen};
4722simdutf_warn_unused std::tuple<result, std::size_t>
4723atomic_base64_to_binary_safe(
4724 std::span<const char16_t> base64_input,
4725 detail::output_span_of_byte_like
auto &&binary_output,
4726 base64_options options = base64_default,
4727 last_chunk_handling_options last_chunk_options = loose,
4728 bool decode_up_to_bad_char =
false) noexcept {
4729 size_t outlen = binary_output.size();
4730 auto ret = atomic_base64_to_binary_safe(
4731 base64_input.data(), base64_input.size(),
4732 reinterpret_cast<char *
>(binary_output.data()), outlen, options,
4733 last_chunk_options, decode_up_to_bad_char);
4734 return {ret, outlen};
4759 virtual std::string
name()
const {
return std::string(_name); }
4770 virtual std::string
description()
const {
return std::string(_description); }
4783#if SIMDUTF_FEATURE_DETECT_ENCODING
4791 size_t length)
const noexcept;
4800 size_t length)
const noexcept = 0;
4810 virtual uint32_t required_instruction_sets()
const {
4811 return _required_instruction_sets;
4814#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING
4825 size_t len)
const noexcept = 0;
4828#if SIMDUTF_FEATURE_UTF8
4841 simdutf_warn_unused
virtual result
4845#if SIMDUTF_FEATURE_ASCII
4855 simdutf_warn_unused
virtual bool
4870 simdutf_warn_unused
virtual result
4875#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_ASCII
4887 simdutf_warn_unused
virtual bool
4901 simdutf_warn_unused
virtual bool
4905#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING
4920 simdutf_warn_unused
virtual bool
4924#if SIMDUTF_FEATURE_UTF16
4939 simdutf_warn_unused
virtual bool
4958 simdutf_warn_unused
virtual result
4960 size_t len)
const noexcept = 0;
4978 simdutf_warn_unused
virtual result
4980 size_t len)
const noexcept = 0;
4994 char16_t *output)
const noexcept = 0;
5008 char16_t *output)
const noexcept = 0;
5011#if SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING
5024 simdutf_warn_unused
virtual bool
5028#if SIMDUTF_FEATURE_UTF32
5044 simdutf_warn_unused
virtual result
5046 size_t len)
const noexcept = 0;
5049#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
5060 simdutf_warn_unused
virtual size_t
5062 char *utf8_output)
const noexcept = 0;
5065#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
5076 simdutf_warn_unused
virtual size_t
5078 char16_t *utf16_output)
const noexcept = 0;
5090 simdutf_warn_unused
virtual size_t
5092 char16_t *utf16_output)
const noexcept = 0;
5095#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1
5106 simdutf_warn_unused
virtual size_t
5108 char32_t *utf32_buffer)
const noexcept = 0;
5111#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
5124 simdutf_warn_unused
virtual size_t
5126 char *latin1_output)
const noexcept = 0;
5144 simdutf_warn_unused
virtual result
5146 char *latin1_output)
const noexcept = 0;
5167 simdutf_warn_unused
virtual size_t
5169 char *latin1_output)
const noexcept = 0;
5172#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
5185 simdutf_warn_unused
virtual size_t
5187 char16_t *utf16_output)
const noexcept = 0;
5201 simdutf_warn_unused
virtual size_t
5203 char16_t *utf16_output)
const noexcept = 0;
5221 const char *input,
size_t length,
5222 char16_t *utf16_output)
const noexcept = 0;
5240 const char *input,
size_t length,
5241 char16_t *utf16_output)
const noexcept = 0;
5262 const char16_t *input,
size_t length)
const noexcept = 0;
5284 const char16_t *input,
size_t length)
const noexcept = 0;
5288#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
5301 simdutf_warn_unused
virtual size_t
5303 char32_t *utf32_output)
const noexcept = 0;
5319 simdutf_warn_unused
virtual result
5321 char32_t *utf32_output)
const noexcept = 0;
5324#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
5335 simdutf_warn_unused
virtual size_t
5337 char16_t *utf16_buffer)
const noexcept = 0;
5349 simdutf_warn_unused
virtual size_t
5351 char16_t *utf16_buffer)
const noexcept = 0;
5354#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
5365 simdutf_warn_unused
virtual size_t
5367 char32_t *utf32_buffer)
const noexcept = 0;
5370#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
5383 simdutf_warn_unused
virtual size_t
5387#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
5402 simdutf_warn_unused
virtual size_t
5406#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
5423 simdutf_warn_unused
virtual size_t
5425 char *latin1_buffer)
const noexcept = 0;
5443 simdutf_warn_unused
virtual size_t
5445 char *latin1_buffer)
const noexcept = 0;
5466 simdutf_warn_unused
virtual result
5468 char *latin1_buffer)
const noexcept = 0;
5489 simdutf_warn_unused
virtual result
5491 char *latin1_buffer)
const noexcept = 0;
5513 simdutf_warn_unused
virtual size_t
5515 char *latin1_buffer)
const noexcept = 0;
5537 simdutf_warn_unused
virtual size_t
5539 char *latin1_buffer)
const noexcept = 0;
5542#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
5558 simdutf_warn_unused
virtual size_t
5560 char *utf8_buffer)
const noexcept = 0;
5577 simdutf_warn_unused
virtual size_t
5579 char *utf8_buffer)
const noexcept = 0;
5599 simdutf_warn_unused
virtual result
5601 char *utf8_buffer)
const noexcept = 0;
5621 simdutf_warn_unused
virtual result
5623 char *utf8_buffer)
const noexcept = 0;
5639 simdutf_warn_unused
virtual size_t
5641 char *utf8_buffer)
const noexcept = 0;
5657 simdutf_warn_unused
virtual size_t
5659 char *utf8_buffer)
const noexcept = 0;
5662#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
5678 simdutf_warn_unused
virtual size_t
5680 char32_t *utf32_buffer)
const noexcept = 0;
5697 simdutf_warn_unused
virtual size_t
5699 char32_t *utf32_buffer)
const noexcept = 0;
5720 const char16_t *input,
size_t length,
5721 char32_t *utf32_buffer)
const noexcept = 0;
5742 const char16_t *input,
size_t length,
5743 char32_t *utf32_buffer)
const noexcept = 0;
5759 simdutf_warn_unused
virtual size_t
5761 char32_t *utf32_buffer)
const noexcept = 0;
5777 simdutf_warn_unused
virtual size_t
5779 char32_t *utf32_buffer)
const noexcept = 0;
5782#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
5797 simdutf_warn_unused
virtual size_t
5799 size_t length)
const noexcept = 0;
5815 simdutf_warn_unused
virtual size_t
5817 size_t length)
const noexcept = 0;
5820#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1
5837 simdutf_warn_unused
virtual size_t
5839 char *latin1_buffer)
const noexcept = 0;
5842#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1
5862 simdutf_warn_unused
virtual result
5864 char *latin1_buffer)
const noexcept = 0;
5886 simdutf_warn_unused
virtual size_t
5888 char *latin1_buffer)
const noexcept = 0;
5891#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
5907 simdutf_warn_unused
virtual size_t
5909 char *utf8_buffer)
const noexcept = 0;
5928 simdutf_warn_unused
virtual result
5930 char *utf8_buffer)
const noexcept = 0;
5946 simdutf_warn_unused
virtual size_t
5948 char *utf8_buffer)
const noexcept = 0;
5951#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
5962 simdutf_warn_unused
virtual size_t
5968#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
5984 simdutf_warn_unused
virtual size_t
5986 char16_t *utf16_buffer)
const noexcept = 0;
6003 simdutf_warn_unused
virtual size_t
6005 char16_t *utf16_buffer)
const noexcept = 0;
6026 const char32_t *input,
size_t length,
6027 char16_t *utf16_buffer)
const noexcept = 0;
6048 const char32_t *input,
size_t length,
6049 char16_t *utf16_buffer)
const noexcept = 0;
6065 simdutf_warn_unused
virtual size_t
6067 char16_t *utf16_buffer)
const noexcept = 0;
6083 simdutf_warn_unused
virtual size_t
6085 char16_t *utf16_buffer)
const noexcept = 0;
6088#if SIMDUTF_FEATURE_UTF16
6104 char16_t *output)
const noexcept = 0;
6107#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
6116 simdutf_warn_unused
virtual size_t
6120#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
6133 simdutf_warn_unused
virtual size_t
6135 size_t length)
const noexcept = 0;
6138#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1
6150 simdutf_warn_unused
virtual size_t
6156#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
6168 simdutf_warn_unused
virtual size_t
6172#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
6188 simdutf_warn_unused
virtual size_t
6194#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
6207 simdutf_warn_unused
virtual size_t
6209 size_t length)
const noexcept = 0;
6212#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1
6221 simdutf_warn_unused
virtual size_t
6227#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
6245 simdutf_warn_unused
virtual size_t
6247 size_t length)
const noexcept = 0;
6266 simdutf_warn_unused
virtual size_t
6268 size_t length)
const noexcept = 0;
6271#if SIMDUTF_FEATURE_UTF16
6287 simdutf_warn_unused
virtual size_t
6305 simdutf_warn_unused
virtual size_t
6309#if SIMDUTF_FEATURE_UTF8
6322 simdutf_warn_unused
virtual size_t
6326#if SIMDUTF_FEATURE_BASE64
6341 const char *input,
size_t length)
const noexcept;
6358 const char16_t *input,
size_t length)
const noexcept;
6392 simdutf_warn_unused
virtual result
6394 base64_options options = base64_default,
6395 last_chunk_handling_options last_chunk_options =
6396 last_chunk_handling_options::loose)
const noexcept = 0;
6430 const char *input,
size_t length,
char *output,
6431 base64_options options = base64_default,
6432 last_chunk_handling_options last_chunk_options =
6433 last_chunk_handling_options::loose)
const noexcept = 0;
6468 simdutf_warn_unused
virtual result
6470 base64_options options = base64_default,
6471 last_chunk_handling_options last_chunk_options =
6472 last_chunk_handling_options::loose)
const noexcept = 0;
6506 const char16_t *input,
size_t length,
char *output,
6507 base64_options options = base64_default,
6508 last_chunk_handling_options last_chunk_options =
6509 last_chunk_handling_options::loose)
const noexcept = 0;
6520 size_t length, base64_options options = base64_default)
const noexcept;
6545 base64_options options = base64_default)
const noexcept = 0;
6573 const char *input,
size_t length,
char *output,
6574 size_t line_length = simdutf::default_line_length,
6575 base64_options options = base64_default)
const noexcept = 0;
6587 virtual const char *
find(
const char *start,
const char *end,
6588 char character)
const noexcept = 0;
6589 virtual const char16_t *
find(
const char16_t *start,
const char16_t *end,
6590 char16_t character)
const noexcept = 0;
6593#ifdef SIMDUTF_INTERNAL_TESTS
6602 struct TestProcedure {
6610 virtual std::vector<TestProcedure> internal_tests()
const;
6618 uint32_t required_instruction_sets)
6620 _required_instruction_sets(required_instruction_sets) {}
6623 ~implementation() =
default;
6634 const char *_description;
6639 const uint32_t _required_instruction_sets;
6648class available_implementation_list {
6651 simdutf_really_inline available_implementation_list() {}
6653 size_t size() const noexcept;
6655 const implementation *const *begin() const noexcept;
6657 const implementation *const *end() const noexcept;
6672 const implementation *operator[](const std::
string &name) const noexcept {
6673 for (
const implementation *impl : *this) {
6674 if (impl->name() == name) {
6694 const implementation *detect_best_supported() const noexcept;
6697template <typename T> class atomic_ptr {
6699 atomic_ptr(T *_ptr) : ptr{_ptr} {}
6701#if defined(SIMDUTF_NO_THREADS)
6702 operator const T *()
const {
return ptr; }
6703 const T &operator*()
const {
return *ptr; }
6704 const T *operator->()
const {
return ptr; }
6706 operator T *() {
return ptr; }
6707 T &operator*() {
return *ptr; }
6708 T *operator->() {
return ptr; }
6709 atomic_ptr &operator=(T *_ptr) {
6715 operator const T *()
const {
return ptr.load(); }
6716 const T &operator*()
const {
return *ptr; }
6717 const T *operator->()
const {
return ptr.load(); }
6719 operator T *() {
return ptr.load(); }
6720 T &operator*() {
return *ptr; }
6721 T *operator->() {
return ptr.load(); }
6722 atomic_ptr &operator=(T *_ptr) {
6730#if defined(SIMDUTF_NO_THREADS)
6733 std::atomic<T *> ptr;
6737class detect_best_supported_implementation_on_first_use;
6744extern SIMDUTF_DLLIMPORTEXPORT
const internal::available_implementation_list &
6745get_available_implementations();
6753extern SIMDUTF_DLLIMPORTEXPORT internal::atomic_ptr<const implementation> &
6754get_active_implementation();
6758#if SIMDUTF_FEATURE_BASE64
6760 #include <simdutf/base64_implementation.h>
6768simdutf_really_inline
6769 simdutf_constexpr23 simdutf_warn_unused std::tuple<result, std::size_t>
6770 base64_to_binary_safe(
6771 const detail::input_span_of_byte_like
auto &input,
6772 detail::output_span_of_byte_like
auto &&binary_output,
6773 base64_options options = base64_default,
6774 last_chunk_handling_options last_chunk_options = loose,
6775 bool decode_up_to_bad_char =
false) noexcept {
6776 size_t outlen = binary_output.size();
6777 #if SIMDUTF_CPLUSPLUS23
6779 using CInput = std::decay_t<
decltype(*input.data())>;
6780 static_assert(std::is_same_v<CInput, char>,
6781 "sorry, the constexpr implementation is for now limited to "
6782 "input of type char");
6783 using COutput = std::decay_t<
decltype(*binary_output.data())>;
6784 static_assert(std::is_same_v<COutput, char>,
6785 "sorry, the constexpr implementation is for now limited to "
6786 "output of type char");
6787 auto r = base64_to_binary_safe_impl(
6788 input.data(), input.size(), binary_output.data(), outlen, options,
6789 last_chunk_options, decode_up_to_bad_char);
6794 auto r = base64_to_binary_safe_impl<char>(
6795 reinterpret_cast<const char *
>(input.data()), input.size(),
6796 reinterpret_cast<char *
>(binary_output.data()), outlen, options,
6797 last_chunk_options, decode_up_to_bad_char);
6807simdutf_really_inline
6808 simdutf_warn_unused simdutf_constexpr23 std::tuple<result, std::size_t>
6809 base64_to_binary_safe(
6810 std::span<const char16_t> input,
6811 detail::output_span_of_byte_like
auto &&binary_output,
6812 base64_options options = base64_default,
6813 last_chunk_handling_options last_chunk_options = loose,
6814 bool decode_up_to_bad_char =
false) noexcept {
6815 size_t outlen = binary_output.size();
6816 #if SIMDUTF_CPLUSPLUS23
6818 auto r = base64_to_binary_safe_impl(
6819 input.data(), input.size(), binary_output.data(), outlen, options,
6820 last_chunk_options, decode_up_to_bad_char);
6825 auto r = base64_to_binary_safe(
6826 input.data(), input.size(),
6827 reinterpret_cast<char *
>(binary_output.data()), outlen, options,
6828 last_chunk_options, decode_up_to_bad_char);
An implementation of simdutf for a particular CPU architecture.
virtual simdutf_warn_unused size_t convert_valid_utf32_to_latin1(const char32_t *input, size_t length, char *latin1_buffer) const noexcept=0
Convert valid UTF-32 string into Latin1 string.
virtual const char * find(const char *start, const char *end, char character) const noexcept=0
Find the first occurrence of a character in a string.
virtual simdutf_warn_unused size_t convert_valid_utf8_to_utf16le(const char *input, size_t length, char16_t *utf16_buffer) const noexcept=0
Convert valid UTF-8 string into UTF-16LE string.
virtual simdutf_warn_unused size_t latin1_length_from_utf16(size_t length) const noexcept
Compute the number of bytes that this UTF-16LE/BE string would require in Latin1 format.
virtual simdutf_warn_unused size_t utf32_length_from_utf16be(const char16_t *input, size_t length) const noexcept=0
Compute the number of bytes that this UTF-16BE string would require in UTF-32 format.
virtual simdutf_warn_unused size_t convert_utf8_to_utf16be(const char *input, size_t length, char16_t *utf16_output) const noexcept=0
Convert possibly broken UTF-8 string into UTF-16BE string.
virtual simdutf_warn_unused size_t convert_utf16le_to_utf32(const char16_t *input, size_t length, char32_t *utf32_buffer) const noexcept=0
Convert possibly broken UTF-16LE string into UTF-32 string.
virtual simdutf_warn_unused size_t convert_valid_utf16be_to_utf32(const char16_t *input, size_t length, char32_t *utf32_buffer) const noexcept=0
Convert valid UTF-16LE string into UTF-32BE string.
virtual simdutf_warn_unused size_t convert_valid_utf16be_to_utf8(const char16_t *input, size_t length, char *utf8_buffer) const noexcept=0
Convert valid UTF-16BE string into UTF-8 string.
virtual simdutf_warn_unused size_t convert_latin1_to_utf16le(const char *input, size_t length, char16_t *utf16_output) const noexcept=0
Convert possibly Latin1 string into UTF-16LE string.
virtual simdutf_warn_unused bool validate_utf16be(const char16_t *buf, size_t len) const noexcept=0
Validate the UTF-16BE string.
virtual simdutf_warn_unused result convert_utf16be_to_latin1_with_errors(const char16_t *input, size_t length, char *latin1_buffer) const noexcept=0
Convert possibly broken UTF-16BE string into Latin1 string.
virtual simdutf_warn_unused full_result base64_to_binary_details(const char *input, size_t length, char *output, base64_options options=base64_default, last_chunk_handling_options last_chunk_options=last_chunk_handling_options::loose) const noexcept=0
Convert a base64 input to a binary output while returning more details than base64_to_binary.
virtual simdutf_warn_unused size_t convert_valid_utf16le_to_utf32(const char16_t *input, size_t length, char32_t *utf32_buffer) const noexcept=0
Convert valid UTF-16LE string into UTF-32 string.
virtual simdutf_warn_unused size_t latin1_length_from_utf8(const char *input, size_t length) const noexcept=0
Compute the number of bytes that this UTF-8 string would require in Latin1 format.
virtual simdutf_warn_unused result convert_utf32_to_latin1_with_errors(const char32_t *input, size_t length, char *latin1_buffer) const noexcept=0
Convert possibly broken UTF-32 string into Latin1 string and stop on error.
virtual simdutf_warn_unused size_t utf8_length_from_utf32(const char32_t *input, size_t length) const noexcept=0
Compute the number of bytes that this UTF-32 string would require in UTF-8 format.
virtual simdutf_warn_unused size_t convert_latin1_to_utf16be(const char *input, size_t length, char16_t *utf16_output) const noexcept=0
Convert Latin1 string into UTF-16BE string.
virtual simdutf_warn_unused size_t latin1_length_from_utf32(size_t length) const noexcept
Compute the number of bytes that this UTF-32 string would require in Latin1 format.
virtual simdutf_warn_unused full_result base64_to_binary_details(const char16_t *input, size_t length, char *output, base64_options options=base64_default, last_chunk_handling_options last_chunk_options=last_chunk_handling_options::loose) const noexcept=0
Convert a base64 input to a binary output while returning more details than base64_to_binary.
virtual simdutf_warn_unused size_t convert_utf32_to_utf16le(const char32_t *input, size_t length, char16_t *utf16_buffer) const noexcept=0
Convert possibly broken UTF-32 string into UTF-16LE string.
virtual simdutf_warn_unused size_t utf16_length_from_utf32(const char32_t *input, size_t length) const noexcept=0
Compute the number of two-byte code units that this UTF-32 string would require in UTF-16 format.
virtual simdutf_warn_unused size_t count_utf8(const char *input, size_t length) const noexcept=0
Count the number of code points (characters) in the string assuming that it is valid.
virtual simdutf_warn_unused size_t convert_valid_utf32_to_utf16be(const char32_t *input, size_t length, char16_t *utf16_buffer) const noexcept=0
Convert valid UTF-32 string into UTF-16BE string.
virtual simdutf_warn_unused size_t utf32_length_from_utf8(const char *input, size_t length) const noexcept=0
Compute the number of 4-byte code units that this UTF-8 string would require in UTF-32 format.
virtual simdutf_warn_unused size_t convert_valid_utf16le_to_utf8(const char16_t *input, size_t length, char *utf8_buffer) const noexcept=0
Convert valid UTF-16LE string into UTF-8 string.
virtual void to_well_formed_utf16le(const char16_t *input, size_t len, char16_t *output) const noexcept=0
Copies the UTF-16LE string while replacing mismatched surrogates with the Unicode replacement charact...
virtual simdutf_warn_unused result convert_utf32_to_utf16be_with_errors(const char32_t *input, size_t length, char16_t *utf16_buffer) const noexcept=0
Convert possibly broken UTF-32 string into UTF-16BE string and stop on error.
virtual simdutf_warn_unused result validate_utf8_with_errors(const char *buf, size_t len) const noexcept=0
Validate the UTF-8 string and stop on errors.
virtual simdutf_warn_unused size_t utf8_length_from_utf16le(const char16_t *input, size_t length) const noexcept=0
Compute the number of bytes that this UTF-16LE string would require in UTF-8 format.
virtual size_t binary_to_base64_with_lines(const char *input, size_t length, char *output, size_t line_length=simdutf::default_line_length, base64_options options=base64_default) const noexcept=0
Convert a binary input to a base64 output with lines of given length.
virtual simdutf_warn_unused size_t convert_utf16be_to_utf8(const char16_t *input, size_t length, char *utf8_buffer) const noexcept=0
Convert possibly broken UTF-16BE string into UTF-8 string.
virtual simdutf_warn_unused size_t convert_utf8_to_utf16le(const char *input, size_t length, char16_t *utf16_output) const noexcept=0
Convert possibly broken UTF-8 string into UTF-16LE string.
virtual simdutf_warn_unused result convert_utf16le_to_latin1_with_errors(const char16_t *input, size_t length, char *latin1_buffer) const noexcept=0
Convert possibly broken UTF-16LE string into Latin1 string.
virtual simdutf_warn_unused result utf8_length_from_utf16le_with_replacement(const char16_t *input, size_t length) const noexcept=0
Compute the number of bytes that this UTF-16LE string would require in UTF-8 format even when the UTF...
virtual simdutf_warn_unused result validate_utf16le_with_errors(const char16_t *buf, size_t len) const noexcept=0
Validate the UTF-16LE string and stop on error.
virtual simdutf_warn_unused result convert_utf16be_to_utf8_with_errors(const char16_t *input, size_t length, char *utf8_buffer) const noexcept=0
Convert possibly broken UTF-16BE string into UTF-8 string and stop on error.
virtual size_t binary_to_base64(const char *input, size_t length, char *output, base64_options options=base64_default) const noexcept=0
Convert a binary input to a base64 output.
virtual simdutf_warn_unused result utf8_length_from_utf16be_with_replacement(const char16_t *input, size_t length) const noexcept=0
Compute the number of bytes that this UTF-16BE string would require in UTF-8 format even when the UTF...
virtual simdutf_warn_unused size_t convert_latin1_to_utf32(const char *input, size_t length, char32_t *utf32_buffer) const noexcept=0
Convert Latin1 string into UTF-32 string.
virtual simdutf_warn_unused result convert_utf8_to_utf32_with_errors(const char *input, size_t length, char32_t *utf32_output) const noexcept=0
Convert possibly broken UTF-8 string into UTF-32 string and stop on error.
virtual simdutf_warn_unused size_t convert_valid_utf32_to_utf8(const char32_t *input, size_t length, char *utf8_buffer) const noexcept=0
Convert valid UTF-32 string into UTF-8 string.
virtual simdutf_warn_unused size_t utf32_length_from_latin1(size_t length) const noexcept
Return the number of bytes that this UTF-32 string would require in Latin1 format.
virtual simdutf_warn_unused bool validate_utf16le(const char16_t *buf, size_t len) const noexcept=0
Validate the UTF-16LE string.This function may be best when you expect the input to be almost always ...
virtual simdutf_warn_unused result convert_utf8_to_utf16le_with_errors(const char *input, size_t length, char16_t *utf16_output) const noexcept=0
Convert possibly broken UTF-8 string into UTF-16LE string and stop on error.
virtual simdutf_warn_unused size_t count_utf16le(const char16_t *input, size_t length) const noexcept=0
Count the number of code points (characters) in the string assuming that it is valid.
virtual simdutf_warn_unused size_t convert_utf16be_to_utf32(const char16_t *input, size_t length, char32_t *utf32_buffer) const noexcept=0
Convert possibly broken UTF-16BE string into UTF-32 string.
virtual simdutf_warn_unused size_t convert_valid_utf16le_to_latin1(const char16_t *input, size_t length, char *latin1_buffer) const noexcept=0
Convert valid UTF-16LE string into Latin1 string.
virtual simdutf_warn_unused size_t convert_utf32_to_latin1(const char32_t *input, size_t length, char *latin1_buffer) const noexcept=0
Convert possibly broken UTF-32 string into Latin1 string.
virtual simdutf_warn_unused size_t convert_valid_utf16be_to_latin1(const char16_t *input, size_t length, char *latin1_buffer) const noexcept=0
Convert valid UTF-16BE string into Latin1 string.
bool supported_by_runtime_system() const
The instruction sets this implementation is compiled against and the current CPU match.
virtual simdutf_warn_unused size_t convert_valid_utf8_to_utf32(const char *input, size_t length, char32_t *utf32_buffer) const noexcept=0
Convert valid UTF-8 string into UTF-32 string.
virtual simdutf_warn_unused size_t convert_utf16le_to_latin1(const char16_t *input, size_t length, char *latin1_buffer) const noexcept=0
Convert possibly broken UTF-16LE string into Latin1 string.
virtual simdutf_warn_unused result validate_utf16be_with_errors(const char16_t *buf, size_t len) const noexcept=0
Validate the UTF-16BE string and stop on error.
virtual simdutf_warn_unused result convert_utf16le_to_utf8_with_errors(const char16_t *input, size_t length, char *utf8_buffer) const noexcept=0
Convert possibly broken UTF-16LE string into UTF-8 string and stop on error.
virtual simdutf_warn_unused result convert_utf16be_to_utf32_with_errors(const char16_t *input, size_t length, char32_t *utf32_buffer) const noexcept=0
Convert possibly broken UTF-16BE string into UTF-32 string and stop on error.
virtual simdutf_warn_unused result base64_to_binary(const char *input, size_t length, char *output, base64_options options=base64_default, last_chunk_handling_options last_chunk_options=last_chunk_handling_options::loose) const noexcept=0
Convert a base64 input to a binary output.
virtual simdutf_warn_unused bool validate_utf16be_as_ascii(const char16_t *buf, size_t len) const noexcept=0
Validate the ASCII string as a UTF-16BE sequence.
virtual simdutf_warn_unused size_t convert_utf32_to_utf16be(const char32_t *input, size_t length, char16_t *utf16_buffer) const noexcept=0
Convert possibly broken UTF-32 string into UTF-16BE string.
virtual simdutf_warn_unused result validate_utf32_with_errors(const char32_t *buf, size_t len) const noexcept=0
Validate the UTF-32 string and stop on error.
virtual simdutf_warn_unused size_t utf32_length_from_utf16le(const char16_t *input, size_t length) const noexcept=0
Compute the number of bytes that this UTF-16LE string would require in UTF-32 format.
virtual void change_endianness_utf16(const char16_t *input, size_t length, char16_t *output) const noexcept=0
Change the endianness of the input.
virtual simdutf_warn_unused size_t convert_valid_utf8_to_utf16be(const char *input, size_t length, char16_t *utf16_buffer) const noexcept=0
Convert valid UTF-8 string into UTF-16BE string.
virtual simdutf_warn_unused size_t convert_valid_utf8_to_latin1(const char *input, size_t length, char *latin1_output) const noexcept=0
Convert valid UTF-8 string into latin1 string.
simdutf_warn_unused size_t maximal_binary_length_from_base64(const char *input, size_t length) const noexcept
Provide the maximal binary length in bytes given the base64 input.
virtual simdutf_warn_unused size_t convert_valid_utf32_to_utf16le(const char32_t *input, size_t length, char16_t *utf16_buffer) const noexcept=0
Convert valid UTF-32 string into UTF-16LE string.
virtual std::string name() const
The name of this implementation.
virtual simdutf_warn_unused result validate_ascii_with_errors(const char *buf, size_t len) const noexcept=0
Validate the ASCII string and stop on error.
virtual simdutf_warn_unused bool validate_utf8(const char *buf, size_t len) const noexcept=0
Validate the UTF-8 string.
virtual simdutf_warn_unused size_t convert_utf8_to_utf32(const char *input, size_t length, char32_t *utf32_output) const noexcept=0
Convert possibly broken UTF-8 string into UTF-32 string.
simdutf_warn_unused size_t maximal_binary_length_from_base64(const char16_t *input, size_t length) const noexcept
Provide the maximal binary length in bytes given the base64 input.
virtual simdutf_warn_unused bool validate_utf32(const char32_t *buf, size_t len) const noexcept=0
Validate the UTF-32 string.
virtual simdutf_warn_unused result convert_utf16le_to_utf32_with_errors(const char16_t *input, size_t length, char32_t *utf32_buffer) const noexcept=0
Convert possibly broken UTF-16LE string into UTF-32 string and stop on error.
virtual simdutf_warn_unused size_t convert_utf16le_to_utf8(const char16_t *input, size_t length, char *utf8_buffer) const noexcept=0
Convert possibly broken UTF-16LE string into UTF-8 string.
virtual simdutf_warn_unused size_t utf8_length_from_utf16be(const char16_t *input, size_t length) const noexcept=0
Compute the number of bytes that this UTF-16BE string would require in UTF-8 format.
virtual simdutf_warn_unused size_t convert_utf32_to_utf8(const char32_t *input, size_t length, char *utf8_buffer) const noexcept=0
Convert possibly broken UTF-32 string into UTF-8 string.
virtual simdutf_warn_unused size_t convert_utf16be_to_latin1(const char16_t *input, size_t length, char *latin1_buffer) const noexcept=0
Convert possibly broken UTF-16BE string into Latin1 string.
virtual simdutf_warn_unused size_t convert_utf8_to_latin1(const char *input, size_t length, char *latin1_output) const noexcept=0
Convert possibly broken UTF-8 string into latin1 string.
virtual simdutf_warn_unused result base64_to_binary(const char16_t *input, size_t length, char *output, base64_options options=base64_default, last_chunk_handling_options last_chunk_options=last_chunk_handling_options::loose) const noexcept=0
Convert a base64 input to a binary output.
simdutf_warn_unused size_t base64_length_from_binary(size_t length, base64_options options=base64_default) const noexcept
Provide the base64 length in bytes given the length of a binary input.
virtual int detect_encodings(const char *input, size_t length) const noexcept=0
This function will try to detect the possible encodings in one pass.
virtual simdutf_warn_unused size_t count_utf16be(const char16_t *input, size_t length) const noexcept=0
Count the number of code points (characters) in the string assuming that it is valid.
virtual simdutf_warn_unused result convert_utf32_to_utf8_with_errors(const char32_t *input, size_t length, char *utf8_buffer) const noexcept=0
Convert possibly broken UTF-32 string into UTF-8 string and stop on error.
virtual simdutf_warn_unused bool validate_utf16le_as_ascii(const char16_t *buf, size_t len) const noexcept=0
Validate the ASCII string as a UTF-16LE sequence.
virtual void to_well_formed_utf16be(const char16_t *input, size_t len, char16_t *output) const noexcept=0
Copies the UTF-16BE string while replacing mismatched surrogates with the Unicode replacement charact...
virtual simdutf_warn_unused bool validate_ascii(const char *buf, size_t len) const noexcept=0
Validate the ASCII string.
virtual simdutf_warn_unused size_t utf16_length_from_utf8(const char *input, size_t length) const noexcept=0
Compute the number of 2-byte code units that this UTF-8 string would require in UTF-16LE format.
virtual simdutf_warn_unused result convert_utf8_to_utf16be_with_errors(const char *input, size_t length, char16_t *utf16_output) const noexcept=0
Convert possibly broken UTF-8 string into UTF-16BE string and stop on error.
virtual simdutf_warn_unused result convert_utf8_to_latin1_with_errors(const char *input, size_t length, char *latin1_output) const noexcept=0
Convert possibly broken UTF-8 string into latin1 string with errors.
virtual std::string description() const
The description of this implementation.
virtual simdutf_warn_unused size_t utf16_length_from_latin1(size_t length) const noexcept
Return the number of bytes that this UTF-16 string would require in Latin1 format.
virtual simdutf_warn_unused result convert_utf32_to_utf16le_with_errors(const char32_t *input, size_t length, char16_t *utf16_buffer) const noexcept=0
Convert possibly broken UTF-32 string into UTF-16LE string and stop on error.
virtual simdutf_warn_unused size_t convert_latin1_to_utf8(const char *input, size_t length, char *utf8_output) const noexcept=0
Convert Latin1 string into UTF-8 string.
virtual encoding_type autodetect_encoding(const char *input, size_t length) const noexcept
This function will try to detect the encoding.
virtual simdutf_warn_unused size_t utf8_length_from_latin1(const char *input, size_t length) const noexcept=0
Return the number of bytes that this Latin1 string would require in UTF-8 format.