1#ifndef SIMDUTF_UTF32_TO_UTF8_H
2#define SIMDUTF_UTF32_TO_UTF8_H
7namespace utf32_to_utf8 {
9template <
typename InputPtr,
typename OutputPtr>
10#if SIMDUTF_CPLUSPLUS20
11 requires(simdutf::detail::indexes_into_utf32<InputPtr> &&
12 simdutf::detail::index_assignable_from_char<OutputPtr>)
14simdutf_constexpr23
size_t convert(InputPtr data,
size_t len,
15 OutputPtr utf8_output) {
17 auto start = utf8_output;
19#if SIMDUTF_CPLUSPLUS23
26 ::memcpy(&v, data + pos,
sizeof(uint64_t));
27 if ((v & 0xFFFFFF80FFFFFF80) == 0) {
28 *utf8_output++ = char(data[pos]);
29 *utf8_output++ = char(data[pos + 1]);
36 uint32_t word = data[pos];
37 if ((word & 0xFFFFFF80) == 0) {
39 *utf8_output++ = char(word);
41 }
else if ((word & 0xFFFFF800) == 0) {
44 *utf8_output++ = char((word >> 6) | 0b11000000);
45 *utf8_output++ = char((word & 0b111111) | 0b10000000);
47 }
else if ((word & 0xFFFF0000) == 0) {
50 if (word >= 0xD800 && word <= 0xDFFF) {
53 *utf8_output++ = char((word >> 12) | 0b11100000);
54 *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000);
55 *utf8_output++ = char((word & 0b111111) | 0b10000000);
60 if (word > 0x10FFFF) {
63 *utf8_output++ = char((word >> 18) | 0b11110000);
64 *utf8_output++ = char(((word >> 12) & 0b111111) | 0b10000000);
65 *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000);
66 *utf8_output++ = char((word & 0b111111) | 0b10000000);
70 return utf8_output - start;
73template <
typename InputPtr,
typename OutputPtr>
74#if SIMDUTF_CPLUSPLUS20
75 requires(simdutf::detail::indexes_into_utf32<InputPtr> &&
76 simdutf::detail::index_assignable_from_char<OutputPtr>)
78simdutf_constexpr23 result convert_with_errors(InputPtr data,
size_t len,
79 OutputPtr utf8_output) {
81 auto start = utf8_output;
83#if SIMDUTF_CPLUSPLUS23
90 ::memcpy(&v, data + pos,
sizeof(uint64_t));
91 if ((v & 0xFFFFFF80FFFFFF80) == 0) {
92 *utf8_output++ = char(data[pos]);
93 *utf8_output++ = char(data[pos + 1]);
100 uint32_t word = data[pos];
101 if ((word & 0xFFFFFF80) == 0) {
103 *utf8_output++ = char(word);
105 }
else if ((word & 0xFFFFF800) == 0) {
108 *utf8_output++ = char((word >> 6) | 0b11000000);
109 *utf8_output++ = char((word & 0b111111) | 0b10000000);
111 }
else if ((word & 0xFFFF0000) == 0) {
114 if (word >= 0xD800 && word <= 0xDFFF) {
115 return result(error_code::SURROGATE, pos);
117 *utf8_output++ = char((word >> 12) | 0b11100000);
118 *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000);
119 *utf8_output++ = char((word & 0b111111) | 0b10000000);
124 if (word > 0x10FFFF) {
125 return result(error_code::TOO_LARGE, pos);
127 *utf8_output++ = char((word >> 18) | 0b11110000);
128 *utf8_output++ = char(((word >> 12) & 0b111111) | 0b10000000);
129 *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000);
130 *utf8_output++ = char((word & 0b111111) | 0b10000000);
134 return result(error_code::SUCCESS, utf8_output - start);