1#ifndef SIMDUTF_VALID_UTF8_TO_UTF32_H
2#define SIMDUTF_VALID_UTF8_TO_UTF32_H
7namespace utf8_to_utf32 {
9template <
typename InputPtr>
10#if SIMDUTF_CPLUSPLUS20
11 requires simdutf::detail::indexes_into_byte_like<InputPtr>
13simdutf_constexpr23
size_t convert_valid(InputPtr data,
size_t len,
14 char32_t *utf32_output) {
16 char32_t *start{utf32_output};
18#if SIMDUTF_CPLUSPLUS23
26 ::memcpy(&v, data + pos,
sizeof(uint64_t));
27 if ((v & 0x8080808080808080) == 0) {
28 size_t final_pos = pos + 8;
29 while (pos < final_pos) {
30 *utf32_output++ = uint8_t(data[pos]);
37 auto leading_byte = uint8_t(data[pos]);
38 if (leading_byte < 0b10000000) {
40 *utf32_output++ = char32_t(leading_byte);
42 }
else if ((leading_byte & 0b11100000) == 0b11000000) {
47 *utf32_output++ = char32_t(((leading_byte & 0b00011111) << 6) |
48 (uint8_t(data[pos + 1]) & 0b00111111));
50 }
else if ((leading_byte & 0b11110000) == 0b11100000) {
55 *utf32_output++ = char32_t(((leading_byte & 0b00001111) << 12) |
56 ((uint8_t(data[pos + 1]) & 0b00111111) << 6) |
57 (uint8_t(data[pos + 2]) & 0b00111111));
59 }
else if ((leading_byte & 0b11111000) == 0b11110000) {
64 uint32_t code_word = ((leading_byte & 0b00000111) << 18) |
65 ((uint8_t(data[pos + 1]) & 0b00111111) << 12) |
66 ((uint8_t(data[pos + 2]) & 0b00111111) << 6) |
67 (uint8_t(data[pos + 3]) & 0b00111111);
68 *utf32_output++ = char32_t(code_word);
75 return utf32_output - start;