simdutf 8.0.0
Unicode at GB/s.
Loading...
Searching...
No Matches
valid_utf16_to_utf32.h
1#ifndef SIMDUTF_VALID_UTF16_TO_UTF32_H
2#define SIMDUTF_VALID_UTF16_TO_UTF32_H
3
4namespace simdutf {
5namespace scalar {
6namespace {
7namespace utf16_to_utf32 {
8
9template <endianness big_endian>
10simdutf_constexpr23 size_t convert_valid(const char16_t *data, size_t len,
11 char32_t *utf32_output) {
12 size_t pos = 0;
13 char32_t *start{utf32_output};
14 while (pos < len) {
15 uint16_t word =
16 !match_system(big_endian) ? u16_swap_bytes(data[pos]) : data[pos];
17 if ((word & 0xF800) != 0xD800) {
18 // No surrogate pair, extend 16-bit word to 32-bit word
19 *utf32_output++ = char32_t(word);
20 pos++;
21 } else {
22 // must be a surrogate pair
23 uint16_t diff = uint16_t(word - 0xD800);
24 if (pos + 1 >= len) {
25 return 0;
26 } // minimal bound checking
27 uint16_t next_word = !match_system(big_endian)
28 ? u16_swap_bytes(data[pos + 1])
29 : data[pos + 1];
30 uint16_t diff2 = uint16_t(next_word - 0xDC00);
31 uint32_t value = (diff << 10) + diff2 + 0x10000;
32 *utf32_output++ = char32_t(value);
33 pos += 2;
34 }
35 }
36 return utf32_output - start;
37}
38
39} // namespace utf16_to_utf32
40} // unnamed namespace
41} // namespace scalar
42} // namespace simdutf
43
44#endif