simdutf 8.0.0
Unicode at GB/s.
Loading...
Searching...
No Matches
utf32_to_utf16.h
1#ifndef SIMDUTF_UTF32_TO_UTF16_H
2#define SIMDUTF_UTF32_TO_UTF16_H
3
4namespace simdutf {
5namespace scalar {
6namespace {
7namespace utf32_to_utf16 {
8
9template <endianness big_endian>
10simdutf_constexpr23 size_t convert(const char32_t *data, size_t len,
11 char16_t *utf16_output) {
12 size_t pos = 0;
13 char16_t *start{utf16_output};
14 while (pos < len) {
15 uint32_t word = data[pos];
16 if ((word & 0xFFFF0000) == 0) {
17 if (word >= 0xD800 && word <= 0xDFFF) {
18 return 0;
19 }
20 // will not generate a surrogate pair
21 *utf16_output++ = !match_system(big_endian)
22 ? char16_t(u16_swap_bytes(uint16_t(word)))
23 : char16_t(word);
24 } else {
25 // will generate a surrogate pair
26 if (word > 0x10FFFF) {
27 return 0;
28 }
29 word -= 0x10000;
30 uint16_t high_surrogate = uint16_t(0xD800 + (word >> 10));
31 uint16_t low_surrogate = uint16_t(0xDC00 + (word & 0x3FF));
32 if simdutf_constexpr (!match_system(big_endian)) {
33 high_surrogate = u16_swap_bytes(high_surrogate);
34 low_surrogate = u16_swap_bytes(low_surrogate);
35 }
36 *utf16_output++ = char16_t(high_surrogate);
37 *utf16_output++ = char16_t(low_surrogate);
38 }
39 pos++;
40 }
41 return utf16_output - start;
42}
43
44template <endianness big_endian>
45simdutf_constexpr23 result convert_with_errors(const char32_t *data, size_t len,
46 char16_t *utf16_output) {
47 size_t pos = 0;
48 char16_t *start{utf16_output};
49 while (pos < len) {
50 uint32_t word = data[pos];
51 if ((word & 0xFFFF0000) == 0) {
52 if (word >= 0xD800 && word <= 0xDFFF) {
53 return result(error_code::SURROGATE, pos);
54 }
55 // will not generate a surrogate pair
56 *utf16_output++ = !match_system(big_endian)
57 ? char16_t(u16_swap_bytes(uint16_t(word)))
58 : char16_t(word);
59 } else {
60 // will generate a surrogate pair
61 if (word > 0x10FFFF) {
62 return result(error_code::TOO_LARGE, pos);
63 }
64 word -= 0x10000;
65 uint16_t high_surrogate = uint16_t(0xD800 + (word >> 10));
66 uint16_t low_surrogate = uint16_t(0xDC00 + (word & 0x3FF));
67 if simdutf_constexpr (!match_system(big_endian)) {
68 high_surrogate = u16_swap_bytes(high_surrogate);
69 low_surrogate = u16_swap_bytes(low_surrogate);
70 }
71 *utf16_output++ = char16_t(high_surrogate);
72 *utf16_output++ = char16_t(low_surrogate);
73 }
74 pos++;
75 }
76 return result(error_code::SUCCESS, utf16_output - start);
77}
78
79} // namespace utf32_to_utf16
80} // unnamed namespace
81} // namespace scalar
82} // namespace simdutf
83
84#endif