simdutf 8.0.0
Unicode at GB/s.
Loading...
Searching...
No Matches
valid_utf32_to_latin1.h
1#ifndef SIMDUTF_VALID_UTF32_TO_LATIN1_H
2#define SIMDUTF_VALID_UTF32_TO_LATIN1_H
3
4namespace simdutf {
5namespace scalar {
6namespace {
7namespace utf32_to_latin1 {
8
9template <typename ReadPtr, typename WritePtr>
10simdutf_constexpr23 size_t convert_valid(ReadPtr data, size_t len,
11 WritePtr latin1_output) {
12 static_assert(
13 std::is_same<typename std::decay<decltype(*data)>::type, uint32_t>::value,
14 "dereferencing the data pointer must result in a uint32_t");
15 auto start = latin1_output;
16 uint32_t utf32_char;
17 size_t pos = 0;
18
19 while (pos < len) {
20 utf32_char = data[pos];
21
22#if SIMDUTF_CPLUSPLUS23
23 // avoid using the 8 byte at a time optimization in constant evaluation
24 // mode. memcpy can't be used and replacing it with bitwise or gave worse
25 // codegen (when not during constant evaluation).
26 if !consteval {
27#endif
28 if (pos + 2 <= len) {
29 // if it is safe to read 8 more bytes, check that they are Latin1
30 uint64_t v;
31 std::memcpy(&v, data + pos, sizeof(uint64_t));
32 if ((v & 0xFFFFFF00FFFFFF00) == 0) {
33 *latin1_output++ = char(data[pos]);
34 *latin1_output++ = char(data[pos + 1]);
35 pos += 2;
36 continue;
37 } else {
38 // output can not be represented in latin1
39 return 0;
40 }
41 }
42#if SIMDUTF_CPLUSPLUS23
43 } // if ! consteval
44#endif
45 if ((utf32_char & 0xFFFFFF00) == 0) {
46 *latin1_output++ = char(utf32_char);
47 } else {
48 // output can not be represented in latin1
49 return 0;
50 }
51 pos++;
52 }
53 return latin1_output - start;
54}
55
56simdutf_really_inline size_t convert_valid(const char32_t *buf, size_t len,
57 char *latin1_output) {
58 return convert_valid(reinterpret_cast<const uint32_t *>(buf), len,
59 latin1_output);
60}
61
62} // namespace utf32_to_latin1
63} // unnamed namespace
64} // namespace scalar
65} // namespace simdutf
66
67#endif