simdutf 8.0.0
Unicode at GB/s.
Loading...
Searching...
No Matches
utf16_to_latin1.h
1#ifndef SIMDUTF_UTF16_TO_LATIN1_H
2#define SIMDUTF_UTF16_TO_LATIN1_H
3
4#include <cstring> // for std::memcpy
5
6namespace simdutf {
7namespace scalar {
8namespace {
9namespace utf16_to_latin1 {
10
11template <endianness big_endian, typename InputPtr, typename OutputPtr>
12#if SIMDUTF_CPLUSPLUS20
13 requires(simdutf::detail::indexes_into_utf16<InputPtr> &&
14 simdutf::detail::index_assignable_from_char<OutputPtr>)
15#endif
16simdutf_constexpr23 size_t convert(InputPtr data, size_t len,
17 OutputPtr latin_output) {
18 if (len == 0) {
19 return 0;
20 }
21 size_t pos = 0;
22 const auto latin_output_start = latin_output;
23 uint16_t word = 0;
24 uint16_t too_large = 0;
25
26 while (pos < len) {
27 word = !match_system(big_endian) ? u16_swap_bytes(data[pos]) : data[pos];
28 too_large |= word;
29 *latin_output++ = char(word & 0xFF);
30 pos++;
31 }
32 if ((too_large & 0xFF00) != 0) {
33 return 0;
34 }
35
36 return latin_output - latin_output_start;
37}
38
39template <endianness big_endian, typename InputPtr, typename OutputPtr>
40#if SIMDUTF_CPLUSPLUS20
41 requires(simdutf::detail::indexes_into_utf16<InputPtr> &&
42 simdutf::detail::index_assignable_from_char<OutputPtr>)
43#endif
44simdutf_constexpr23 result convert_with_errors(InputPtr data, size_t len,
45 OutputPtr latin_output) {
46 if (len == 0) {
47 return result(error_code::SUCCESS, 0);
48 }
49 size_t pos = 0;
50 auto start = latin_output;
51 uint16_t word;
52
53 while (pos < len) {
54#if SIMDUTF_CPLUSPLUS23
55 if !consteval
56#endif
57 {
58 if (pos + 16 <= len) { // if it is safe to read 32 more bytes, check that
59 // they are Latin1
60 uint64_t v1, v2, v3, v4;
61 ::memcpy(&v1, data + pos, sizeof(uint64_t));
62 ::memcpy(&v2, data + pos + 4, sizeof(uint64_t));
63 ::memcpy(&v3, data + pos + 8, sizeof(uint64_t));
64 ::memcpy(&v4, data + pos + 12, sizeof(uint64_t));
65
66 if simdutf_constexpr (!match_system(big_endian)) {
67 v1 = (v1 >> 8) | (v1 << (64 - 8));
68 }
69 if simdutf_constexpr (!match_system(big_endian)) {
70 v2 = (v2 >> 8) | (v2 << (64 - 8));
71 }
72 if simdutf_constexpr (!match_system(big_endian)) {
73 v3 = (v3 >> 8) | (v3 << (64 - 8));
74 }
75 if simdutf_constexpr (!match_system(big_endian)) {
76 v4 = (v4 >> 8) | (v4 << (64 - 8));
77 }
78
79 if (((v1 | v2 | v3 | v4) & 0xFF00FF00FF00FF00) == 0) {
80 size_t final_pos = pos + 16;
81 while (pos < final_pos) {
82 *latin_output++ = !match_system(big_endian)
83 ? char(u16_swap_bytes(data[pos]))
84 : char(data[pos]);
85 pos++;
86 }
87 continue;
88 }
89 }
90 }
91
92 word = !match_system(big_endian) ? u16_swap_bytes(data[pos]) : data[pos];
93 if ((word & 0xFF00) == 0) {
94 *latin_output++ = char(word & 0xFF);
95 pos++;
96 } else {
97 return result(error_code::TOO_LARGE, pos);
98 }
99 }
100 return result(error_code::SUCCESS, latin_output - start);
101}
102
103} // namespace utf16_to_latin1
104} // unnamed namespace
105} // namespace scalar
106} // namespace simdutf
107
108#endif