16 #if __has_include(<uchar.h>)
19 #define char16_t uint16_t
20 #define char32_t uint32_t
23 #define char16_t uint16_t
24 #define char32_t uint32_t
32typedef enum simdutf_error_code {
33 SIMDUTF_ERROR_SUCCESS = 0,
34 SIMDUTF_ERROR_HEADER_BITS,
35 SIMDUTF_ERROR_TOO_SHORT,
36 SIMDUTF_ERROR_TOO_LONG,
37 SIMDUTF_ERROR_OVERLONG,
38 SIMDUTF_ERROR_TOO_LARGE,
39 SIMDUTF_ERROR_SURROGATE,
40 SIMDUTF_ERROR_INVALID_BASE64_CHARACTER,
41 SIMDUTF_ERROR_BASE64_INPUT_REMAINDER,
42 SIMDUTF_ERROR_BASE64_EXTRA_BITS,
43 SIMDUTF_ERROR_OUTPUT_BUFFER_TOO_SMALL,
48 simdutf_error_code error;
52typedef enum simdutf_encoding_type {
53 SIMDUTF_ENCODING_UNSPECIFIED = 0,
54 SIMDUTF_ENCODING_UTF8 = 1,
55 SIMDUTF_ENCODING_UTF16_LE = 2,
56 SIMDUTF_ENCODING_UTF16_BE = 4,
57 SIMDUTF_ENCODING_UTF32_LE = 8,
58 SIMDUTF_ENCODING_UTF32_BE = 16
59} simdutf_encoding_type;
62bool simdutf_validate_utf8(
const char *buf,
size_t len);
65simdutf_result simdutf_validate_utf8_with_errors(
const char *buf,
size_t len);
68simdutf_encoding_type simdutf_autodetect_encoding(
const char *input,
70int simdutf_detect_encodings(
const char *input,
size_t length);
73bool simdutf_validate_ascii(
const char *buf,
size_t len);
74simdutf_result simdutf_validate_ascii_with_errors(
const char *buf,
size_t len);
77bool simdutf_validate_utf16_as_ascii(
const char16_t *buf,
size_t len);
78bool simdutf_validate_utf16be_as_ascii(
const char16_t *buf,
size_t len);
79bool simdutf_validate_utf16le_as_ascii(
const char16_t *buf,
size_t len);
82bool simdutf_validate_utf16(
const char16_t *buf,
size_t len);
83bool simdutf_validate_utf16le(
const char16_t *buf,
size_t len);
84bool simdutf_validate_utf16be(
const char16_t *buf,
size_t len);
85simdutf_result simdutf_validate_utf16_with_errors(
const char16_t *buf,
87simdutf_result simdutf_validate_utf16le_with_errors(
const char16_t *buf,
89simdutf_result simdutf_validate_utf16be_with_errors(
const char16_t *buf,
92bool simdutf_validate_utf32(
const char32_t *buf,
size_t len);
93simdutf_result simdutf_validate_utf32_with_errors(
const char32_t *buf,
97void simdutf_to_well_formed_utf16le(
const char16_t *input,
size_t len,
99void simdutf_to_well_formed_utf16be(
const char16_t *input,
size_t len,
101void simdutf_to_well_formed_utf16(
const char16_t *input,
size_t len,
105size_t simdutf_count_utf16(
const char16_t *input,
size_t length);
106size_t simdutf_count_utf16le(
const char16_t *input,
size_t length);
107size_t simdutf_count_utf16be(
const char16_t *input,
size_t length);
108size_t simdutf_count_utf8(
const char *input,
size_t length);
111size_t simdutf_utf8_length_from_latin1(
const char *input,
size_t length);
112size_t simdutf_latin1_length_from_utf8(
const char *input,
size_t length);
113size_t simdutf_latin1_length_from_utf16(
size_t length);
114size_t simdutf_latin1_length_from_utf32(
size_t length);
115size_t simdutf_utf16_length_from_utf8(
const char *input,
size_t length);
116size_t simdutf_utf32_length_from_utf8(
const char *input,
size_t length);
117size_t simdutf_utf8_length_from_utf16(
const char16_t *input,
size_t length);
119simdutf_utf8_length_from_utf16_with_replacement(
const char16_t *input,
121size_t simdutf_utf8_length_from_utf16le(
const char16_t *input,
size_t length);
122size_t simdutf_utf8_length_from_utf16be(
const char16_t *input,
size_t length);
124simdutf_utf8_length_from_utf16le_with_replacement(
const char16_t *input,
127simdutf_utf8_length_from_utf16be_with_replacement(
const char16_t *input,
131size_t simdutf_convert_latin1_to_utf8(
const char *input,
size_t length,
133size_t simdutf_convert_latin1_to_utf8_safe(
const char *input,
size_t length,
134 char *output,
size_t utf8_len);
135size_t simdutf_convert_latin1_to_utf16le(
const char *input,
size_t length,
137size_t simdutf_convert_latin1_to_utf16be(
const char *input,
size_t length,
139size_t simdutf_convert_latin1_to_utf32(
const char *input,
size_t length,
142size_t simdutf_convert_utf8_to_latin1(
const char *input,
size_t length,
144size_t simdutf_convert_utf8_to_utf16le(
const char *input,
size_t length,
146size_t simdutf_convert_utf8_to_utf16be(
const char *input,
size_t length,
148size_t simdutf_convert_utf8_to_utf16(
const char *input,
size_t length,
151size_t simdutf_convert_utf8_to_utf32(
const char *input,
size_t length,
153simdutf_result simdutf_convert_utf8_to_latin1_with_errors(
const char *input,
156simdutf_result simdutf_convert_utf8_to_utf16_with_errors(
const char *input,
159simdutf_result simdutf_convert_utf8_to_utf16le_with_errors(
const char *input,
162simdutf_result simdutf_convert_utf8_to_utf16be_with_errors(
const char *input,
165simdutf_result simdutf_convert_utf8_to_utf32_with_errors(
const char *input,
170size_t simdutf_convert_valid_utf8_to_latin1(
const char *input,
size_t length,
172size_t simdutf_convert_valid_utf8_to_utf16le(
const char *input,
size_t length,
174size_t simdutf_convert_valid_utf8_to_utf16be(
const char *input,
size_t length,
176size_t simdutf_convert_valid_utf8_to_utf32(
const char *input,
size_t length,
180size_t simdutf_convert_utf16_to_utf8(
const char16_t *input,
size_t length,
182size_t simdutf_convert_utf16le_to_utf8(
const char16_t *input,
size_t length,
184size_t simdutf_convert_utf16be_to_utf8(
const char16_t *input,
size_t length,
186size_t simdutf_convert_utf16_to_utf8_safe(
const char16_t *input,
size_t length,
187 char *output,
size_t utf8_len);
188size_t simdutf_convert_utf16_to_latin1(
const char16_t *input,
size_t length,
190size_t simdutf_convert_utf16le_to_latin1(
const char16_t *input,
size_t length,
192size_t simdutf_convert_utf16be_to_latin1(
const char16_t *input,
size_t length,
195simdutf_convert_utf16_to_latin1_with_errors(
const char16_t *input,
196 size_t length,
char *output);
198simdutf_convert_utf16le_to_latin1_with_errors(
const char16_t *input,
199 size_t length,
char *output);
201simdutf_convert_utf16be_to_latin1_with_errors(
const char16_t *input,
202 size_t length,
char *output);
204simdutf_result simdutf_convert_utf16_to_utf8_with_errors(
const char16_t *input,
208simdutf_convert_utf16le_to_utf8_with_errors(
const char16_t *input,
209 size_t length,
char *output);
211simdutf_convert_utf16be_to_utf8_with_errors(
const char16_t *input,
212 size_t length,
char *output);
214size_t simdutf_convert_valid_utf16_to_utf8(
const char16_t *input,
size_t length,
216size_t simdutf_convert_valid_utf16_to_latin1(
const char16_t *input,
217 size_t length,
char *output);
218size_t simdutf_convert_valid_utf16le_to_latin1(
const char16_t *input,
219 size_t length,
char *output);
220size_t simdutf_convert_valid_utf16be_to_latin1(
const char16_t *input,
221 size_t length,
char *output);
223size_t simdutf_convert_valid_utf16le_to_utf8(
const char16_t *input,
224 size_t length,
char *output);
225size_t simdutf_convert_valid_utf16be_to_utf8(
const char16_t *input,
226 size_t length,
char *output);
229size_t simdutf_convert_utf16_to_utf32(
const char16_t *input,
size_t length,
231size_t simdutf_convert_utf16le_to_utf32(
const char16_t *input,
size_t length,
233size_t simdutf_convert_utf16be_to_utf32(
const char16_t *input,
size_t length,
235simdutf_result simdutf_convert_utf16_to_utf32_with_errors(
const char16_t *input,
239simdutf_convert_utf16le_to_utf32_with_errors(
const char16_t *input,
240 size_t length,
char32_t *output);
242simdutf_convert_utf16be_to_utf32_with_errors(
const char16_t *input,
243 size_t length,
char32_t *output);
246size_t simdutf_convert_valid_utf16_to_utf32(
const char16_t *input,
247 size_t length,
char32_t *output);
248size_t simdutf_convert_valid_utf16le_to_utf32(
const char16_t *input,
249 size_t length,
char32_t *output);
250size_t simdutf_convert_valid_utf16be_to_utf32(
const char16_t *input,
251 size_t length,
char32_t *output);
254size_t simdutf_convert_utf32_to_utf8(
const char32_t *input,
size_t length,
256simdutf_result simdutf_convert_utf32_to_utf8_with_errors(
const char32_t *input,
259size_t simdutf_convert_valid_utf32_to_utf8(
const char32_t *input,
size_t length,
262size_t simdutf_convert_utf32_to_utf16(
const char32_t *input,
size_t length,
264size_t simdutf_convert_utf32_to_utf16le(
const char32_t *input,
size_t length,
266size_t simdutf_convert_utf32_to_utf16be(
const char32_t *input,
size_t length,
269simdutf_convert_utf32_to_latin1_with_errors(
const char32_t *input,
270 size_t length,
char *output);
273const char *simdutf_find(
const char *start,
const char *end,
char character);
274const char16_t *simdutf_find_utf16(
const char16_t *start,
const char16_t *end,
278typedef enum simdutf_base64_options {
279 SIMDUTF_BASE64_DEFAULT = 0,
280 SIMDUTF_BASE64_URL = 1,
281 SIMDUTF_BASE64_DEFAULT_NO_PADDING = 2,
282 SIMDUTF_BASE64_URL_WITH_PADDING = 3,
283 SIMDUTF_BASE64_DEFAULT_ACCEPT_GARBAGE = 4,
284 SIMDUTF_BASE64_URL_ACCEPT_GARBAGE = 5,
285 SIMDUTF_BASE64_DEFAULT_OR_URL = 8,
286 SIMDUTF_BASE64_DEFAULT_OR_URL_ACCEPT_GARBAGE = 12
287} simdutf_base64_options;
289typedef enum simdutf_last_chunk_handling_options {
290 SIMDUTF_LAST_CHUNK_LOOSE = 0,
291 SIMDUTF_LAST_CHUNK_STRICT = 1,
292 SIMDUTF_LAST_CHUNK_STOP_BEFORE_PARTIAL = 2,
293 SIMDUTF_LAST_CHUNK_ONLY_FULL_CHUNKS = 3
294} simdutf_last_chunk_handling_options;
297size_t simdutf_maximal_binary_length_from_base64(
const char *input,
299size_t simdutf_maximal_binary_length_from_base64_utf16(
const char16_t *input,
304 const char *input,
size_t length,
char *output,
305 simdutf_base64_options options,
306 simdutf_last_chunk_handling_options last_chunk_options);
308 const char16_t *input,
size_t length,
char *output,
309 simdutf_base64_options options,
310 simdutf_last_chunk_handling_options last_chunk_options);
312size_t simdutf_base64_length_from_binary(
size_t length,
313 simdutf_base64_options options);
314size_t simdutf_base64_length_from_binary_with_lines(
315 size_t length, simdutf_base64_options options,
size_t line_length);
317size_t simdutf_binary_to_base64(
const char *input,
size_t length,
char *output,
318 simdutf_base64_options options);
319size_t simdutf_binary_to_base64_with_lines(
const char *input,
size_t length,
320 char *output,
size_t line_length,
321 simdutf_base64_options options);
325 const char *input,
size_t length,
char *output,
size_t *outlen,
326 simdutf_base64_options options,
327 simdutf_last_chunk_handling_options last_chunk_options,
328 bool decode_up_to_bad_char);
330 const char16_t *input,
size_t length,
char *output,
size_t *outlen,
331 simdutf_base64_options options,
332 simdutf_last_chunk_handling_options last_chunk_options,
333 bool decode_up_to_bad_char);