16 #if __has_include(<uchar.h>)
19 #define char16_t uint16_t
20 #define char32_t uint32_t
23 #define char16_t uint16_t
24 #define char32_t uint32_t
32typedef enum simdutf_error_code {
33 SIMDUTF_ERROR_SUCCESS = 0,
34 SIMDUTF_ERROR_HEADER_BITS,
35 SIMDUTF_ERROR_TOO_SHORT,
36 SIMDUTF_ERROR_TOO_LONG,
37 SIMDUTF_ERROR_OVERLONG,
38 SIMDUTF_ERROR_TOO_LARGE,
39 SIMDUTF_ERROR_SURROGATE,
40 SIMDUTF_ERROR_INVALID_BASE64_CHARACTER,
41 SIMDUTF_ERROR_BASE64_INPUT_REMAINDER,
42 SIMDUTF_ERROR_BASE64_EXTRA_BITS,
43 SIMDUTF_ERROR_OUTPUT_BUFFER_TOO_SMALL,
48 simdutf_error_code error;
53 simdutf_error_code error;
58typedef enum simdutf_encoding_type {
59 SIMDUTF_ENCODING_UNSPECIFIED = 0,
60 SIMDUTF_ENCODING_UTF8 = 1,
61 SIMDUTF_ENCODING_UTF16_LE = 2,
62 SIMDUTF_ENCODING_UTF16_BE = 4,
63 SIMDUTF_ENCODING_UTF32_LE = 8,
64 SIMDUTF_ENCODING_UTF32_BE = 16
65} simdutf_encoding_type;
68bool simdutf_validate_utf8(
const char *buf,
size_t len);
71simdutf_result simdutf_validate_utf8_with_errors(
const char *buf,
size_t len);
74simdutf_encoding_type simdutf_autodetect_encoding(
const char *input,
76int simdutf_detect_encodings(
const char *input,
size_t length);
79bool simdutf_validate_ascii(
const char *buf,
size_t len);
80simdutf_result simdutf_validate_ascii_with_errors(
const char *buf,
size_t len);
83bool simdutf_validate_utf16_as_ascii(
const char16_t *buf,
size_t len);
84bool simdutf_validate_utf16be_as_ascii(
const char16_t *buf,
size_t len);
85bool simdutf_validate_utf16le_as_ascii(
const char16_t *buf,
size_t len);
88bool simdutf_validate_utf16(
const char16_t *buf,
size_t len);
89bool simdutf_validate_utf16le(
const char16_t *buf,
size_t len);
90bool simdutf_validate_utf16be(
const char16_t *buf,
size_t len);
91simdutf_result simdutf_validate_utf16_with_errors(
const char16_t *buf,
93simdutf_result simdutf_validate_utf16le_with_errors(
const char16_t *buf,
95simdutf_result simdutf_validate_utf16be_with_errors(
const char16_t *buf,
98bool simdutf_validate_utf32(
const char32_t *buf,
size_t len);
99simdutf_result simdutf_validate_utf32_with_errors(
const char32_t *buf,
103void simdutf_to_well_formed_utf16le(
const char16_t *input,
size_t len,
105void simdutf_to_well_formed_utf16be(
const char16_t *input,
size_t len,
107void simdutf_to_well_formed_utf16(
const char16_t *input,
size_t len,
111size_t simdutf_count_utf16(
const char16_t *input,
size_t length);
112size_t simdutf_count_utf16le(
const char16_t *input,
size_t length);
113size_t simdutf_count_utf16be(
const char16_t *input,
size_t length);
114size_t simdutf_count_utf8(
const char *input,
size_t length);
117size_t simdutf_utf8_length_from_latin1(
const char *input,
size_t length);
118size_t simdutf_latin1_length_from_utf8(
const char *input,
size_t length);
119size_t simdutf_latin1_length_from_utf16(
size_t length);
120size_t simdutf_latin1_length_from_utf32(
size_t length);
121size_t simdutf_utf16_length_from_utf8(
const char *input,
size_t length);
122size_t simdutf_utf32_length_from_utf8(
const char *input,
size_t length);
123size_t simdutf_utf8_length_from_utf16(
const char16_t *input,
size_t length);
124size_t simdutf_utf8_length_from_utf32(
const char32_t *input,
size_t length);
126simdutf_utf8_length_from_utf16_with_replacement(
const char16_t *input,
128size_t simdutf_utf8_length_from_utf16le(
const char16_t *input,
size_t length);
129size_t simdutf_utf8_length_from_utf16be(
const char16_t *input,
size_t length);
131simdutf_utf8_length_from_utf16le_with_replacement(
const char16_t *input,
134simdutf_utf8_length_from_utf16be_with_replacement(
const char16_t *input,
138size_t simdutf_convert_latin1_to_utf8(
const char *input,
size_t length,
140size_t simdutf_convert_latin1_to_utf8_safe(
const char *input,
size_t length,
141 char *output,
size_t utf8_len);
142size_t simdutf_convert_latin1_to_utf16le(
const char *input,
size_t length,
144size_t simdutf_convert_latin1_to_utf16be(
const char *input,
size_t length,
146size_t simdutf_convert_latin1_to_utf16(
const char *input,
size_t length,
148size_t simdutf_convert_latin1_to_utf32(
const char *input,
size_t length,
151size_t simdutf_convert_utf8_to_latin1(
const char *input,
size_t length,
153size_t simdutf_convert_utf8_to_utf16le(
const char *input,
size_t length,
155size_t simdutf_convert_utf8_to_utf16be(
const char *input,
size_t length,
157size_t simdutf_convert_utf8_to_utf16(
const char *input,
size_t length,
160size_t simdutf_convert_utf8_to_utf32(
const char *input,
size_t length,
162simdutf_result simdutf_convert_utf8_to_latin1_with_errors(
const char *input,
165simdutf_result simdutf_convert_utf8_to_utf16_with_errors(
const char *input,
168simdutf_result simdutf_convert_utf8_to_utf16le_with_errors(
const char *input,
171simdutf_result simdutf_convert_utf8_to_utf16be_with_errors(
const char *input,
174simdutf_result simdutf_convert_utf8_to_utf32_with_errors(
const char *input,
179size_t simdutf_convert_valid_utf8_to_latin1(
const char *input,
size_t length,
181size_t simdutf_convert_valid_utf8_to_utf16le(
const char *input,
size_t length,
183size_t simdutf_convert_valid_utf8_to_utf16be(
const char *input,
size_t length,
185size_t simdutf_convert_valid_utf8_to_utf32(
const char *input,
size_t length,
189size_t simdutf_convert_utf16_to_utf8(
const char16_t *input,
size_t length,
191size_t simdutf_convert_utf16le_to_utf8(
const char16_t *input,
size_t length,
193size_t simdutf_convert_utf16be_to_utf8(
const char16_t *input,
size_t length,
195size_t simdutf_convert_utf16_to_utf8_safe(
const char16_t *input,
size_t length,
196 char *output,
size_t utf8_len);
197size_t simdutf_convert_utf16_to_latin1(
const char16_t *input,
size_t length,
199size_t simdutf_convert_utf16le_to_latin1(
const char16_t *input,
size_t length,
201size_t simdutf_convert_utf16be_to_latin1(
const char16_t *input,
size_t length,
204simdutf_convert_utf16_to_latin1_with_errors(
const char16_t *input,
205 size_t length,
char *output);
207simdutf_convert_utf16le_to_latin1_with_errors(
const char16_t *input,
208 size_t length,
char *output);
210simdutf_convert_utf16be_to_latin1_with_errors(
const char16_t *input,
211 size_t length,
char *output);
213simdutf_result simdutf_convert_utf16_to_utf8_with_errors(
const char16_t *input,
217simdutf_convert_utf16le_to_utf8_with_errors(
const char16_t *input,
218 size_t length,
char *output);
220simdutf_convert_utf16be_to_utf8_with_errors(
const char16_t *input,
221 size_t length,
char *output);
223size_t simdutf_convert_valid_utf16_to_utf8(
const char16_t *input,
size_t length,
225size_t simdutf_convert_valid_utf16_to_latin1(
const char16_t *input,
226 size_t length,
char *output);
227size_t simdutf_convert_valid_utf16le_to_latin1(
const char16_t *input,
228 size_t length,
char *output);
229size_t simdutf_convert_valid_utf16be_to_latin1(
const char16_t *input,
230 size_t length,
char *output);
232size_t simdutf_convert_valid_utf16le_to_utf8(
const char16_t *input,
233 size_t length,
char *output);
234size_t simdutf_convert_valid_utf16be_to_utf8(
const char16_t *input,
235 size_t length,
char *output);
238size_t simdutf_convert_utf16_to_utf32(
const char16_t *input,
size_t length,
240size_t simdutf_convert_utf16le_to_utf32(
const char16_t *input,
size_t length,
242size_t simdutf_convert_utf16be_to_utf32(
const char16_t *input,
size_t length,
244simdutf_result simdutf_convert_utf16_to_utf32_with_errors(
const char16_t *input,
248simdutf_convert_utf16le_to_utf32_with_errors(
const char16_t *input,
249 size_t length,
char32_t *output);
251simdutf_convert_utf16be_to_utf32_with_errors(
const char16_t *input,
252 size_t length,
char32_t *output);
255size_t simdutf_convert_valid_utf16_to_utf32(
const char16_t *input,
256 size_t length,
char32_t *output);
257size_t simdutf_convert_valid_utf16le_to_utf32(
const char16_t *input,
258 size_t length,
char32_t *output);
259size_t simdutf_convert_valid_utf16be_to_utf32(
const char16_t *input,
260 size_t length,
char32_t *output);
263size_t simdutf_convert_utf32_to_utf8(
const char32_t *input,
size_t length,
265simdutf_result simdutf_convert_utf32_to_utf8_with_errors(
const char32_t *input,
268size_t simdutf_convert_valid_utf32_to_utf8(
const char32_t *input,
size_t length,
271size_t simdutf_convert_utf32_to_utf16(
const char32_t *input,
size_t length,
273size_t simdutf_convert_utf32_to_utf16le(
const char32_t *input,
size_t length,
275size_t simdutf_convert_utf32_to_utf16be(
const char32_t *input,
size_t length,
278simdutf_convert_utf32_to_latin1_with_errors(
const char32_t *input,
279 size_t length,
char *output);
282const char *simdutf_find(
const char *start,
const char *end,
char character);
283const char16_t *simdutf_find_utf16(
const char16_t *start,
const char16_t *end,
287typedef enum simdutf_base64_options {
288 SIMDUTF_BASE64_DEFAULT = 0,
289 SIMDUTF_BASE64_URL = 1,
290 SIMDUTF_BASE64_DEFAULT_NO_PADDING = 2,
291 SIMDUTF_BASE64_URL_WITH_PADDING = 3,
292 SIMDUTF_BASE64_DEFAULT_ACCEPT_GARBAGE = 4,
293 SIMDUTF_BASE64_URL_ACCEPT_GARBAGE = 5,
294 SIMDUTF_BASE64_DEFAULT_OR_URL = 8,
295 SIMDUTF_BASE64_DEFAULT_OR_URL_ACCEPT_GARBAGE = 12
296} simdutf_base64_options;
298typedef enum simdutf_last_chunk_handling_options {
299 SIMDUTF_LAST_CHUNK_LOOSE = 0,
300 SIMDUTF_LAST_CHUNK_STRICT = 1,
301 SIMDUTF_LAST_CHUNK_STOP_BEFORE_PARTIAL = 2,
302 SIMDUTF_LAST_CHUNK_ONLY_FULL_CHUNKS = 3
303} simdutf_last_chunk_handling_options;
306size_t simdutf_maximal_binary_length_from_base64(
const char *input,
308size_t simdutf_maximal_binary_length_from_base64_utf16(
const char16_t *input,
313 const char *input,
size_t length,
char *output,
314 simdutf_base64_options options,
315 simdutf_last_chunk_handling_options last_chunk_options);
317 const char16_t *input,
size_t length,
char *output,
318 simdutf_base64_options options,
319 simdutf_last_chunk_handling_options last_chunk_options);
321size_t simdutf_base64_length_from_binary(
size_t length,
322 simdutf_base64_options options);
323size_t simdutf_base64_length_from_binary_with_lines(
324 size_t length, simdutf_base64_options options,
size_t line_length);
326size_t simdutf_binary_to_base64(
const char *input,
size_t length,
char *output,
327 simdutf_base64_options options);
328size_t simdutf_binary_to_base64_with_lines(
const char *input,
size_t length,
329 char *output,
size_t line_length,
330 simdutf_base64_options options);
334 const char *input,
size_t length,
char *output,
size_t *outlen,
335 simdutf_base64_options options,
336 simdutf_last_chunk_handling_options last_chunk_options,
337 bool decode_up_to_bad_char);
339 const char16_t *input,
size_t length,
char *output,
size_t *outlen,
340 simdutf_base64_options options,
341 simdutf_last_chunk_handling_options last_chunk_options,
342 bool decode_up_to_bad_char);
346 const char *input,
size_t length,
char *output,
347 simdutf_base64_options options,
348 simdutf_last_chunk_handling_options last_chunk_options);
350 const char16_t *input,
size_t length,
char *output,
351 simdutf_base64_options options,
352 simdutf_last_chunk_handling_options last_chunk_options);
355bool simdutf_base64_valid(
char input, simdutf_base64_options options);
356bool simdutf_base64_valid_utf16(
char16_t input, simdutf_base64_options options);