![]() |
simdutf 6.4.0
Unicode at GB/s.
|
An implementation of simdutf for a particular CPU architecture. More...
#include <implementation.h>
Public Member Functions | |
virtual std::string | name () const |
The name of this implementation. | |
virtual std::string | description () const |
The description of this implementation. | |
bool | supported_by_runtime_system () const |
The instruction sets this implementation is compiled against and the current CPU match. | |
virtual encoding_type | autodetect_encoding (const char *input, size_t length) const noexcept |
This function will try to detect the encoding. | |
virtual int | detect_encodings (const char *input, size_t length) const noexcept=0 |
This function will try to detect the possible encodings in one pass. | |
virtual simdutf_warn_unused bool | validate_utf8 (const char *buf, size_t len) const noexcept=0 |
Validate the UTF-8 string. | |
virtual simdutf_warn_unused result | validate_utf8_with_errors (const char *buf, size_t len) const noexcept=0 |
Validate the UTF-8 string and stop on errors. | |
virtual simdutf_warn_unused bool | validate_ascii (const char *buf, size_t len) const noexcept=0 |
Validate the ASCII string. | |
virtual simdutf_warn_unused result | validate_ascii_with_errors (const char *buf, size_t len) const noexcept=0 |
Validate the ASCII string and stop on error. | |
virtual simdutf_warn_unused bool | validate_utf16le (const char16_t *buf, size_t len) const noexcept=0 |
Validate the UTF-16LE string.This function may be best when you expect the input to be almost always valid. | |
virtual simdutf_warn_unused bool | validate_utf16be (const char16_t *buf, size_t len) const noexcept=0 |
Validate the UTF-16BE string. | |
virtual simdutf_warn_unused result | validate_utf16le_with_errors (const char16_t *buf, size_t len) const noexcept=0 |
Validate the UTF-16LE string and stop on error. | |
virtual simdutf_warn_unused result | validate_utf16be_with_errors (const char16_t *buf, size_t len) const noexcept=0 |
Validate the UTF-16BE string and stop on error. | |
virtual simdutf_warn_unused bool | validate_utf32 (const char32_t *buf, size_t len) const noexcept=0 |
Validate the UTF-32 string. | |
virtual simdutf_warn_unused result | validate_utf32_with_errors (const char32_t *buf, size_t len) const noexcept=0 |
Validate the UTF-32 string and stop on error. | |
virtual simdutf_warn_unused size_t | convert_latin1_to_utf8 (const char *input, size_t length, char *utf8_output) const noexcept=0 |
Convert Latin1 string into UTF8 string. | |
virtual simdutf_warn_unused size_t | convert_latin1_to_utf16le (const char *input, size_t length, char16_t *utf16_output) const noexcept=0 |
Convert possibly Latin1 string into UTF-16LE string. | |
virtual simdutf_warn_unused size_t | convert_latin1_to_utf16be (const char *input, size_t length, char16_t *utf16_output) const noexcept=0 |
Convert Latin1 string into UTF-16BE string. | |
virtual simdutf_warn_unused size_t | convert_latin1_to_utf32 (const char *input, size_t length, char32_t *utf32_buffer) const noexcept=0 |
Convert Latin1 string into UTF-32 string. | |
virtual simdutf_warn_unused size_t | convert_utf8_to_latin1 (const char *input, size_t length, char *latin1_output) const noexcept=0 |
Convert possibly broken UTF-8 string into latin1 string. | |
virtual simdutf_warn_unused result | convert_utf8_to_latin1_with_errors (const char *input, size_t length, char *latin1_output) const noexcept=0 |
Convert possibly broken UTF-8 string into latin1 string with errors. | |
virtual simdutf_warn_unused size_t | convert_valid_utf8_to_latin1 (const char *input, size_t length, char *latin1_output) const noexcept=0 |
Convert valid UTF-8 string into latin1 string. | |
virtual simdutf_warn_unused size_t | convert_utf8_to_utf16le (const char *input, size_t length, char16_t *utf16_output) const noexcept=0 |
Convert possibly broken UTF-8 string into UTF-16LE string. | |
virtual simdutf_warn_unused size_t | convert_utf8_to_utf16be (const char *input, size_t length, char16_t *utf16_output) const noexcept=0 |
Convert possibly broken UTF-8 string into UTF-16BE string. | |
virtual simdutf_warn_unused result | convert_utf8_to_utf16le_with_errors (const char *input, size_t length, char16_t *utf16_output) const noexcept=0 |
Convert possibly broken UTF-8 string into UTF-16LE string and stop on error. | |
virtual simdutf_warn_unused result | convert_utf8_to_utf16be_with_errors (const char *input, size_t length, char16_t *utf16_output) const noexcept=0 |
Convert possibly broken UTF-8 string into UTF-16BE string and stop on error. | |
virtual simdutf_warn_unused size_t | convert_utf8_to_utf32 (const char *input, size_t length, char32_t *utf32_output) const noexcept=0 |
Convert possibly broken UTF-8 string into UTF-32 string. | |
virtual simdutf_warn_unused result | convert_utf8_to_utf32_with_errors (const char *input, size_t length, char32_t *utf32_output) const noexcept=0 |
Convert possibly broken UTF-8 string into UTF-32 string and stop on error. | |
virtual simdutf_warn_unused size_t | convert_valid_utf8_to_utf16le (const char *input, size_t length, char16_t *utf16_buffer) const noexcept=0 |
Convert valid UTF-8 string into UTF-16LE string. | |
virtual simdutf_warn_unused size_t | convert_valid_utf8_to_utf16be (const char *input, size_t length, char16_t *utf16_buffer) const noexcept=0 |
Convert valid UTF-8 string into UTF-16BE string. | |
virtual simdutf_warn_unused size_t | convert_valid_utf8_to_utf32 (const char *input, size_t length, char32_t *utf32_buffer) const noexcept=0 |
Convert valid UTF-8 string into UTF-32 string. | |
virtual simdutf_warn_unused size_t | utf16_length_from_utf8 (const char *input, size_t length) const noexcept=0 |
Compute the number of 2-byte code units that this UTF-8 string would require in UTF-16LE format. | |
virtual simdutf_warn_unused size_t | utf32_length_from_utf8 (const char *input, size_t length) const noexcept=0 |
Compute the number of 4-byte code units that this UTF-8 string would require in UTF-32 format. | |
virtual simdutf_warn_unused size_t | convert_utf16le_to_latin1 (const char16_t *input, size_t length, char *latin1_buffer) const noexcept=0 |
Convert possibly broken UTF-16LE string into Latin1 string. | |
virtual simdutf_warn_unused size_t | convert_utf16be_to_latin1 (const char16_t *input, size_t length, char *latin1_buffer) const noexcept=0 |
Convert possibly broken UTF-16BE string into Latin1 string. | |
virtual simdutf_warn_unused result | convert_utf16le_to_latin1_with_errors (const char16_t *input, size_t length, char *latin1_buffer) const noexcept=0 |
Convert possibly broken UTF-16LE string into Latin1 string. | |
virtual simdutf_warn_unused result | convert_utf16be_to_latin1_with_errors (const char16_t *input, size_t length, char *latin1_buffer) const noexcept=0 |
Convert possibly broken UTF-16BE string into Latin1 string. | |
virtual simdutf_warn_unused size_t | convert_valid_utf16le_to_latin1 (const char16_t *input, size_t length, char *latin1_buffer) const noexcept=0 |
Convert valid UTF-16LE string into Latin1 string. | |
virtual simdutf_warn_unused size_t | convert_valid_utf16be_to_latin1 (const char16_t *input, size_t length, char *latin1_buffer) const noexcept=0 |
Convert valid UTF-16BE string into Latin1 string. | |
virtual simdutf_warn_unused size_t | convert_utf16le_to_utf8 (const char16_t *input, size_t length, char *utf8_buffer) const noexcept=0 |
Convert possibly broken UTF-16LE string into UTF-8 string. | |
virtual simdutf_warn_unused size_t | convert_utf16be_to_utf8 (const char16_t *input, size_t length, char *utf8_buffer) const noexcept=0 |
Convert possibly broken UTF-16BE string into UTF-8 string. | |
virtual simdutf_warn_unused result | convert_utf16le_to_utf8_with_errors (const char16_t *input, size_t length, char *utf8_buffer) const noexcept=0 |
Convert possibly broken UTF-16LE string into UTF-8 string and stop on error. | |
virtual simdutf_warn_unused result | convert_utf16be_to_utf8_with_errors (const char16_t *input, size_t length, char *utf8_buffer) const noexcept=0 |
Convert possibly broken UTF-16BE string into UTF-8 string and stop on error. | |
virtual simdutf_warn_unused size_t | convert_valid_utf16le_to_utf8 (const char16_t *input, size_t length, char *utf8_buffer) const noexcept=0 |
Convert valid UTF-16LE string into UTF-8 string. | |
virtual simdutf_warn_unused size_t | convert_valid_utf16be_to_utf8 (const char16_t *input, size_t length, char *utf8_buffer) const noexcept=0 |
Convert valid UTF-16BE string into UTF-8 string. | |
virtual simdutf_warn_unused size_t | convert_utf16le_to_utf32 (const char16_t *input, size_t length, char32_t *utf32_buffer) const noexcept=0 |
Convert possibly broken UTF-16LE string into UTF-32 string. | |
virtual simdutf_warn_unused size_t | convert_utf16be_to_utf32 (const char16_t *input, size_t length, char32_t *utf32_buffer) const noexcept=0 |
Convert possibly broken UTF-16BE string into UTF-32 string. | |
virtual simdutf_warn_unused result | convert_utf16le_to_utf32_with_errors (const char16_t *input, size_t length, char32_t *utf32_buffer) const noexcept=0 |
Convert possibly broken UTF-16LE string into UTF-32 string and stop on error. | |
virtual simdutf_warn_unused result | convert_utf16be_to_utf32_with_errors (const char16_t *input, size_t length, char32_t *utf32_buffer) const noexcept=0 |
Convert possibly broken UTF-16BE string into UTF-32 string and stop on error. | |
virtual simdutf_warn_unused size_t | convert_valid_utf16le_to_utf32 (const char16_t *input, size_t length, char32_t *utf32_buffer) const noexcept=0 |
Convert valid UTF-16LE string into UTF-32 string. | |
virtual simdutf_warn_unused size_t | convert_valid_utf16be_to_utf32 (const char16_t *input, size_t length, char32_t *utf32_buffer) const noexcept=0 |
Convert valid UTF-16LE string into UTF-32BE string. | |
virtual simdutf_warn_unused size_t | utf8_length_from_utf16le (const char16_t *input, size_t length) const noexcept=0 |
Compute the number of bytes that this UTF-16LE string would require in UTF-8 format. | |
virtual simdutf_warn_unused size_t | utf8_length_from_utf16be (const char16_t *input, size_t length) const noexcept=0 |
Compute the number of bytes that this UTF-16BE string would require in UTF-8 format. | |
virtual simdutf_warn_unused size_t | convert_utf32_to_latin1 (const char32_t *input, size_t length, char *latin1_buffer) const noexcept=0 |
Convert possibly broken UTF-32 string into Latin1 string. | |
virtual simdutf_warn_unused result | convert_utf32_to_latin1_with_errors (const char32_t *input, size_t length, char *latin1_buffer) const noexcept=0 |
Convert possibly broken UTF-32 string into Latin1 string and stop on error. | |
virtual simdutf_warn_unused size_t | convert_valid_utf32_to_latin1 (const char32_t *input, size_t length, char *latin1_buffer) const noexcept=0 |
Convert valid UTF-32 string into Latin1 string. | |
virtual simdutf_warn_unused size_t | convert_utf32_to_utf8 (const char32_t *input, size_t length, char *utf8_buffer) const noexcept=0 |
Convert possibly broken UTF-32 string into UTF-8 string. | |
virtual simdutf_warn_unused result | convert_utf32_to_utf8_with_errors (const char32_t *input, size_t length, char *utf8_buffer) const noexcept=0 |
Convert possibly broken UTF-32 string into UTF-8 string and stop on error. | |
virtual simdutf_warn_unused size_t | convert_valid_utf32_to_utf8 (const char32_t *input, size_t length, char *utf8_buffer) const noexcept=0 |
Convert valid UTF-32 string into UTF-8 string. | |
virtual simdutf_warn_unused size_t | utf16_length_from_latin1 (size_t length) const noexcept |
Return the number of bytes that this UTF-16 string would require in Latin1 format. | |
virtual simdutf_warn_unused size_t | convert_utf32_to_utf16le (const char32_t *input, size_t length, char16_t *utf16_buffer) const noexcept=0 |
Convert possibly broken UTF-32 string into UTF-16LE string. | |
virtual simdutf_warn_unused size_t | convert_utf32_to_utf16be (const char32_t *input, size_t length, char16_t *utf16_buffer) const noexcept=0 |
Convert possibly broken UTF-32 string into UTF-16BE string. | |
virtual simdutf_warn_unused result | convert_utf32_to_utf16le_with_errors (const char32_t *input, size_t length, char16_t *utf16_buffer) const noexcept=0 |
Convert possibly broken UTF-32 string into UTF-16LE string and stop on error. | |
virtual simdutf_warn_unused result | convert_utf32_to_utf16be_with_errors (const char32_t *input, size_t length, char16_t *utf16_buffer) const noexcept=0 |
Convert possibly broken UTF-32 string into UTF-16BE string and stop on error. | |
virtual simdutf_warn_unused size_t | convert_valid_utf32_to_utf16le (const char32_t *input, size_t length, char16_t *utf16_buffer) const noexcept=0 |
Convert valid UTF-32 string into UTF-16LE string. | |
virtual simdutf_warn_unused size_t | convert_valid_utf32_to_utf16be (const char32_t *input, size_t length, char16_t *utf16_buffer) const noexcept=0 |
Convert valid UTF-32 string into UTF-16BE string. | |
virtual void | change_endianness_utf16 (const char16_t *input, size_t length, char16_t *output) const noexcept=0 |
Change the endianness of the input. | |
virtual simdutf_warn_unused size_t | utf8_length_from_latin1 (const char *input, size_t length) const noexcept=0 |
Return the number of bytes that this Latin1 string would require in UTF-8 format. | |
virtual simdutf_warn_unused size_t | utf8_length_from_utf32 (const char32_t *input, size_t length) const noexcept=0 |
Compute the number of bytes that this UTF-32 string would require in UTF-8 format. | |
virtual simdutf_warn_unused size_t | latin1_length_from_utf32 (size_t length) const noexcept |
Compute the number of bytes that this UTF-32 string would require in Latin1 format. | |
virtual simdutf_warn_unused size_t | latin1_length_from_utf8 (const char *input, size_t length) const noexcept=0 |
Compute the number of bytes that this UTF-8 string would require in Latin1 format. | |
virtual simdutf_warn_unused size_t | latin1_length_from_utf16 (size_t length) const noexcept |
Compute the number of bytes that this UTF-16LE/BE string would require in Latin1 format. | |
virtual simdutf_warn_unused size_t | utf16_length_from_utf32 (const char32_t *input, size_t length) const noexcept=0 |
Compute the number of two-byte code units that this UTF-32 string would require in UTF-16 format. | |
virtual simdutf_warn_unused size_t | utf32_length_from_latin1 (size_t length) const noexcept |
Return the number of bytes that this UTF-32 string would require in Latin1 format. | |
virtual simdutf_warn_unused size_t | utf32_length_from_utf16le (const char16_t *input, size_t length) const noexcept=0 |
Compute the number of bytes that this UTF-16LE string would require in UTF-32 format. | |
virtual simdutf_warn_unused size_t | utf32_length_from_utf16be (const char16_t *input, size_t length) const noexcept=0 |
Compute the number of bytes that this UTF-16BE string would require in UTF-32 format. | |
virtual simdutf_warn_unused size_t | count_utf16le (const char16_t *input, size_t length) const noexcept=0 |
Count the number of code points (characters) in the string assuming that it is valid. | |
virtual simdutf_warn_unused size_t | count_utf16be (const char16_t *input, size_t length) const noexcept=0 |
Count the number of code points (characters) in the string assuming that it is valid. | |
virtual simdutf_warn_unused size_t | count_utf8 (const char *input, size_t length) const noexcept=0 |
Count the number of code points (characters) in the string assuming that it is valid. | |
simdutf_warn_unused size_t | maximal_binary_length_from_base64 (const char *input, size_t length) const noexcept |
Provide the maximal binary length in bytes given the base64 input. | |
simdutf_warn_unused size_t | maximal_binary_length_from_base64 (const char16_t *input, size_t length) const noexcept |
Provide the maximal binary length in bytes given the base64 input. | |
virtual simdutf_warn_unused result | base64_to_binary (const char *input, size_t length, char *output, base64_options options=base64_default, last_chunk_handling_options last_chunk_options=last_chunk_handling_options::loose) const noexcept=0 |
Convert a base64 input to a binary output. | |
virtual simdutf_warn_unused full_result | base64_to_binary_details (const char *input, size_t length, char *output, base64_options options=base64_default, last_chunk_handling_options last_chunk_options=last_chunk_handling_options::loose) const noexcept=0 |
Convert a base64 input to a binary output while returning more details than base64_to_binary. | |
virtual simdutf_warn_unused result | base64_to_binary (const char16_t *input, size_t length, char *output, base64_options options=base64_default, last_chunk_handling_options last_chunk_options=last_chunk_handling_options::loose) const noexcept=0 |
Convert a base64 input to a binary output. | |
virtual simdutf_warn_unused full_result | base64_to_binary_details (const char16_t *input, size_t length, char *output, base64_options options=base64_default, last_chunk_handling_options last_chunk_options=last_chunk_handling_options::loose) const noexcept=0 |
Convert a base64 input to a binary output while returning more details than base64_to_binary. | |
simdutf_warn_unused size_t | base64_length_from_binary (size_t length, base64_options options=base64_default) const noexcept |
Provide the base64 length in bytes given the length of a binary input. | |
virtual size_t | binary_to_base64 (const char *input, size_t length, char *output, base64_options options=base64_default) const noexcept=0 |
Convert a binary input to a base64 output. | |
An implementation of simdutf for a particular CPU architecture.
Also used to maintain the currently active implementation. The active implementation is automatically initialized on first use to the most advanced implementation supported by the host.
Definition at line 3127 of file implementation.h.
|
virtualnoexcept |
This function will try to detect the encoding.
input | the string to identify |
length | the length of the string in bytes. |
|
noexcept |
Provide the base64 length in bytes given the length of a binary input.
length | the length of the input in bytes @parem options the base64 options to use, can be base64_default or base64_url, is base64_default by default. |
|
pure virtualnoexcept |
Convert a base64 input to a binary output.
This function follows the WHATWG forgiving-base64 format, which means that it will ignore any ASCII spaces in the input. You may provide a padded input (with one or two equal signs at the end) or an unpadded input (without any equal signs at the end).
See https://infra.spec.whatwg.org/#forgiving-base64-decode
This function will fail in case of invalid input. When last_chunk_options = loose, there are two possible reasons for failure: the input contains a number of base64 characters that when divided by 4, leaves a single remainder character (BASE64_INPUT_REMAINDER), or the input contains a character that is not a valid base64 character (INVALID_BASE64_CHARACTER).
You should call this function with a buffer that is at least maximal_binary_length_from_base64(input, length) bytes long. If you fail to provide that much space, the function may cause a buffer overflow.
input | the base64 string to process |
length | the length of the string in bytes |
output | the pointer to a buffer that can hold the conversion result (should be at least maximal_binary_length_from_base64(input, length) bytes long). |
options | the base64 options to use, can be base64_default or base64_url, is base64_default by default. |
|
pure virtualnoexcept |
Convert a base64 input to a binary output.
This function follows the WHATWG forgiving-base64 format, which means that it will ignore any ASCII spaces in the input. You may provide a padded input (with one or two equal signs at the end) or an unpadded input (without any equal signs at the end).
See https://infra.spec.whatwg.org/#forgiving-base64-decode
This function will fail in case of invalid input. When last_chunk_options = loose, there are two possible reasons for failure: the input contains a number of base64 characters that when divided by 4, leaves a single remainder character (BASE64_INPUT_REMAINDER), or the input contains a character that is not a valid base64 character (INVALID_BASE64_CHARACTER).
You should call this function with a buffer that is at least maximal_binary_length_from_utf6_base64(input, length) bytes long. If you fail to provide that much space, the function may cause a buffer overflow.
input | the base64 string to process, in ASCII stored as 16-bit units |
length | the length of the string in 16-bit units |
output | the pointer to a buffer that can hold the conversion result (should be at least maximal_binary_length_from_base64(input, length) bytes long). |
options | the base64 options to use, can be base64_default or base64_url, is base64_default by default. |
|
pure virtualnoexcept |
Convert a base64 input to a binary output while returning more details than base64_to_binary.
This function follows the WHATWG forgiving-base64 format, which means that it will ignore any ASCII spaces in the input. You may provide a padded input (with one or two equal signs at the end) or an unpadded input (without any equal signs at the end).
See https://infra.spec.whatwg.org/#forgiving-base64-decode
This function will fail in case of invalid input. When last_chunk_options = loose, there are two possible reasons for failure: the input contains a number of base64 characters that when divided by 4, leaves a single remainder character (BASE64_INPUT_REMAINDER), or the input contains a character that is not a valid base64 character (INVALID_BASE64_CHARACTER).
You should call this function with a buffer that is at least maximal_binary_length_from_base64(input, length) bytes long. If you fail to provide that much space, the function may cause a buffer overflow.
input | the base64 string to process |
length | the length of the string in bytes |
output | the pointer to a buffer that can hold the conversion result (should be at least maximal_binary_length_from_base64(input, length) bytes long). |
options | the base64 options to use, can be base64_default or base64_url, is base64_default by default. |
|
pure virtualnoexcept |
Convert a base64 input to a binary output while returning more details than base64_to_binary.
This function follows the WHATWG forgiving-base64 format, which means that it will ignore any ASCII spaces in the input. You may provide a padded input (with one or two equal signs at the end) or an unpadded input (without any equal signs at the end).
See https://infra.spec.whatwg.org/#forgiving-base64-decode
This function will fail in case of invalid input. When last_chunk_options = loose, there are two possible reasons for failure: the input contains a number of base64 characters that when divided by 4, leaves a single remainder character (BASE64_INPUT_REMAINDER), or the input contains a character that is not a valid base64 character (INVALID_BASE64_CHARACTER).
You should call this function with a buffer that is at least maximal_binary_length_from_base64(input, length) bytes long. If you fail to provide that much space, the function may cause a buffer overflow.
input | the base64 string to process |
length | the length of the string in bytes |
output | the pointer to a buffer that can hold the conversion result (should be at least maximal_binary_length_from_base64(input, length) bytes long). |
options | the base64 options to use, can be base64_default or base64_url, is base64_default by default. |
|
pure virtualnoexcept |
Convert a binary input to a base64 output.
The default option (simdutf::base64_default) uses the characters +
and /
as part of its alphabet. Further, it adds padding (=
) at the end of the output to ensure that the output length is a multiple of four.
The URL option (simdutf::base64_url) uses the characters -
and _
as part of its alphabet. No padding is added at the end of the output.
This function always succeeds.
input | the binary to process |
length | the length of the input in bytes |
output | the pointer to a buffer that can hold the conversion result (should be at least base64_length_from_binary(length) bytes long) |
options | the base64 options to use, can be base64_default or base64_url, is base64_default by default. |
|
pure virtualnoexcept |
Change the endianness of the input.
Can be used to go from UTF-16LE to UTF-16BE or from UTF-16BE to UTF-16LE.
This function does not validate the input.
This function is not BOM-aware.
input | the UTF-16 string to process |
length | the length of the string in 2-byte code units (char16_t) |
output | the pointer to a buffer that can hold the conversion result |
|
pure virtualnoexcept |
Convert Latin1 string into UTF-16BE string.
This function is suitable to work with inputs from untrusted sources.
input | the Latin1 string to convert |
length | the length of the string in bytes |
utf16_buffer | the pointer to buffer that can hold conversion result |
|
pure virtualnoexcept |
Convert possibly Latin1 string into UTF-16LE string.
This function is suitable to work with inputs from untrusted sources.
input | the Latin1 string to convert |
length | the length of the string in bytes |
utf16_buffer | the pointer to buffer that can hold conversion result |
|
pure virtualnoexcept |
Convert Latin1 string into UTF-32 string.
This function is suitable to work with inputs from untrusted sources.
input | the Latin1 string to convert |
length | the length of the string in bytes |
utf32_buffer | the pointer to buffer that can hold conversion result |
|
pure virtualnoexcept |
Convert Latin1 string into UTF8 string.
This function is suitable to work with inputs from untrusted sources.
input | the Latin1 string to convert |
length | the length of the string in bytes |
utf8_output | the pointer to buffer that can hold conversion result |
|
pure virtualnoexcept |
Convert possibly broken UTF-16BE string into Latin1 string.
During the conversion also validation of the input string is done. This function is suitable to work with inputs from untrusted sources.
This function is not BOM-aware.
input | the UTF-16BE string to convert |
length | the length of the string in 2-byte code units (char16_t) |
latin1_buffer | the pointer to buffer that can hold conversion result |
|
pure virtualnoexcept |
Convert possibly broken UTF-16BE string into Latin1 string.
If the string cannot be represented as Latin1, an error is returned.
During the conversion also validation of the input string is done. This function is suitable to work with inputs from untrusted sources. This function is not BOM-aware.
input | the UTF-16BE string to convert |
length | the length of the string in 2-byte code units (char16_t) |
latin1_buffer | the pointer to buffer that can hold conversion result |
|
pure virtualnoexcept |
Convert possibly broken UTF-16BE string into UTF-32 string.
During the conversion also validation of the input string is done. This function is suitable to work with inputs from untrusted sources.
This function is not BOM-aware.
input | the UTF-16BE string to convert |
length | the length of the string in 2-byte code units (char16_t) |
utf32_buffer | the pointer to buffer that can hold conversion result |
|
pure virtualnoexcept |
Convert possibly broken UTF-16BE string into UTF-32 string and stop on error.
During the conversion also validation of the input string is done. This function is suitable to work with inputs from untrusted sources.
This function is not BOM-aware.
input | the UTF-16BE string to convert |
length | the length of the string in 2-byte code units (char16_t) |
utf32_buffer | the pointer to buffer that can hold conversion result |
|
pure virtualnoexcept |
Convert possibly broken UTF-16BE string into UTF-8 string.
During the conversion also validation of the input string is done. This function is suitable to work with inputs from untrusted sources.
This function is not BOM-aware.
input | the UTF-16BE string to convert |
length | the length of the string in 2-byte code units (char16_t) |
utf8_buffer | the pointer to buffer that can hold conversion result |
|
pure virtualnoexcept |
Convert possibly broken UTF-16BE string into UTF-8 string and stop on error.
During the conversion also validation of the input string is done. This function is suitable to work with inputs from untrusted sources.
This function is not BOM-aware.
input | the UTF-16BE string to convert |
length | the length of the string in 2-byte code units (char16_t) |
utf8_buffer | the pointer to buffer that can hold conversion result |
|
pure virtualnoexcept |
Convert possibly broken UTF-16LE string into Latin1 string.
During the conversion also validation of the input string is done. This function is suitable to work with inputs from untrusted sources.
This function is not BOM-aware.
input | the UTF-16LE string to convert |
length | the length of the string in 2-byte code units (char16_t) |
latin1_buffer | the pointer to buffer that can hold conversion result |
|
pure virtualnoexcept |
Convert possibly broken UTF-16LE string into Latin1 string.
If the string cannot be represented as Latin1, an error is returned.
During the conversion also validation of the input string is done. This function is suitable to work with inputs from untrusted sources. This function is not BOM-aware.
input | the UTF-16LE string to convert |
length | the length of the string in 2-byte code units (char16_t) |
latin1_buffer | the pointer to buffer that can hold conversion result |
|
pure virtualnoexcept |
Convert possibly broken UTF-16LE string into UTF-32 string.
During the conversion also validation of the input string is done. This function is suitable to work with inputs from untrusted sources.
This function is not BOM-aware.
input | the UTF-16LE string to convert |
length | the length of the string in 2-byte code units (char16_t) |
utf32_buffer | the pointer to buffer that can hold conversion result |
|
pure virtualnoexcept |
Convert possibly broken UTF-16LE string into UTF-32 string and stop on error.
During the conversion also validation of the input string is done. This function is suitable to work with inputs from untrusted sources.
This function is not BOM-aware.
input | the UTF-16LE string to convert |
length | the length of the string in 2-byte code units (char16_t) |
utf32_buffer | the pointer to buffer that can hold conversion result |
|
pure virtualnoexcept |
Convert possibly broken UTF-16LE string into UTF-8 string.
During the conversion also validation of the input string is done. This function is suitable to work with inputs from untrusted sources.
This function is not BOM-aware.
input | the UTF-16LE string to convert |
length | the length of the string in 2-byte code units (char16_t) |
utf8_buffer | the pointer to buffer that can hold conversion result |
|
pure virtualnoexcept |
Convert possibly broken UTF-16LE string into UTF-8 string and stop on error.
During the conversion also validation of the input string is done. This function is suitable to work with inputs from untrusted sources.
This function is not BOM-aware.
input | the UTF-16LE string to convert |
length | the length of the string in 2-byte code units (char16_t) |
utf8_buffer | the pointer to buffer that can hold conversion result |
|
pure virtualnoexcept |
Convert possibly broken UTF-32 string into Latin1 string.
During the conversion also validation of the input string is done. This function is suitable to work with inputs from untrusted sources.
This function is not BOM-aware.
input | the UTF-32 string to convert |
length | the length of the string in 4-byte code units (char32_t) |
latin1_buffer | the pointer to buffer that can hold conversion result |
|
pure virtualnoexcept |
Convert possibly broken UTF-32 string into Latin1 string and stop on error.
If the string cannot be represented as Latin1, an error is returned.
During the conversion also validation of the input string is done. This function is suitable to work with inputs from untrusted sources.
This function is not BOM-aware.
input | the UTF-32 string to convert |
length | the length of the string in 4-byte code units (char32_t) |
latin1_buffer | the pointer to buffer that can hold conversion result |
|
pure virtualnoexcept |
Convert possibly broken UTF-32 string into UTF-16BE string.
During the conversion also validation of the input string is done. This function is suitable to work with inputs from untrusted sources.
This function is not BOM-aware.
input | the UTF-32 string to convert |
length | the length of the string in 4-byte code units (char32_t) |
utf16_buffer | the pointer to buffer that can hold conversion result |
|
pure virtualnoexcept |
Convert possibly broken UTF-32 string into UTF-16BE string and stop on error.
During the conversion also validation of the input string is done. This function is suitable to work with inputs from untrusted sources.
This function is not BOM-aware.
input | the UTF-32 string to convert |
length | the length of the string in 4-byte code units (char32_t) |
utf16_buffer | the pointer to buffer that can hold conversion result |
|
pure virtualnoexcept |
Convert possibly broken UTF-32 string into UTF-16LE string.
During the conversion also validation of the input string is done. This function is suitable to work with inputs from untrusted sources.
This function is not BOM-aware.
input | the UTF-32 string to convert |
length | the length of the string in 4-byte code units (char32_t) |
utf16_buffer | the pointer to buffer that can hold conversion result |
|
pure virtualnoexcept |
Convert possibly broken UTF-32 string into UTF-16LE string and stop on error.
During the conversion also validation of the input string is done. This function is suitable to work with inputs from untrusted sources.
This function is not BOM-aware.
input | the UTF-32 string to convert |
length | the length of the string in 4-byte code units (char32_t) |
utf16_buffer | the pointer to buffer that can hold conversion result |
|
pure virtualnoexcept |
Convert possibly broken UTF-32 string into UTF-8 string.
During the conversion also validation of the input string is done. This function is suitable to work with inputs from untrusted sources.
This function is not BOM-aware.
input | the UTF-32 string to convert |
length | the length of the string in 4-byte code units (char32_t) |
utf8_buffer | the pointer to buffer that can hold conversion result |
|
pure virtualnoexcept |
Convert possibly broken UTF-32 string into UTF-8 string and stop on error.
During the conversion also validation of the input string is done. This function is suitable to work with inputs from untrusted sources.
This function is not BOM-aware.
input | the UTF-32 string to convert |
length | the length of the string in 4-byte code units (char32_t) |
utf8_buffer | the pointer to buffer that can hold conversion result |
|
pure virtualnoexcept |
Convert possibly broken UTF-8 string into latin1 string.
During the conversion also validation of the input string is done. This function is suitable to work with inputs from untrusted sources.
input | the UTF-8 string to convert |
length | the length of the string in bytes |
latin1_output | the pointer to buffer that can hold conversion result |
|
pure virtualnoexcept |
Convert possibly broken UTF-8 string into latin1 string with errors.
If the string cannot be represented as Latin1, an error code is returned.
During the conversion also validation of the input string is done. This function is suitable to work with inputs from untrusted sources.
input | the UTF-8 string to convert |
length | the length of the string in bytes |
latin1_output | the pointer to buffer that can hold conversion result |
|
pure virtualnoexcept |
Convert possibly broken UTF-8 string into UTF-16BE string.
During the conversion also validation of the input string is done. This function is suitable to work with inputs from untrusted sources.
input | the UTF-8 string to convert |
length | the length of the string in bytes |
utf16_buffer | the pointer to buffer that can hold conversion result |
|
pure virtualnoexcept |
Convert possibly broken UTF-8 string into UTF-16BE string and stop on error.
During the conversion also validation of the input string is done. This function is suitable to work with inputs from untrusted sources.
input | the UTF-8 string to convert |
length | the length of the string in bytes |
utf16_buffer | the pointer to buffer that can hold conversion result |
|
pure virtualnoexcept |
Convert possibly broken UTF-8 string into UTF-16LE string.
During the conversion also validation of the input string is done. This function is suitable to work with inputs from untrusted sources.
input | the UTF-8 string to convert |
length | the length of the string in bytes |
utf16_buffer | the pointer to buffer that can hold conversion result |
|
pure virtualnoexcept |
Convert possibly broken UTF-8 string into UTF-16LE string and stop on error.
During the conversion also validation of the input string is done. This function is suitable to work with inputs from untrusted sources.
input | the UTF-8 string to convert |
length | the length of the string in bytes |
utf16_buffer | the pointer to buffer that can hold conversion result |
|
pure virtualnoexcept |
Convert possibly broken UTF-8 string into UTF-32 string.
During the conversion also validation of the input string is done. This function is suitable to work with inputs from untrusted sources.
input | the UTF-8 string to convert |
length | the length of the string in bytes |
utf32_buffer | the pointer to buffer that can hold conversion result |
|
pure virtualnoexcept |
Convert possibly broken UTF-8 string into UTF-32 string and stop on error.
During the conversion also validation of the input string is done. This function is suitable to work with inputs from untrusted sources.
input | the UTF-8 string to convert |
length | the length of the string in bytes |
utf32_buffer | the pointer to buffer that can hold conversion result |
|
pure virtualnoexcept |
Convert valid UTF-16BE string into Latin1 string.
This function assumes that the input string is valid UTF16-BE and that it can be represented as Latin1. If you violate this assumption, the result is implementation defined and may include system-dependent behavior such as crashes.
This function is for expert users only and not part of our public API. Use convert_utf16be_to_latin1 instead.
This function is not BOM-aware.
input | the UTF-16BE string to convert |
length | the length of the string in 2-byte code units (char16_t) |
latin1_buffer | the pointer to buffer that can hold conversion result |
|
pure virtualnoexcept |
Convert valid UTF-16LE string into UTF-32BE string.
This function assumes that the input string is valid UTF-16BE.
This function is not BOM-aware.
input | the UTF-16BE string to convert |
length | the length of the string in 2-byte code units (char16_t) |
utf32_buffer | the pointer to a buffer that can hold the conversion result |
|
pure virtualnoexcept |
Convert valid UTF-16BE string into UTF-8 string.
This function assumes that the input string is valid UTF-16BE.
This function is not BOM-aware.
input | the UTF-16BE string to convert |
length | the length of the string in 2-byte code units (char16_t) |
utf8_buffer | the pointer to a buffer that can hold the conversion result |
|
pure virtualnoexcept |
Convert valid UTF-16LE string into Latin1 string.
This function assumes that the input string is valid UTF-L16LE and that it can be represented as Latin1. If you violate this assumption, the result is implementation defined and may include system-dependent behavior such as crashes.
This function is for expert users only and not part of our public API. Use convert_utf16le_to_latin1 instead.
This function is not BOM-aware.
input | the UTF-16LE string to convert |
length | the length of the string in 2-byte code units (char16_t) |
latin1_buffer | the pointer to buffer that can hold conversion result |
|
pure virtualnoexcept |
Convert valid UTF-16LE string into UTF-32 string.
This function assumes that the input string is valid UTF-16LE.
This function is not BOM-aware.
input | the UTF-16LE string to convert |
length | the length of the string in 2-byte code units (char16_t) |
utf32_buffer | the pointer to a buffer that can hold the conversion result |
|
pure virtualnoexcept |
Convert valid UTF-16LE string into UTF-8 string.
This function assumes that the input string is valid UTF-16LE.
This function is not BOM-aware.
input | the UTF-16LE string to convert |
length | the length of the string in 2-byte code units (char16_t) |
utf8_buffer | the pointer to a buffer that can hold the conversion result |
|
pure virtualnoexcept |
Convert valid UTF-32 string into Latin1 string.
This function assumes that the input string is valid UTF-32 and can be represented as Latin1. If you violate this assumption, the result is implementation defined and may include system-dependent behavior such as crashes.
This function is for expert users only and not part of our public API. Use convert_utf32_to_latin1 instead.
This function is not BOM-aware.
input | the UTF-32 string to convert |
length | the length of the string in 4-byte code units (char32_t) |
latin1_buffer | the pointer to a buffer that can hold the conversion result |
|
pure virtualnoexcept |
Convert valid UTF-32 string into UTF-16BE string.
This function assumes that the input string is valid UTF-32.
This function is not BOM-aware.
input | the UTF-32 string to convert |
length | the length of the string in 4-byte code units (char32_t) |
utf16_buffer | the pointer to a buffer that can hold the conversion result |
|
pure virtualnoexcept |
Convert valid UTF-32 string into UTF-16LE string.
This function assumes that the input string is valid UTF-32.
This function is not BOM-aware.
input | the UTF-32 string to convert |
length | the length of the string in 4-byte code units (char32_t) |
utf16_buffer | the pointer to a buffer that can hold the conversion result |
|
pure virtualnoexcept |
Convert valid UTF-32 string into UTF-8 string.
This function assumes that the input string is valid UTF-32.
This function is not BOM-aware.
input | the UTF-32 string to convert |
length | the length of the string in 4-byte code units (char32_t) |
utf8_buffer | the pointer to a buffer that can hold the conversion result |
|
pure virtualnoexcept |
Convert valid UTF-8 string into latin1 string.
This function assumes that the input string is valid UTF-8 and that it can be represented as Latin1. If you violate this assumption, the result is implementation defined and may include system-dependent behavior such as crashes.
This function is for expert users only and not part of our public API. Use convert_utf8_to_latin1 instead.
This function is not BOM-aware.
input | the UTF-8 string to convert |
length | the length of the string in bytes |
latin1_output | the pointer to buffer that can hold conversion result |
|
pure virtualnoexcept |
Convert valid UTF-8 string into UTF-16BE string.
This function assumes that the input string is valid UTF-8.
input | the UTF-8 string to convert |
length | the length of the string in bytes |
utf16_buffer | the pointer to buffer that can hold conversion result |
|
pure virtualnoexcept |
Convert valid UTF-8 string into UTF-16LE string.
This function assumes that the input string is valid UTF-8.
input | the UTF-8 string to convert |
length | the length of the string in bytes |
utf16_buffer | the pointer to buffer that can hold conversion result |
|
pure virtualnoexcept |
Convert valid UTF-8 string into UTF-32 string.
This function assumes that the input string is valid UTF-8.
input | the UTF-8 string to convert |
length | the length of the string in bytes |
utf16_buffer | the pointer to buffer that can hold conversion result |
|
pure virtualnoexcept |
Count the number of code points (characters) in the string assuming that it is valid.
This function assumes that the input string is valid UTF-16BE. It is acceptable to pass invalid UTF-16 strings but in such cases the result is implementation defined.
This function is not BOM-aware.
input | the UTF-16BE string to process |
length | the length of the string in 2-byte code units (char16_t) |
|
pure virtualnoexcept |
Count the number of code points (characters) in the string assuming that it is valid.
This function assumes that the input string is valid UTF-16LE. It is acceptable to pass invalid UTF-16 strings but in such cases the result is implementation defined.
This function is not BOM-aware.
input | the UTF-16LE string to process |
length | the length of the string in 2-byte code units (char16_t) |
|
pure virtualnoexcept |
Count the number of code points (characters) in the string assuming that it is valid.
This function assumes that the input string is valid UTF-8. It is acceptable to pass invalid UTF-8 strings but in such cases the result is implementation defined.
input | the UTF-8 string to process |
length | the length of the string in bytes |
|
inlinevirtual |
The description of this implementation.
const implementation *impl = simdutf::active_implementation; cout << "simdutf is optimized for " << impl->name() << "(" <<
impl->description() << ")" << endl;
Definition at line 3149 of file implementation.h.
|
pure virtualnoexcept |
This function will try to detect the possible encodings in one pass.
input | the string to identify |
length | the length of the string in bytes. |
|
inlinevirtualnoexcept |
Compute the number of bytes that this UTF-16LE/BE string would require in Latin1 format.
This function does not validate the input. It is acceptable to pass invalid UTF-16 strings but in such cases the result is implementation defined.
This function is not BOM-aware.
input | the UTF-16LE string to convert |
length | the length of the string in 2-byte code units (char16_t) |
Definition at line 4465 of file implementation.h.
|
inlinevirtualnoexcept |
Compute the number of bytes that this UTF-32 string would require in Latin1 format.
This function does not validate the input. It is acceptable to pass invalid UTF-32 strings but in such cases the result is implementation defined.
length | the length of the string in 4-byte code units (char32_t) |
Definition at line 4427 of file implementation.h.
|
pure virtualnoexcept |
Compute the number of bytes that this UTF-8 string would require in Latin1 format.
This function does not validate the input. It is acceptable to pass invalid UTF-8 strings but in such cases the result is implementation defined.
input | the UTF-8 string to convert |
length | the length of the string in byte |
|
noexcept |
Provide the maximal binary length in bytes given the base64 input.
In general, if the input contains ASCII spaces, the result will be less than the maximum length. It is acceptable to pass invalid base64 strings but in such cases the result is implementation defined.
input | the base64 input to process |
length | the length of the base64 input in bytes |
|
noexcept |
Provide the maximal binary length in bytes given the base64 input.
In general, if the input contains ASCII spaces, the result will be less than the maximum length. It is acceptable to pass invalid base64 strings but in such cases the result is implementation defined.
input | the base64 input to process, in ASCII stored as 16-bit units |
length | the length of the base64 input in 16-bit units |
|
inlinevirtual |
The name of this implementation.
const implementation *impl = simdutf::active_implementation; cout << "simdutf is optimized for " << impl->name() << "(" <<
impl->description() << ")" << endl;
Definition at line 3138 of file implementation.h.
bool simdutf::implementation::supported_by_runtime_system | ( | ) | const |
The instruction sets this implementation is compiled against and the current CPU match.
This function may poll the current CPU/system and should therefore not be called too often if performance is a concern.
|
inlinevirtualnoexcept |
Return the number of bytes that this UTF-16 string would require in Latin1 format.
input | the UTF-16 string to convert |
length | the length of the string in 2-byte code units (char16_t) |
Definition at line 4239 of file implementation.h.
|
pure virtualnoexcept |
Compute the number of two-byte code units that this UTF-32 string would require in UTF-16 format.
This function does not validate the input. It is acceptable to pass invalid UTF-32 strings but in such cases the result is implementation defined.
input | the UTF-32 string to convert |
length | the length of the string in 4-byte code units (char32_t) |
|
pure virtualnoexcept |
Compute the number of 2-byte code units that this UTF-8 string would require in UTF-16LE format.
This function does not validate the input. It is acceptable to pass invalid UTF-8 strings but in such cases the result is implementation defined.
input | the UTF-8 string to process |
length | the length of the string in bytes |
|
inlinevirtualnoexcept |
Return the number of bytes that this UTF-32 string would require in Latin1 format.
length | the length of the string in 4-byte code units (char32_t) |
Definition at line 4498 of file implementation.h.
|
pure virtualnoexcept |
Compute the number of bytes that this UTF-16BE string would require in UTF-32 format.
This function is equivalent to count_utf16be.
This function does not validate the input. It is acceptable to pass invalid UTF-16 strings but in such cases the result is implementation defined.
This function is not BOM-aware.
input | the UTF-16BE string to convert |
length | the length of the string in 2-byte code units (char16_t) |
|
pure virtualnoexcept |
Compute the number of bytes that this UTF-16LE string would require in UTF-32 format.
This function is equivalent to count_utf16le.
This function does not validate the input. It is acceptable to pass invalid UTF-16 strings but in such cases the result is implementation defined.
This function is not BOM-aware.
input | the UTF-16LE string to convert |
length | the length of the string in 2-byte code units (char16_t) |
|
pure virtualnoexcept |
Compute the number of 4-byte code units that this UTF-8 string would require in UTF-32 format.
This function is equivalent to count_utf8. It is acceptable to pass invalid UTF-8 strings but in such cases the result is implementation defined.
This function does not validate the input.
input | the UTF-8 string to process |
length | the length of the string in bytes |
|
pure virtualnoexcept |
Return the number of bytes that this Latin1 string would require in UTF-8 format.
input | the Latin1 string to convert |
length | the length of the string bytes |
|
pure virtualnoexcept |
Compute the number of bytes that this UTF-16BE string would require in UTF-8 format.
This function does not validate the input. It is acceptable to pass invalid UTF-16 strings but in such cases the result is implementation defined.
This function is not BOM-aware.
input | the UTF-16BE string to convert |
length | the length of the string in 2-byte code units (char16_t) |
|
pure virtualnoexcept |
Compute the number of bytes that this UTF-16LE string would require in UTF-8 format.
This function does not validate the input. It is acceptable to pass invalid UTF-16 strings but in such cases the result is implementation defined.
This function is not BOM-aware.
input | the UTF-16LE string to convert |
length | the length of the string in 2-byte code units (char16_t) |
|
pure virtualnoexcept |
Compute the number of bytes that this UTF-32 string would require in UTF-8 format.
This function does not validate the input. It is acceptable to pass invalid UTF-32 strings but in such cases the result is implementation defined.
input | the UTF-32 string to convert |
length | the length of the string in 4-byte code units (char32_t) |
|
pure virtualnoexcept |
Validate the ASCII string.
Overridden by each implementation.
buf | the ASCII string to validate. |
len | the length of the string in bytes. |
|
pure virtualnoexcept |
Validate the ASCII string and stop on error.
Overridden by each implementation.
buf | the ASCII string to validate. |
len | the length of the string in bytes. |
|
pure virtualnoexcept |
Validate the UTF-16BE string.
This function may be best when you expect the input to be almost always valid. Otherwise, consider using validate_utf16be_with_errors.
Overridden by each implementation.
This function is not BOM-aware.
buf | the UTF-16BE string to validate. |
len | the length of the string in number of 2-byte code units (char16_t). |
|
pure virtualnoexcept |
Validate the UTF-16BE string and stop on error.
It might be faster than validate_utf16be when an error is expected to occur early.
Overridden by each implementation.
This function is not BOM-aware.
buf | the UTF-16BE string to validate. |
len | the length of the string in number of 2-byte code units (char16_t). |
|
pure virtualnoexcept |
Validate the UTF-16LE string.This function may be best when you expect the input to be almost always valid.
Otherwise, consider using validate_utf16le_with_errors.
Overridden by each implementation.
This function is not BOM-aware.
buf | the UTF-16LE string to validate. |
len | the length of the string in number of 2-byte code units (char16_t). |
|
pure virtualnoexcept |
Validate the UTF-16LE string and stop on error.
It might be faster than validate_utf16le when an error is expected to occur early.
Overridden by each implementation.
This function is not BOM-aware.
buf | the UTF-16LE string to validate. |
len | the length of the string in number of 2-byte code units (char16_t). |
|
pure virtualnoexcept |
Validate the UTF-32 string.
Overridden by each implementation.
This function is not BOM-aware.
buf | the UTF-32 string to validate. |
len | the length of the string in number of 4-byte code units (char32_t). |
|
pure virtualnoexcept |
Validate the UTF-32 string and stop on error.
Overridden by each implementation.
This function is not BOM-aware.
buf | the UTF-32 string to validate. |
len | the length of the string in number of 4-byte code units (char32_t). |
|
pure virtualnoexcept |
Validate the UTF-8 string.
Overridden by each implementation.
buf | the UTF-8 string to validate. |
len | the length of the string in bytes. |
|
pure virtualnoexcept |
Validate the UTF-8 string and stop on errors.
Overridden by each implementation.
buf | the UTF-8 string to validate. |
len | the length of the string in bytes. |