simdutf 6.4.0
Unicode at GB/s.
Loading...
Searching...
No Matches
simdutf::implementation Class Referenceabstract

An implementation of simdutf for a particular CPU architecture. More...

#include <implementation.h>

Public Member Functions

virtual std::string name () const
 The name of this implementation.
 
virtual std::string description () const
 The description of this implementation.
 
bool supported_by_runtime_system () const
 The instruction sets this implementation is compiled against and the current CPU match.
 
virtual encoding_type autodetect_encoding (const char *input, size_t length) const noexcept
 This function will try to detect the encoding.
 
virtual int detect_encodings (const char *input, size_t length) const noexcept=0
 This function will try to detect the possible encodings in one pass.
 
virtual simdutf_warn_unused bool validate_utf8 (const char *buf, size_t len) const noexcept=0
 Validate the UTF-8 string.
 
virtual simdutf_warn_unused result validate_utf8_with_errors (const char *buf, size_t len) const noexcept=0
 Validate the UTF-8 string and stop on errors.
 
virtual simdutf_warn_unused bool validate_ascii (const char *buf, size_t len) const noexcept=0
 Validate the ASCII string.
 
virtual simdutf_warn_unused result validate_ascii_with_errors (const char *buf, size_t len) const noexcept=0
 Validate the ASCII string and stop on error.
 
virtual simdutf_warn_unused bool validate_utf16le (const char16_t *buf, size_t len) const noexcept=0
 Validate the UTF-16LE string.This function may be best when you expect the input to be almost always valid.
 
virtual simdutf_warn_unused bool validate_utf16be (const char16_t *buf, size_t len) const noexcept=0
 Validate the UTF-16BE string.
 
virtual simdutf_warn_unused result validate_utf16le_with_errors (const char16_t *buf, size_t len) const noexcept=0
 Validate the UTF-16LE string and stop on error.
 
virtual simdutf_warn_unused result validate_utf16be_with_errors (const char16_t *buf, size_t len) const noexcept=0
 Validate the UTF-16BE string and stop on error.
 
virtual simdutf_warn_unused bool validate_utf32 (const char32_t *buf, size_t len) const noexcept=0
 Validate the UTF-32 string.
 
virtual simdutf_warn_unused result validate_utf32_with_errors (const char32_t *buf, size_t len) const noexcept=0
 Validate the UTF-32 string and stop on error.
 
virtual simdutf_warn_unused size_t convert_latin1_to_utf8 (const char *input, size_t length, char *utf8_output) const noexcept=0
 Convert Latin1 string into UTF8 string.
 
virtual simdutf_warn_unused size_t convert_latin1_to_utf16le (const char *input, size_t length, char16_t *utf16_output) const noexcept=0
 Convert possibly Latin1 string into UTF-16LE string.
 
virtual simdutf_warn_unused size_t convert_latin1_to_utf16be (const char *input, size_t length, char16_t *utf16_output) const noexcept=0
 Convert Latin1 string into UTF-16BE string.
 
virtual simdutf_warn_unused size_t convert_latin1_to_utf32 (const char *input, size_t length, char32_t *utf32_buffer) const noexcept=0
 Convert Latin1 string into UTF-32 string.
 
virtual simdutf_warn_unused size_t convert_utf8_to_latin1 (const char *input, size_t length, char *latin1_output) const noexcept=0
 Convert possibly broken UTF-8 string into latin1 string.
 
virtual simdutf_warn_unused result convert_utf8_to_latin1_with_errors (const char *input, size_t length, char *latin1_output) const noexcept=0
 Convert possibly broken UTF-8 string into latin1 string with errors.
 
virtual simdutf_warn_unused size_t convert_valid_utf8_to_latin1 (const char *input, size_t length, char *latin1_output) const noexcept=0
 Convert valid UTF-8 string into latin1 string.
 
virtual simdutf_warn_unused size_t convert_utf8_to_utf16le (const char *input, size_t length, char16_t *utf16_output) const noexcept=0
 Convert possibly broken UTF-8 string into UTF-16LE string.
 
virtual simdutf_warn_unused size_t convert_utf8_to_utf16be (const char *input, size_t length, char16_t *utf16_output) const noexcept=0
 Convert possibly broken UTF-8 string into UTF-16BE string.
 
virtual simdutf_warn_unused result convert_utf8_to_utf16le_with_errors (const char *input, size_t length, char16_t *utf16_output) const noexcept=0
 Convert possibly broken UTF-8 string into UTF-16LE string and stop on error.
 
virtual simdutf_warn_unused result convert_utf8_to_utf16be_with_errors (const char *input, size_t length, char16_t *utf16_output) const noexcept=0
 Convert possibly broken UTF-8 string into UTF-16BE string and stop on error.
 
virtual simdutf_warn_unused size_t convert_utf8_to_utf32 (const char *input, size_t length, char32_t *utf32_output) const noexcept=0
 Convert possibly broken UTF-8 string into UTF-32 string.
 
virtual simdutf_warn_unused result convert_utf8_to_utf32_with_errors (const char *input, size_t length, char32_t *utf32_output) const noexcept=0
 Convert possibly broken UTF-8 string into UTF-32 string and stop on error.
 
virtual simdutf_warn_unused size_t convert_valid_utf8_to_utf16le (const char *input, size_t length, char16_t *utf16_buffer) const noexcept=0
 Convert valid UTF-8 string into UTF-16LE string.
 
virtual simdutf_warn_unused size_t convert_valid_utf8_to_utf16be (const char *input, size_t length, char16_t *utf16_buffer) const noexcept=0
 Convert valid UTF-8 string into UTF-16BE string.
 
virtual simdutf_warn_unused size_t convert_valid_utf8_to_utf32 (const char *input, size_t length, char32_t *utf32_buffer) const noexcept=0
 Convert valid UTF-8 string into UTF-32 string.
 
virtual simdutf_warn_unused size_t utf16_length_from_utf8 (const char *input, size_t length) const noexcept=0
 Compute the number of 2-byte code units that this UTF-8 string would require in UTF-16LE format.
 
virtual simdutf_warn_unused size_t utf32_length_from_utf8 (const char *input, size_t length) const noexcept=0
 Compute the number of 4-byte code units that this UTF-8 string would require in UTF-32 format.
 
virtual simdutf_warn_unused size_t convert_utf16le_to_latin1 (const char16_t *input, size_t length, char *latin1_buffer) const noexcept=0
 Convert possibly broken UTF-16LE string into Latin1 string.
 
virtual simdutf_warn_unused size_t convert_utf16be_to_latin1 (const char16_t *input, size_t length, char *latin1_buffer) const noexcept=0
 Convert possibly broken UTF-16BE string into Latin1 string.
 
virtual simdutf_warn_unused result convert_utf16le_to_latin1_with_errors (const char16_t *input, size_t length, char *latin1_buffer) const noexcept=0
 Convert possibly broken UTF-16LE string into Latin1 string.
 
virtual simdutf_warn_unused result convert_utf16be_to_latin1_with_errors (const char16_t *input, size_t length, char *latin1_buffer) const noexcept=0
 Convert possibly broken UTF-16BE string into Latin1 string.
 
virtual simdutf_warn_unused size_t convert_valid_utf16le_to_latin1 (const char16_t *input, size_t length, char *latin1_buffer) const noexcept=0
 Convert valid UTF-16LE string into Latin1 string.
 
virtual simdutf_warn_unused size_t convert_valid_utf16be_to_latin1 (const char16_t *input, size_t length, char *latin1_buffer) const noexcept=0
 Convert valid UTF-16BE string into Latin1 string.
 
virtual simdutf_warn_unused size_t convert_utf16le_to_utf8 (const char16_t *input, size_t length, char *utf8_buffer) const noexcept=0
 Convert possibly broken UTF-16LE string into UTF-8 string.
 
virtual simdutf_warn_unused size_t convert_utf16be_to_utf8 (const char16_t *input, size_t length, char *utf8_buffer) const noexcept=0
 Convert possibly broken UTF-16BE string into UTF-8 string.
 
virtual simdutf_warn_unused result convert_utf16le_to_utf8_with_errors (const char16_t *input, size_t length, char *utf8_buffer) const noexcept=0
 Convert possibly broken UTF-16LE string into UTF-8 string and stop on error.
 
virtual simdutf_warn_unused result convert_utf16be_to_utf8_with_errors (const char16_t *input, size_t length, char *utf8_buffer) const noexcept=0
 Convert possibly broken UTF-16BE string into UTF-8 string and stop on error.
 
virtual simdutf_warn_unused size_t convert_valid_utf16le_to_utf8 (const char16_t *input, size_t length, char *utf8_buffer) const noexcept=0
 Convert valid UTF-16LE string into UTF-8 string.
 
virtual simdutf_warn_unused size_t convert_valid_utf16be_to_utf8 (const char16_t *input, size_t length, char *utf8_buffer) const noexcept=0
 Convert valid UTF-16BE string into UTF-8 string.
 
virtual simdutf_warn_unused size_t convert_utf16le_to_utf32 (const char16_t *input, size_t length, char32_t *utf32_buffer) const noexcept=0
 Convert possibly broken UTF-16LE string into UTF-32 string.
 
virtual simdutf_warn_unused size_t convert_utf16be_to_utf32 (const char16_t *input, size_t length, char32_t *utf32_buffer) const noexcept=0
 Convert possibly broken UTF-16BE string into UTF-32 string.
 
virtual simdutf_warn_unused result convert_utf16le_to_utf32_with_errors (const char16_t *input, size_t length, char32_t *utf32_buffer) const noexcept=0
 Convert possibly broken UTF-16LE string into UTF-32 string and stop on error.
 
virtual simdutf_warn_unused result convert_utf16be_to_utf32_with_errors (const char16_t *input, size_t length, char32_t *utf32_buffer) const noexcept=0
 Convert possibly broken UTF-16BE string into UTF-32 string and stop on error.
 
virtual simdutf_warn_unused size_t convert_valid_utf16le_to_utf32 (const char16_t *input, size_t length, char32_t *utf32_buffer) const noexcept=0
 Convert valid UTF-16LE string into UTF-32 string.
 
virtual simdutf_warn_unused size_t convert_valid_utf16be_to_utf32 (const char16_t *input, size_t length, char32_t *utf32_buffer) const noexcept=0
 Convert valid UTF-16LE string into UTF-32BE string.
 
virtual simdutf_warn_unused size_t utf8_length_from_utf16le (const char16_t *input, size_t length) const noexcept=0
 Compute the number of bytes that this UTF-16LE string would require in UTF-8 format.
 
virtual simdutf_warn_unused size_t utf8_length_from_utf16be (const char16_t *input, size_t length) const noexcept=0
 Compute the number of bytes that this UTF-16BE string would require in UTF-8 format.
 
virtual simdutf_warn_unused size_t convert_utf32_to_latin1 (const char32_t *input, size_t length, char *latin1_buffer) const noexcept=0
 Convert possibly broken UTF-32 string into Latin1 string.
 
virtual simdutf_warn_unused result convert_utf32_to_latin1_with_errors (const char32_t *input, size_t length, char *latin1_buffer) const noexcept=0
 Convert possibly broken UTF-32 string into Latin1 string and stop on error.
 
virtual simdutf_warn_unused size_t convert_valid_utf32_to_latin1 (const char32_t *input, size_t length, char *latin1_buffer) const noexcept=0
 Convert valid UTF-32 string into Latin1 string.
 
virtual simdutf_warn_unused size_t convert_utf32_to_utf8 (const char32_t *input, size_t length, char *utf8_buffer) const noexcept=0
 Convert possibly broken UTF-32 string into UTF-8 string.
 
virtual simdutf_warn_unused result convert_utf32_to_utf8_with_errors (const char32_t *input, size_t length, char *utf8_buffer) const noexcept=0
 Convert possibly broken UTF-32 string into UTF-8 string and stop on error.
 
virtual simdutf_warn_unused size_t convert_valid_utf32_to_utf8 (const char32_t *input, size_t length, char *utf8_buffer) const noexcept=0
 Convert valid UTF-32 string into UTF-8 string.
 
virtual simdutf_warn_unused size_t utf16_length_from_latin1 (size_t length) const noexcept
 Return the number of bytes that this UTF-16 string would require in Latin1 format.
 
virtual simdutf_warn_unused size_t convert_utf32_to_utf16le (const char32_t *input, size_t length, char16_t *utf16_buffer) const noexcept=0
 Convert possibly broken UTF-32 string into UTF-16LE string.
 
virtual simdutf_warn_unused size_t convert_utf32_to_utf16be (const char32_t *input, size_t length, char16_t *utf16_buffer) const noexcept=0
 Convert possibly broken UTF-32 string into UTF-16BE string.
 
virtual simdutf_warn_unused result convert_utf32_to_utf16le_with_errors (const char32_t *input, size_t length, char16_t *utf16_buffer) const noexcept=0
 Convert possibly broken UTF-32 string into UTF-16LE string and stop on error.
 
virtual simdutf_warn_unused result convert_utf32_to_utf16be_with_errors (const char32_t *input, size_t length, char16_t *utf16_buffer) const noexcept=0
 Convert possibly broken UTF-32 string into UTF-16BE string and stop on error.
 
virtual simdutf_warn_unused size_t convert_valid_utf32_to_utf16le (const char32_t *input, size_t length, char16_t *utf16_buffer) const noexcept=0
 Convert valid UTF-32 string into UTF-16LE string.
 
virtual simdutf_warn_unused size_t convert_valid_utf32_to_utf16be (const char32_t *input, size_t length, char16_t *utf16_buffer) const noexcept=0
 Convert valid UTF-32 string into UTF-16BE string.
 
virtual void change_endianness_utf16 (const char16_t *input, size_t length, char16_t *output) const noexcept=0
 Change the endianness of the input.
 
virtual simdutf_warn_unused size_t utf8_length_from_latin1 (const char *input, size_t length) const noexcept=0
 Return the number of bytes that this Latin1 string would require in UTF-8 format.
 
virtual simdutf_warn_unused size_t utf8_length_from_utf32 (const char32_t *input, size_t length) const noexcept=0
 Compute the number of bytes that this UTF-32 string would require in UTF-8 format.
 
virtual simdutf_warn_unused size_t latin1_length_from_utf32 (size_t length) const noexcept
 Compute the number of bytes that this UTF-32 string would require in Latin1 format.
 
virtual simdutf_warn_unused size_t latin1_length_from_utf8 (const char *input, size_t length) const noexcept=0
 Compute the number of bytes that this UTF-8 string would require in Latin1 format.
 
virtual simdutf_warn_unused size_t latin1_length_from_utf16 (size_t length) const noexcept
 Compute the number of bytes that this UTF-16LE/BE string would require in Latin1 format.
 
virtual simdutf_warn_unused size_t utf16_length_from_utf32 (const char32_t *input, size_t length) const noexcept=0
 Compute the number of two-byte code units that this UTF-32 string would require in UTF-16 format.
 
virtual simdutf_warn_unused size_t utf32_length_from_latin1 (size_t length) const noexcept
 Return the number of bytes that this UTF-32 string would require in Latin1 format.
 
virtual simdutf_warn_unused size_t utf32_length_from_utf16le (const char16_t *input, size_t length) const noexcept=0
 Compute the number of bytes that this UTF-16LE string would require in UTF-32 format.
 
virtual simdutf_warn_unused size_t utf32_length_from_utf16be (const char16_t *input, size_t length) const noexcept=0
 Compute the number of bytes that this UTF-16BE string would require in UTF-32 format.
 
virtual simdutf_warn_unused size_t count_utf16le (const char16_t *input, size_t length) const noexcept=0
 Count the number of code points (characters) in the string assuming that it is valid.
 
virtual simdutf_warn_unused size_t count_utf16be (const char16_t *input, size_t length) const noexcept=0
 Count the number of code points (characters) in the string assuming that it is valid.
 
virtual simdutf_warn_unused size_t count_utf8 (const char *input, size_t length) const noexcept=0
 Count the number of code points (characters) in the string assuming that it is valid.
 
simdutf_warn_unused size_t maximal_binary_length_from_base64 (const char *input, size_t length) const noexcept
 Provide the maximal binary length in bytes given the base64 input.
 
simdutf_warn_unused size_t maximal_binary_length_from_base64 (const char16_t *input, size_t length) const noexcept
 Provide the maximal binary length in bytes given the base64 input.
 
virtual simdutf_warn_unused result base64_to_binary (const char *input, size_t length, char *output, base64_options options=base64_default, last_chunk_handling_options last_chunk_options=last_chunk_handling_options::loose) const noexcept=0
 Convert a base64 input to a binary output.
 
virtual simdutf_warn_unused full_result base64_to_binary_details (const char *input, size_t length, char *output, base64_options options=base64_default, last_chunk_handling_options last_chunk_options=last_chunk_handling_options::loose) const noexcept=0
 Convert a base64 input to a binary output while returning more details than base64_to_binary.
 
virtual simdutf_warn_unused result base64_to_binary (const char16_t *input, size_t length, char *output, base64_options options=base64_default, last_chunk_handling_options last_chunk_options=last_chunk_handling_options::loose) const noexcept=0
 Convert a base64 input to a binary output.
 
virtual simdutf_warn_unused full_result base64_to_binary_details (const char16_t *input, size_t length, char *output, base64_options options=base64_default, last_chunk_handling_options last_chunk_options=last_chunk_handling_options::loose) const noexcept=0
 Convert a base64 input to a binary output while returning more details than base64_to_binary.
 
simdutf_warn_unused size_t base64_length_from_binary (size_t length, base64_options options=base64_default) const noexcept
 Provide the base64 length in bytes given the length of a binary input.
 
virtual size_t binary_to_base64 (const char *input, size_t length, char *output, base64_options options=base64_default) const noexcept=0
 Convert a binary input to a base64 output.
 

Detailed Description

An implementation of simdutf for a particular CPU architecture.

Also used to maintain the currently active implementation. The active implementation is automatically initialized on first use to the most advanced implementation supported by the host.

Definition at line 3127 of file implementation.h.

Member Function Documentation

◆ autodetect_encoding()

virtual encoding_type simdutf::implementation::autodetect_encoding ( const char *  input,
size_t  length 
) const
virtualnoexcept

This function will try to detect the encoding.

Parameters
inputthe string to identify
lengththe length of the string in bytes.
Returns
the encoding type detected

◆ base64_length_from_binary()

simdutf_warn_unused size_t simdutf::implementation::base64_length_from_binary ( size_t  length,
base64_options  options = base64_default 
) const
noexcept

Provide the base64 length in bytes given the length of a binary input.

Parameters
lengththe length of the input in bytes @parem options the base64 options to use, can be base64_default or base64_url, is base64_default by default.
Returns
number of base64 bytes

◆ base64_to_binary() [1/2]

virtual simdutf_warn_unused result simdutf::implementation::base64_to_binary ( const char *  input,
size_t  length,
char *  output,
base64_options  options = base64_default,
last_chunk_handling_options  last_chunk_options = last_chunk_handling_options::loose 
) const
pure virtualnoexcept

Convert a base64 input to a binary output.

This function follows the WHATWG forgiving-base64 format, which means that it will ignore any ASCII spaces in the input. You may provide a padded input (with one or two equal signs at the end) or an unpadded input (without any equal signs at the end).

See https://infra.spec.whatwg.org/#forgiving-base64-decode

This function will fail in case of invalid input. When last_chunk_options = loose, there are two possible reasons for failure: the input contains a number of base64 characters that when divided by 4, leaves a single remainder character (BASE64_INPUT_REMAINDER), or the input contains a character that is not a valid base64 character (INVALID_BASE64_CHARACTER).

You should call this function with a buffer that is at least maximal_binary_length_from_base64(input, length) bytes long. If you fail to provide that much space, the function may cause a buffer overflow.

Parameters
inputthe base64 string to process
lengththe length of the string in bytes
outputthe pointer to a buffer that can hold the conversion result (should be at least maximal_binary_length_from_base64(input, length) bytes long).
optionsthe base64 options to use, can be base64_default or base64_url, is base64_default by default.
Returns
a result pair struct (of type simdutf::result containing the two fields error and count) with an error code and either position of the error (in the input in bytes) if any, or the number of bytes written if successful.

◆ base64_to_binary() [2/2]

virtual simdutf_warn_unused result simdutf::implementation::base64_to_binary ( const char16_t *  input,
size_t  length,
char *  output,
base64_options  options = base64_default,
last_chunk_handling_options  last_chunk_options = last_chunk_handling_options::loose 
) const
pure virtualnoexcept

Convert a base64 input to a binary output.

This function follows the WHATWG forgiving-base64 format, which means that it will ignore any ASCII spaces in the input. You may provide a padded input (with one or two equal signs at the end) or an unpadded input (without any equal signs at the end).

See https://infra.spec.whatwg.org/#forgiving-base64-decode

This function will fail in case of invalid input. When last_chunk_options = loose, there are two possible reasons for failure: the input contains a number of base64 characters that when divided by 4, leaves a single remainder character (BASE64_INPUT_REMAINDER), or the input contains a character that is not a valid base64 character (INVALID_BASE64_CHARACTER).

You should call this function with a buffer that is at least maximal_binary_length_from_utf6_base64(input, length) bytes long. If you fail to provide that much space, the function may cause a buffer overflow.

Parameters
inputthe base64 string to process, in ASCII stored as 16-bit units
lengththe length of the string in 16-bit units
outputthe pointer to a buffer that can hold the conversion result (should be at least maximal_binary_length_from_base64(input, length) bytes long).
optionsthe base64 options to use, can be base64_default or base64_url, is base64_default by default.
Returns
a result pair struct (of type simdutf::result containing the two fields error and count) with an error code and position of the INVALID_BASE64_CHARACTER error (in the input in units) if any, or the number of bytes written if successful.

◆ base64_to_binary_details() [1/2]

virtual simdutf_warn_unused full_result simdutf::implementation::base64_to_binary_details ( const char *  input,
size_t  length,
char *  output,
base64_options  options = base64_default,
last_chunk_handling_options  last_chunk_options = last_chunk_handling_options::loose 
) const
pure virtualnoexcept

Convert a base64 input to a binary output while returning more details than base64_to_binary.

This function follows the WHATWG forgiving-base64 format, which means that it will ignore any ASCII spaces in the input. You may provide a padded input (with one or two equal signs at the end) or an unpadded input (without any equal signs at the end).

See https://infra.spec.whatwg.org/#forgiving-base64-decode

This function will fail in case of invalid input. When last_chunk_options = loose, there are two possible reasons for failure: the input contains a number of base64 characters that when divided by 4, leaves a single remainder character (BASE64_INPUT_REMAINDER), or the input contains a character that is not a valid base64 character (INVALID_BASE64_CHARACTER).

You should call this function with a buffer that is at least maximal_binary_length_from_base64(input, length) bytes long. If you fail to provide that much space, the function may cause a buffer overflow.

Parameters
inputthe base64 string to process
lengththe length of the string in bytes
outputthe pointer to a buffer that can hold the conversion result (should be at least maximal_binary_length_from_base64(input, length) bytes long).
optionsthe base64 options to use, can be base64_default or base64_url, is base64_default by default.
Returns
a full_result pair struct (of type simdutf::result containing the three fields error, input_count and output_count).

◆ base64_to_binary_details() [2/2]

virtual simdutf_warn_unused full_result simdutf::implementation::base64_to_binary_details ( const char16_t *  input,
size_t  length,
char *  output,
base64_options  options = base64_default,
last_chunk_handling_options  last_chunk_options = last_chunk_handling_options::loose 
) const
pure virtualnoexcept

Convert a base64 input to a binary output while returning more details than base64_to_binary.

This function follows the WHATWG forgiving-base64 format, which means that it will ignore any ASCII spaces in the input. You may provide a padded input (with one or two equal signs at the end) or an unpadded input (without any equal signs at the end).

See https://infra.spec.whatwg.org/#forgiving-base64-decode

This function will fail in case of invalid input. When last_chunk_options = loose, there are two possible reasons for failure: the input contains a number of base64 characters that when divided by 4, leaves a single remainder character (BASE64_INPUT_REMAINDER), or the input contains a character that is not a valid base64 character (INVALID_BASE64_CHARACTER).

You should call this function with a buffer that is at least maximal_binary_length_from_base64(input, length) bytes long. If you fail to provide that much space, the function may cause a buffer overflow.

Parameters
inputthe base64 string to process
lengththe length of the string in bytes
outputthe pointer to a buffer that can hold the conversion result (should be at least maximal_binary_length_from_base64(input, length) bytes long).
optionsthe base64 options to use, can be base64_default or base64_url, is base64_default by default.
Returns
a full_result pair struct (of type simdutf::result containing the three fields error, input_count and output_count).

◆ binary_to_base64()

virtual size_t simdutf::implementation::binary_to_base64 ( const char *  input,
size_t  length,
char *  output,
base64_options  options = base64_default 
) const
pure virtualnoexcept

Convert a binary input to a base64 output.

The default option (simdutf::base64_default) uses the characters + and / as part of its alphabet. Further, it adds padding (=) at the end of the output to ensure that the output length is a multiple of four.

The URL option (simdutf::base64_url) uses the characters - and _ as part of its alphabet. No padding is added at the end of the output.

This function always succeeds.

Parameters
inputthe binary to process
lengththe length of the input in bytes
outputthe pointer to a buffer that can hold the conversion result (should be at least base64_length_from_binary(length) bytes long)
optionsthe base64 options to use, can be base64_default or base64_url, is base64_default by default.
Returns
number of written bytes, will be equal to base64_length_from_binary(length, options)

◆ change_endianness_utf16()

virtual void simdutf::implementation::change_endianness_utf16 ( const char16_t *  input,
size_t  length,
char16_t *  output 
) const
pure virtualnoexcept

Change the endianness of the input.

Can be used to go from UTF-16LE to UTF-16BE or from UTF-16BE to UTF-16LE.

This function does not validate the input.

This function is not BOM-aware.

Parameters
inputthe UTF-16 string to process
lengththe length of the string in 2-byte code units (char16_t)
outputthe pointer to a buffer that can hold the conversion result

◆ convert_latin1_to_utf16be()

virtual simdutf_warn_unused size_t simdutf::implementation::convert_latin1_to_utf16be ( const char *  input,
size_t  length,
char16_t *  utf16_output 
) const
pure virtualnoexcept

Convert Latin1 string into UTF-16BE string.

This function is suitable to work with inputs from untrusted sources.

Parameters
inputthe Latin1 string to convert
lengththe length of the string in bytes
utf16_bufferthe pointer to buffer that can hold conversion result
Returns
the number of written char16_t; 0 if conversion is not possible

◆ convert_latin1_to_utf16le()

virtual simdutf_warn_unused size_t simdutf::implementation::convert_latin1_to_utf16le ( const char *  input,
size_t  length,
char16_t *  utf16_output 
) const
pure virtualnoexcept

Convert possibly Latin1 string into UTF-16LE string.

This function is suitable to work with inputs from untrusted sources.

Parameters
inputthe Latin1 string to convert
lengththe length of the string in bytes
utf16_bufferthe pointer to buffer that can hold conversion result
Returns
the number of written char16_t; 0 if conversion is not possible

◆ convert_latin1_to_utf32()

virtual simdutf_warn_unused size_t simdutf::implementation::convert_latin1_to_utf32 ( const char *  input,
size_t  length,
char32_t *  utf32_buffer 
) const
pure virtualnoexcept

Convert Latin1 string into UTF-32 string.

This function is suitable to work with inputs from untrusted sources.

Parameters
inputthe Latin1 string to convert
lengththe length of the string in bytes
utf32_bufferthe pointer to buffer that can hold conversion result
Returns
the number of written char32_t; 0 if conversion is not possible

◆ convert_latin1_to_utf8()

virtual simdutf_warn_unused size_t simdutf::implementation::convert_latin1_to_utf8 ( const char *  input,
size_t  length,
char *  utf8_output 
) const
pure virtualnoexcept

Convert Latin1 string into UTF8 string.

This function is suitable to work with inputs from untrusted sources.

Parameters
inputthe Latin1 string to convert
lengththe length of the string in bytes
utf8_outputthe pointer to buffer that can hold conversion result
Returns
the number of written char; 0 if conversion is not possible

◆ convert_utf16be_to_latin1()

virtual simdutf_warn_unused size_t simdutf::implementation::convert_utf16be_to_latin1 ( const char16_t *  input,
size_t  length,
char *  latin1_buffer 
) const
pure virtualnoexcept

Convert possibly broken UTF-16BE string into Latin1 string.

During the conversion also validation of the input string is done. This function is suitable to work with inputs from untrusted sources.

This function is not BOM-aware.

Parameters
inputthe UTF-16BE string to convert
lengththe length of the string in 2-byte code units (char16_t)
latin1_bufferthe pointer to buffer that can hold conversion result
Returns
number of written code units; 0 if input is not a valid UTF-16BE string or if it cannot be represented as Latin1

◆ convert_utf16be_to_latin1_with_errors()

virtual simdutf_warn_unused result simdutf::implementation::convert_utf16be_to_latin1_with_errors ( const char16_t *  input,
size_t  length,
char *  latin1_buffer 
) const
pure virtualnoexcept

Convert possibly broken UTF-16BE string into Latin1 string.

If the string cannot be represented as Latin1, an error is returned.

During the conversion also validation of the input string is done. This function is suitable to work with inputs from untrusted sources. This function is not BOM-aware.

Parameters
inputthe UTF-16BE string to convert
lengththe length of the string in 2-byte code units (char16_t)
latin1_bufferthe pointer to buffer that can hold conversion result
Returns
a result pair struct (of type simdutf::result containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of char written if successful.

◆ convert_utf16be_to_utf32()

virtual simdutf_warn_unused size_t simdutf::implementation::convert_utf16be_to_utf32 ( const char16_t *  input,
size_t  length,
char32_t *  utf32_buffer 
) const
pure virtualnoexcept

Convert possibly broken UTF-16BE string into UTF-32 string.

During the conversion also validation of the input string is done. This function is suitable to work with inputs from untrusted sources.

This function is not BOM-aware.

Parameters
inputthe UTF-16BE string to convert
lengththe length of the string in 2-byte code units (char16_t)
utf32_bufferthe pointer to buffer that can hold conversion result
Returns
number of written code units; 0 if input is not a valid UTF-16BE string

◆ convert_utf16be_to_utf32_with_errors()

virtual simdutf_warn_unused result simdutf::implementation::convert_utf16be_to_utf32_with_errors ( const char16_t *  input,
size_t  length,
char32_t *  utf32_buffer 
) const
pure virtualnoexcept

Convert possibly broken UTF-16BE string into UTF-32 string and stop on error.

During the conversion also validation of the input string is done. This function is suitable to work with inputs from untrusted sources.

This function is not BOM-aware.

Parameters
inputthe UTF-16BE string to convert
lengththe length of the string in 2-byte code units (char16_t)
utf32_bufferthe pointer to buffer that can hold conversion result
Returns
a result pair struct (of type simdutf::result containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of char32_t written if successful.

◆ convert_utf16be_to_utf8()

virtual simdutf_warn_unused size_t simdutf::implementation::convert_utf16be_to_utf8 ( const char16_t *  input,
size_t  length,
char *  utf8_buffer 
) const
pure virtualnoexcept

Convert possibly broken UTF-16BE string into UTF-8 string.

During the conversion also validation of the input string is done. This function is suitable to work with inputs from untrusted sources.

This function is not BOM-aware.

Parameters
inputthe UTF-16BE string to convert
lengththe length of the string in 2-byte code units (char16_t)
utf8_bufferthe pointer to buffer that can hold conversion result
Returns
number of written code units; 0 if input is not a valid UTF-16BE string

◆ convert_utf16be_to_utf8_with_errors()

virtual simdutf_warn_unused result simdutf::implementation::convert_utf16be_to_utf8_with_errors ( const char16_t *  input,
size_t  length,
char *  utf8_buffer 
) const
pure virtualnoexcept

Convert possibly broken UTF-16BE string into UTF-8 string and stop on error.

During the conversion also validation of the input string is done. This function is suitable to work with inputs from untrusted sources.

This function is not BOM-aware.

Parameters
inputthe UTF-16BE string to convert
lengththe length of the string in 2-byte code units (char16_t)
utf8_bufferthe pointer to buffer that can hold conversion result
Returns
a result pair struct (of type simdutf::result containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of char written if successful.

◆ convert_utf16le_to_latin1()

virtual simdutf_warn_unused size_t simdutf::implementation::convert_utf16le_to_latin1 ( const char16_t *  input,
size_t  length,
char *  latin1_buffer 
) const
pure virtualnoexcept

Convert possibly broken UTF-16LE string into Latin1 string.

During the conversion also validation of the input string is done. This function is suitable to work with inputs from untrusted sources.

This function is not BOM-aware.

Parameters
inputthe UTF-16LE string to convert
lengththe length of the string in 2-byte code units (char16_t)
latin1_bufferthe pointer to buffer that can hold conversion result
Returns
number of written code units; 0 if input is not a valid UTF-16LE string or if it cannot be represented as Latin1

◆ convert_utf16le_to_latin1_with_errors()

virtual simdutf_warn_unused result simdutf::implementation::convert_utf16le_to_latin1_with_errors ( const char16_t *  input,
size_t  length,
char *  latin1_buffer 
) const
pure virtualnoexcept

Convert possibly broken UTF-16LE string into Latin1 string.

If the string cannot be represented as Latin1, an error is returned.

During the conversion also validation of the input string is done. This function is suitable to work with inputs from untrusted sources. This function is not BOM-aware.

Parameters
inputthe UTF-16LE string to convert
lengththe length of the string in 2-byte code units (char16_t)
latin1_bufferthe pointer to buffer that can hold conversion result
Returns
a result pair struct (of type simdutf::result containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of char written if successful.

◆ convert_utf16le_to_utf32()

virtual simdutf_warn_unused size_t simdutf::implementation::convert_utf16le_to_utf32 ( const char16_t *  input,
size_t  length,
char32_t *  utf32_buffer 
) const
pure virtualnoexcept

Convert possibly broken UTF-16LE string into UTF-32 string.

During the conversion also validation of the input string is done. This function is suitable to work with inputs from untrusted sources.

This function is not BOM-aware.

Parameters
inputthe UTF-16LE string to convert
lengththe length of the string in 2-byte code units (char16_t)
utf32_bufferthe pointer to buffer that can hold conversion result
Returns
number of written code units; 0 if input is not a valid UTF-16LE string

◆ convert_utf16le_to_utf32_with_errors()

virtual simdutf_warn_unused result simdutf::implementation::convert_utf16le_to_utf32_with_errors ( const char16_t *  input,
size_t  length,
char32_t *  utf32_buffer 
) const
pure virtualnoexcept

Convert possibly broken UTF-16LE string into UTF-32 string and stop on error.

During the conversion also validation of the input string is done. This function is suitable to work with inputs from untrusted sources.

This function is not BOM-aware.

Parameters
inputthe UTF-16LE string to convert
lengththe length of the string in 2-byte code units (char16_t)
utf32_bufferthe pointer to buffer that can hold conversion result
Returns
a result pair struct (of type simdutf::result containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of char32_t written if successful.

◆ convert_utf16le_to_utf8()

virtual simdutf_warn_unused size_t simdutf::implementation::convert_utf16le_to_utf8 ( const char16_t *  input,
size_t  length,
char *  utf8_buffer 
) const
pure virtualnoexcept

Convert possibly broken UTF-16LE string into UTF-8 string.

During the conversion also validation of the input string is done. This function is suitable to work with inputs from untrusted sources.

This function is not BOM-aware.

Parameters
inputthe UTF-16LE string to convert
lengththe length of the string in 2-byte code units (char16_t)
utf8_bufferthe pointer to buffer that can hold conversion result
Returns
number of written code units; 0 if input is not a valid UTF-16LE string

◆ convert_utf16le_to_utf8_with_errors()

virtual simdutf_warn_unused result simdutf::implementation::convert_utf16le_to_utf8_with_errors ( const char16_t *  input,
size_t  length,
char *  utf8_buffer 
) const
pure virtualnoexcept

Convert possibly broken UTF-16LE string into UTF-8 string and stop on error.

During the conversion also validation of the input string is done. This function is suitable to work with inputs from untrusted sources.

This function is not BOM-aware.

Parameters
inputthe UTF-16LE string to convert
lengththe length of the string in 2-byte code units (char16_t)
utf8_bufferthe pointer to buffer that can hold conversion result
Returns
a result pair struct (of type simdutf::result containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of char written if successful.

◆ convert_utf32_to_latin1()

virtual simdutf_warn_unused size_t simdutf::implementation::convert_utf32_to_latin1 ( const char32_t *  input,
size_t  length,
char *  latin1_buffer 
) const
pure virtualnoexcept

Convert possibly broken UTF-32 string into Latin1 string.

During the conversion also validation of the input string is done. This function is suitable to work with inputs from untrusted sources.

This function is not BOM-aware.

Parameters
inputthe UTF-32 string to convert
lengththe length of the string in 4-byte code units (char32_t)
latin1_bufferthe pointer to buffer that can hold conversion result
Returns
number of written code units; 0 if input is not a valid UTF-32 string

◆ convert_utf32_to_latin1_with_errors()

virtual simdutf_warn_unused result simdutf::implementation::convert_utf32_to_latin1_with_errors ( const char32_t *  input,
size_t  length,
char *  latin1_buffer 
) const
pure virtualnoexcept

Convert possibly broken UTF-32 string into Latin1 string and stop on error.

If the string cannot be represented as Latin1, an error is returned.

During the conversion also validation of the input string is done. This function is suitable to work with inputs from untrusted sources.

This function is not BOM-aware.

Parameters
inputthe UTF-32 string to convert
lengththe length of the string in 4-byte code units (char32_t)
latin1_bufferthe pointer to buffer that can hold conversion result
Returns
a result pair struct (of type simdutf::result containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of char written if successful.

◆ convert_utf32_to_utf16be()

virtual simdutf_warn_unused size_t simdutf::implementation::convert_utf32_to_utf16be ( const char32_t *  input,
size_t  length,
char16_t *  utf16_buffer 
) const
pure virtualnoexcept

Convert possibly broken UTF-32 string into UTF-16BE string.

During the conversion also validation of the input string is done. This function is suitable to work with inputs from untrusted sources.

This function is not BOM-aware.

Parameters
inputthe UTF-32 string to convert
lengththe length of the string in 4-byte code units (char32_t)
utf16_bufferthe pointer to buffer that can hold conversion result
Returns
number of written code units; 0 if input is not a valid UTF-32 string

◆ convert_utf32_to_utf16be_with_errors()

virtual simdutf_warn_unused result simdutf::implementation::convert_utf32_to_utf16be_with_errors ( const char32_t *  input,
size_t  length,
char16_t *  utf16_buffer 
) const
pure virtualnoexcept

Convert possibly broken UTF-32 string into UTF-16BE string and stop on error.

During the conversion also validation of the input string is done. This function is suitable to work with inputs from untrusted sources.

This function is not BOM-aware.

Parameters
inputthe UTF-32 string to convert
lengththe length of the string in 4-byte code units (char32_t)
utf16_bufferthe pointer to buffer that can hold conversion result
Returns
a result pair struct (of type simdutf::result containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of char16_t written if successful.

◆ convert_utf32_to_utf16le()

virtual simdutf_warn_unused size_t simdutf::implementation::convert_utf32_to_utf16le ( const char32_t *  input,
size_t  length,
char16_t *  utf16_buffer 
) const
pure virtualnoexcept

Convert possibly broken UTF-32 string into UTF-16LE string.

During the conversion also validation of the input string is done. This function is suitable to work with inputs from untrusted sources.

This function is not BOM-aware.

Parameters
inputthe UTF-32 string to convert
lengththe length of the string in 4-byte code units (char32_t)
utf16_bufferthe pointer to buffer that can hold conversion result
Returns
number of written code units; 0 if input is not a valid UTF-32 string

◆ convert_utf32_to_utf16le_with_errors()

virtual simdutf_warn_unused result simdutf::implementation::convert_utf32_to_utf16le_with_errors ( const char32_t *  input,
size_t  length,
char16_t *  utf16_buffer 
) const
pure virtualnoexcept

Convert possibly broken UTF-32 string into UTF-16LE string and stop on error.

During the conversion also validation of the input string is done. This function is suitable to work with inputs from untrusted sources.

This function is not BOM-aware.

Parameters
inputthe UTF-32 string to convert
lengththe length of the string in 4-byte code units (char32_t)
utf16_bufferthe pointer to buffer that can hold conversion result
Returns
a result pair struct (of type simdutf::result containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of char16_t written if successful.

◆ convert_utf32_to_utf8()

virtual simdutf_warn_unused size_t simdutf::implementation::convert_utf32_to_utf8 ( const char32_t *  input,
size_t  length,
char *  utf8_buffer 
) const
pure virtualnoexcept

Convert possibly broken UTF-32 string into UTF-8 string.

During the conversion also validation of the input string is done. This function is suitable to work with inputs from untrusted sources.

This function is not BOM-aware.

Parameters
inputthe UTF-32 string to convert
lengththe length of the string in 4-byte code units (char32_t)
utf8_bufferthe pointer to buffer that can hold conversion result
Returns
number of written code units; 0 if input is not a valid UTF-32 string

◆ convert_utf32_to_utf8_with_errors()

virtual simdutf_warn_unused result simdutf::implementation::convert_utf32_to_utf8_with_errors ( const char32_t *  input,
size_t  length,
char *  utf8_buffer 
) const
pure virtualnoexcept

Convert possibly broken UTF-32 string into UTF-8 string and stop on error.

During the conversion also validation of the input string is done. This function is suitable to work with inputs from untrusted sources.

This function is not BOM-aware.

Parameters
inputthe UTF-32 string to convert
lengththe length of the string in 4-byte code units (char32_t)
utf8_bufferthe pointer to buffer that can hold conversion result
Returns
a result pair struct (of type simdutf::result containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of char written if successful.

◆ convert_utf8_to_latin1()

virtual simdutf_warn_unused size_t simdutf::implementation::convert_utf8_to_latin1 ( const char *  input,
size_t  length,
char *  latin1_output 
) const
pure virtualnoexcept

Convert possibly broken UTF-8 string into latin1 string.

During the conversion also validation of the input string is done. This function is suitable to work with inputs from untrusted sources.

Parameters
inputthe UTF-8 string to convert
lengththe length of the string in bytes
latin1_outputthe pointer to buffer that can hold conversion result
Returns
the number of written char; 0 if the input was not valid UTF-8 string or if it cannot be represented as Latin1

◆ convert_utf8_to_latin1_with_errors()

virtual simdutf_warn_unused result simdutf::implementation::convert_utf8_to_latin1_with_errors ( const char *  input,
size_t  length,
char *  latin1_output 
) const
pure virtualnoexcept

Convert possibly broken UTF-8 string into latin1 string with errors.

If the string cannot be represented as Latin1, an error code is returned.

During the conversion also validation of the input string is done. This function is suitable to work with inputs from untrusted sources.

Parameters
inputthe UTF-8 string to convert
lengththe length of the string in bytes
latin1_outputthe pointer to buffer that can hold conversion result
Returns
a result pair struct (of type simdutf::result containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of code units validated if successful.

◆ convert_utf8_to_utf16be()

virtual simdutf_warn_unused size_t simdutf::implementation::convert_utf8_to_utf16be ( const char *  input,
size_t  length,
char16_t *  utf16_output 
) const
pure virtualnoexcept

Convert possibly broken UTF-8 string into UTF-16BE string.

During the conversion also validation of the input string is done. This function is suitable to work with inputs from untrusted sources.

Parameters
inputthe UTF-8 string to convert
lengththe length of the string in bytes
utf16_bufferthe pointer to buffer that can hold conversion result
Returns
the number of written char16_t; 0 if the input was not valid UTF-8 string

◆ convert_utf8_to_utf16be_with_errors()

virtual simdutf_warn_unused result simdutf::implementation::convert_utf8_to_utf16be_with_errors ( const char *  input,
size_t  length,
char16_t *  utf16_output 
) const
pure virtualnoexcept

Convert possibly broken UTF-8 string into UTF-16BE string and stop on error.

During the conversion also validation of the input string is done. This function is suitable to work with inputs from untrusted sources.

Parameters
inputthe UTF-8 string to convert
lengththe length of the string in bytes
utf16_bufferthe pointer to buffer that can hold conversion result
Returns
a result pair struct (of type simdutf::result containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of code units validated if successful.

◆ convert_utf8_to_utf16le()

virtual simdutf_warn_unused size_t simdutf::implementation::convert_utf8_to_utf16le ( const char *  input,
size_t  length,
char16_t *  utf16_output 
) const
pure virtualnoexcept

Convert possibly broken UTF-8 string into UTF-16LE string.

During the conversion also validation of the input string is done. This function is suitable to work with inputs from untrusted sources.

Parameters
inputthe UTF-8 string to convert
lengththe length of the string in bytes
utf16_bufferthe pointer to buffer that can hold conversion result
Returns
the number of written char16_t; 0 if the input was not valid UTF-8 string

◆ convert_utf8_to_utf16le_with_errors()

virtual simdutf_warn_unused result simdutf::implementation::convert_utf8_to_utf16le_with_errors ( const char *  input,
size_t  length,
char16_t *  utf16_output 
) const
pure virtualnoexcept

Convert possibly broken UTF-8 string into UTF-16LE string and stop on error.

During the conversion also validation of the input string is done. This function is suitable to work with inputs from untrusted sources.

Parameters
inputthe UTF-8 string to convert
lengththe length of the string in bytes
utf16_bufferthe pointer to buffer that can hold conversion result
Returns
a result pair struct (of type simdutf::result containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of code units validated if successful.

◆ convert_utf8_to_utf32()

virtual simdutf_warn_unused size_t simdutf::implementation::convert_utf8_to_utf32 ( const char *  input,
size_t  length,
char32_t *  utf32_output 
) const
pure virtualnoexcept

Convert possibly broken UTF-8 string into UTF-32 string.

During the conversion also validation of the input string is done. This function is suitable to work with inputs from untrusted sources.

Parameters
inputthe UTF-8 string to convert
lengththe length of the string in bytes
utf32_bufferthe pointer to buffer that can hold conversion result
Returns
the number of written char16_t; 0 if the input was not valid UTF-8 string

◆ convert_utf8_to_utf32_with_errors()

virtual simdutf_warn_unused result simdutf::implementation::convert_utf8_to_utf32_with_errors ( const char *  input,
size_t  length,
char32_t *  utf32_output 
) const
pure virtualnoexcept

Convert possibly broken UTF-8 string into UTF-32 string and stop on error.

During the conversion also validation of the input string is done. This function is suitable to work with inputs from untrusted sources.

Parameters
inputthe UTF-8 string to convert
lengththe length of the string in bytes
utf32_bufferthe pointer to buffer that can hold conversion result
Returns
a result pair struct (of type simdutf::result containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of char32_t written if successful.

◆ convert_valid_utf16be_to_latin1()

virtual simdutf_warn_unused size_t simdutf::implementation::convert_valid_utf16be_to_latin1 ( const char16_t *  input,
size_t  length,
char *  latin1_buffer 
) const
pure virtualnoexcept

Convert valid UTF-16BE string into Latin1 string.

This function assumes that the input string is valid UTF16-BE and that it can be represented as Latin1. If you violate this assumption, the result is implementation defined and may include system-dependent behavior such as crashes.

This function is for expert users only and not part of our public API. Use convert_utf16be_to_latin1 instead.

This function is not BOM-aware.

Parameters
inputthe UTF-16BE string to convert
lengththe length of the string in 2-byte code units (char16_t)
latin1_bufferthe pointer to buffer that can hold conversion result
Returns
number of written code units; 0 if conversion is not possible

◆ convert_valid_utf16be_to_utf32()

virtual simdutf_warn_unused size_t simdutf::implementation::convert_valid_utf16be_to_utf32 ( const char16_t *  input,
size_t  length,
char32_t *  utf32_buffer 
) const
pure virtualnoexcept

Convert valid UTF-16LE string into UTF-32BE string.

This function assumes that the input string is valid UTF-16BE.

This function is not BOM-aware.

Parameters
inputthe UTF-16BE string to convert
lengththe length of the string in 2-byte code units (char16_t)
utf32_bufferthe pointer to a buffer that can hold the conversion result
Returns
number of written code units; 0 if conversion is not possible

◆ convert_valid_utf16be_to_utf8()

virtual simdutf_warn_unused size_t simdutf::implementation::convert_valid_utf16be_to_utf8 ( const char16_t *  input,
size_t  length,
char *  utf8_buffer 
) const
pure virtualnoexcept

Convert valid UTF-16BE string into UTF-8 string.

This function assumes that the input string is valid UTF-16BE.

This function is not BOM-aware.

Parameters
inputthe UTF-16BE string to convert
lengththe length of the string in 2-byte code units (char16_t)
utf8_bufferthe pointer to a buffer that can hold the conversion result
Returns
number of written code units; 0 if conversion is not possible

◆ convert_valid_utf16le_to_latin1()

virtual simdutf_warn_unused size_t simdutf::implementation::convert_valid_utf16le_to_latin1 ( const char16_t *  input,
size_t  length,
char *  latin1_buffer 
) const
pure virtualnoexcept

Convert valid UTF-16LE string into Latin1 string.

This function assumes that the input string is valid UTF-L16LE and that it can be represented as Latin1. If you violate this assumption, the result is implementation defined and may include system-dependent behavior such as crashes.

This function is for expert users only and not part of our public API. Use convert_utf16le_to_latin1 instead.

This function is not BOM-aware.

Parameters
inputthe UTF-16LE string to convert
lengththe length of the string in 2-byte code units (char16_t)
latin1_bufferthe pointer to buffer that can hold conversion result
Returns
number of written code units; 0 if conversion is not possible

◆ convert_valid_utf16le_to_utf32()

virtual simdutf_warn_unused size_t simdutf::implementation::convert_valid_utf16le_to_utf32 ( const char16_t *  input,
size_t  length,
char32_t *  utf32_buffer 
) const
pure virtualnoexcept

Convert valid UTF-16LE string into UTF-32 string.

This function assumes that the input string is valid UTF-16LE.

This function is not BOM-aware.

Parameters
inputthe UTF-16LE string to convert
lengththe length of the string in 2-byte code units (char16_t)
utf32_bufferthe pointer to a buffer that can hold the conversion result
Returns
number of written code units; 0 if conversion is not possible

◆ convert_valid_utf16le_to_utf8()

virtual simdutf_warn_unused size_t simdutf::implementation::convert_valid_utf16le_to_utf8 ( const char16_t *  input,
size_t  length,
char *  utf8_buffer 
) const
pure virtualnoexcept

Convert valid UTF-16LE string into UTF-8 string.

This function assumes that the input string is valid UTF-16LE.

This function is not BOM-aware.

Parameters
inputthe UTF-16LE string to convert
lengththe length of the string in 2-byte code units (char16_t)
utf8_bufferthe pointer to a buffer that can hold the conversion result
Returns
number of written code units; 0 if conversion is not possible

◆ convert_valid_utf32_to_latin1()

virtual simdutf_warn_unused size_t simdutf::implementation::convert_valid_utf32_to_latin1 ( const char32_t *  input,
size_t  length,
char *  latin1_buffer 
) const
pure virtualnoexcept

Convert valid UTF-32 string into Latin1 string.

This function assumes that the input string is valid UTF-32 and can be represented as Latin1. If you violate this assumption, the result is implementation defined and may include system-dependent behavior such as crashes.

This function is for expert users only and not part of our public API. Use convert_utf32_to_latin1 instead.

This function is not BOM-aware.

Parameters
inputthe UTF-32 string to convert
lengththe length of the string in 4-byte code units (char32_t)
latin1_bufferthe pointer to a buffer that can hold the conversion result
Returns
number of written code units; 0 if conversion is not possible

◆ convert_valid_utf32_to_utf16be()

virtual simdutf_warn_unused size_t simdutf::implementation::convert_valid_utf32_to_utf16be ( const char32_t *  input,
size_t  length,
char16_t *  utf16_buffer 
) const
pure virtualnoexcept

Convert valid UTF-32 string into UTF-16BE string.

This function assumes that the input string is valid UTF-32.

This function is not BOM-aware.

Parameters
inputthe UTF-32 string to convert
lengththe length of the string in 4-byte code units (char32_t)
utf16_bufferthe pointer to a buffer that can hold the conversion result
Returns
number of written code units; 0 if conversion is not possible

◆ convert_valid_utf32_to_utf16le()

virtual simdutf_warn_unused size_t simdutf::implementation::convert_valid_utf32_to_utf16le ( const char32_t *  input,
size_t  length,
char16_t *  utf16_buffer 
) const
pure virtualnoexcept

Convert valid UTF-32 string into UTF-16LE string.

This function assumes that the input string is valid UTF-32.

This function is not BOM-aware.

Parameters
inputthe UTF-32 string to convert
lengththe length of the string in 4-byte code units (char32_t)
utf16_bufferthe pointer to a buffer that can hold the conversion result
Returns
number of written code units; 0 if conversion is not possible

◆ convert_valid_utf32_to_utf8()

virtual simdutf_warn_unused size_t simdutf::implementation::convert_valid_utf32_to_utf8 ( const char32_t *  input,
size_t  length,
char *  utf8_buffer 
) const
pure virtualnoexcept

Convert valid UTF-32 string into UTF-8 string.

This function assumes that the input string is valid UTF-32.

This function is not BOM-aware.

Parameters
inputthe UTF-32 string to convert
lengththe length of the string in 4-byte code units (char32_t)
utf8_bufferthe pointer to a buffer that can hold the conversion result
Returns
number of written code units; 0 if conversion is not possible

◆ convert_valid_utf8_to_latin1()

virtual simdutf_warn_unused size_t simdutf::implementation::convert_valid_utf8_to_latin1 ( const char *  input,
size_t  length,
char *  latin1_output 
) const
pure virtualnoexcept

Convert valid UTF-8 string into latin1 string.

This function assumes that the input string is valid UTF-8 and that it can be represented as Latin1. If you violate this assumption, the result is implementation defined and may include system-dependent behavior such as crashes.

This function is for expert users only and not part of our public API. Use convert_utf8_to_latin1 instead.

This function is not BOM-aware.

Parameters
inputthe UTF-8 string to convert
lengththe length of the string in bytes
latin1_outputthe pointer to buffer that can hold conversion result
Returns
the number of written char; 0 if the input was not valid UTF-8 string

◆ convert_valid_utf8_to_utf16be()

virtual simdutf_warn_unused size_t simdutf::implementation::convert_valid_utf8_to_utf16be ( const char *  input,
size_t  length,
char16_t *  utf16_buffer 
) const
pure virtualnoexcept

Convert valid UTF-8 string into UTF-16BE string.

This function assumes that the input string is valid UTF-8.

Parameters
inputthe UTF-8 string to convert
lengththe length of the string in bytes
utf16_bufferthe pointer to buffer that can hold conversion result
Returns
the number of written char16_t

◆ convert_valid_utf8_to_utf16le()

virtual simdutf_warn_unused size_t simdutf::implementation::convert_valid_utf8_to_utf16le ( const char *  input,
size_t  length,
char16_t *  utf16_buffer 
) const
pure virtualnoexcept

Convert valid UTF-8 string into UTF-16LE string.

This function assumes that the input string is valid UTF-8.

Parameters
inputthe UTF-8 string to convert
lengththe length of the string in bytes
utf16_bufferthe pointer to buffer that can hold conversion result
Returns
the number of written char16_t

◆ convert_valid_utf8_to_utf32()

virtual simdutf_warn_unused size_t simdutf::implementation::convert_valid_utf8_to_utf32 ( const char *  input,
size_t  length,
char32_t *  utf32_buffer 
) const
pure virtualnoexcept

Convert valid UTF-8 string into UTF-32 string.

This function assumes that the input string is valid UTF-8.

Parameters
inputthe UTF-8 string to convert
lengththe length of the string in bytes
utf16_bufferthe pointer to buffer that can hold conversion result
Returns
the number of written char32_t

◆ count_utf16be()

virtual simdutf_warn_unused size_t simdutf::implementation::count_utf16be ( const char16_t *  input,
size_t  length 
) const
pure virtualnoexcept

Count the number of code points (characters) in the string assuming that it is valid.

This function assumes that the input string is valid UTF-16BE. It is acceptable to pass invalid UTF-16 strings but in such cases the result is implementation defined.

This function is not BOM-aware.

Parameters
inputthe UTF-16BE string to process
lengththe length of the string in 2-byte code units (char16_t)
Returns
number of code points

◆ count_utf16le()

virtual simdutf_warn_unused size_t simdutf::implementation::count_utf16le ( const char16_t *  input,
size_t  length 
) const
pure virtualnoexcept

Count the number of code points (characters) in the string assuming that it is valid.

This function assumes that the input string is valid UTF-16LE. It is acceptable to pass invalid UTF-16 strings but in such cases the result is implementation defined.

This function is not BOM-aware.

Parameters
inputthe UTF-16LE string to process
lengththe length of the string in 2-byte code units (char16_t)
Returns
number of code points

◆ count_utf8()

virtual simdutf_warn_unused size_t simdutf::implementation::count_utf8 ( const char *  input,
size_t  length 
) const
pure virtualnoexcept

Count the number of code points (characters) in the string assuming that it is valid.

This function assumes that the input string is valid UTF-8. It is acceptable to pass invalid UTF-8 strings but in such cases the result is implementation defined.

Parameters
inputthe UTF-8 string to process
lengththe length of the string in bytes
Returns
number of code points

◆ description()

virtual std::string simdutf::implementation::description ( ) const
inlinevirtual

The description of this implementation.

const implementation *impl = simdutf::active_implementation;
cout << "simdutf is optimized for " << impl->name() << "(" <<

impl->description() << ")" << endl;

Returns
the name of the implementation, e.g. "haswell", "westmere", "arm64"

Definition at line 3149 of file implementation.h.

◆ detect_encodings()

virtual int simdutf::implementation::detect_encodings ( const char *  input,
size_t  length 
) const
pure virtualnoexcept

This function will try to detect the possible encodings in one pass.

Parameters
inputthe string to identify
lengththe length of the string in bytes.
Returns
the encoding type detected

◆ latin1_length_from_utf16()

virtual simdutf_warn_unused size_t simdutf::implementation::latin1_length_from_utf16 ( size_t  length) const
inlinevirtualnoexcept

Compute the number of bytes that this UTF-16LE/BE string would require in Latin1 format.

This function does not validate the input. It is acceptable to pass invalid UTF-16 strings but in such cases the result is implementation defined.

This function is not BOM-aware.

Parameters
inputthe UTF-16LE string to convert
lengththe length of the string in 2-byte code units (char16_t)
Returns
the number of bytes required to encode the UTF-16LE string as Latin1

Definition at line 4465 of file implementation.h.

◆ latin1_length_from_utf32()

virtual simdutf_warn_unused size_t simdutf::implementation::latin1_length_from_utf32 ( size_t  length) const
inlinevirtualnoexcept

Compute the number of bytes that this UTF-32 string would require in Latin1 format.

This function does not validate the input. It is acceptable to pass invalid UTF-32 strings but in such cases the result is implementation defined.

Parameters
lengththe length of the string in 4-byte code units (char32_t)
Returns
the number of bytes required to encode the UTF-32 string as Latin1

Definition at line 4427 of file implementation.h.

◆ latin1_length_from_utf8()

virtual simdutf_warn_unused size_t simdutf::implementation::latin1_length_from_utf8 ( const char *  input,
size_t  length 
) const
pure virtualnoexcept

Compute the number of bytes that this UTF-8 string would require in Latin1 format.

This function does not validate the input. It is acceptable to pass invalid UTF-8 strings but in such cases the result is implementation defined.

Parameters
inputthe UTF-8 string to convert
lengththe length of the string in byte
Returns
the number of bytes required to encode the UTF-8 string as Latin1

◆ maximal_binary_length_from_base64() [1/2]

simdutf_warn_unused size_t simdutf::implementation::maximal_binary_length_from_base64 ( const char *  input,
size_t  length 
) const
noexcept

Provide the maximal binary length in bytes given the base64 input.

In general, if the input contains ASCII spaces, the result will be less than the maximum length. It is acceptable to pass invalid base64 strings but in such cases the result is implementation defined.

Parameters
inputthe base64 input to process
lengththe length of the base64 input in bytes
Returns
maximal number of binary bytes

◆ maximal_binary_length_from_base64() [2/2]

simdutf_warn_unused size_t simdutf::implementation::maximal_binary_length_from_base64 ( const char16_t *  input,
size_t  length 
) const
noexcept

Provide the maximal binary length in bytes given the base64 input.

In general, if the input contains ASCII spaces, the result will be less than the maximum length. It is acceptable to pass invalid base64 strings but in such cases the result is implementation defined.

Parameters
inputthe base64 input to process, in ASCII stored as 16-bit units
lengththe length of the base64 input in 16-bit units
Returns
maximal number of binary bytes

◆ name()

virtual std::string simdutf::implementation::name ( ) const
inlinevirtual

The name of this implementation.

const implementation *impl = simdutf::active_implementation;
cout << "simdutf is optimized for " << impl->name() << "(" <<

impl->description() << ")" << endl;

Returns
the name of the implementation, e.g. "haswell", "westmere", "arm64"

Definition at line 3138 of file implementation.h.

◆ supported_by_runtime_system()

bool simdutf::implementation::supported_by_runtime_system ( ) const

The instruction sets this implementation is compiled against and the current CPU match.

This function may poll the current CPU/system and should therefore not be called too often if performance is a concern.

Returns
true if the implementation can be safely used on the current system (determined at runtime)

◆ utf16_length_from_latin1()

virtual simdutf_warn_unused size_t simdutf::implementation::utf16_length_from_latin1 ( size_t  length) const
inlinevirtualnoexcept

Return the number of bytes that this UTF-16 string would require in Latin1 format.

Parameters
inputthe UTF-16 string to convert
lengththe length of the string in 2-byte code units (char16_t)
Returns
the number of bytes required to encode the UTF-16 string as Latin1

Definition at line 4239 of file implementation.h.

◆ utf16_length_from_utf32()

virtual simdutf_warn_unused size_t simdutf::implementation::utf16_length_from_utf32 ( const char32_t *  input,
size_t  length 
) const
pure virtualnoexcept

Compute the number of two-byte code units that this UTF-32 string would require in UTF-16 format.

This function does not validate the input. It is acceptable to pass invalid UTF-32 strings but in such cases the result is implementation defined.

Parameters
inputthe UTF-32 string to convert
lengththe length of the string in 4-byte code units (char32_t)
Returns
the number of bytes required to encode the UTF-32 string as UTF-16

◆ utf16_length_from_utf8()

virtual simdutf_warn_unused size_t simdutf::implementation::utf16_length_from_utf8 ( const char *  input,
size_t  length 
) const
pure virtualnoexcept

Compute the number of 2-byte code units that this UTF-8 string would require in UTF-16LE format.

This function does not validate the input. It is acceptable to pass invalid UTF-8 strings but in such cases the result is implementation defined.

Parameters
inputthe UTF-8 string to process
lengththe length of the string in bytes
Returns
the number of char16_t code units required to encode the UTF-8 string as UTF-16LE

◆ utf32_length_from_latin1()

virtual simdutf_warn_unused size_t simdutf::implementation::utf32_length_from_latin1 ( size_t  length) const
inlinevirtualnoexcept

Return the number of bytes that this UTF-32 string would require in Latin1 format.

Parameters
lengththe length of the string in 4-byte code units (char32_t)
Returns
the number of bytes required to encode the UTF-32 string as Latin1

Definition at line 4498 of file implementation.h.

◆ utf32_length_from_utf16be()

virtual simdutf_warn_unused size_t simdutf::implementation::utf32_length_from_utf16be ( const char16_t *  input,
size_t  length 
) const
pure virtualnoexcept

Compute the number of bytes that this UTF-16BE string would require in UTF-32 format.

This function is equivalent to count_utf16be.

This function does not validate the input. It is acceptable to pass invalid UTF-16 strings but in such cases the result is implementation defined.

This function is not BOM-aware.

Parameters
inputthe UTF-16BE string to convert
lengththe length of the string in 2-byte code units (char16_t)
Returns
the number of bytes required to encode the UTF-16BE string as UTF-32

◆ utf32_length_from_utf16le()

virtual simdutf_warn_unused size_t simdutf::implementation::utf32_length_from_utf16le ( const char16_t *  input,
size_t  length 
) const
pure virtualnoexcept

Compute the number of bytes that this UTF-16LE string would require in UTF-32 format.

This function is equivalent to count_utf16le.

This function does not validate the input. It is acceptable to pass invalid UTF-16 strings but in such cases the result is implementation defined.

This function is not BOM-aware.

Parameters
inputthe UTF-16LE string to convert
lengththe length of the string in 2-byte code units (char16_t)
Returns
the number of bytes required to encode the UTF-16LE string as UTF-32

◆ utf32_length_from_utf8()

virtual simdutf_warn_unused size_t simdutf::implementation::utf32_length_from_utf8 ( const char *  input,
size_t  length 
) const
pure virtualnoexcept

Compute the number of 4-byte code units that this UTF-8 string would require in UTF-32 format.

This function is equivalent to count_utf8. It is acceptable to pass invalid UTF-8 strings but in such cases the result is implementation defined.

This function does not validate the input.

Parameters
inputthe UTF-8 string to process
lengththe length of the string in bytes
Returns
the number of char32_t code units required to encode the UTF-8 string as UTF-32

◆ utf8_length_from_latin1()

virtual simdutf_warn_unused size_t simdutf::implementation::utf8_length_from_latin1 ( const char *  input,
size_t  length 
) const
pure virtualnoexcept

Return the number of bytes that this Latin1 string would require in UTF-8 format.

Parameters
inputthe Latin1 string to convert
lengththe length of the string bytes
Returns
the number of bytes required to encode the Latin1 string as UTF-8

◆ utf8_length_from_utf16be()

virtual simdutf_warn_unused size_t simdutf::implementation::utf8_length_from_utf16be ( const char16_t *  input,
size_t  length 
) const
pure virtualnoexcept

Compute the number of bytes that this UTF-16BE string would require in UTF-8 format.

This function does not validate the input. It is acceptable to pass invalid UTF-16 strings but in such cases the result is implementation defined.

This function is not BOM-aware.

Parameters
inputthe UTF-16BE string to convert
lengththe length of the string in 2-byte code units (char16_t)
Returns
the number of bytes required to encode the UTF-16BE string as UTF-8

◆ utf8_length_from_utf16le()

virtual simdutf_warn_unused size_t simdutf::implementation::utf8_length_from_utf16le ( const char16_t *  input,
size_t  length 
) const
pure virtualnoexcept

Compute the number of bytes that this UTF-16LE string would require in UTF-8 format.

This function does not validate the input. It is acceptable to pass invalid UTF-16 strings but in such cases the result is implementation defined.

This function is not BOM-aware.

Parameters
inputthe UTF-16LE string to convert
lengththe length of the string in 2-byte code units (char16_t)
Returns
the number of bytes required to encode the UTF-16LE string as UTF-8

◆ utf8_length_from_utf32()

virtual simdutf_warn_unused size_t simdutf::implementation::utf8_length_from_utf32 ( const char32_t *  input,
size_t  length 
) const
pure virtualnoexcept

Compute the number of bytes that this UTF-32 string would require in UTF-8 format.

This function does not validate the input. It is acceptable to pass invalid UTF-32 strings but in such cases the result is implementation defined.

Parameters
inputthe UTF-32 string to convert
lengththe length of the string in 4-byte code units (char32_t)
Returns
the number of bytes required to encode the UTF-32 string as UTF-8

◆ validate_ascii()

virtual simdutf_warn_unused bool simdutf::implementation::validate_ascii ( const char *  buf,
size_t  len 
) const
pure virtualnoexcept

Validate the ASCII string.

Overridden by each implementation.

Parameters
bufthe ASCII string to validate.
lenthe length of the string in bytes.
Returns
true if and only if the string is valid ASCII.

◆ validate_ascii_with_errors()

virtual simdutf_warn_unused result simdutf::implementation::validate_ascii_with_errors ( const char *  buf,
size_t  len 
) const
pure virtualnoexcept

Validate the ASCII string and stop on error.

Overridden by each implementation.

Parameters
bufthe ASCII string to validate.
lenthe length of the string in bytes.
Returns
a result pair struct (of type simdutf::result containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of code units validated if successful.

◆ validate_utf16be()

virtual simdutf_warn_unused bool simdutf::implementation::validate_utf16be ( const char16_t *  buf,
size_t  len 
) const
pure virtualnoexcept

Validate the UTF-16BE string.

This function may be best when you expect the input to be almost always valid. Otherwise, consider using validate_utf16be_with_errors.

Overridden by each implementation.

This function is not BOM-aware.

Parameters
bufthe UTF-16BE string to validate.
lenthe length of the string in number of 2-byte code units (char16_t).
Returns
true if and only if the string is valid UTF-16BE.

◆ validate_utf16be_with_errors()

virtual simdutf_warn_unused result simdutf::implementation::validate_utf16be_with_errors ( const char16_t *  buf,
size_t  len 
) const
pure virtualnoexcept

Validate the UTF-16BE string and stop on error.

It might be faster than validate_utf16be when an error is expected to occur early.

Overridden by each implementation.

This function is not BOM-aware.

Parameters
bufthe UTF-16BE string to validate.
lenthe length of the string in number of 2-byte code units (char16_t).
Returns
a result pair struct (of type simdutf::result containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of code units validated if successful.

◆ validate_utf16le()

virtual simdutf_warn_unused bool simdutf::implementation::validate_utf16le ( const char16_t *  buf,
size_t  len 
) const
pure virtualnoexcept

Validate the UTF-16LE string.This function may be best when you expect the input to be almost always valid.

Otherwise, consider using validate_utf16le_with_errors.

Overridden by each implementation.

This function is not BOM-aware.

Parameters
bufthe UTF-16LE string to validate.
lenthe length of the string in number of 2-byte code units (char16_t).
Returns
true if and only if the string is valid UTF-16LE.

◆ validate_utf16le_with_errors()

virtual simdutf_warn_unused result simdutf::implementation::validate_utf16le_with_errors ( const char16_t *  buf,
size_t  len 
) const
pure virtualnoexcept

Validate the UTF-16LE string and stop on error.

It might be faster than validate_utf16le when an error is expected to occur early.

Overridden by each implementation.

This function is not BOM-aware.

Parameters
bufthe UTF-16LE string to validate.
lenthe length of the string in number of 2-byte code units (char16_t).
Returns
a result pair struct (of type simdutf::result containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of code units validated if successful.

◆ validate_utf32()

virtual simdutf_warn_unused bool simdutf::implementation::validate_utf32 ( const char32_t *  buf,
size_t  len 
) const
pure virtualnoexcept

Validate the UTF-32 string.

Overridden by each implementation.

This function is not BOM-aware.

Parameters
bufthe UTF-32 string to validate.
lenthe length of the string in number of 4-byte code units (char32_t).
Returns
true if and only if the string is valid UTF-32.

◆ validate_utf32_with_errors()

virtual simdutf_warn_unused result simdutf::implementation::validate_utf32_with_errors ( const char32_t *  buf,
size_t  len 
) const
pure virtualnoexcept

Validate the UTF-32 string and stop on error.

Overridden by each implementation.

This function is not BOM-aware.

Parameters
bufthe UTF-32 string to validate.
lenthe length of the string in number of 4-byte code units (char32_t).
Returns
a result pair struct (of type simdutf::result containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of code units validated if successful.

◆ validate_utf8()

virtual simdutf_warn_unused bool simdutf::implementation::validate_utf8 ( const char *  buf,
size_t  len 
) const
pure virtualnoexcept

Validate the UTF-8 string.

Overridden by each implementation.

Parameters
bufthe UTF-8 string to validate.
lenthe length of the string in bytes.
Returns
true if and only if the string is valid UTF-8.

◆ validate_utf8_with_errors()

virtual simdutf_warn_unused result simdutf::implementation::validate_utf8_with_errors ( const char *  buf,
size_t  len 
) const
pure virtualnoexcept

Validate the UTF-8 string and stop on errors.

Overridden by each implementation.

Parameters
bufthe UTF-8 string to validate.
lenthe length of the string in bytes.
Returns
a result pair struct (of type simdutf::result containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of code units validated if successful.

The documentation for this class was generated from the following file: