1#ifndef SIMDUTF_ENCODING_TYPES_H
2#define SIMDUTF_ENCODING_TYPES_H
4#include "simdutf/portability.h"
5#include "simdutf/common_defs.h"
7#if !defined(SIMDUTF_NO_STD_TEXT_ENCODING) && \
8 defined(__cpp_lib_text_encoding) && __cpp_lib_text_encoding >= 202306L
9 #define SIMDUTF_HAS_STD_TEXT_ENCODING 1
10 #include <text_encoding>
26#ifndef SIMDUTF_IS_BIG_ENDIAN
27 #error "SIMDUTF_IS_BIG_ENDIAN needs to be defined."
34#if SIMDUTF_IS_BIG_ENDIAN
41simdutf_warn_unused simdutf_really_inline
constexpr bool
42match_system(endianness e) {
43 return e == endianness::NATIVE;
46simdutf_warn_unused std::string to_string(encoding_type bom);
58simdutf_warn_unused encoding_type check_bom(
const uint8_t *
byte,
size_t length);
59simdutf_warn_unused encoding_type check_bom(
const char *
byte,
size_t length);
66simdutf_warn_unused
size_t bom_byte_size(encoding_type bom);
70#ifdef SIMDUTF_HAS_STD_TEXT_ENCODING
78simdutf_warn_unused
constexpr std::text_encoding
79to_std_encoding(encoding_type enc)
noexcept {
82 return std::text_encoding(std::text_encoding::id::UTF8);
84 return std::text_encoding(std::text_encoding::id::UTF16LE);
86 return std::text_encoding(std::text_encoding::id::UTF16BE);
88 return std::text_encoding(std::text_encoding::id::UTF32LE);
90 return std::text_encoding(std::text_encoding::id::UTF32BE);
92 return std::text_encoding(std::text_encoding::id::ISOLatin1);
95 return std::text_encoding(std::text_encoding::id::unknown);
106simdutf_warn_unused
constexpr encoding_type
107from_std_encoding(
const std::text_encoding &enc)
noexcept {
109 case std::text_encoding::id::UTF8:
111 case std::text_encoding::id::UTF16LE:
113 case std::text_encoding::id::UTF16BE:
115 case std::text_encoding::id::UTF32LE:
117 case std::text_encoding::id::UTF32BE:
119 case std::text_encoding::id::ISOLatin1:
131simdutf_warn_unused
constexpr encoding_type native_utf16_encoding() noexcept {
132 #if SIMDUTF_IS_BIG_ENDIAN
144simdutf_warn_unused
constexpr encoding_type native_utf32_encoding() noexcept {
145 #if SIMDUTF_IS_BIG_ENDIAN
163simdutf_warn_unused
constexpr encoding_type
164from_std_encoding_native(
const std::text_encoding &enc)
noexcept {
166 case std::text_encoding::id::UTF8:
168 case std::text_encoding::id::UTF16:
169 return native_utf16_encoding();
170 case std::text_encoding::id::UTF16LE:
172 case std::text_encoding::id::UTF16BE:
174 case std::text_encoding::id::UTF32:
175 return native_utf32_encoding();
176 case std::text_encoding::id::UTF32LE:
178 case std::text_encoding::id::UTF32BE:
180 case std::text_encoding::id::ISOLatin1: