simdutf 8.0.0
Unicode at GB/s.
Loading...
Searching...
No Matches
base64_implementation.h
1#ifndef SIMDUTF_BASE64_IMPLEMENTATION_H
2#define SIMDUTF_BASE64_IMPLEMENTATION_H
3
4// this is not part of the public api
5
6namespace simdutf {
7
8template <typename chartype>
9simdutf_warn_unused simdutf_constexpr23 result slow_base64_to_binary_safe_impl(
10 const chartype *input, size_t length, char *output, size_t &outlen,
11 base64_options options,
12 last_chunk_handling_options last_chunk_options) noexcept {
13 const bool ignore_garbage = (options & base64_default_accept_garbage) != 0;
14 auto ri = simdutf::scalar::base64::find_end(input, length, options);
15 size_t equallocation = ri.equallocation;
16 size_t equalsigns = ri.equalsigns;
17 length = ri.srclen;
18 size_t full_input_length = ri.full_input_length;
19 (void)full_input_length;
20 if (length == 0) {
21 outlen = 0;
22 if (!ignore_garbage && equalsigns > 0) {
23 return {INVALID_BASE64_CHARACTER, equallocation};
24 }
25 return {SUCCESS, 0};
26 }
27
28 // The parameters of base64_tail_decode_safe are:
29 // - dst: the output buffer
30 // - outlen: the size of the output buffer
31 // - srcr: the input buffer
32 // - length: the size of the input buffer
33 // - padded_characters: the number of padding characters
34 // - options: the options for the base64 decoder
35 // - last_chunk_options: the options for the last chunk
36 // The function will return the number of bytes written to the output buffer
37 // and the number of bytes read from the input buffer.
38 // The function will also return an error code if the input buffer is not
39 // valid base64.
40 full_result r = scalar::base64::base64_tail_decode_safe(
41 output, outlen, input, length, equalsigns, options, last_chunk_options);
42 r = scalar::base64::patch_tail_result(r, 0, 0, equallocation,
43 full_input_length, last_chunk_options);
44 outlen = r.output_count;
45 if (!is_partial(last_chunk_options) && r.error == error_code::SUCCESS &&
46 equalsigns > 0) {
47 // additional checks
48 if ((outlen % 3 == 0) || ((outlen % 3) + 1 + equalsigns != 4)) {
49 r.error = error_code::INVALID_BASE64_CHARACTER;
50 }
51 }
52 return {r.error, r.input_count}; // we cannot return r itself because it gets
53 // converted to error/output_count
54}
55
56template <typename chartype>
57simdutf_warn_unused simdutf_constexpr23 result base64_to_binary_safe_impl(
58 const chartype *input, size_t length, char *output, size_t &outlen,
59 base64_options options,
60 last_chunk_handling_options last_chunk_handling_options,
61 bool decode_up_to_bad_char) noexcept {
62 static_assert(std::is_same<chartype, char>::value ||
63 std::is_same<chartype, char16_t>::value,
64 "Only char and char16_t are supported.");
65 size_t remaining_input_length = length;
66 size_t remaining_output_length = outlen;
67 size_t input_position = 0;
68 size_t output_position = 0;
69
70 // We also do a first pass using the fast path to decode as much as possible
71 size_t safe_input = (std::min)(
72 remaining_input_length,
73 base64_length_from_binary(remaining_output_length / 3 * 3, options));
74 bool done_with_partial = (safe_input == remaining_input_length);
76
77#if SIMDUTF_CPLUSPLUS23
78 if consteval {
79 r = scalar::base64::base64_to_binary_details_impl(
80 input + input_position, safe_input, output + output_position, options,
81 done_with_partial
82 ? last_chunk_handling_options
83 : simdutf::last_chunk_handling_options::only_full_chunks);
84 } else
85#endif
86 {
87 r = get_active_implementation()->base64_to_binary_details(
88 input + input_position, safe_input, output + output_position, options,
89 done_with_partial
90 ? last_chunk_handling_options
91 : simdutf::last_chunk_handling_options::only_full_chunks);
92 }
93 simdutf_log_assert(r.input_count <= safe_input,
94 "You should not read more than safe_input");
95 simdutf_log_assert(r.output_count <= remaining_output_length,
96 "You should not write more than remaining_output_length");
97 // Technically redundant, but we want to be explicit about it.
98 input_position += r.input_count;
99 output_position += r.output_count;
100 remaining_input_length -= r.input_count;
101 remaining_output_length -= r.output_count;
102 if (r.error != simdutf::error_code::SUCCESS) {
103 // There is an error. We return.
104 if (decode_up_to_bad_char &&
105 r.error == error_code::INVALID_BASE64_CHARACTER) {
106 return slow_base64_to_binary_safe_impl(
107 input, length, output, outlen, options, last_chunk_handling_options);
108 }
109 outlen = output_position;
110 return {r.error, input_position};
111 }
112
113 if (done_with_partial) {
114 // We are done. We have decoded everything.
115 outlen = output_position;
116 return {simdutf::error_code::SUCCESS, input_position};
117 }
118 // We have decoded some data, but we still have some data to decode.
119 // We need to decode the rest of the input buffer.
120 r = simdutf::scalar::base64::base64_to_binary_details_safe_impl(
121 input + input_position, remaining_input_length, output + output_position,
122 remaining_output_length, options, last_chunk_handling_options);
123 input_position += r.input_count;
124 output_position += r.output_count;
125 remaining_input_length -= r.input_count;
126 remaining_output_length -= r.output_count;
127
128 if (r.error != simdutf::error_code::SUCCESS) {
129 // There is an error. We return.
130 if (decode_up_to_bad_char &&
131 r.error == error_code::INVALID_BASE64_CHARACTER) {
132 return slow_base64_to_binary_safe_impl(
133 input, length, output, outlen, options, last_chunk_handling_options);
134 }
135 outlen = output_position;
136 return {r.error, input_position};
137 }
138 if (input_position < length) {
139 // We cannot process the entire input in one go, so we need to
140 // process it in two steps: first the fast path, then the slow path.
141 // In some cases, the processing might 'eat up' trailing ignorable
142 // characters in the fast path, but that can be a problem.
143 // suppose we have just white space followed by a single base64 character.
144 // If we first process the white space with the fast path, it will
145 // eat all of it. But, by the JavaScript standard, we should consume
146 // no character. See
147 // https://tc39.es/proposal-arraybuffer-base64/spec/#sec-frombase64
148 while (input_position > 0 &&
149 base64_ignorable(input[input_position - 1], options)) {
150 input_position--;
151 }
152 }
153 outlen = output_position;
154 return {simdutf::error_code::SUCCESS, input_position};
155}
156
157} // namespace simdutf
158#endif // SIMDUTF_BASE64_IMPLEMENTATION_H