From 7fa2a134af35fb794f7e618d823b3df38b104719 Mon Sep 17 00:00:00 2001 From: "Node.js GitHub Bot" Date: Sat, 16 Nov 2024 18:24:13 -0500 Subject: [PATCH] deps: update simdutf to 5.6.1 PR-URL: https://github.com/nodejs/node/pull/55850 Reviewed-By: Marco Ippolito Reviewed-By: Rafael Gonzaga Reviewed-By: Luigi Pinca --- deps/simdutf/simdutf.cpp | 615 +++++++++++++++++++++++++++++++++------ deps/simdutf/simdutf.h | 268 +++++++++++------ 2 files changed, 702 insertions(+), 181 deletions(-) diff --git a/deps/simdutf/simdutf.cpp b/deps/simdutf/simdutf.cpp index e6b25c7ce27..d5f6fbd9a4c 100644 --- a/deps/simdutf/simdutf.cpp +++ b/deps/simdutf/simdutf.cpp @@ -1,4 +1,4 @@ -/* auto-generated on 2024-10-11 12:35:29 -0400. Do not edit! */ +/* auto-generated on 2024-11-12 20:00:19 -0500. Do not edit! */ /* begin file src/simdutf.cpp */ #include "simdutf.h" // We include base64_tables once. @@ -937,6 +937,10 @@ public: const char *input, size_t length, char *output, base64_options options, last_chunk_handling_options last_chunk_options = last_chunk_handling_options::loose) const noexcept; + simdutf_warn_unused full_result base64_to_binary_details( + const char *input, size_t length, char *output, base64_options options, + last_chunk_handling_options last_chunk_options = + last_chunk_handling_options::loose) const noexcept; simdutf_warn_unused size_t maximal_binary_length_from_base64( const char16_t *input, size_t length) const noexcept; simdutf_warn_unused result @@ -944,6 +948,11 @@ public: base64_options options, last_chunk_handling_options last_chunk_options = last_chunk_handling_options::loose) const noexcept; + simdutf_warn_unused full_result base64_to_binary_details( + const char16_t *input, size_t length, char *output, + base64_options options, + last_chunk_handling_options last_chunk_options = + last_chunk_handling_options::loose) const noexcept; simdutf_warn_unused size_t base64_length_from_binary( size_t length, base64_options options) const noexcept; size_t binary_to_base64(const char *input, size_t length, char *output, @@ -2547,6 +2556,10 @@ public: const char *input, size_t length, char *output, base64_options options, last_chunk_handling_options last_chunk_options = last_chunk_handling_options::loose) const noexcept; + simdutf_warn_unused full_result base64_to_binary_details( + const char *input, size_t length, char *output, base64_options options, + last_chunk_handling_options last_chunk_options = + last_chunk_handling_options::loose) const noexcept; simdutf_warn_unused size_t maximal_binary_length_from_base64( const char16_t *input, size_t length) const noexcept; simdutf_warn_unused result @@ -2554,6 +2567,11 @@ public: base64_options options, last_chunk_handling_options last_chunk_options = last_chunk_handling_options::loose) const noexcept; + simdutf_warn_unused full_result base64_to_binary_details( + const char16_t *input, size_t length, char *output, + base64_options options, + last_chunk_handling_options last_chunk_options = + last_chunk_handling_options::loose) const noexcept; simdutf_warn_unused size_t base64_length_from_binary( size_t length, base64_options options) const noexcept; size_t binary_to_base64(const char *input, size_t length, char *output, @@ -2885,6 +2903,10 @@ public: base64_options options, last_chunk_handling_options last_chunk_options = last_chunk_handling_options::loose) const noexcept; + simdutf_warn_unused virtual full_result base64_to_binary_details( + const char *input, size_t length, char *output, base64_options options, + last_chunk_handling_options last_chunk_options = + last_chunk_handling_options::loose) const noexcept; simdutf_warn_unused virtual size_t maximal_binary_length_from_base64(const char16_t *input, size_t length) const noexcept; @@ -2893,6 +2915,11 @@ public: base64_options options, last_chunk_handling_options last_chunk_options = last_chunk_handling_options::loose) const noexcept; + simdutf_warn_unused virtual full_result base64_to_binary_details( + const char16_t *input, size_t length, char *output, + base64_options options, + last_chunk_handling_options last_chunk_options = + last_chunk_handling_options::loose) const noexcept; simdutf_warn_unused virtual size_t base64_length_from_binary(size_t length, base64_options options) const noexcept; @@ -4142,6 +4169,10 @@ public: const char *input, size_t length, char *output, base64_options options, last_chunk_handling_options last_chunk_options = last_chunk_handling_options::loose) const noexcept; + simdutf_warn_unused full_result base64_to_binary_details( + const char *input, size_t length, char *output, base64_options options, + last_chunk_handling_options last_chunk_options = + last_chunk_handling_options::loose) const noexcept; simdutf_warn_unused size_t maximal_binary_length_from_base64( const char16_t *input, size_t length) const noexcept; simdutf_warn_unused result @@ -4149,6 +4180,11 @@ public: base64_options options, last_chunk_handling_options last_chunk_options = last_chunk_handling_options::loose) const noexcept; + simdutf_warn_unused full_result base64_to_binary_details( + const char16_t *input, size_t length, char *output, + base64_options options, + last_chunk_handling_options last_chunk_options = + last_chunk_handling_options::loose) const noexcept; simdutf_warn_unused size_t base64_length_from_binary( size_t length, base64_options options) const noexcept; size_t binary_to_base64(const char *input, size_t length, char *output, @@ -5386,6 +5422,10 @@ public: const char *input, size_t length, char *output, base64_options options, last_chunk_handling_options last_chunk_options = last_chunk_handling_options::loose) const noexcept; + simdutf_warn_unused full_result base64_to_binary_details( + const char *input, size_t length, char *output, base64_options options, + last_chunk_handling_options last_chunk_options = + last_chunk_handling_options::loose) const noexcept; simdutf_warn_unused size_t maximal_binary_length_from_base64( const char16_t *input, size_t length) const noexcept; simdutf_warn_unused result @@ -5393,6 +5433,11 @@ public: base64_options options, last_chunk_handling_options last_chunk_options = last_chunk_handling_options::loose) const noexcept; + simdutf_warn_unused full_result base64_to_binary_details( + const char16_t *input, size_t length, char *output, + base64_options options, + last_chunk_handling_options last_chunk_options = + last_chunk_handling_options::loose) const noexcept; simdutf_warn_unused size_t base64_length_from_binary( size_t length, base64_options options) const noexcept; size_t binary_to_base64(const char *input, size_t length, char *output, @@ -6172,6 +6217,10 @@ public: const char *input, size_t length, char *output, base64_options options, last_chunk_handling_options last_chunk_options = last_chunk_handling_options::loose) const noexcept; + simdutf_warn_unused full_result base64_to_binary_details( + const char *input, size_t length, char *output, base64_options options, + last_chunk_handling_options last_chunk_options = + last_chunk_handling_options::loose) const noexcept; simdutf_warn_unused size_t maximal_binary_length_from_base64( const char16_t *input, size_t length) const noexcept; simdutf_warn_unused result @@ -6179,6 +6228,11 @@ public: base64_options options, last_chunk_handling_options last_chunk_options = last_chunk_handling_options::loose) const noexcept; + simdutf_warn_unused full_result base64_to_binary_details( + const char16_t *input, size_t length, char *output, + base64_options options, + last_chunk_handling_options last_chunk_options = + last_chunk_handling_options::loose) const noexcept; simdutf_warn_unused size_t base64_length_from_binary( size_t length, base64_options options) const noexcept; size_t binary_to_base64(const char *input, size_t length, char *output, @@ -6579,6 +6633,10 @@ public: simdutf_warn_unused result base64_to_binary( const char *input, size_t length, char *output, base64_options options, last_chunk_handling_options last_chunk_options) const noexcept; + simdutf_warn_unused full_result base64_to_binary_details( + const char *input, size_t length, char *output, base64_options options, + last_chunk_handling_options last_chunk_options = + last_chunk_handling_options::loose) const noexcept; simdutf_warn_unused size_t maximal_binary_length_from_base64( const char16_t *input, size_t length) const noexcept; simdutf_warn_unused result base64_to_binary( @@ -6587,6 +6645,11 @@ public: last_chunk_handling_options last_chunk_options) const noexcept; simdutf_warn_unused size_t base64_length_from_binary( size_t length, base64_options options) const noexcept; + simdutf_warn_unused full_result base64_to_binary_details( + const char16_t *input, size_t length, char *output, + base64_options options, + last_chunk_handling_options last_chunk_options = + last_chunk_handling_options::loose) const noexcept; size_t binary_to_base64(const char *input, size_t length, char *output, base64_options options) const noexcept; }; @@ -7176,7 +7239,7 @@ template bool is_eight_byte(char_type c) { // Returns true upon success. The destination buffer must be large enough. // This functions assumes that the padding (=) has been removed. template -result +full_result base64_tail_decode(char *dst, const char_type *src, size_t length, size_t padded_characters, // number of padding characters // '=', typically 0, 1, 2. @@ -7229,7 +7292,8 @@ base64_tail_decode(char *dst, const char_type *src, size_t length, if (is_eight_byte(c) && code <= 63) { idx++; } else if (code > 64 || !scalar::base64::is_eight_byte(c)) { - return {INVALID_BASE64_CHARACTER, size_t(src - srcinit)}; + return {INVALID_BASE64_CHARACTER, size_t(src - srcinit), + size_t(dst - dstinit)}; } else { // We have a space or a newline. We ignore it. } @@ -7239,17 +7303,23 @@ base64_tail_decode(char *dst, const char_type *src, size_t length, if (last_chunk_options == last_chunk_handling_options::strict && (idx != 1) && ((idx + padded_characters) & 3) != 0) { // The partial chunk was at src - idx - return {BASE64_INPUT_REMAINDER, size_t(dst - dstinit)}; + return {BASE64_INPUT_REMAINDER, size_t(src - srcinit), + size_t(dst - dstinit)}; } else if (last_chunk_options == last_chunk_handling_options::stop_before_partial && (idx != 1) && ((idx + padded_characters) & 3) != 0) { // Rewind src to before partial chunk src -= idx; - return {SUCCESS, size_t(dst - dstinit)}; + return {SUCCESS, size_t(src - srcinit), size_t(dst - dstinit)}; } else { if (idx == 2) { uint32_t triple = (uint32_t(buffer[0]) << 3 * 6) + (uint32_t(buffer[1]) << 2 * 6); + if ((last_chunk_options == last_chunk_handling_options::strict) && + (triple & 0xffff)) { + return {BASE64_EXTRA_BITS, size_t(src - srcinit), + size_t(dst - dstinit)}; + } if (match_system(endianness::BIG)) { triple <<= 8; std::memcpy(dst, &triple, 1); @@ -7263,6 +7333,11 @@ base64_tail_decode(char *dst, const char_type *src, size_t length, uint32_t triple = (uint32_t(buffer[0]) << 3 * 6) + (uint32_t(buffer[1]) << 2 * 6) + (uint32_t(buffer[2]) << 1 * 6); + if ((last_chunk_options == last_chunk_handling_options::strict) && + (triple & 0xff)) { + return {BASE64_EXTRA_BITS, size_t(src - srcinit), + size_t(dst - dstinit)}; + } if (match_system(endianness::BIG)) { triple <<= 8; std::memcpy(dst, &triple, 2); @@ -7273,9 +7348,10 @@ base64_tail_decode(char *dst, const char_type *src, size_t length, } dst += 2; } else if (idx == 1) { - return {BASE64_INPUT_REMAINDER, size_t(dst - dstinit)}; + return {BASE64_INPUT_REMAINDER, size_t(src - srcinit), + size_t(dst - dstinit)}; } - return {SUCCESS, size_t(dst - dstinit)}; + return {SUCCESS, size_t(src - srcinit), size_t(dst - dstinit)}; } } @@ -7295,17 +7371,15 @@ base64_tail_decode(char *dst, const char_type *src, size_t length, } // like base64_tail_decode, but it will not write past the end of the output -// buffer. outlen is modified to reflect the number of bytes written. This -// functions assumes that the padding (=) has been removed. -// like base64_tail_decode, but it will not write past the end of the output -// buffer. outlen is modified to reflect the number of bytes written. This -// functions assumes that the padding (=) has been removed. +// buffer. The outlen paramter is modified to reflect the number of bytes +// written. This functions assumes that the padding (=) has been removed. template result base64_tail_decode_safe( - char *dst, size_t &outlen, const char_type *src, size_t length, + char *dst, size_t &outlen, const char_type *&srcr, size_t length, size_t padded_characters, // number of padding characters '=', typically 0, // 1, 2. base64_options options, last_chunk_handling_options last_chunk_options) { + const char_type *src = srcr; if (length == 0) { outlen = 0; return {SUCCESS, 0}; @@ -7344,6 +7418,7 @@ result base64_tail_decode_safe( d2[uint8_t(src[2])] | d3[uint8_t(src[3])]) < 0x01FFFFFF) { if (dstend - dst < 3) { outlen = size_t(dst - dstinit); + srcr = src; return {OUTPUT_BUFFER_TOO_SMALL, size_t(src - srcinit)}; } if (match_system(endianness::BIG)) { @@ -7365,6 +7440,7 @@ result base64_tail_decode_safe( idx++; } else if (code > 64 || !scalar::base64::is_eight_byte(c)) { outlen = size_t(dst - dstinit); + srcr = src; return {INVALID_BASE64_CHARACTER, size_t(src - srcinit)}; } else { // We have a space or a newline. We ignore it. @@ -7375,33 +7451,42 @@ result base64_tail_decode_safe( if (last_chunk_options == last_chunk_handling_options::strict && ((idx + padded_characters) & 3) != 0) { outlen = size_t(dst - dstinit); + srcr = src; return {BASE64_INPUT_REMAINDER, size_t(src - srcinit)}; } else if (last_chunk_options == last_chunk_handling_options::stop_before_partial && ((idx + padded_characters) & 3) != 0) { // Rewind src to before partial chunk - src = srccur; + srcr = srccur; outlen = size_t(dst - dstinit); return {SUCCESS, size_t(dst - dstinit)}; } else { // loose mode if (idx == 0) { // No data left; return success outlen = size_t(dst - dstinit); + srcr = src; return {SUCCESS, size_t(dst - dstinit)}; } else if (idx == 1) { // Error: Incomplete chunk of length 1 is invalid in loose mode outlen = size_t(dst - dstinit); + srcr = src; return {BASE64_INPUT_REMAINDER, size_t(src - srcinit)}; } else if (idx == 2 || idx == 3) { // Check if there's enough space in the destination buffer size_t required_space = (idx == 2) ? 1 : 2; if (size_t(dstend - dst) < required_space) { outlen = size_t(dst - dstinit); + srcr = src; return {OUTPUT_BUFFER_TOO_SMALL, size_t(srccur - srcinit)}; } uint32_t triple = 0; if (idx == 2) { triple = (uint32_t(buffer[0]) << 18) + (uint32_t(buffer[1]) << 12); + if ((last_chunk_options == last_chunk_handling_options::strict) && + (triple & 0xffff)) { + srcr = src; + return {BASE64_EXTRA_BITS, size_t(src - srcinit)}; + } // Extract the first byte triple >>= 16; dst[0] = static_cast(triple & 0xFF); @@ -7409,6 +7494,11 @@ result base64_tail_decode_safe( } else if (idx == 3) { triple = (uint32_t(buffer[0]) << 18) + (uint32_t(buffer[1]) << 12) + (uint32_t(buffer[2]) << 6); + if ((last_chunk_options == last_chunk_handling_options::strict) && + (triple & 0xff)) { + srcr = src; + return {BASE64_EXTRA_BITS, size_t(src - srcinit)}; + } // Extract the first two bytes triple >>= 8; dst[0] = static_cast((triple >> 8) & 0xFF); @@ -7416,6 +7506,7 @@ result base64_tail_decode_safe( dst += 2; } outlen = size_t(dst - dstinit); + srcr = src; return {SUCCESS, size_t(dst - dstinit)}; } } @@ -7423,6 +7514,7 @@ result base64_tail_decode_safe( if (dstend - dst < 3) { outlen = size_t(dst - dstinit); + srcr = src; return {OUTPUT_BUFFER_TOO_SMALL, size_t(srccur - srcinit)}; } uint32_t triple = (uint32_t(buffer[0]) << 18) + @@ -8243,6 +8335,14 @@ public: last_chunk_handling_options); } + simdutf_warn_unused full_result base64_to_binary_details( + const char *input, size_t length, char *output, base64_options options, + last_chunk_handling_options last_chunk_handling_options = + last_chunk_handling_options::loose) const noexcept override { + return set_best()->base64_to_binary_details(input, length, output, options, + last_chunk_handling_options); + } + simdutf_warn_unused size_t maximal_binary_length_from_base64( const char16_t *input, size_t length) const noexcept override { return set_best()->maximal_binary_length_from_base64(input, length); @@ -8257,6 +8357,15 @@ public: last_chunk_handling_options); } + simdutf_warn_unused full_result base64_to_binary_details( + const char16_t *input, size_t length, char *output, + base64_options options, + last_chunk_handling_options last_chunk_handling_options = + last_chunk_handling_options::loose) const noexcept override { + return set_best()->base64_to_binary_details(input, length, output, options, + last_chunk_handling_options); + } + simdutf_warn_unused size_t base64_length_from_binary( size_t length, base64_options options) const noexcept override { return set_best()->base64_length_from_binary(length, options); @@ -8706,6 +8815,12 @@ public: return result(error_code::OTHER, 0); } + simdutf_warn_unused full_result base64_to_binary_details( + const char *, size_t, char *, base64_options, + last_chunk_handling_options) const noexcept override { + return full_result(error_code::OTHER, 0, 0); + } + simdutf_warn_unused size_t maximal_binary_length_from_base64( const char16_t *, size_t) const noexcept override { return 0; @@ -8717,6 +8832,12 @@ public: return result(error_code::OTHER, 0); } + simdutf_warn_unused full_result base64_to_binary_details( + const char16_t *, size_t, char *, base64_options, + last_chunk_handling_options) const noexcept override { + return full_result(error_code::OTHER, 0, 0); + } + simdutf_warn_unused size_t base64_length_from_binary(size_t, base64_options) const noexcept override { return 0; @@ -9365,11 +9486,12 @@ simdutf_warn_unused result base64_to_binary_safe_impl( size_t max_length = maximal_binary_length_from_base64(input, length); if (outlen >= max_length) { // fast path - result r = base64_to_binary(input, length, output, options, - last_chunk_handling_options); - if (r.error != error_code::INVALID_BASE64_CHARACTER) { - outlen = r.count; - r.count = length; + full_result r = get_default_implementation()->base64_to_binary_details( + input, length, output, options, last_chunk_handling_options); + if (r.error != error_code::INVALID_BASE64_CHARACTER && + r.error != error_code::BASE64_EXTRA_BITS) { + outlen = r.output_count; + return {r.error, length}; } return r; } @@ -9377,14 +9499,16 @@ simdutf_warn_unused result base64_to_binary_safe_impl( // the input. size_t outlen3 = outlen / 3 * 3; // round down to multiple of 3 size_t safe_input = base64_length_from_binary(outlen3, options); - result r = base64_to_binary(input, safe_input, output, options, loose); + full_result r = get_default_implementation()->base64_to_binary_details( + input, safe_input, output, options, loose); if (r.error == error_code::INVALID_BASE64_CHARACTER) { return r; } - size_t offset = (r.error == error_code::BASE64_INPUT_REMAINDER) - ? 1 - : ((r.count % 3) == 0 ? 0 : (r.count % 3) + 1); - size_t output_index = r.count - (r.count % 3); + size_t offset = + (r.error == error_code::BASE64_INPUT_REMAINDER) + ? 1 + : ((r.output_count % 3) == 0 ? 0 : (r.output_count % 3) + 1); + size_t output_index = r.output_count - (r.output_count % 3); size_t input_index = safe_input; // offset is a value that is no larger than 3. We backtrack // by up to offset characters + an undetermined number of @@ -9419,19 +9543,25 @@ simdutf_warn_unused result base64_to_binary_safe_impl( padding_characts++; } } - r = scalar::base64::base64_tail_decode_safe( + // this will advance tail_input and tail_length + result rr = scalar::base64::base64_tail_decode_safe( output + output_index, remaining_out, tail_input, tail_length, padding_characts, options, last_chunk_handling_options); outlen = output_index + remaining_out; if (last_chunk_handling_options != stop_before_partial && - r.error == error_code::SUCCESS && padding_characts > 0) { + rr.error == error_code::SUCCESS && padding_characts > 0) { // additional checks if ((outlen % 3 == 0) || ((outlen % 3) + 1 + padding_characts != 4)) { - r.error = error_code::INVALID_BASE64_CHARACTER; + rr.error = error_code::INVALID_BASE64_CHARACTER; } } - r.count += input_index; - return r; + if (rr.error == error_code::SUCCESS && + last_chunk_handling_options == stop_before_partial) { + rr.count = tail_input - input; + return rr; + } + rr.count += input_index; + return rr; } simdutf_warn_unused size_t convert_latin1_to_utf8_safe( @@ -15614,9 +15744,10 @@ void base64_decode_block(char *out, const char *src) { } template -result compress_decode_base64(char *dst, const char_type *src, size_t srclen, - base64_options options, - last_chunk_handling_options last_chunk_options) { +full_result +compress_decode_base64(char *dst, const char_type *src, size_t srclen, + base64_options options, + last_chunk_handling_options last_chunk_options) { const uint8_t *to_base64 = base64_url ? tables::base64::to_base64_url_value : tables::base64::to_base64_value; size_t equallocation = @@ -15644,9 +15775,9 @@ result compress_decode_base64(char *dst, const char_type *src, size_t srclen, } if (srclen == 0) { if (equalsigns > 0) { - return {INVALID_BASE64_CHARACTER, equallocation}; + return {INVALID_BASE64_CHARACTER, equallocation, 0}; } - return {SUCCESS, 0}; + return {SUCCESS, 0, 0}; } const char_type *const srcinit = src; const char *const dstinit = dst; @@ -15673,7 +15804,8 @@ result compress_decode_base64(char *dst, const char_type *src, size_t srclen, if (src < srcend) { // should never happen } - return {error_code::INVALID_BASE64_CHARACTER, size_t(src - srcinit)}; + return {error_code::INVALID_BASE64_CHARACTER, size_t(src - srcinit), + size_t(dst - dstinit)}; } } @@ -15708,7 +15840,8 @@ result compress_decode_base64(char *dst, const char_type *src, size_t srclen, uint8_t val = to_base64[uint8_t(*src)]; *bufferptr = char(val); if (!scalar::base64::is_eight_byte(*src) || val > 64) { - return {error_code::INVALID_BASE64_CHARACTER, size_t(src - srcinit)}; + return {error_code::INVALID_BASE64_CHARACTER, size_t(src - srcinit), + size_t(dst - dstinit)}; } bufferptr += (val <= 63); src++; @@ -15756,20 +15889,22 @@ result compress_decode_base64(char *dst, const char_type *src, size_t srclen, } } if (src < srcend + equalsigns) { - result r = scalar::base64::base64_tail_decode( + full_result r = scalar::base64::base64_tail_decode( dst, src, srcend - src, equalsigns, options, last_chunk_options); - if (r.error == error_code::INVALID_BASE64_CHARACTER) { - r.count += size_t(src - srcinit); + if (r.error == error_code::INVALID_BASE64_CHARACTER || + r.error == error_code::BASE64_EXTRA_BITS) { + r.input_count += size_t(src - srcinit); return r; } else { - r.count += size_t(dst - dstinit); + r.output_count += size_t(dst - dstinit); } if (last_chunk_options != stop_before_partial && r.error == error_code::SUCCESS && equalsigns > 0) { // additional checks - if ((r.count % 3 == 0) || ((r.count % 3) + 1 + equalsigns != 4)) { + if ((r.output_count % 3 == 0) || + ((r.output_count % 3) + 1 + equalsigns != 4)) { r.error = error_code::INVALID_BASE64_CHARACTER; - r.count = equallocation; + r.input_count = equallocation; } } return r; @@ -15777,10 +15912,10 @@ result compress_decode_base64(char *dst, const char_type *src, size_t srclen, if (equalsigns > 0) { if ((size_t(dst - dstinit) % 3 == 0) || ((size_t(dst - dstinit) % 3) + 1 + equalsigns != 4)) { - return {INVALID_BASE64_CHARACTER, equallocation}; + return {INVALID_BASE64_CHARACTER, equallocation, size_t(dst - dstinit)}; } } - return {SUCCESS, size_t(dst - dstinit)}; + return {SUCCESS, srclen, size_t(dst - dstinit)}; } /* end file src/arm64/arm_base64.cpp */ /* begin file src/arm64/arm_convert_utf32_to_latin1.cpp */ @@ -19261,6 +19396,16 @@ simdutf_warn_unused result implementation::base64_to_binary( last_chunk_options); } +simdutf_warn_unused full_result implementation::base64_to_binary_details( + const char *input, size_t length, char *output, base64_options options, + last_chunk_handling_options last_chunk_options) const noexcept { + return (options & base64_url) + ? compress_decode_base64(output, input, length, options, + last_chunk_options) + : compress_decode_base64(output, input, length, options, + last_chunk_options); +} + simdutf_warn_unused size_t implementation::maximal_binary_length_from_base64( const char16_t *input, size_t length) const noexcept { return scalar::base64::maximal_binary_length_from_base64(input, length); @@ -19276,6 +19421,16 @@ simdutf_warn_unused result implementation::base64_to_binary( last_chunk_options); } +simdutf_warn_unused full_result implementation::base64_to_binary_details( + const char16_t *input, size_t length, char *output, base64_options options, + last_chunk_handling_options last_chunk_options) const noexcept { + return (options & base64_url) + ? compress_decode_base64(output, input, length, options, + last_chunk_options) + : compress_decode_base64(output, input, length, options, + last_chunk_options); +} + simdutf_warn_unused size_t implementation::base64_length_from_binary( size_t length, base64_options options) const noexcept { return scalar::base64::base64_length_from_binary(length, options); @@ -19822,6 +19977,49 @@ simdutf_warn_unused result implementation::base64_to_binary( return r; } +simdutf_warn_unused full_result implementation::base64_to_binary_details( + const char *input, size_t length, char *output, base64_options options, + last_chunk_handling_options last_chunk_options) const noexcept { + while (length > 0 && + scalar::base64::is_ascii_white_space(input[length - 1])) { + length--; + } + size_t equallocation = + length; // location of the first padding character if any + size_t equalsigns = 0; + if (length > 0 && input[length - 1] == '=') { + equallocation = length - 1; + length -= 1; + equalsigns++; + while (length > 0 && + scalar::base64::is_ascii_white_space(input[length - 1])) { + length--; + } + if (length > 0 && input[length - 1] == '=') { + equallocation = length - 1; + equalsigns++; + length -= 1; + } + } + if (length == 0) { + if (equalsigns > 0) { + return {INVALID_BASE64_CHARACTER, equallocation, 0}; + } + return {SUCCESS, 0, 0}; + } + full_result r = scalar::base64::base64_tail_decode( + output, input, length, equalsigns, options, last_chunk_options); + if (last_chunk_options != stop_before_partial && + r.error == error_code::SUCCESS && equalsigns > 0) { + // additional checks + if ((r.output_count % 3 == 0) || + ((r.output_count % 3) + 1 + equalsigns != 4)) { + return {INVALID_BASE64_CHARACTER, equallocation, r.output_count}; + } + } + return r; +} + simdutf_warn_unused size_t implementation::maximal_binary_length_from_base64( const char16_t *input, size_t length) const noexcept { return scalar::base64::maximal_binary_length_from_base64(input, length); @@ -19869,6 +20067,49 @@ simdutf_warn_unused result implementation::base64_to_binary( return r; } +simdutf_warn_unused full_result implementation::base64_to_binary_details( + const char16_t *input, size_t length, char *output, base64_options options, + last_chunk_handling_options last_chunk_options) const noexcept { + while (length > 0 && + scalar::base64::is_ascii_white_space(input[length - 1])) { + length--; + } + size_t equallocation = + length; // location of the first padding character if any + size_t equalsigns = 0; + if (length > 0 && input[length - 1] == '=') { + equallocation = length - 1; + length -= 1; + equalsigns++; + while (length > 0 && + scalar::base64::is_ascii_white_space(input[length - 1])) { + length--; + } + if (length > 0 && input[length - 1] == '=') { + equallocation = length - 1; + equalsigns++; + length -= 1; + } + } + if (length == 0) { + if (equalsigns > 0) { + return {INVALID_BASE64_CHARACTER, equallocation, 0}; + } + return {SUCCESS, 0, 0}; + } + full_result r = scalar::base64::base64_tail_decode( + output, input, length, equalsigns, options, last_chunk_options); + if (last_chunk_options != stop_before_partial && + r.error == error_code::SUCCESS && equalsigns > 0) { + // additional checks + if ((r.output_count % 3 == 0) || + ((r.output_count % 3) + 1 + equalsigns != 4)) { + return {INVALID_BASE64_CHARACTER, equallocation, r.output_count}; + } + } + return r; +} + simdutf_warn_unused size_t implementation::base64_length_from_binary( size_t length, base64_options options) const noexcept { return scalar::base64::base64_length_from_binary(length, options); @@ -23192,18 +23433,41 @@ size_t encode_base64(char *dst, const char *src, size_t srclen, const __m512i lookup = _mm512_loadu_si512(reinterpret_cast(lookup_tbl)); const __m512i multi_shifts = _mm512_set1_epi64(UINT64_C(0x3036242a1016040a)); - size_t i = 0; - for (; i + 64 <= srclen; i += 48) { - const __m512i v = - _mm512_loadu_si512(reinterpret_cast(input + i)); + size_t size = srclen; + __mmask64 input_mask = 0xffffffffffff; // (1 << 48) - 1 + while (size >= 48) { + const __m512i v = _mm512_maskz_loadu_epi8( + input_mask, reinterpret_cast(input)); const __m512i in = _mm512_permutexvar_epi8(shuffle_input, v); const __m512i indices = _mm512_multishift_epi64_epi8(multi_shifts, in); const __m512i result = _mm512_permutexvar_epi8(indices, lookup); _mm512_storeu_si512(reinterpret_cast<__m512i *>(out), result); out += 64; + input += 48; + size -= 48; } - return i / 3 * 4 + scalar::base64::tail_encode_base64((char *)out, src + i, - srclen - i, options); + input_mask = ((__mmask64)1 << size) - 1; + const __m512i v = _mm512_maskz_loadu_epi8( + input_mask, reinterpret_cast(input)); + const __m512i in = _mm512_permutexvar_epi8(shuffle_input, v); + const __m512i indices = _mm512_multishift_epi64_epi8(multi_shifts, in); + bool padding_needed = + (((options & base64_url) == 0) ^ + ((options & base64_reverse_padding) == base64_reverse_padding)); + size_t padding_amount = ((size % 3) > 0) ? (3 - (size % 3)) : 0; + size_t output_len = ((size + 2) / 3) * 4; + size_t non_padded_output_len = output_len - padding_amount; + if (!padding_needed) { + output_len = non_padded_output_len; + } + __mmask64 output_mask = output_len == 64 ? (__mmask64)UINT64_MAX + : ((__mmask64)1 << output_len) - 1; + __m512i result = _mm512_mask_permutexvar_epi8( + _mm512_set1_epi8('='), ((__mmask64)1 << non_padded_output_len) - 1, + indices, lookup); + _mm512_mask_storeu_epi8(reinterpret_cast<__m512i *>(out), output_mask, + result); + return (size_t)(out - (uint8_t *)dst) + output_len; } template @@ -23309,9 +23573,10 @@ static inline void base64_decode_block(char *out, block64 *b) { } template -result compress_decode_base64(char *dst, const chartype *src, size_t srclen, - base64_options options, - last_chunk_handling_options last_chunk_options) { +full_result +compress_decode_base64(char *dst, const chartype *src, size_t srclen, + base64_options options, + last_chunk_handling_options last_chunk_options) { const uint8_t *to_base64 = base64_url ? tables::base64::to_base64_url_value : tables::base64::to_base64_value; size_t equallocation = @@ -23339,9 +23604,9 @@ result compress_decode_base64(char *dst, const chartype *src, size_t srclen, } if (srclen == 0) { if (equalsigns > 0) { - return {INVALID_BASE64_CHARACTER, equallocation}; + return {INVALID_BASE64_CHARACTER, equallocation, 0}; } - return {SUCCESS, 0}; + return {SUCCESS, 0, 0}; } const chartype *const srcinit = src; const char *const dstinit = dst; @@ -23365,7 +23630,8 @@ result compress_decode_base64(char *dst, const chartype *src, size_t srclen, to_base64[uint8_t(*src)] <= 64) { src++; } - return {error_code::INVALID_BASE64_CHARACTER, size_t(src - srcinit)}; + return {error_code::INVALID_BASE64_CHARACTER, size_t(src - srcinit), + size_t(dst - dstinit)}; } if (badcharmask != 0) { // optimization opportunity: check for simple masks like those made of @@ -23401,7 +23667,8 @@ result compress_decode_base64(char *dst, const chartype *src, size_t srclen, uint8_t val = to_base64[uint8_t(*src)]; *bufferptr = char(val); if (!scalar::base64::is_eight_byte(*src) || val > 64) { - return {error_code::INVALID_BASE64_CHARACTER, size_t(src - srcinit)}; + return {error_code::INVALID_BASE64_CHARACTER, size_t(src - srcinit), + size_t(dst - dstinit)}; } bufferptr += (val <= 63); src++; @@ -23447,20 +23714,22 @@ result compress_decode_base64(char *dst, const chartype *src, size_t srclen, } } if (src < srcend + equalsigns) { - result r = scalar::base64::base64_tail_decode( + full_result r = scalar::base64::base64_tail_decode( dst, src, srcend - src, equalsigns, options, last_chunk_options); - if (r.error == error_code::INVALID_BASE64_CHARACTER) { - r.count += size_t(src - srcinit); + if (r.error == error_code::INVALID_BASE64_CHARACTER || + r.error == error_code::BASE64_EXTRA_BITS) { + r.input_count += size_t(src - srcinit); return r; } else { - r.count += size_t(dst - dstinit); + r.output_count += size_t(dst - dstinit); } if (last_chunk_options != stop_before_partial && r.error == error_code::SUCCESS && equalsigns > 0) { // additional checks - if ((r.count % 3 == 0) || ((r.count % 3) + 1 + equalsigns != 4)) { + if ((r.output_count % 3 == 0) || + ((r.output_count % 3) + 1 + equalsigns != 4)) { r.error = error_code::INVALID_BASE64_CHARACTER; - r.count = equallocation; + r.input_count = equallocation; } } return r; @@ -23468,10 +23737,10 @@ result compress_decode_base64(char *dst, const chartype *src, size_t srclen, if (equalsigns > 0) { if ((size_t(dst - dstinit) % 3 == 0) || ((size_t(dst - dstinit) % 3) + 1 + equalsigns != 4)) { - return {INVALID_BASE64_CHARACTER, equallocation}; + return {INVALID_BASE64_CHARACTER, equallocation, size_t(dst - dstinit)}; } } - return {SUCCESS, size_t(dst - dstinit)}; + return {SUCCESS, srclen, size_t(dst - dstinit)}; } /* end file src/icelake/icelake_base64.inl.cpp */ @@ -25032,6 +25301,16 @@ simdutf_warn_unused result implementation::base64_to_binary( last_chunk_options); } +simdutf_warn_unused full_result implementation::base64_to_binary_details( + const char *input, size_t length, char *output, base64_options options, + last_chunk_handling_options last_chunk_options) const noexcept { + return (options & base64_url) + ? compress_decode_base64(output, input, length, options, + last_chunk_options) + : compress_decode_base64(output, input, length, options, + last_chunk_options); +} + simdutf_warn_unused size_t implementation::maximal_binary_length_from_base64( const char16_t *input, size_t length) const noexcept { return scalar::base64::maximal_binary_length_from_base64(input, length); @@ -25047,6 +25326,16 @@ simdutf_warn_unused result implementation::base64_to_binary( last_chunk_options); } +simdutf_warn_unused full_result implementation::base64_to_binary_details( + const char16_t *input, size_t length, char *output, base64_options options, + last_chunk_handling_options last_chunk_options) const noexcept { + return (options & base64_url) + ? compress_decode_base64(output, input, length, options, + last_chunk_options) + : compress_decode_base64(output, input, length, options, + last_chunk_options); +} + simdutf_warn_unused size_t implementation::base64_length_from_binary( size_t length, base64_options options) const noexcept { return scalar::base64::base64_length_from_binary(length, options); @@ -28101,9 +28390,10 @@ static inline void base64_decode_block_safe(char *out, block64 *b) { } template -result compress_decode_base64(char *dst, const chartype *src, size_t srclen, - base64_options options, - last_chunk_handling_options last_chunk_options) { +full_result +compress_decode_base64(char *dst, const chartype *src, size_t srclen, + base64_options options, + last_chunk_handling_options last_chunk_options) { const uint8_t *to_base64 = base64_url ? tables::base64::to_base64_url_value : tables::base64::to_base64_value; size_t equallocation = @@ -28131,9 +28421,9 @@ result compress_decode_base64(char *dst, const chartype *src, size_t srclen, } if (srclen == 0) { if (equalsigns > 0) { - return {INVALID_BASE64_CHARACTER, equallocation}; + return {INVALID_BASE64_CHARACTER, equallocation, 0}; } - return {SUCCESS, 0}; + return {SUCCESS, 0, 0}; } char *end_of_safe_64byte_zone = (srclen + 3) / 4 * 3 >= 63 ? dst + (srclen + 3) / 4 * 3 - 63 : dst; @@ -28160,7 +28450,8 @@ result compress_decode_base64(char *dst, const chartype *src, size_t srclen, to_base64[uint8_t(*src)] <= 64) { src++; } - return {error_code::INVALID_BASE64_CHARACTER, size_t(src - srcinit)}; + return {error_code::INVALID_BASE64_CHARACTER, size_t(src - srcinit), + size_t(dst - dstinit)}; } if (badcharmask != 0) { // optimization opportunity: check for simple masks like those made of @@ -28206,7 +28497,8 @@ result compress_decode_base64(char *dst, const chartype *src, size_t srclen, uint8_t val = to_base64[uint8_t(*src)]; *bufferptr = char(val); if (!scalar::base64::is_eight_byte(*src) || val > 64) { - return {error_code::INVALID_BASE64_CHARACTER, size_t(src - srcinit)}; + return {error_code::INVALID_BASE64_CHARACTER, size_t(src - srcinit), + size_t(dst - dstinit)}; } bufferptr += (val <= 63); src++; @@ -28258,20 +28550,22 @@ result compress_decode_base64(char *dst, const chartype *src, size_t srclen, } } if (src < srcend + equalsigns) { - result r = scalar::base64::base64_tail_decode( + full_result r = scalar::base64::base64_tail_decode( dst, src, srcend - src, equalsigns, options, last_chunk_options); - if (r.error == error_code::INVALID_BASE64_CHARACTER) { - r.count += size_t(src - srcinit); + if (r.error == error_code::INVALID_BASE64_CHARACTER || + r.error == error_code::BASE64_EXTRA_BITS) { + r.input_count += size_t(src - srcinit); return r; } else { - r.count += size_t(dst - dstinit); + r.output_count += size_t(dst - dstinit); } if (last_chunk_options != stop_before_partial && r.error == error_code::SUCCESS && equalsigns > 0) { // additional checks - if ((r.count % 3 == 0) || ((r.count % 3) + 1 + equalsigns != 4)) { + if ((r.output_count % 3 == 0) || + ((r.output_count % 3) + 1 + equalsigns != 4)) { r.error = error_code::INVALID_BASE64_CHARACTER; - r.count = equallocation; + r.input_count = equallocation; } } return r; @@ -28279,10 +28573,10 @@ result compress_decode_base64(char *dst, const chartype *src, size_t srclen, if (equalsigns > 0) { if ((size_t(dst - dstinit) % 3 == 0) || ((size_t(dst - dstinit) % 3) + 1 + equalsigns != 4)) { - return {INVALID_BASE64_CHARACTER, equallocation}; + return {INVALID_BASE64_CHARACTER, equallocation, size_t(dst - dstinit)}; } } - return {SUCCESS, size_t(dst - dstinit)}; + return {SUCCESS, srclen, size_t(dst - dstinit)}; } /* end file src/haswell/avx2_base64.cpp */ @@ -31062,6 +31356,16 @@ simdutf_warn_unused result implementation::base64_to_binary( last_chunk_options); } +simdutf_warn_unused full_result implementation::base64_to_binary_details( + const char *input, size_t length, char *output, base64_options options, + last_chunk_handling_options last_chunk_options) const noexcept { + return (options & base64_url) + ? compress_decode_base64(output, input, length, options, + last_chunk_options) + : compress_decode_base64(output, input, length, options, + last_chunk_options); +} + simdutf_warn_unused size_t implementation::maximal_binary_length_from_base64( const char16_t *input, size_t length) const noexcept { return scalar::base64::maximal_binary_length_from_base64(input, length); @@ -31077,6 +31381,16 @@ simdutf_warn_unused result implementation::base64_to_binary( last_chunk_options); } +simdutf_warn_unused full_result implementation::base64_to_binary_details( + const char16_t *input, size_t length, char *output, base64_options options, + last_chunk_handling_options last_chunk_options) const noexcept { + return (options & base64_url) + ? compress_decode_base64(output, input, length, options, + last_chunk_options) + : compress_decode_base64(output, input, length, options, + last_chunk_options); +} + simdutf_warn_unused size_t implementation::base64_length_from_binary( size_t length, base64_options options) const noexcept { return scalar::base64::base64_length_from_binary(length, options); @@ -34707,6 +35021,49 @@ simdutf_warn_unused result implementation::base64_to_binary( return r; } +simdutf_warn_unused full_result implementation::base64_to_binary_details( + const char *input, size_t length, char *output, base64_options options, + last_chunk_handling_options last_chunk_options) const noexcept { + while (length > 0 && + scalar::base64::is_ascii_white_space(input[length - 1])) { + length--; + } + size_t equallocation = + length; // location of the first padding character if any + size_t equalsigns = 0; + if (length > 0 && input[length - 1] == '=') { + equallocation = length - 1; + length -= 1; + equalsigns++; + while (length > 0 && + scalar::base64::is_ascii_white_space(input[length - 1])) { + length--; + } + if (length > 0 && input[length - 1] == '=') { + equallocation = length - 1; + equalsigns++; + length -= 1; + } + } + if (length == 0) { + if (equalsigns > 0) { + return {INVALID_BASE64_CHARACTER, equallocation, 0}; + } + return {SUCCESS, 0, 0}; + } + full_result r = scalar::base64::base64_tail_decode( + output, input, length, equalsigns, options, last_chunk_options); + if (last_chunk_options != stop_before_partial && + r.error == error_code::SUCCESS && equalsigns > 0) { + // additional checks + if ((r.output_count % 3 == 0) || + ((r.output_count % 3) + 1 + equalsigns != 4)) { + return {INVALID_BASE64_CHARACTER, equallocation, r.output_count}; + } + } + return r; +} + simdutf_warn_unused size_t implementation::maximal_binary_length_from_base64( const char16_t *input, size_t length) const noexcept { return scalar::base64::maximal_binary_length_from_base64(input, length); @@ -34754,6 +35111,49 @@ simdutf_warn_unused result implementation::base64_to_binary( return r; } +simdutf_warn_unused full_result implementation::base64_to_binary_details( + const char16_t *input, size_t length, char *output, base64_options options, + last_chunk_handling_options last_chunk_options) const noexcept { + while (length > 0 && + scalar::base64::is_ascii_white_space(input[length - 1])) { + length--; + } + size_t equallocation = + length; // location of the first padding character if any + size_t equalsigns = 0; + if (length > 0 && input[length - 1] == '=') { + equallocation = length - 1; + length -= 1; + equalsigns++; + while (length > 0 && + scalar::base64::is_ascii_white_space(input[length - 1])) { + length--; + } + if (length > 0 && input[length - 1] == '=') { + equallocation = length - 1; + equalsigns++; + length -= 1; + } + } + if (length == 0) { + if (equalsigns > 0) { + return {INVALID_BASE64_CHARACTER, equallocation, 0}; + } + return {SUCCESS, 0, 0}; + } + full_result r = scalar::base64::base64_tail_decode( + output, input, length, equalsigns, options, last_chunk_options); + if (last_chunk_options != stop_before_partial && + r.error == error_code::SUCCESS && equalsigns > 0) { + // additional checks + if ((r.output_count % 3 == 0) || + ((r.output_count % 3) + 1 + equalsigns != 4)) { + return {INVALID_BASE64_CHARACTER, equallocation, r.output_count}; + } + } + return r; +} + simdutf_warn_unused size_t implementation::base64_length_from_binary( size_t length, base64_options options) const noexcept { return scalar::base64::base64_length_from_binary(length, options); @@ -37746,9 +38146,10 @@ static inline void base64_decode_block_safe(char *out, block64 *b) { } template -result compress_decode_base64(char *dst, const chartype *src, size_t srclen, - base64_options options, - last_chunk_handling_options last_chunk_options) { +full_result +compress_decode_base64(char *dst, const chartype *src, size_t srclen, + base64_options options, + last_chunk_handling_options last_chunk_options) { const uint8_t *to_base64 = base64_url ? tables::base64::to_base64_url_value : tables::base64::to_base64_value; size_t equallocation = @@ -37776,9 +38177,9 @@ result compress_decode_base64(char *dst, const chartype *src, size_t srclen, } if (srclen == 0) { if (equalsigns > 0) { - return {INVALID_BASE64_CHARACTER, equallocation}; + return {INVALID_BASE64_CHARACTER, equallocation, 0}; } - return {SUCCESS, 0}; + return {SUCCESS, 0, 0}; } char *end_of_safe_64byte_zone = (srclen + 3) / 4 * 3 >= 63 ? dst + (srclen + 3) / 4 * 3 - 63 : dst; @@ -37805,7 +38206,8 @@ result compress_decode_base64(char *dst, const chartype *src, size_t srclen, to_base64[uint8_t(*src)] <= 64) { src++; } - return {error_code::INVALID_BASE64_CHARACTER, size_t(src - srcinit)}; + return {error_code::INVALID_BASE64_CHARACTER, size_t(src - srcinit), + size_t(dst - dstinit)}; } if (badcharmask != 0) { // optimization opportunity: check for simple masks like those made of @@ -37850,7 +38252,8 @@ result compress_decode_base64(char *dst, const chartype *src, size_t srclen, uint8_t val = to_base64[uint8_t(*src)]; *bufferptr = char(val); if (!scalar::base64::is_eight_byte(*src) || val > 64) { - return {error_code::INVALID_BASE64_CHARACTER, size_t(src - srcinit)}; + return {error_code::INVALID_BASE64_CHARACTER, size_t(src - srcinit), + size_t(dst - dstinit)}; } bufferptr += (val <= 63); src++; @@ -37902,20 +38305,22 @@ result compress_decode_base64(char *dst, const chartype *src, size_t srclen, } } if (src < srcend + equalsigns) { - result r = scalar::base64::base64_tail_decode( + full_result r = scalar::base64::base64_tail_decode( dst, src, srcend - src, equalsigns, options, last_chunk_options); - if (r.error == error_code::INVALID_BASE64_CHARACTER) { - r.count += size_t(src - srcinit); + if (r.error == error_code::INVALID_BASE64_CHARACTER || + r.error == error_code::BASE64_EXTRA_BITS) { + r.input_count += size_t(src - srcinit); return r; } else { - r.count += size_t(dst - dstinit); + r.output_count += size_t(dst - dstinit); } if (last_chunk_options != stop_before_partial && r.error == error_code::SUCCESS && equalsigns > 0) { // additional checks - if ((r.count % 3 == 0) || ((r.count % 3) + 1 + equalsigns != 4)) { + if ((r.output_count % 3 == 0) || + ((r.output_count % 3) + 1 + equalsigns != 4)) { r.error = error_code::INVALID_BASE64_CHARACTER; - r.count = equallocation; + r.input_count = equallocation; } } return r; @@ -37923,10 +38328,10 @@ result compress_decode_base64(char *dst, const chartype *src, size_t srclen, if (equalsigns > 0) { if ((size_t(dst - dstinit) % 3 == 0) || ((size_t(dst - dstinit) % 3) + 1 + equalsigns != 4)) { - return {INVALID_BASE64_CHARACTER, equallocation}; + return {INVALID_BASE64_CHARACTER, equallocation, size_t(dst - dstinit)}; } } - return {SUCCESS, size_t(dst - dstinit)}; + return {SUCCESS, srclen, size_t(dst - dstinit)}; } /* end file src/westmere/sse_base64.cpp */ @@ -40715,6 +41120,16 @@ simdutf_warn_unused result implementation::base64_to_binary( last_chunk_options); } +simdutf_warn_unused full_result implementation::base64_to_binary_details( + const char *input, size_t length, char *output, base64_options options, + last_chunk_handling_options last_chunk_options) const noexcept { + return (options & base64_url) + ? compress_decode_base64(output, input, length, options, + last_chunk_options) + : compress_decode_base64(output, input, length, options, + last_chunk_options); +} + simdutf_warn_unused size_t implementation::maximal_binary_length_from_base64( const char16_t *input, size_t length) const noexcept { return scalar::base64::maximal_binary_length_from_base64(input, length); @@ -40730,6 +41145,16 @@ simdutf_warn_unused result implementation::base64_to_binary( last_chunk_options); } +simdutf_warn_unused full_result implementation::base64_to_binary_details( + const char16_t *input, size_t length, char *output, base64_options options, + last_chunk_handling_options last_chunk_options) const noexcept { + return (options & base64_url) + ? compress_decode_base64(output, input, length, options, + last_chunk_options) + : compress_decode_base64(output, input, length, options, + last_chunk_options); +} + simdutf_warn_unused size_t implementation::base64_length_from_binary( size_t length, base64_options options) const noexcept { return scalar::base64::base64_length_from_binary(length, options); diff --git a/deps/simdutf/simdutf.h b/deps/simdutf/simdutf.h index 4ed08d542b0..a219e96ad9c 100644 --- a/deps/simdutf/simdutf.h +++ b/deps/simdutf/simdutf.h @@ -1,4 +1,4 @@ -/* auto-generated on 2024-10-11 12:35:29 -0400. Do not edit! */ +/* auto-generated on 2024-11-12 20:00:19 -0500. Do not edit! */ /* begin file include/simdutf.h */ #ifndef SIMDUTF_H #define SIMDUTF_H @@ -611,9 +611,12 @@ enum error_code { // and a low surrogate must be preceded by a high surrogate // (UTF-16) OR there must be no surrogate at all (Latin1) INVALID_BASE64_CHARACTER, // Found a character that cannot be part of a valid - // base64 string. + // base64 string. This may include a misplaced + // padding character ('='). BASE64_INPUT_REMAINDER, // The base64 input terminates with a single // character, excluding padding (=). + BASE64_EXTRA_BITS, // The base64 input terminates with non-zero + // padding bits. OUTPUT_BUFFER_TOO_SMALL, // The provided buffer is too small. OTHER // Not related to validation/transcoding. }; @@ -626,8 +629,30 @@ struct result { simdutf_really_inline result() : error{error_code::SUCCESS}, count{0} {} - simdutf_really_inline result(error_code _err, size_t _pos) - : error{_err}, count{_pos} {} + simdutf_really_inline result(error_code err, size_t pos) + : error{err}, count{pos} {} +}; + +struct full_result { + error_code error; + size_t input_count; + size_t output_count; + + simdutf_really_inline full_result() + : error{error_code::SUCCESS}, input_count{0}, output_count{0} {} + + simdutf_really_inline full_result(error_code err, size_t pos_in, + size_t pos_out) + : error{err}, input_count{pos_in}, output_count{pos_out} {} + + simdutf_really_inline operator result() const noexcept { + if (error == error_code::SUCCESS || + error == error_code::BASE64_INPUT_REMAINDER) { + return result{error, output_count}; + } else { + return result{error, input_count}; + } + } }; } // namespace simdutf @@ -645,7 +670,7 @@ SIMDUTF_DISABLE_UNDESIRED_WARNINGS #define SIMDUTF_SIMDUTF_VERSION_H /** The version of simdutf being used (major.minor.revision) */ -#define SIMDUTF_VERSION "5.6.0" +#define SIMDUTF_VERSION "5.6.1" namespace simdutf { enum { @@ -660,7 +685,7 @@ enum { /** * The revision (major.minor.REVISION) of simdutf being used. */ - SIMDUTF_VERSION_REVISION = 0 + SIMDUTF_VERSION_REVISION = 1 }; } // namespace simdutf @@ -1034,7 +1059,7 @@ simdutf_warn_unused bool validate_utf8(const char *buf, size_t len) noexcept; * * @param buf the UTF-8 string to validate. * @param len the length of the string in bytes. - * @return a result pair struct (of type simdutf::error containing the two + * @return a result pair struct (of type simdutf::result containing the two * fields error and count) with an error code and either position of the error * (in the input in code units) if any, or the number of code units validated if * successful. @@ -1061,7 +1086,7 @@ simdutf_warn_unused bool validate_ascii(const char *buf, size_t len) noexcept; * * @param buf the ASCII string to validate. * @param len the length of the string in bytes. - * @return a result pair struct (of type simdutf::error containing the two + * @return a result pair struct (of type simdutf::result containing the two * fields error and count) with an error code and either position of the error * (in the input in code units) if any, or the number of code units validated if * successful. @@ -1132,7 +1157,7 @@ simdutf_warn_unused bool validate_utf16be(const char16_t *buf, * @param buf the UTF-16 string to validate. * @param len the length of the string in number of 2-byte code units * (char16_t). - * @return a result pair struct (of type simdutf::error containing the two + * @return a result pair struct (of type simdutf::result containing the two * fields error and count) with an error code and either position of the error * (in the input in code units) if any, or the number of code units validated if * successful. @@ -1151,7 +1176,7 @@ simdutf_warn_unused result validate_utf16_with_errors(const char16_t *buf, * @param buf the UTF-16LE string to validate. * @param len the length of the string in number of 2-byte code units * (char16_t). - * @return a result pair struct (of type simdutf::error containing the two + * @return a result pair struct (of type simdutf::result containing the two * fields error and count) with an error code and either position of the error * (in the input in code units) if any, or the number of code units validated if * successful. @@ -1170,7 +1195,7 @@ simdutf_warn_unused result validate_utf16le_with_errors(const char16_t *buf, * @param buf the UTF-16BE string to validate. * @param len the length of the string in number of 2-byte code units * (char16_t). - * @return a result pair struct (of type simdutf::error containing the two + * @return a result pair struct (of type simdutf::result containing the two * fields error and count) with an error code and either position of the error * (in the input in code units) if any, or the number of code units validated if * successful. @@ -1206,7 +1231,7 @@ simdutf_warn_unused bool validate_utf32(const char32_t *buf, * @param buf the UTF-32 string to validate. * @param len the length of the string in number of 4-byte code units * (char32_t). - * @return a result pair struct (of type simdutf::error containing the two + * @return a result pair struct (of type simdutf::result containing the two * fields error and count) with an error code and either position of the error * (in the input in code units) if any, or the number of code units validated if * successful. @@ -1366,7 +1391,7 @@ simdutf_warn_unused size_t convert_utf8_to_utf16be( * @param input the UTF-8 string to convert * @param length the length of the string in bytes * @param latin1_output the pointer to buffer that can hold conversion result - * @return a result pair struct (of type simdutf::error containing the two + * @return a result pair struct (of type simdutf::result containing the two * fields error and count) with an error code and either position of the error * (in the input in code units) if any, or the number of code units validated if * successful. @@ -1384,7 +1409,7 @@ simdutf_warn_unused result convert_utf8_to_latin1_with_errors( * @param input the UTF-8 string to convert * @param length the length of the string in bytes * @param utf16_buffer the pointer to buffer that can hold conversion result - * @return a result pair struct (of type simdutf::error containing the two + * @return a result pair struct (of type simdutf::result containing the two * fields error and count) with an error code and either position of the error * (in the input in code units) if any, or the number of char16_t written if * successful. @@ -1401,7 +1426,7 @@ simdutf_warn_unused result convert_utf8_to_utf16_with_errors( * @param input the UTF-8 string to convert * @param length the length of the string in bytes * @param utf16_buffer the pointer to buffer that can hold conversion result - * @return a result pair struct (of type simdutf::error containing the two + * @return a result pair struct (of type simdutf::result containing the two * fields error and count) with an error code and either position of the error * (in the input in code units) if any, or the number of char16_t written if * successful. @@ -1418,7 +1443,7 @@ simdutf_warn_unused result convert_utf8_to_utf16le_with_errors( * @param input the UTF-8 string to convert * @param length the length of the string in bytes * @param utf16_buffer the pointer to buffer that can hold conversion result - * @return a result pair struct (of type simdutf::error containing the two + * @return a result pair struct (of type simdutf::result containing the two * fields error and count) with an error code and either position of the error * (in the input in code units) if any, or the number of char16_t written if * successful. @@ -1450,7 +1475,7 @@ simdutf_warn_unused size_t convert_utf8_to_utf32( * @param input the UTF-8 string to convert * @param length the length of the string in bytes * @param utf32_buffer the pointer to buffer that can hold conversion result - * @return a result pair struct (of type simdutf::error containing the two + * @return a result pair struct (of type simdutf::result containing the two * fields error and count) with an error code and either position of the error * (in the input in code units) if any, or the number of char32_t written if * successful. @@ -1715,7 +1740,7 @@ simdutf_warn_unused size_t convert_utf16be_to_utf8(const char16_t *input, * @param input the UTF-16 string to convert * @param length the length of the string in 2-byte code units (char16_t) * @param latin1_buffer the pointer to buffer that can hold conversion result - * @return a result pair struct (of type simdutf::error containing the two + * @return a result pair struct (of type simdutf::result containing the two * fields error and count) with an error code and either position of the error * (in the input in code units) if any, or the number of char written if * successful. @@ -1733,7 +1758,7 @@ simdutf_warn_unused result convert_utf16_to_latin1_with_errors( * @param input the UTF-16LE string to convert * @param length the length of the string in 2-byte code units (char16_t) * @param latin1_buffer the pointer to buffer that can hold conversion result - * @return a result pair struct (of type simdutf::error containing the two + * @return a result pair struct (of type simdutf::result containing the two * fields error and count) with an error code and either position of the error * (in the input in code units) if any, or the number of char written if * successful. @@ -1753,7 +1778,7 @@ simdutf_warn_unused result convert_utf16le_to_latin1_with_errors( * @param input the UTF-16BE string to convert * @param length the length of the string in 2-byte code units (char16_t) * @param latin1_buffer the pointer to buffer that can hold conversion result - * @return a result pair struct (of type simdutf::error containing the two + * @return a result pair struct (of type simdutf::result containing the two * fields error and count) with an error code and either position of the error * (in the input in code units) if any, or the number of char written if * successful. @@ -1773,7 +1798,7 @@ simdutf_warn_unused result convert_utf16be_to_latin1_with_errors( * @param input the UTF-16 string to convert * @param length the length of the string in 2-byte code units (char16_t) * @param utf8_buffer the pointer to buffer that can hold conversion result - * @return a result pair struct (of type simdutf::error containing the two + * @return a result pair struct (of type simdutf::result containing the two * fields error and count) with an error code and either position of the error * (in the input in code units) if any, or the number of char written if * successful. @@ -1792,7 +1817,7 @@ simdutf_warn_unused result convert_utf16_to_utf8_with_errors( * @param input the UTF-16LE string to convert * @param length the length of the string in 2-byte code units (char16_t) * @param utf8_buffer the pointer to buffer that can hold conversion result - * @return a result pair struct (of type simdutf::error containing the two + * @return a result pair struct (of type simdutf::result containing the two * fields error and count) with an error code and either position of the error * (in the input in code units) if any, or the number of char written if * successful. @@ -1811,7 +1836,7 @@ simdutf_warn_unused result convert_utf16le_to_utf8_with_errors( * @param input the UTF-16BE string to convert * @param length the length of the string in 2-byte code units (char16_t) * @param utf8_buffer the pointer to buffer that can hold conversion result - * @return a result pair struct (of type simdutf::error containing the two + * @return a result pair struct (of type simdutf::result containing the two * fields error and count) with an error code and either position of the error * (in the input in code units) if any, or the number of char written if * successful. @@ -1998,7 +2023,7 @@ simdutf_warn_unused size_t convert_utf16be_to_utf32( * @param input the UTF-16 string to convert * @param length the length of the string in 2-byte code units (char16_t) * @param utf32_buffer the pointer to buffer that can hold conversion result - * @return a result pair struct (of type simdutf::error containing the two + * @return a result pair struct (of type simdutf::result containing the two * fields error and count) with an error code and either position of the error * (in the input in code units) if any, or the number of char32_t written if * successful. @@ -2017,7 +2042,7 @@ simdutf_warn_unused result convert_utf16_to_utf32_with_errors( * @param input the UTF-16LE string to convert * @param length the length of the string in 2-byte code units (char16_t) * @param utf32_buffer the pointer to buffer that can hold conversion result - * @return a result pair struct (of type simdutf::error containing the two + * @return a result pair struct (of type simdutf::result containing the two * fields error and count) with an error code and either position of the error * (in the input in code units) if any, or the number of char32_t written if * successful. @@ -2036,7 +2061,7 @@ simdutf_warn_unused result convert_utf16le_to_utf32_with_errors( * @param input the UTF-16BE string to convert * @param length the length of the string in 2-byte code units (char16_t) * @param utf32_buffer the pointer to buffer that can hold conversion result - * @return a result pair struct (of type simdutf::error containing the two + * @return a result pair struct (of type simdutf::result containing the two * fields error and count) with an error code and either position of the error * (in the input in code units) if any, or the number of char32_t written if * successful. @@ -2177,7 +2202,7 @@ simdutf_warn_unused size_t convert_utf32_to_utf8(const char32_t *input, * @param input the UTF-32 string to convert * @param length the length of the string in 4-byte code units (char32_t) * @param utf8_buffer the pointer to buffer that can hold conversion result - * @return a result pair struct (of type simdutf::error containing the two + * @return a result pair struct (of type simdutf::result containing the two * fields error and count) with an error code and either position of the error * (in the input in code units) if any, or the number of char written if * successful. @@ -2263,7 +2288,7 @@ simdutf_warn_unused size_t convert_utf32_to_latin1( * @param input the UTF-32 string to convert * @param length the length of the string in 4-byte code units (char32_t) * @param latin1_buffer the pointer to buffer that can hold conversion result - * @return a result pair struct (of type simdutf::error containing the two + * @return a result pair struct (of type simdutf::result containing the two * fields error and count) with an error code and either position of the error * (in the input in code units) if any, or the number of char written if * successful. @@ -2322,7 +2347,7 @@ simdutf_warn_unused size_t convert_utf32_to_utf16be( * @param input the UTF-32 string to convert * @param length the length of the string in 4-byte code units (char32_t) * @param utf16_buffer the pointer to buffer that can hold conversion result - * @return a result pair struct (of type simdutf::error containing the two + * @return a result pair struct (of type simdutf::result containing the two * fields error and count) with an error code and either position of the error * (in the input in code units) if any, or the number of char16_t written if * successful. @@ -2341,7 +2366,7 @@ simdutf_warn_unused result convert_utf32_to_utf16_with_errors( * @param input the UTF-32 string to convert * @param length the length of the string in 4-byte code units (char32_t) * @param utf16_buffer the pointer to buffer that can hold conversion result - * @return a result pair struct (of type simdutf::error containing the two + * @return a result pair struct (of type simdutf::result containing the two * fields error and count) with an error code and either position of the error * (in the input in code units) if any, or the number of char16_t written if * successful. @@ -2360,7 +2385,7 @@ simdutf_warn_unused result convert_utf32_to_utf16le_with_errors( * @param input the UTF-32 string to convert * @param length the length of the string in 4-byte code units (char32_t) * @param utf16_buffer the pointer to buffer that can hold conversion result - * @return a result pair struct (of type simdutf::error containing the two + * @return a result pair struct (of type simdutf::result containing the two * fields error and count) with an error code and either position of the error * (in the input in code units) if any, or the number of char16_t written if * successful. @@ -2648,8 +2673,7 @@ simdutf_warn_unused size_t trim_partial_utf16(const char16_t *input, size_t length); // base64_options are used to specify the base64 encoding options. -using base64_options = uint64_t; -enum : base64_options { +enum base64_options : uint64_t { base64_default = 0, /* standard base64 format (with padding) */ base64_url = 1, /* base64url format (no padding) */ base64_reverse_padding = 2, /* modifier for base64_default and base64_url */ @@ -2664,9 +2688,9 @@ enum : base64_options { // chunk in base64 decoding. // https://tc39.es/proposal-arraybuffer-base64/spec/#sec-frombase64 enum last_chunk_handling_options : uint64_t { - loose = 0, /* standard base64 format, decode partial final chunk */ - strict = - 1, /* error when the last chunk is partial, 2 or 3 chars, and unpadded */ + loose = 0, /* standard base64 format, decode partial final chunk */ + strict = 1, /* error when the last chunk is partial, 2 or 3 chars, and + unpadded, or non-zero bit padding */ stop_before_partial = 2, /* if the last chunk is partial (2 or 3 chars), ignore it (no error) */ }; @@ -2706,11 +2730,11 @@ simdutf_warn_unused size_t maximal_binary_length_from_base64( * * See https://infra.spec.whatwg.org/#forgiving-base64-decode * - * This function will fail in case of invalid input. There are two possible - * reasons for failure: the input contains a number of base64 characters that - * when divided by 4, leaves a single remainder character - * (BASE64_INPUT_REMAINDER), or the input contains a character that is not a - * valid base64 character (INVALID_BASE64_CHARACTER). + * This function will fail in case of invalid input. When last_chunk_options = + * loose, there are two possible reasons for failure: the input contains a + * number of base64 characters that when divided by 4, leaves a single remainder + * character (BASE64_INPUT_REMAINDER), or the input contains a character that is + * not a valid base64 character (INVALID_BASE64_CHARACTER). * * When the error is INVALID_BASE64_CHARACTER, r.count contains the index in the * input where the invalid character was found. When the error is @@ -2746,7 +2770,7 @@ simdutf_warn_unused size_t maximal_binary_length_from_base64( * last_chunk_handling_options::loose by default * but can also be last_chunk_handling_options::strict or * last_chunk_handling_options::stop_before_partial. - * @return a result pair struct (of type simdutf::error containing the two + * @return a result pair struct (of type simdutf::result containing the two * fields error and count) with an error code and either position of the error * (in the input in bytes) if any, or the number of bytes written if successful. */ @@ -2798,11 +2822,11 @@ size_t binary_to_base64(const char *input, size_t length, char *output, * * See https://infra.spec.whatwg.org/#forgiving-base64-decode * - * This function will fail in case of invalid input. There are two possible - * reasons for failure: the input contains a number of base64 characters that - * when divided by 4, leaves a single remainder character - * (BASE64_INPUT_REMAINDER), or the input contains a character that is not a - * valid base64 character (INVALID_BASE64_CHARACTER). + * This function will fail in case of invalid input. When last_chunk_options = + * loose, there are two possible reasons for failure: the input contains a + * number of base64 characters that when divided by 4, leaves a single remainder + * character (BASE64_INPUT_REMAINDER), or the input contains a character that is + * not a valid base64 character (INVALID_BASE64_CHARACTER). * * When the error is INVALID_BASE64_CHARACTER, r.count contains the index in the * input where the invalid character was found. When the error is @@ -2839,7 +2863,7 @@ size_t binary_to_base64(const char *input, size_t length, char *output, * last_chunk_handling_options::loose by default * but can also be last_chunk_handling_options::strict or * last_chunk_handling_options::stop_before_partial. - * @return a result pair struct (of type simdutf::error containing the two + * @return a result pair struct (of type simdutf::result containing the two * fields error and count) with an error code and position of the * INVALID_BASE64_CHARACTER error (in the input in units) if any, or the number * of bytes written if successful. @@ -2860,12 +2884,12 @@ base64_to_binary(const char16_t *input, size_t length, char *output, * * See https://infra.spec.whatwg.org/#forgiving-base64-decode * - * This function will fail in case of invalid input. There are three possible - * reasons for failure: the input contains a number of base64 characters that - * when divided by 4, leaves a single remainder character - * (BASE64_INPUT_REMAINDER), the input contains a character that is not a valid - * base64 character (INVALID_BASE64_CHARACTER), or the output buffer is too - * small (OUTPUT_BUFFER_TOO_SMALL). + * This function will fail in case of invalid input. When last_chunk_options = + * loose, there are three possible reasons for failure: the input contains a + * number of base64 characters that when divided by 4, leaves a single remainder + * character (BASE64_INPUT_REMAINDER), the input contains a character that is + * not a valid base64 character (INVALID_BASE64_CHARACTER), or the output buffer + * is too small (OUTPUT_BUFFER_TOO_SMALL). * * When OUTPUT_BUFFER_TOO_SMALL, we return both the number of bytes written * and the number of units processed, see description of the parameters and @@ -2906,7 +2930,7 @@ base64_to_binary(const char16_t *input, size_t length, char *output, * last_chunk_handling_options::loose by default * but can also be last_chunk_handling_options::strict or * last_chunk_handling_options::stop_before_partial. - * @return a result pair struct (of type simdutf::error containing the two + * @return a result pair struct (of type simdutf::result containing the two * fields error and count) with an error code and position of the * INVALID_BASE64_CHARACTER error (in the input in units) if any, or the number * of units processed if successful. @@ -3012,7 +3036,7 @@ public: * * @param buf the UTF-8 string to validate. * @param len the length of the string in bytes. - * @return a result pair struct (of type simdutf::error containing the two + * @return a result pair struct (of type simdutf::result containing the two * fields error and count) with an error code and either position of the error * (in the input in code units) if any, or the number of code units validated * if successful. @@ -3039,7 +3063,7 @@ public: * * @param buf the ASCII string to validate. * @param len the length of the string in bytes. - * @return a result pair struct (of type simdutf::error containing the two + * @return a result pair struct (of type simdutf::result containing the two * fields error and count) with an error code and either position of the error * (in the input in code units) if any, or the number of code units validated * if successful. @@ -3092,7 +3116,7 @@ public: * @param buf the UTF-16LE string to validate. * @param len the length of the string in number of 2-byte code units * (char16_t). - * @return a result pair struct (of type simdutf::error containing the two + * @return a result pair struct (of type simdutf::result containing the two * fields error and count) with an error code and either position of the error * (in the input in code units) if any, or the number of code units validated * if successful. @@ -3112,7 +3136,7 @@ public: * @param buf the UTF-16BE string to validate. * @param len the length of the string in number of 2-byte code units * (char16_t). - * @return a result pair struct (of type simdutf::error containing the two + * @return a result pair struct (of type simdutf::result containing the two * fields error and count) with an error code and either position of the error * (in the input in code units) if any, or the number of code units validated * if successful. @@ -3146,7 +3170,7 @@ public: * @param buf the UTF-32 string to validate. * @param len the length of the string in number of 4-byte code units * (char32_t). - * @return a result pair struct (of type simdutf::error containing the two + * @return a result pair struct (of type simdutf::result containing the two * fields error and count) with an error code and either position of the error * (in the input in code units) if any, or the number of code units validated * if successful. @@ -3238,7 +3262,7 @@ public: * @param input the UTF-8 string to convert * @param length the length of the string in bytes * @param latin1_output the pointer to buffer that can hold conversion result - * @return a result pair struct (of type simdutf::error containing the two + * @return a result pair struct (of type simdutf::result containing the two * fields error and count) with an error code and either position of the error * (in the input in code units) if any, or the number of code units validated * if successful. @@ -3312,7 +3336,7 @@ public: * @param input the UTF-8 string to convert * @param length the length of the string in bytes * @param utf16_buffer the pointer to buffer that can hold conversion result - * @return a result pair struct (of type simdutf::error containing the two + * @return a result pair struct (of type simdutf::result containing the two * fields error and count) with an error code and either position of the error * (in the input in code units) if any, or the number of code units validated * if successful. @@ -3331,7 +3355,7 @@ public: * @param input the UTF-8 string to convert * @param length the length of the string in bytes * @param utf16_buffer the pointer to buffer that can hold conversion result - * @return a result pair struct (of type simdutf::error containing the two + * @return a result pair struct (of type simdutf::result containing the two * fields error and count) with an error code and either position of the error * (in the input in code units) if any, or the number of code units validated * if successful. @@ -3365,7 +3389,7 @@ public: * @param input the UTF-8 string to convert * @param length the length of the string in bytes * @param utf32_buffer the pointer to buffer that can hold conversion result - * @return a result pair struct (of type simdutf::error containing the two + * @return a result pair struct (of type simdutf::result containing the two * fields error and count) with an error code and either position of the error * (in the input in code units) if any, or the number of char32_t written if * successful. @@ -3502,7 +3526,7 @@ public: * (char16_t) * @param latin1_buffer the pointer to buffer that can hold conversion * result - * @return a result pair struct (of type simdutf::error containing the two + * @return a result pair struct (of type simdutf::result containing the two * fields error and count) with an error code and either position of the error * (in the input in code units) if any, or the number of char written if * successful. @@ -3525,7 +3549,7 @@ public: * (char16_t) * @param latin1_buffer the pointer to buffer that can hold conversion * result - * @return a result pair struct (of type simdutf::error containing the two + * @return a result pair struct (of type simdutf::result containing the two * fields error and count) with an error code and either position of the error * (in the input in code units) if any, or the number of char written if * successful. @@ -3633,7 +3657,7 @@ public: * @param length the length of the string in 2-byte code units * (char16_t) * @param utf8_buffer the pointer to buffer that can hold conversion result - * @return a result pair struct (of type simdutf::error containing the two + * @return a result pair struct (of type simdutf::result containing the two * fields error and count) with an error code and either position of the error * (in the input in code units) if any, or the number of char written if * successful. @@ -3655,7 +3679,7 @@ public: * @param length the length of the string in 2-byte code units * (char16_t) * @param utf8_buffer the pointer to buffer that can hold conversion result - * @return a result pair struct (of type simdutf::error containing the two + * @return a result pair struct (of type simdutf::result containing the two * fields error and count) with an error code and either position of the error * (in the input in code units) if any, or the number of char written if * successful. @@ -3751,7 +3775,7 @@ public: * @param length the length of the string in 2-byte code units * (char16_t) * @param utf32_buffer the pointer to buffer that can hold conversion result - * @return a result pair struct (of type simdutf::error containing the two + * @return a result pair struct (of type simdutf::result containing the two * fields error and count) with an error code and either position of the error * (in the input in code units) if any, or the number of char32_t written if * successful. @@ -3773,7 +3797,7 @@ public: * @param length the length of the string in 2-byte code units * (char16_t) * @param utf32_buffer the pointer to buffer that can hold conversion result - * @return a result pair struct (of type simdutf::error containing the two + * @return a result pair struct (of type simdutf::result containing the two * fields error and count) with an error code and either position of the error * (in the input in code units) if any, or the number of char32_t written if * successful. @@ -3889,7 +3913,7 @@ public: * (char32_t) * @param latin1_buffer the pointer to buffer that can hold conversion * result - * @return a result pair struct (of type simdutf::error containing the two + * @return a result pair struct (of type simdutf::result containing the two * fields error and count) with an error code and either position of the error * (in the input in code units) if any, or the number of char written if * successful. @@ -3953,7 +3977,7 @@ public: * @param length the length of the string in 4-byte code units * (char32_t) * @param utf8_buffer the pointer to buffer that can hold conversion result - * @return a result pair struct (of type simdutf::error containing the two + * @return a result pair struct (of type simdutf::result containing the two * fields error and count) with an error code and either position of the error * (in the input in code units) if any, or the number of char written if * successful. @@ -4044,7 +4068,7 @@ public: * @param length the length of the string in 4-byte code units * (char32_t) * @param utf16_buffer the pointer to buffer that can hold conversion result - * @return a result pair struct (of type simdutf::error containing the two + * @return a result pair struct (of type simdutf::result containing the two * fields error and count) with an error code and either position of the error * (in the input in code units) if any, or the number of char16_t written if * successful. @@ -4066,7 +4090,7 @@ public: * @param length the length of the string in 4-byte code units * (char32_t) * @param utf16_buffer the pointer to buffer that can hold conversion result - * @return a result pair struct (of type simdutf::error containing the two + * @return a result pair struct (of type simdutf::result containing the two * fields error and count) with an error code and either position of the error * (in the input in code units) if any, or the number of char16_t written if * successful. @@ -4361,11 +4385,11 @@ public: * * See https://infra.spec.whatwg.org/#forgiving-base64-decode * - * This function will fail in case of invalid input. There are two possible - * reasons for failure: the input contains a number of base64 characters that - * when divided by 4, leaves a single remainder character - * (BASE64_INPUT_REMAINDER), or the input contains a character that is not a - * valid base64 character (INVALID_BASE64_CHARACTER). + * This function will fail in case of invalid input. When last_chunk_options = + * loose, there are two possible reasons for failure: the input contains a + * number of base64 characters that when divided by 4, leaves a single + * remainder character (BASE64_INPUT_REMAINDER), or the input contains a + * character that is not a valid base64 character (INVALID_BASE64_CHARACTER). * * You should call this function with a buffer that is at least * maximal_binary_length_from_base64(input, length) bytes long. If you fail to @@ -4378,7 +4402,7 @@ public: * bytes long). * @param options the base64 options to use, can be base64_default or * base64_url, is base64_default by default. - * @return a result pair struct (of type simdutf::error containing the two + * @return a result pair struct (of type simdutf::result containing the two * fields error and count) with an error code and either position of the error * (in the input in bytes) if any, or the number of bytes written if * successful. @@ -4389,6 +4413,42 @@ public: last_chunk_handling_options last_chunk_options = last_chunk_handling_options::loose) const noexcept = 0; + /** + * Convert a base64 input to a binary output while returning more details + * than base64_to_binary. + * + * This function follows the WHATWG forgiving-base64 format, which means that + * it will ignore any ASCII spaces in the input. You may provide a padded + * input (with one or two equal signs at the end) or an unpadded input + * (without any equal signs at the end). + * + * See https://infra.spec.whatwg.org/#forgiving-base64-decode + * + * This function will fail in case of invalid input. When last_chunk_options = + * loose, there are two possible reasons for failure: the input contains a + * number of base64 characters that when divided by 4, leaves a single + * remainder character (BASE64_INPUT_REMAINDER), or the input contains a + * character that is not a valid base64 character (INVALID_BASE64_CHARACTER). + * + * You should call this function with a buffer that is at least + * maximal_binary_length_from_base64(input, length) bytes long. If you fail to + * provide that much space, the function may cause a buffer overflow. + * + * @param input the base64 string to process + * @param length the length of the string in bytes + * @param output the pointer to buffer that can hold the conversion + * result (should be at least maximal_binary_length_from_base64(input, length) + * bytes long). + * @param options the base64 options to use, can be base64_default or + * base64_url, is base64_default by default. + * @return a full_result pair struct (of type simdutf::result containing the + * three fields error, input_count and output_count). + */ + simdutf_warn_unused virtual full_result base64_to_binary_details( + const char *input, size_t length, char *output, + base64_options options = base64_default, + last_chunk_handling_options last_chunk_options = + last_chunk_handling_options::loose) const noexcept = 0; /** * Convert a base64 input to a binary output. * @@ -4399,11 +4459,11 @@ public: * * See https://infra.spec.whatwg.org/#forgiving-base64-decode * - * This function will fail in case of invalid input. There are two possible - * reasons for failure: the input contains a number of base64 characters that - * when divided by 4, leaves a single remainder character - * (BASE64_INPUT_REMAINDER), or the input contains a character that is not a - * valid base64 character (INVALID_BASE64_CHARACTER). + * This function will fail in case of invalid input. When last_chunk_options = + * loose, there are two possible reasons for failure: the input contains a + * number of base64 characters that when divided by 4, leaves a single + * remainder character (BASE64_INPUT_REMAINDER), or the input contains a + * character that is not a valid base64 character (INVALID_BASE64_CHARACTER). * * You should call this function with a buffer that is at least * maximal_binary_length_from_utf6_base64(input, length) bytes long. If you @@ -4417,7 +4477,7 @@ public: * bytes long). * @param options the base64 options to use, can be base64_default or * base64_url, is base64_default by default. - * @return a result pair struct (of type simdutf::error containing the two + * @return a result pair struct (of type simdutf::result containing the two * fields error and count) with an error code and position of the * INVALID_BASE64_CHARACTER error (in the input in units) if any, or the * number of bytes written if successful. @@ -4428,6 +4488,42 @@ public: last_chunk_handling_options last_chunk_options = last_chunk_handling_options::loose) const noexcept = 0; + /** + * Convert a base64 input to a binary output while returning more details + * than base64_to_binary. + * + * This function follows the WHATWG forgiving-base64 format, which means that + * it will ignore any ASCII spaces in the input. You may provide a padded + * input (with one or two equal signs at the end) or an unpadded input + * (without any equal signs at the end). + * + * See https://infra.spec.whatwg.org/#forgiving-base64-decode + * + * This function will fail in case of invalid input. When last_chunk_options = + * loose, there are two possible reasons for failure: the input contains a + * number of base64 characters that when divided by 4, leaves a single + * remainder character (BASE64_INPUT_REMAINDER), or the input contains a + * character that is not a valid base64 character (INVALID_BASE64_CHARACTER). + * + * You should call this function with a buffer that is at least + * maximal_binary_length_from_base64(input, length) bytes long. If you fail to + * provide that much space, the function may cause a buffer overflow. + * + * @param input the base64 string to process + * @param length the length of the string in bytes + * @param output the pointer to buffer that can hold the conversion + * result (should be at least maximal_binary_length_from_base64(input, length) + * bytes long). + * @param options the base64 options to use, can be base64_default or + * base64_url, is base64_default by default. + * @return a full_result pair struct (of type simdutf::result containing the + * three fields error, input_count and output_count). + */ + simdutf_warn_unused virtual full_result base64_to_binary_details( + const char16_t *input, size_t length, char *output, + base64_options options = base64_default, + last_chunk_handling_options last_chunk_options = + last_chunk_handling_options::loose) const noexcept = 0; /** * Provide the base64 length in bytes given the length of a binary input. *