Skip to content

Commit 606c183

Browse files
anonrigmarco-ippolito
authored andcommittedMay 3, 2024
deps: update simdutf to 5.2.3
PR-URL: #52381 Refs: #51670 Reviewed-By: Daniel Lemire <daniel@lemire.me> Reviewed-By: Vinícius Lourenço Claro Cardoso <contact@viniciusl.com.br> Reviewed-By: Matteo Collina <matteo.collina@gmail.com> Reviewed-By: Robert Nagy <ronagy@icloud.com> Reviewed-By: Benjamin Gruenbaum <benjamingr@gmail.com> Reviewed-By: Filip Skokan <panva.ip@gmail.com>
1 parent 0f784c9 commit 606c183

File tree

2 files changed

+2204
-959
lines changed

2 files changed

+2204
-959
lines changed
 

‎deps/simdutf/simdutf.cpp

+2,059-941
Large diffs are not rendered by default.

‎deps/simdutf/simdutf.h

+145-18
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
/* auto-generated on 2024-03-18 10:58:28 -0400. Do not edit! */
1+
/* auto-generated on 2024-04-05 16:29:02 -0400. Do not edit! */
22
/* begin file include/simdutf.h */
33
#ifndef SIMDUTF_H
44
#define SIMDUTF_H
@@ -566,16 +566,17 @@ enum error_code {
566566
// there must be no surrogate at all (Latin1)
567567
INVALID_BASE64_CHARACTER, // Found a character that cannot be part of a valid base64 string.
568568
BASE64_INPUT_REMAINDER, // The base64 input terminates with a single character, excluding padding (=).
569+
OUTPUT_BUFFER_TOO_SMALL, // The provided buffer is too small.
569570
OTHER // Not related to validation/transcoding.
570571
};
571572

572573
struct result {
573574
error_code error;
574575
size_t count; // In case of error, indicates the position of the error. In case of success, indicates the number of code units validated/written.
575576

576-
simdutf_really_inline result();
577+
simdutf_really_inline result() : error{error_code::SUCCESS}, count{0} {}
577578

578-
simdutf_really_inline result(error_code, size_t);
579+
simdutf_really_inline result(error_code _err, size_t _pos) : error{_err}, count{_pos} {}
579580
};
580581

581582
}
@@ -593,7 +594,7 @@ SIMDUTF_DISABLE_UNDESIRED_WARNINGS
593594
#define SIMDUTF_SIMDUTF_VERSION_H
594595

595596
/** The version of simdutf being used (major.minor.revision) */
596-
#define SIMDUTF_VERSION "5.0.0"
597+
#define SIMDUTF_VERSION "5.2.3"
597598

598599
namespace simdutf {
599600
enum {
@@ -604,11 +605,11 @@ enum {
604605
/**
605606
* The minor version (major.MINOR.revision) of simdutf being used.
606607
*/
607-
SIMDUTF_VERSION_MINOR = 0,
608+
SIMDUTF_VERSION_MINOR = 2,
608609
/**
609610
* The revision (major.minor.REVISION) of simdutf being used.
610611
*/
611-
SIMDUTF_VERSION_REVISION = 0
612+
SIMDUTF_VERSION_REVISION = 3
612613
};
613614
} // namespace simdutf
614615

@@ -2285,6 +2286,12 @@ simdutf_warn_unused size_t trim_partial_utf16le(const char16_t* input, size_t le
22852286
*/
22862287
simdutf_warn_unused size_t trim_partial_utf16(const char16_t* input, size_t length);
22872288

2289+
// base64_options are used to specify the base64 encoding options.
2290+
using base64_options = uint64_t;
2291+
enum : base64_options {
2292+
base64_default = 0, /* standard base64 format */
2293+
base64_url = 1 /* base64url format*/
2294+
};
22882295

22892296
/**
22902297
* Provide the maximal binary length in bytes given the base64 input.
@@ -2293,10 +2300,21 @@ simdutf_warn_unused size_t trim_partial_utf16(const char16_t* input, size_t leng
22932300
*
22942301
* @param input the base64 input to process
22952302
* @param length the length of the base64 input in bytes
2296-
* @return number of base64 bytes
2303+
* @return maximum number of binary bytes
22972304
*/
22982305
simdutf_warn_unused size_t maximal_binary_length_from_base64(const char * input, size_t length) noexcept;
22992306

2307+
/**
2308+
* Provide the maximal binary length in bytes given the base64 input.
2309+
* In general, if the input contains ASCII spaces, the result will be less than
2310+
* the maximum length.
2311+
*
2312+
* @param input the base64 input to process, in ASCII stored as 16-bit units
2313+
* @param length the length of the base64 input in 16-bit units
2314+
* @return maximal number of binary bytes
2315+
*/
2316+
simdutf_warn_unused size_t maximal_binary_length_from_base64(const char16_t * input, size_t length) noexcept;
2317+
23002318
/**
23012319
* Convert a base64 input to a binary ouput.
23022320
*
@@ -2307,27 +2325,32 @@ simdutf_warn_unused size_t maximal_binary_length_from_base64(const char * input,
23072325
* See https://infra.spec.whatwg.org/#forgiving-base64-decode
23082326
*
23092327
* This function will fail in case of invalid input. There are two possible reasons for
2310-
* failure: the input is contains a number of base64 characters that when divided by 4, leaves
2311-
* a singler remainder character (BASE64_INPUT_REMAINDER), or the input contains a character
2328+
* failure: the input contains a number of base64 characters that when divided by 4, leaves
2329+
* a single remainder character (BASE64_INPUT_REMAINDER), or the input contains a character
23122330
* that is not a valid base64 character (INVALID_BASE64_CHARACTER).
23132331
*
2332+
* When the error is INVALID_BASE64_CHARACTER, r.count contains the index in the input
2333+
* where the invalid character was found. When the error is BASE64_INPUT_REMAINDER, then
2334+
* r.count contains the number of bytes decoded.
2335+
*
23142336
* You should call this function with a buffer that is at least maximal_binary_length_from_base64(input, length) bytes long.
23152337
* If you fail to provide that much space, the function may cause a buffer overflow.
23162338
*
23172339
* @param input the base64 string to process
23182340
* @param length the length of the string in bytes
23192341
* @param output the pointer to buffer that can hold the conversion result (should be at least maximal_binary_length_from_base64(input, length) bytes long).
2342+
* @param options the base64 options to use, can be base64_default or base64_url, is base64_default by default.
23202343
* @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in bytes) if any, or the number of bytes written if successful.
23212344
*/
2322-
simdutf_warn_unused result base64_to_binary(const char * input, size_t length, char* output) noexcept;
2345+
simdutf_warn_unused result base64_to_binary(const char * input, size_t length, char* output, base64_options options = base64_default) noexcept;
23232346

23242347
/**
23252348
* Provide the base64 length in bytes given the length of a binary input.
23262349
*
23272350
* @param length the length of the input in bytes
23282351
* @return number of base64 bytes
23292352
*/
2330-
simdutf_warn_unused size_t base64_length_from_binary(size_t length) noexcept;
2353+
simdutf_warn_unused size_t base64_length_from_binary(size_t length, base64_options options = base64_default) noexcept;
23312354

23322355
/**
23332356
* Convert a binary input to a base64 ouput. The output is always padded with equal signs so that it is
@@ -2338,9 +2361,74 @@ simdutf_warn_unused size_t base64_length_from_binary(size_t length) noexcept;
23382361
* @param input the binary to process
23392362
* @param length the length of the input in bytes
23402363
* @param output the pointer to buffer that can hold the conversion result (should be at least base64_length_from_binary(length) bytes long)
2364+
* @param options the base64 options to use, can be base64_default or base64_url, is base64_default by default.
23412365
* @return number of written bytes, will be equal to base64_length_from_binary(length)
23422366
*/
2343-
size_t binary_to_base64(const char * input, size_t length, char* output) noexcept;
2367+
size_t binary_to_base64(const char * input, size_t length, char* output, base64_options options = base64_default) noexcept;
2368+
2369+
/**
2370+
* Convert a base64 input to a binary ouput.
2371+
*
2372+
* This function follows the WHATWG forgiving-base64 format, which means that it will
2373+
* ignore any ASCII spaces in the input. You may provide a padded input (with one or two
2374+
* equal signs at the end) or an unpadded input (without any equal signs at the end).
2375+
*
2376+
* See https://infra.spec.whatwg.org/#forgiving-base64-decode
2377+
*
2378+
* This function will fail in case of invalid input. There are two possible reasons for
2379+
* failure: the input contains a number of base64 characters that when divided by 4, leaves
2380+
* a single remainder character (BASE64_INPUT_REMAINDER), or the input contains a character
2381+
* that is not a valid base64 character (INVALID_BASE64_CHARACTER).
2382+
*
2383+
* When the error is INVALID_BASE64_CHARACTER, r.count contains the index in the input
2384+
* where the invalid character was found. When the error is BASE64_INPUT_REMAINDER, then
2385+
* r.count contains the number of bytes decoded.
2386+
*
2387+
* You should call this function with a buffer that is at least maximal_binary_length_from_utf6_base64(input, length) bytes long.
2388+
* If you fail to provide that much space, the function may cause a buffer overflow.
2389+
*
2390+
* @param input the base64 string to process, in ASCII stored as 16-bit units
2391+
* @param length the length of the string in 16-bit units
2392+
* @param output the pointer to buffer that can hold the conversion result (should be at least maximal_binary_length_from_base64(input, length) bytes long).
2393+
* @param options the base64 options to use, can be base64_default or base64_url, is base64_default by default.
2394+
* @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and position of the INVALID_BASE64_CHARACTER error (in the input in units) if any, or the number of bytes written if successful.
2395+
*/
2396+
simdutf_warn_unused result base64_to_binary(const char16_t * input, size_t length, char* output, base64_options options = base64_default) noexcept;
2397+
2398+
/**
2399+
* Convert a base64 input to a binary ouput.
2400+
*
2401+
* This function follows the WHATWG forgiving-base64 format, which means that it will
2402+
* ignore any ASCII spaces in the input. You may provide a padded input (with one or two
2403+
* equal signs at the end) or an unpadded input (without any equal signs at the end).
2404+
*
2405+
* See https://infra.spec.whatwg.org/#forgiving-base64-decode
2406+
*
2407+
* This function will fail in case of invalid input. There are three possible reasons for
2408+
* failure: the input contains a number of base64 characters that when divided by 4, leaves
2409+
* a single remainder character (BASE64_INPUT_REMAINDER), the input contains a character
2410+
* that is not a valid base64 character (INVALID_BASE64_CHARACTER), or the output buffer
2411+
* is too small (OUTPUT_BUFFER_TOO_SMALL).
2412+
*
2413+
* When OUTPUT_BUFFER_TOO_SMALL, we return both the number of bytes written
2414+
* and the number of units processed, see description of the parameters and returned value.
2415+
*
2416+
* When the error is INVALID_BASE64_CHARACTER, r.count contains the index in the input
2417+
* where the invalid character was found. When the error is BASE64_INPUT_REMAINDER, then
2418+
* r.count contains the number of bytes decoded.
2419+
*
2420+
* The INVALID_BASE64_CHARACTER cases are considered fatal and you are expected to discard
2421+
* the output.
2422+
*
2423+
* @param input the base64 string to process, in ASCII stored as 8-bit or 16-bit units
2424+
* @param length the length of the string in 8-bit or 16-bit units.
2425+
* @param output the pointer to buffer that can hold the conversion result.
2426+
* @param outlen the number of bytes that can be written in the output buffer. Upon return, it is modified to reflect how many bytes were written.
2427+
* @param options the base64 options to use, can be base64_default or base64_url, is base64_default by default.
2428+
* @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and position of the INVALID_BASE64_CHARACTER error (in the input in units) if any, or the number of units processed if successful.
2429+
*/
2430+
simdutf_warn_unused result base64_to_binary_safe(const char * input, size_t length, char* output, size_t& outlen, base64_options options = base64_default) noexcept;
2431+
simdutf_warn_unused result base64_to_binary_safe(const char16_t * input, size_t length, char* output, size_t& outlen, base64_options options = base64_default) noexcept;
23442432

23452433
/**
23462434
* An implementation of simdutf for a particular CPU architecture.
@@ -3409,10 +3497,21 @@ class implementation {
34093497
*
34103498
* @param input the base64 input to process
34113499
* @param length the length of the base64 input in bytes
3412-
* @return number of base64 bytes
3500+
* @return maximal number of binary bytes
34133501
*/
34143502
simdutf_warn_unused virtual size_t maximal_binary_length_from_base64(const char * input, size_t length) const noexcept = 0;
34153503

3504+
/**
3505+
* Provide the maximal binary length in bytes given the base64 input.
3506+
* In general, if the input contains ASCII spaces, the result will be less than
3507+
* the maximum length.
3508+
*
3509+
* @param input the base64 input to process, in ASCII stored as 16-bit units
3510+
* @param length the length of the base64 input in 16-bit units
3511+
* @return maximal number of binary bytes
3512+
*/
3513+
simdutf_warn_unused virtual size_t maximal_binary_length_from_base64(const char16_t * input, size_t length) const noexcept = 0;
3514+
34163515
/**
34173516
* Convert a base64 input to a binary ouput.
34183517
*
@@ -3423,8 +3522,8 @@ class implementation {
34233522
* See https://infra.spec.whatwg.org/#forgiving-base64-decode
34243523
*
34253524
* This function will fail in case of invalid input. There are two possible reasons for
3426-
* failure: the input is contains a number of base64 characters that when divided by 4, leaves
3427-
* a singler remainder character (BASE64_INPUT_REMAINDER), or the input contains a character
3525+
* failure: the input contains a number of base64 characters that when divided by 4, leaves
3526+
* a single remainder character (BASE64_INPUT_REMAINDER), or the input contains a character
34283527
* that is not a valid base64 character (INVALID_BASE64_CHARACTER).
34293528
*
34303529
* You should call this function with a buffer that is at least maximal_binary_length_from_base64(input, length) bytes long.
@@ -3433,17 +3532,44 @@ class implementation {
34333532
* @param input the base64 string to process
34343533
* @param length the length of the string in bytes
34353534
* @param output the pointer to buffer that can hold the conversion result (should be at least maximal_binary_length_from_base64(input, length) bytes long).
3535+
* @param options the base64 options to use, can be base64_default or base64_url, is base64_default by default.
34363536
* @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in bytes) if any, or the number of bytes written if successful.
34373537
*/
3438-
simdutf_warn_unused virtual result base64_to_binary(const char * input, size_t length, char* output) const noexcept = 0;
3538+
simdutf_warn_unused virtual result base64_to_binary(const char * input, size_t length, char* output, base64_options options = base64_default) const noexcept = 0;
3539+
3540+
/**
3541+
* Convert a base64 input to a binary ouput.
3542+
*
3543+
* This function follows the WHATWG forgiving-base64 format, which means that it will
3544+
* ignore any ASCII spaces in the input. You may provide a padded input (with one or two
3545+
* equal signs at the end) or an unpadded input (without any equal signs at the end).
3546+
*
3547+
* See https://infra.spec.whatwg.org/#forgiving-base64-decode
3548+
*
3549+
* This function will fail in case of invalid input. There are two possible reasons for
3550+
* failure: the input contains a number of base64 characters that when divided by 4, leaves
3551+
* a single remainder character (BASE64_INPUT_REMAINDER), or the input contains a character
3552+
* that is not a valid base64 character (INVALID_BASE64_CHARACTER).
3553+
*
3554+
* You should call this function with a buffer that is at least maximal_binary_length_from_utf6_base64(input, length) bytes long.
3555+
* If you fail to provide that much space, the function may cause a buffer overflow.
3556+
*
3557+
* @param input the base64 string to process, in ASCII stored as 16-bit units
3558+
* @param length the length of the string in 16-bit units
3559+
* @param output the pointer to buffer that can hold the conversion result (should be at least maximal_binary_length_from_base64(input, length) bytes long).
3560+
* @param options the base64 options to use, can be base64_default or base64_url, is base64_default by default.
3561+
* @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and position of the INVALID_BASE64_CHARACTER error (in the input in units) if any, or the number of bytes written if successful.
3562+
*/
3563+
simdutf_warn_unused virtual result base64_to_binary(const char16_t * input, size_t length, char* output, base64_options options = base64_default) const noexcept = 0;
34393564

34403565
/**
34413566
* Provide the base64 length in bytes given the length of a binary input.
34423567
*
34433568
* @param length the length of the input in bytes
3569+
* @parem options the base64 options to use, can be base64_default or base64_url, is base64_default by default.
34443570
* @return number of base64 bytes
34453571
*/
3446-
simdutf_warn_unused virtual size_t base64_length_from_binary(size_t length) const noexcept = 0;
3572+
simdutf_warn_unused virtual size_t base64_length_from_binary(size_t length, base64_options options = base64_default) const noexcept = 0;
34473573

34483574
/**
34493575
* Convert a binary input to a base64 ouput. The output is always padded with equal signs so that it is
@@ -3454,9 +3580,10 @@ class implementation {
34543580
* @param input the binary to process
34553581
* @param length the length of the input in bytes
34563582
* @param output the pointer to buffer that can hold the conversion result (should be at least base64_length_from_binary(length) bytes long)
3583+
* @param options the base64 options to use, can be base64_default or base64_url, is base64_default by default.
34573584
* @return number of written bytes, will be equal to base64_length_from_binary(length)
34583585
*/
3459-
virtual size_t binary_to_base64(const char * input, size_t length, char* output) const noexcept = 0;
3586+
virtual size_t binary_to_base64(const char * input, size_t length, char* output, base64_options options = base64_default) const noexcept = 0;
34603587

34613588

34623589
protected:

0 commit comments

Comments
 (0)
Please sign in to comment.