Skip to content

Commit be30309

Browse files
lemirerichardlau
authored andcommittedApr 26, 2024
deps: update simdutf to 5.0.0
PR-URL: #52138 Reviewed-By: Yagiz Nizipli <yagiz.nizipli@sentry.io> Reviewed-By: Michael Dawson <midawson@redhat.com> Reviewed-By: Marco Ippolito <marcoippolito54@gmail.com> Reviewed-By: Rafael Gonzaga <rafael.nunu@hotmail.com>
1 parent b56f66e commit be30309

File tree

2 files changed

+4522
-195
lines changed

2 files changed

+4522
-195
lines changed
 

‎deps/simdutf/simdutf.cpp

+4,342-190
Large diffs are not rendered by default.

‎deps/simdutf/simdutf.h

+180-5
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
/* auto-generated on 2024-01-29 10:40:15 -0500. Do not edit! */
1+
/* auto-generated on 2024-03-18 10:58:28 -0400. Do not edit! */
22
/* begin file include/simdutf.h */
33
#ifndef SIMDUTF_H
44
#define SIMDUTF_H
@@ -142,6 +142,30 @@
142142
// s390 IBM system. Big endian.
143143
#elif (defined(__riscv) || defined(__riscv__)) && __riscv_xlen == 64
144144
// RISC-V 64-bit
145+
#define SIMDUTF_IS_RISCV64 1
146+
147+
#if __clang_major__ >= 19
148+
// Does the compiler support target regions for RISC-V
149+
#define SIMDUTF_HAS_RVV_TARGET_REGION 1
150+
#endif
151+
152+
#if __riscv_v_intrinsic >= 11000 && !(__GNUC__ == 13 && __GNUC_MINOR__ == 2 && __GNUC_PATCHLEVEL__ == 0)
153+
#define SIMDUTF_HAS_RVV_INTRINSICS 1
154+
#endif
155+
156+
#define SIMDUTF_HAS_ZVBB_INTRINSICS 0 // there is currently no way to detect this
157+
158+
#if SIMDUTF_HAS_RVV_INTRINSICS && __riscv_vector && __riscv_v_min_vlen >= 128 && __riscv_v_elen >= 64
159+
// RISC-V V extension
160+
#define SIMDUTF_IS_RVV 1
161+
#if SIMDUTF_HAS_ZVBB_INTRINSICS && __riscv_zvbb >= 1000000
162+
// RISC-V Vector Basic Bit-manipulation
163+
#define SIMDUTF_IS_ZVBB 1
164+
#endif
165+
#endif
166+
167+
#elif defined(__loongarch_lp64)
168+
// LoongArch 64-bit
145169
#else
146170
// The simdutf library is designed
147171
// for 64-bit processors and it seems that you are not
@@ -540,6 +564,8 @@ enum error_code {
540564
SURROGATE, // The decoded character must be not be in U+D800...DFFF (UTF-8 or UTF-32) OR
541565
// a high surrogate must be followed by a low surrogate and a low surrogate must be preceded by a high surrogate (UTF-16) OR
542566
// there must be no surrogate at all (Latin1)
567+
INVALID_BASE64_CHARACTER, // Found a character that cannot be part of a valid base64 string.
568+
BASE64_INPUT_REMAINDER, // The base64 input terminates with a single character, excluding padding (=).
543569
OTHER // Not related to validation/transcoding.
544570
};
545571

@@ -567,22 +593,22 @@ SIMDUTF_DISABLE_UNDESIRED_WARNINGS
567593
#define SIMDUTF_SIMDUTF_VERSION_H
568594

569595
/** The version of simdutf being used (major.minor.revision) */
570-
#define SIMDUTF_VERSION "4.0.9"
596+
#define SIMDUTF_VERSION "5.0.0"
571597

572598
namespace simdutf {
573599
enum {
574600
/**
575601
* The major version (MAJOR.minor.revision) of simdutf being used.
576602
*/
577-
SIMDUTF_VERSION_MAJOR = 4,
603+
SIMDUTF_VERSION_MAJOR = 5,
578604
/**
579605
* The minor version (major.MINOR.revision) of simdutf being used.
580606
*/
581607
SIMDUTF_VERSION_MINOR = 0,
582608
/**
583609
* The revision (major.minor.REVISION) of simdutf being used.
584610
*/
585-
SIMDUTF_VERSION_REVISION = 9
611+
SIMDUTF_VERSION_REVISION = 0
586612
};
587613
} // namespace simdutf
588614

@@ -654,6 +680,7 @@ POSSIBILITY OF SUCH DAMAGE.
654680
#include <cpuid.h>
655681
#endif
656682

683+
657684
namespace simdutf {
658685
namespace internal {
659686

@@ -675,7 +702,9 @@ enum instruction_set {
675702
AVX512BW = 0x4000,
676703
AVX512VL = 0x8000,
677704
AVX512VBMI2 = 0x10000,
678-
AVX512VPOPCNTDQ = 0x2000
705+
AVX512VPOPCNTDQ = 0x2000,
706+
RVV = 0x4000,
707+
ZVBB = 0x8000,
679708
};
680709

681710
#if defined(__PPC64__)
@@ -684,6 +713,40 @@ static inline uint32_t detect_supported_architectures() {
684713
return instruction_set::ALTIVEC;
685714
}
686715

716+
#elif SIMDUTF_IS_RISCV64
717+
718+
#if defined(__linux__)
719+
#include <unistd.h>
720+
// We define these our selfs, for backwards compatibility
721+
struct simdutf_riscv_hwprobe { int64_t key; uint64_t value; };
722+
#define simdutf_riscv_hwprobe(...) syscall(258, __VA_ARGS__)
723+
#define SIMDUTF_RISCV_HWPROBE_KEY_IMA_EXT_0 4
724+
#define SIMDUTF_RISCV_HWPROBE_IMA_V (1 << 2)
725+
#define SIMDUTF_RISCV_HWPROBE_EXT_ZVBB (1 << 17)
726+
#endif
727+
728+
static inline uint32_t detect_supported_architectures() {
729+
uint32_t host_isa = instruction_set::DEFAULT;
730+
#if SIMDUTF_IS_RVV
731+
host_isa |= instruction_set::RVV;
732+
#endif
733+
#if SIMDUTF_IS_ZVBB
734+
host_isa |= instruction_set::ZVBB;
735+
#endif
736+
#if defined(__linux__)
737+
simdutf_riscv_hwprobe probes[] = { { SIMDUTF_RISCV_HWPROBE_KEY_IMA_EXT_0, 0 } };
738+
long ret = simdutf_riscv_hwprobe(&probes, sizeof probes/sizeof *probes, 0, nullptr, 0);
739+
if (ret == 0) {
740+
uint64_t extensions = probes[0].value;
741+
if (extensions & SIMDUTF_RISCV_HWPROBE_IMA_V)
742+
host_isa |= instruction_set::RVV;
743+
if (extensions & SIMDUTF_RISCV_HWPROBE_EXT_ZVBB)
744+
host_isa |= instruction_set::ZVBB;
745+
}
746+
#endif
747+
return host_isa;
748+
}
749+
687750
#elif defined(__aarch64__) || defined(_M_ARM64)
688751

689752
static inline uint32_t detect_supported_architectures() {
@@ -2222,6 +2285,63 @@ simdutf_warn_unused size_t trim_partial_utf16le(const char16_t* input, size_t le
22222285
*/
22232286
simdutf_warn_unused size_t trim_partial_utf16(const char16_t* input, size_t length);
22242287

2288+
2289+
/**
2290+
* Provide the maximal binary length in bytes given the base64 input.
2291+
* In general, if the input contains ASCII spaces, the result will be less than
2292+
* the maximum length.
2293+
*
2294+
* @param input the base64 input to process
2295+
* @param length the length of the base64 input in bytes
2296+
* @return number of base64 bytes
2297+
*/
2298+
simdutf_warn_unused size_t maximal_binary_length_from_base64(const char * input, size_t length) noexcept;
2299+
2300+
/**
2301+
* Convert a base64 input to a binary ouput.
2302+
*
2303+
* This function follows the WHATWG forgiving-base64 format, which means that it will
2304+
* ignore any ASCII spaces in the input. You may provide a padded input (with one or two
2305+
* equal signs at the end) or an unpadded input (without any equal signs at the end).
2306+
*
2307+
* See https://infra.spec.whatwg.org/#forgiving-base64-decode
2308+
*
2309+
* This function will fail in case of invalid input. There are two possible reasons for
2310+
* failure: the input is contains a number of base64 characters that when divided by 4, leaves
2311+
* a singler remainder character (BASE64_INPUT_REMAINDER), or the input contains a character
2312+
* that is not a valid base64 character (INVALID_BASE64_CHARACTER).
2313+
*
2314+
* You should call this function with a buffer that is at least maximal_binary_length_from_base64(input, length) bytes long.
2315+
* If you fail to provide that much space, the function may cause a buffer overflow.
2316+
*
2317+
* @param input the base64 string to process
2318+
* @param length the length of the string in bytes
2319+
* @param output the pointer to buffer that can hold the conversion result (should be at least maximal_binary_length_from_base64(input, length) bytes long).
2320+
* @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in bytes) if any, or the number of bytes written if successful.
2321+
*/
2322+
simdutf_warn_unused result base64_to_binary(const char * input, size_t length, char* output) noexcept;
2323+
2324+
/**
2325+
* Provide the base64 length in bytes given the length of a binary input.
2326+
*
2327+
* @param length the length of the input in bytes
2328+
* @return number of base64 bytes
2329+
*/
2330+
simdutf_warn_unused size_t base64_length_from_binary(size_t length) noexcept;
2331+
2332+
/**
2333+
* Convert a binary input to a base64 ouput. The output is always padded with equal signs so that it is
2334+
* a multiple of 4 bytes long.
2335+
*
2336+
* This function always succeeds.
2337+
*
2338+
* @param input the binary to process
2339+
* @param length the length of the input in bytes
2340+
* @param output the pointer to buffer that can hold the conversion result (should be at least base64_length_from_binary(length) bytes long)
2341+
* @return number of written bytes, will be equal to base64_length_from_binary(length)
2342+
*/
2343+
size_t binary_to_base64(const char * input, size_t length, char* output) noexcept;
2344+
22252345
/**
22262346
* An implementation of simdutf for a particular CPU architecture.
22272347
*
@@ -3282,6 +3402,61 @@ class implementation {
32823402
*/
32833403
simdutf_warn_unused virtual size_t count_utf8(const char * input, size_t length) const noexcept = 0;
32843404

3405+
/**
3406+
* Provide the maximal binary length in bytes given the base64 input.
3407+
* In general, if the input contains ASCII spaces, the result will be less than
3408+
* the maximum length.
3409+
*
3410+
* @param input the base64 input to process
3411+
* @param length the length of the base64 input in bytes
3412+
* @return number of base64 bytes
3413+
*/
3414+
simdutf_warn_unused virtual size_t maximal_binary_length_from_base64(const char * input, size_t length) const noexcept = 0;
3415+
3416+
/**
3417+
* Convert a base64 input to a binary ouput.
3418+
*
3419+
* This function follows the WHATWG forgiving-base64 format, which means that it will
3420+
* ignore any ASCII spaces in the input. You may provide a padded input (with one or two
3421+
* equal signs at the end) or an unpadded input (without any equal signs at the end).
3422+
*
3423+
* See https://infra.spec.whatwg.org/#forgiving-base64-decode
3424+
*
3425+
* This function will fail in case of invalid input. There are two possible reasons for
3426+
* failure: the input is contains a number of base64 characters that when divided by 4, leaves
3427+
* a singler remainder character (BASE64_INPUT_REMAINDER), or the input contains a character
3428+
* that is not a valid base64 character (INVALID_BASE64_CHARACTER).
3429+
*
3430+
* You should call this function with a buffer that is at least maximal_binary_length_from_base64(input, length) bytes long.
3431+
* If you fail to provide that much space, the function may cause a buffer overflow.
3432+
*
3433+
* @param input the base64 string to process
3434+
* @param length the length of the string in bytes
3435+
* @param output the pointer to buffer that can hold the conversion result (should be at least maximal_binary_length_from_base64(input, length) bytes long).
3436+
* @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in bytes) if any, or the number of bytes written if successful.
3437+
*/
3438+
simdutf_warn_unused virtual result base64_to_binary(const char * input, size_t length, char* output) const noexcept = 0;
3439+
3440+
/**
3441+
* Provide the base64 length in bytes given the length of a binary input.
3442+
*
3443+
* @param length the length of the input in bytes
3444+
* @return number of base64 bytes
3445+
*/
3446+
simdutf_warn_unused virtual size_t base64_length_from_binary(size_t length) const noexcept = 0;
3447+
3448+
/**
3449+
* Convert a binary input to a base64 ouput. The output is always padded with equal signs so that it is
3450+
* a multiple of 4 bytes long.
3451+
*
3452+
* This function always succeeds.
3453+
*
3454+
* @param input the binary to process
3455+
* @param length the length of the input in bytes
3456+
* @param output the pointer to buffer that can hold the conversion result (should be at least base64_length_from_binary(length) bytes long)
3457+
* @return number of written bytes, will be equal to base64_length_from_binary(length)
3458+
*/
3459+
virtual size_t binary_to_base64(const char * input, size_t length, char* output) const noexcept = 0;
32853460

32863461

32873462
protected:

0 commit comments

Comments
 (0)
Please sign in to comment.