diff --git a/deps/simdutf/simdutf.cpp b/deps/simdutf/simdutf.cpp index afc7727725b01e..f9c0a649dc1b26 100644 --- a/deps/simdutf/simdutf.cpp +++ b/deps/simdutf/simdutf.cpp @@ -1,4 +1,4 @@ -/* auto-generated on 2022-12-15 12:13:17 -0500. Do not edit! */ +/* auto-generated on 2023-01-02 15:43:33 -0500. Do not edit! */ // dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf.cpp /* begin file src/simdutf.cpp */ #include "simdutf.h" @@ -25,6 +25,7 @@ std::string toBinaryString(T b) { } // Implementations +// The best choice should always come first! // dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/arm64.h /* begin file src/simdutf/arm64.h */ #ifndef SIMDUTF_ARM64_H @@ -1104,6 +1105,313 @@ simdutf_really_inline simd16::operator simd16() const { retur #endif // SIMDUTF_ARM64_H /* end file src/simdutf/arm64.h */ +// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/icelake.h +/* begin file src/simdutf/icelake.h */ +#ifndef SIMDUTF_ICELAKE_H +#define SIMDUTF_ICELAKE_H + + + +#ifdef __has_include +// How do we detect that a compiler supports vbmi2? +// For sure if the following header is found, we are ok? +#if __has_include() +#define SIMDUTF_COMPILER_SUPPORTS_VBMI2 1 +#endif +#endif + +#ifdef _MSC_VER +#if _MSC_VER >= 1920 +// Visual Studio 2019 and up support VBMI2 under x64 even if the header +// avx512vbmi2intrin.h is not found. +#define SIMDUTF_COMPILER_SUPPORTS_VBMI2 1 +#endif +#endif + +// We allow icelake on x64 as long as the compiler is known to support VBMI2. +#ifndef SIMDUTF_IMPLEMENTATION_ICELAKE +#define SIMDUTF_IMPLEMENTATION_ICELAKE ((SIMDUTF_IS_X86_64) && (SIMDUTF_COMPILER_SUPPORTS_VBMI2)) +#endif + +// To see why (__BMI__) && (__PCLMUL__) && (__LZCNT__) are not part of this next line, see +// https://github.com/simdutf/simdutf/issues/1247 +#define SIMDUTF_CAN_ALWAYS_RUN_ICELAKE ((SIMDUTF_IMPLEMENTATION_ICELAKE) && (SIMDUTF_IS_X86_64) && (__AVX2__) && (SIMDUTF_HAS_AVX512F && \ + SIMDUTF_HAS_AVX512DQ && \ + SIMDUTF_HAS_AVX512VL && \ + SIMDUTF_HAS_AVX512VBMI2) && (!SIMDUTF_IS_32BITS)) + +#if SIMDUTF_IMPLEMENTATION_ICELAKE +#if SIMDUTF_CAN_ALWAYS_RUN_ICELAKE +#define SIMDUTF_TARGET_ICELAKE +#else +#define SIMDUTF_TARGET_ICELAKE SIMDUTF_TARGET_REGION("avx512f,avx512dq,avx512cd,avx512bw,avx512vbmi,avx512vbmi2,avx512vl,avx2,bmi,bmi2,pclmul,lzcnt") +#endif + +namespace simdutf { +namespace icelake { +} // namespace icelake +} // namespace simdutf + + + +// +// These two need to be included outside SIMDUTF_TARGET_REGION +// +// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/icelake/intrinsics.h +/* begin file src/simdutf/icelake/intrinsics.h */ +#ifndef SIMDUTF_ICELAKE_INTRINSICS_H +#define SIMDUTF_ICELAKE_INTRINSICS_H + + +#ifdef SIMDUTF_VISUAL_STUDIO +// under clang within visual studio, this will include +#include // visual studio or clang +#include +#else + +#if SIMDUTF_GCC11ORMORE +// We should not get warnings while including yet we do +// under some versions of GCC. +// If the x86intrin.h header has uninitialized values that are problematic, +// it is a GCC issue, we want to ignore these warnigns. +SIMDUTF_DISABLE_GCC_WARNING(-Wuninitialized) +#endif + +#include // elsewhere + + +#if SIMDUTF_GCC11ORMORE +// cancels the suppression of the -Wuninitialized +SIMDUTF_POP_DISABLE_WARNINGS +#endif + +#ifndef _tzcnt_u64 +#define _tzcnt_u64(x) __tzcnt_u64(x) +#endif // _tzcnt_u64 +#endif // SIMDUTF_VISUAL_STUDIO + +#ifdef SIMDUTF_CLANG_VISUAL_STUDIO +/** + * You are not supposed, normally, to include these + * headers directly. Instead you should either include intrin.h + * or x86intrin.h. However, when compiling with clang + * under Windows (i.e., when _MSC_VER is set), these headers + * only get included *if* the corresponding features are detected + * from macros: + * e.g., if __AVX2__ is set... in turn, we normally set these + * macros by compiling against the corresponding architecture + * (e.g., arch:AVX2, -mavx2, etc.) which compiles the whole + * software with these advanced instructions. In simdutf, we + * want to compile the whole program for a generic target, + * and only target our specific kernels. As a workaround, + * we directly include the needed headers. These headers would + * normally guard against such usage, but we carefully included + * (or ) before, so the headers + * are fooled. + */ +#include // for _blsr_u64 +#include // for _pext_u64, _pdep_u64 +#include // for __lzcnt64 +#include // for most things (AVX2, AVX512, _popcnt64) +#include +#include +#include +#include +#include // for _mm_clmulepi64_si128 +// Important: we need the AVX-512 headers: +#include +#include +#include +#include +#include +#include +#include +#include +// unfortunately, we may not get _blsr_u64, but, thankfully, clang +// has it as a macro. +#ifndef _blsr_u64 +// we roll our own +#define _blsr_u64(n) ((n - 1) & n) +#endif // _blsr_u64 +#endif // SIMDUTF_CLANG_VISUAL_STUDIO + + + +#if defined(__GNUC__) && !defined(__clang__) + +#if __GNUC__ == 8 +#define SIMDUTF_GCC8 1 +#elif __GNUC__ == 9 +#define SIMDUTF_GCC9 1 +#endif // __GNUC__ == 8 || __GNUC__ == 9 + +#endif // defined(__GNUC__) && !defined(__clang__) + +#if SIMDUTF_GCC8 +#pragma GCC push_options +#pragma GCC target("avx512f") +/** + * GCC 8 fails to provide _mm512_set_epi8. We roll our own. + */ +inline __m512i _mm512_set_epi8(uint8_t a0, uint8_t a1, uint8_t a2, uint8_t a3, uint8_t a4, uint8_t a5, uint8_t a6, uint8_t a7, uint8_t a8, uint8_t a9, uint8_t a10, uint8_t a11, uint8_t a12, uint8_t a13, uint8_t a14, uint8_t a15, uint8_t a16, uint8_t a17, uint8_t a18, uint8_t a19, uint8_t a20, uint8_t a21, uint8_t a22, uint8_t a23, uint8_t a24, uint8_t a25, uint8_t a26, uint8_t a27, uint8_t a28, uint8_t a29, uint8_t a30, uint8_t a31, uint8_t a32, uint8_t a33, uint8_t a34, uint8_t a35, uint8_t a36, uint8_t a37, uint8_t a38, uint8_t a39, uint8_t a40, uint8_t a41, uint8_t a42, uint8_t a43, uint8_t a44, uint8_t a45, uint8_t a46, uint8_t a47, uint8_t a48, uint8_t a49, uint8_t a50, uint8_t a51, uint8_t a52, uint8_t a53, uint8_t a54, uint8_t a55, uint8_t a56, uint8_t a57, uint8_t a58, uint8_t a59, uint8_t a60, uint8_t a61, uint8_t a62, uint8_t a63) { + return _mm512_set_epi64(uint64_t(a7) + (uint64_t(a6) << 8) + (uint64_t(a5) << 16) + (uint64_t(a4) << 24) + (uint64_t(a3) << 32) + (uint64_t(a2) << 40) + (uint64_t(a1) << 48) + (uint64_t(a0) << 56), + uint64_t(a15) + (uint64_t(a14) << 8) + (uint64_t(a13) << 16) + (uint64_t(a12) << 24) + (uint64_t(a11) << 32) + (uint64_t(a10) << 40) + (uint64_t(a9) << 48) + (uint64_t(a8) << 56), + uint64_t(a23) + (uint64_t(a22) << 8) + (uint64_t(a21) << 16) + (uint64_t(a20) << 24) + (uint64_t(a19) << 32) + (uint64_t(a18) << 40) + (uint64_t(a17) << 48) + (uint64_t(a16) << 56), + uint64_t(a31) + (uint64_t(a30) << 8) + (uint64_t(a29) << 16) + (uint64_t(a28) << 24) + (uint64_t(a27) << 32) + (uint64_t(a26) << 40) + (uint64_t(a25) << 48) + (uint64_t(a24) << 56), + uint64_t(a39) + (uint64_t(a38) << 8) + (uint64_t(a37) << 16) + (uint64_t(a36) << 24) + (uint64_t(a35) << 32) + (uint64_t(a34) << 40) + (uint64_t(a33) << 48) + (uint64_t(a32) << 56), + uint64_t(a47) + (uint64_t(a46) << 8) + (uint64_t(a45) << 16) + (uint64_t(a44) << 24) + (uint64_t(a43) << 32) + (uint64_t(a42) << 40) + (uint64_t(a41) << 48) + (uint64_t(a40) << 56), + uint64_t(a55) + (uint64_t(a54) << 8) + (uint64_t(a53) << 16) + (uint64_t(a52) << 24) + (uint64_t(a51) << 32) + (uint64_t(a50) << 40) + (uint64_t(a49) << 48) + (uint64_t(a48) << 56), + uint64_t(a63) + (uint64_t(a62) << 8) + (uint64_t(a61) << 16) + (uint64_t(a60) << 24) + (uint64_t(a59) << 32) + (uint64_t(a58) << 40) + (uint64_t(a57) << 48) + (uint64_t(a56) << 56)); +} +#pragma GCC pop_options +#endif // SIMDUTF_GCC8 + +#endif // SIMDUTF_HASWELL_INTRINSICS_H +/* end file src/simdutf/icelake/intrinsics.h */ +// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/icelake/implementation.h +/* begin file src/simdutf/icelake/implementation.h */ +#ifndef SIMDUTF_ICELAKE_IMPLEMENTATION_H +#define SIMDUTF_ICELAKE_IMPLEMENTATION_H + + +namespace simdutf { +namespace icelake { + +namespace { +using namespace simdutf; +} + +class implementation final : public simdutf::implementation { +public: + simdutf_really_inline implementation() : simdutf::implementation( + "icelake", + "Intel AVX512 (AVX-512BW, AVX-512CD, AVX-512VL, AVX-512VBMI2 extensions)", + internal::instruction_set::AVX2 | internal::instruction_set::PCLMULQDQ | internal::instruction_set::BMI1 | internal::instruction_set::BMI2 | internal::instruction_set::AVX512BW | internal::instruction_set::AVX512CD | internal::instruction_set::AVX512VL | internal::instruction_set::AVX512VBMI2 ) {} + simdutf_warn_unused int detect_encodings(const char * input, size_t length) const noexcept final; + simdutf_warn_unused bool validate_utf8(const char *buf, size_t len) const noexcept final; + simdutf_warn_unused result validate_utf8_with_errors(const char *buf, size_t len) const noexcept final; + simdutf_warn_unused bool validate_ascii(const char *buf, size_t len) const noexcept final; + simdutf_warn_unused result validate_ascii_with_errors(const char *buf, size_t len) const noexcept final; + simdutf_warn_unused bool validate_utf16le(const char16_t *buf, size_t len) const noexcept final; + simdutf_warn_unused bool validate_utf16be(const char16_t *buf, size_t len) const noexcept final; + simdutf_warn_unused result validate_utf16le_with_errors(const char16_t *buf, size_t len) const noexcept final; + simdutf_warn_unused result validate_utf16be_with_errors(const char16_t *buf, size_t len) const noexcept final; + simdutf_warn_unused bool validate_utf32(const char32_t *buf, size_t len) const noexcept final; + simdutf_warn_unused result validate_utf32_with_errors(const char32_t *buf, size_t len) const noexcept final; + simdutf_warn_unused size_t convert_utf8_to_utf16le(const char * buf, size_t len, char16_t* utf16_output) const noexcept final; + simdutf_warn_unused size_t convert_utf8_to_utf16be(const char * buf, size_t len, char16_t* utf16_output) const noexcept final; + simdutf_warn_unused result convert_utf8_to_utf16le_with_errors(const char * buf, size_t len, char16_t* utf16_output) const noexcept final; + simdutf_warn_unused result convert_utf8_to_utf16be_with_errors(const char * buf, size_t len, char16_t* utf16_output) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf8_to_utf16le(const char * buf, size_t len, char16_t* utf16_buffer) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf8_to_utf16be(const char * buf, size_t len, char16_t* utf16_buffer) const noexcept final; + simdutf_warn_unused size_t convert_utf8_to_utf32(const char * buf, size_t len, char32_t* utf32_output) const noexcept final; + simdutf_warn_unused result convert_utf8_to_utf32_with_errors(const char * buf, size_t len, char32_t* utf32_output) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf8_to_utf32(const char * buf, size_t len, char32_t* utf32_buffer) const noexcept final; + simdutf_warn_unused size_t convert_utf16le_to_utf8(const char16_t * buf, size_t len, char* utf8_buffer) const noexcept final; + simdutf_warn_unused size_t convert_utf16be_to_utf8(const char16_t * buf, size_t len, char* utf8_buffer) const noexcept final; + simdutf_warn_unused result convert_utf16le_to_utf8_with_errors(const char16_t * buf, size_t len, char* utf8_buffer) const noexcept final; + simdutf_warn_unused result convert_utf16be_to_utf8_with_errors(const char16_t * buf, size_t len, char* utf8_buffer) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf16le_to_utf8(const char16_t * buf, size_t len, char* utf8_buffer) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf16be_to_utf8(const char16_t * buf, size_t len, char* utf8_buffer) const noexcept final; + simdutf_warn_unused size_t convert_utf32_to_utf8(const char32_t * buf, size_t len, char* utf8_buffer) const noexcept final; + simdutf_warn_unused result convert_utf32_to_utf8_with_errors(const char32_t * buf, size_t len, char* utf8_buffer) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf32_to_utf8(const char32_t * buf, size_t len, char* utf8_buffer) const noexcept final; + simdutf_warn_unused size_t convert_utf32_to_utf16le(const char32_t * buf, size_t len, char16_t* utf16_buffer) const noexcept final; + simdutf_warn_unused size_t convert_utf32_to_utf16be(const char32_t * buf, size_t len, char16_t* utf16_buffer) const noexcept final; + simdutf_warn_unused result convert_utf32_to_utf16le_with_errors(const char32_t * buf, size_t len, char16_t* utf16_buffer) const noexcept final; + simdutf_warn_unused result convert_utf32_to_utf16be_with_errors(const char32_t * buf, size_t len, char16_t* utf16_buffer) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf32_to_utf16le(const char32_t * buf, size_t len, char16_t* utf16_buffer) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf32_to_utf16be(const char32_t * buf, size_t len, char16_t* utf16_buffer) const noexcept final; + simdutf_warn_unused size_t convert_utf16le_to_utf32(const char16_t * buf, size_t len, char32_t* utf32_buffer) const noexcept final; + simdutf_warn_unused size_t convert_utf16be_to_utf32(const char16_t * buf, size_t len, char32_t* utf32_buffer) const noexcept final; + simdutf_warn_unused result convert_utf16le_to_utf32_with_errors(const char16_t * buf, size_t len, char32_t* utf32_buffer) const noexcept final; + simdutf_warn_unused result convert_utf16be_to_utf32_with_errors(const char16_t * buf, size_t len, char32_t* utf32_buffer) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf16le_to_utf32(const char16_t * buf, size_t len, char32_t* utf32_buffer) const noexcept final; + simdutf_warn_unused size_t convert_valid_utf16be_to_utf32(const char16_t * buf, size_t len, char32_t* utf32_buffer) const noexcept final; + void change_endianness_utf16(const char16_t * buf, size_t length, char16_t * output) const noexcept final; + simdutf_warn_unused size_t count_utf16le(const char16_t * buf, size_t length) const noexcept; + simdutf_warn_unused size_t count_utf16be(const char16_t * buf, size_t length) const noexcept; + simdutf_warn_unused size_t count_utf8(const char * buf, size_t length) const noexcept; + simdutf_warn_unused size_t utf8_length_from_utf16le(const char16_t * input, size_t length) const noexcept; + simdutf_warn_unused size_t utf8_length_from_utf16be(const char16_t * input, size_t length) const noexcept; + simdutf_warn_unused size_t utf32_length_from_utf16le(const char16_t * input, size_t length) const noexcept; + simdutf_warn_unused size_t utf32_length_from_utf16be(const char16_t * input, size_t length) const noexcept; + simdutf_warn_unused size_t utf16_length_from_utf8(const char * input, size_t length) const noexcept; + simdutf_warn_unused size_t utf8_length_from_utf32(const char32_t * input, size_t length) const noexcept; + simdutf_warn_unused size_t utf16_length_from_utf32(const char32_t * input, size_t length) const noexcept; + simdutf_warn_unused size_t utf32_length_from_utf8(const char * input, size_t length) const noexcept; +}; + +} // namespace icelake +} // namespace simdutf + +#endif // SIMDUTF_ICELAKE_IMPLEMENTATION_H +/* end file src/simdutf/icelake/implementation.h */ + +// +// The rest need to be inside the region +// +// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/icelake/begin.h +/* begin file src/simdutf/icelake/begin.h */ +// redefining SIMDUTF_IMPLEMENTATION to "icelake" +// #define SIMDUTF_IMPLEMENTATION icelake + +#if SIMDUTF_CAN_ALWAYS_RUN_ICELAKE +// nothing needed. +#else +SIMDUTF_TARGET_ICELAKE +#endif + +#if SIMDUTF_GCC11ORMORE // workaround for https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105593 +SIMDUTF_DISABLE_GCC_WARNING(-Wmaybe-uninitialized) +#endif // end of workaround +/* end file src/simdutf/icelake/begin.h */ +// Declarations +// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/icelake/bitmanipulation.h +/* begin file src/simdutf/icelake/bitmanipulation.h */ +#ifndef SIMDUTF_ICELAKE_BITMANIPULATION_H +#define SIMDUTF_ICELAKE_BITMANIPULATION_H + +namespace simdutf { +namespace icelake { +namespace { + +#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO +simdutf_really_inline unsigned __int64 count_ones(uint64_t input_num) { + // note: we do not support legacy 32-bit Windows + return __popcnt64(input_num);// Visual Studio wants two underscores +} +#else +simdutf_really_inline long long int count_ones(uint64_t input_num) { + return _popcnt64(input_num); +} +#endif + +} // unnamed namespace +} // namespace icelake +} // namespace simdutf + +#endif // SIMDUTF_ICELAKE_BITMANIPULATION_H +/* end file src/simdutf/icelake/bitmanipulation.h */ +// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/icelake/end.h +/* begin file src/simdutf/icelake/end.h */ +#if SIMDUTF_CAN_ALWAYS_RUN_ICELAKE +// nothing needed. +#else +SIMDUTF_UNTARGET_REGION +#endif + + +#if SIMDUTF_GCC11ORMORE // workaround for https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105593 +SIMDUTF_POP_DISABLE_WARNINGS +#endif // end of workaround +/* end file src/simdutf/icelake/end.h */ + + + +#endif // SIMDUTF_IMPLEMENTATION_ICELAKE +#endif // SIMDUTF_ICELAKE_H +/* end file src/simdutf/icelake.h */ // dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/haswell.h /* begin file src/simdutf/haswell.h */ #ifndef SIMDUTF_HASWELL_H @@ -1124,7 +1432,12 @@ simdutf_really_inline simd16::operator simd16() const { retur // You do not want to restrict it like so: SIMDUTF_IS_X86_64 && __AVX2__ // because we want to rely on *runtime dispatch*. // +#if SIMDUTF_CAN_ALWAYS_RUN_ICELAKE +#define SIMDUTF_IMPLEMENTATION_HASWELL 0 +#else #define SIMDUTF_IMPLEMENTATION_HASWELL (SIMDUTF_IS_X86_64) +#endif + #endif // To see why (__BMI__) && (__PCLMUL__) && (__LZCNT__) are not part of this next line, see // https://github.com/simdutf/simdutf/issues/1247 @@ -1298,8 +1611,12 @@ SIMDUTF_POP_DISABLE_WARNINGS /* begin file src/simdutf/haswell/begin.h */ // redefining SIMDUTF_IMPLEMENTATION to "haswell" // #define SIMDUTF_IMPLEMENTATION haswell -SIMDUTF_TARGET_HASWELL +#if SIMDUTF_CAN_ALWAYS_RUN_HASWELL +// nothing needed. +#else +SIMDUTF_TARGET_HASWELL +#endif #if SIMDUTF_GCC11ORMORE // workaround for https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105593 SIMDUTF_DISABLE_GCC_WARNING(-Wmaybe-uninitialized) @@ -2010,10 +2327,15 @@ struct simd16: base16_numeric { // dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/haswell/end.h /* begin file src/simdutf/haswell/end.h */ +#if SIMDUTF_CAN_ALWAYS_RUN_HASWELL +// nothing needed. +#else SIMDUTF_UNTARGET_REGION +#endif + #if SIMDUTF_GCC11ORMORE // workaround for https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105593 -#pragma GCC diagnostic pop +SIMDUTF_POP_DISABLE_WARNINGS #endif // end of workaround /* end file src/simdutf/haswell/end.h */ @@ -2036,8 +2358,14 @@ SIMDUTF_UNTARGET_REGION // You do not want to set it to (SIMDUTF_IS_X86_64 && !SIMDUTF_REQUIRES_HASWELL) // because you want to rely on runtime dispatch! // +#if SIMDUTF_CAN_ALWAYS_RUN_ICELAKE || SIMDUTF_CAN_ALWAYS_RUN_HASWELL +#define SIMDUTF_IMPLEMENTATION_WESTMERE 0 +#else #define SIMDUTF_IMPLEMENTATION_WESTMERE (SIMDUTF_IS_X86_64) #endif + +#endif + #define SIMDUTF_CAN_ALWAYS_RUN_WESTMERE (SIMDUTF_IMPLEMENTATION_WESTMERE && SIMDUTF_IS_X86_64 && __SSE4_2__ && __PCLMUL__) #if SIMDUTF_IMPLEMENTATION_WESTMERE @@ -2186,7 +2514,12 @@ SIMDUTF_POP_DISABLE_WARNINGS /* begin file src/simdutf/westmere/begin.h */ // redefining SIMDUTF_IMPLEMENTATION to "westmere" // #define SIMDUTF_IMPLEMENTATION westmere + +#if SIMDUTF_CAN_ALWAYS_RUN_WESTMERE +// nothing needed. +#else SIMDUTF_TARGET_WESTMERE +#endif /* end file src/simdutf/westmere/begin.h */ // Declarations @@ -2939,7 +3272,12 @@ template // dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/westmere/end.h /* begin file src/simdutf/westmere/end.h */ +#if SIMDUTF_CAN_ALWAYS_RUN_WESTMERE +// nothing needed. +#else SIMDUTF_UNTARGET_REGION +#endif + /* end file src/simdutf/westmere/end.h */ #endif // SIMDUTF_IMPLEMENTATION_WESTMERE @@ -3582,354 +3920,66 @@ template struct simd8x64 { const simd8 mask = simd8::splat(m); return simd8x64( simd8(this->chunks[0]) >= mask, - simd8(this->chunks[1]) >= mask, - simd8(this->chunks[2]) >= mask, - simd8(this->chunks[3]) >= mask - ).to_bitmask(); - } -}; // struct simd8x64 - -} // namespace simd -} // unnamed namespace -} // namespace ppc64 -} // namespace simdutf - -#endif // SIMDUTF_PPC64_SIMD_INPUT_H -/* end file src/simdutf/ppc64/simd.h */ - -// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/ppc64/end.h -/* begin file src/simdutf/ppc64/end.h */ -/* end file src/simdutf/ppc64/end.h */ - -#endif // SIMDUTF_IMPLEMENTATION_PPC64 - -#endif // SIMDUTF_PPC64_H -/* end file src/simdutf/ppc64.h */ -// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/fallback.h -/* begin file src/simdutf/fallback.h */ -#ifndef SIMDUTF_FALLBACK_H -#define SIMDUTF_FALLBACK_H - - -// Default Fallback to on unless a builtin implementation has already been selected. -#ifndef SIMDUTF_IMPLEMENTATION_FALLBACK -#define SIMDUTF_IMPLEMENTATION_FALLBACK 1 // (!SIMDUTF_CAN_ALWAYS_RUN_ARM64 && !SIMDUTF_CAN_ALWAYS_RUN_HASWELL && !SIMDUTF_CAN_ALWAYS_RUN_WESTMERE && !SIMDUTF_CAN_ALWAYS_RUN_PPC64) -#endif -#define SIMDUTF_CAN_ALWAYS_RUN_FALLBACK SIMDUTF_IMPLEMENTATION_FALLBACK - -#if SIMDUTF_IMPLEMENTATION_FALLBACK - -namespace simdutf { -/** - * Fallback implementation (runs on any machine). - */ -namespace fallback { -} // namespace fallback -} // namespace simdutf - -// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/fallback/implementation.h -/* begin file src/simdutf/fallback/implementation.h */ -#ifndef SIMDUTF_FALLBACK_IMPLEMENTATION_H -#define SIMDUTF_FALLBACK_IMPLEMENTATION_H - - -namespace simdutf { -namespace fallback { - -namespace { -using namespace simdutf; -} - -class implementation final : public simdutf::implementation { -public: - simdutf_really_inline implementation() : simdutf::implementation( - "fallback", - "Generic fallback implementation", - 0 - ) {} - simdutf_warn_unused int detect_encodings(const char * input, size_t length) const noexcept final; - simdutf_warn_unused bool validate_utf8(const char *buf, size_t len) const noexcept final; - simdutf_warn_unused result validate_utf8_with_errors(const char *buf, size_t len) const noexcept final; - simdutf_warn_unused bool validate_ascii(const char *buf, size_t len) const noexcept final; - simdutf_warn_unused result validate_ascii_with_errors(const char *buf, size_t len) const noexcept final; - simdutf_warn_unused bool validate_utf16le(const char16_t *buf, size_t len) const noexcept final; - simdutf_warn_unused bool validate_utf16be(const char16_t *buf, size_t len) const noexcept final; - simdutf_warn_unused result validate_utf16le_with_errors(const char16_t *buf, size_t len) const noexcept final; - simdutf_warn_unused result validate_utf16be_with_errors(const char16_t *buf, size_t len) const noexcept final; - simdutf_warn_unused bool validate_utf32(const char32_t *buf, size_t len) const noexcept final; - simdutf_warn_unused result validate_utf32_with_errors(const char32_t *buf, size_t len) const noexcept final; - simdutf_warn_unused size_t convert_utf8_to_utf16le(const char * buf, size_t len, char16_t* utf16_output) const noexcept final; - simdutf_warn_unused size_t convert_utf8_to_utf16be(const char * buf, size_t len, char16_t* utf16_output) const noexcept final; - simdutf_warn_unused result convert_utf8_to_utf16le_with_errors(const char * buf, size_t len, char16_t* utf16_output) const noexcept final; - simdutf_warn_unused result convert_utf8_to_utf16be_with_errors(const char * buf, size_t len, char16_t* utf16_output) const noexcept final; - simdutf_warn_unused size_t convert_valid_utf8_to_utf16le(const char * buf, size_t len, char16_t* utf16_buffer) const noexcept final; - simdutf_warn_unused size_t convert_valid_utf8_to_utf16be(const char * buf, size_t len, char16_t* utf16_buffer) const noexcept final; - simdutf_warn_unused size_t convert_utf8_to_utf32(const char * buf, size_t len, char32_t* utf32_output) const noexcept final; - simdutf_warn_unused result convert_utf8_to_utf32_with_errors(const char * buf, size_t len, char32_t* utf32_output) const noexcept final; - simdutf_warn_unused size_t convert_valid_utf8_to_utf32(const char * buf, size_t len, char32_t* utf32_buffer) const noexcept final; - simdutf_warn_unused size_t convert_utf16le_to_utf8(const char16_t * buf, size_t len, char* utf8_buffer) const noexcept final; - simdutf_warn_unused size_t convert_utf16be_to_utf8(const char16_t * buf, size_t len, char* utf8_buffer) const noexcept final; - simdutf_warn_unused result convert_utf16le_to_utf8_with_errors(const char16_t * buf, size_t len, char* utf8_buffer) const noexcept final; - simdutf_warn_unused result convert_utf16be_to_utf8_with_errors(const char16_t * buf, size_t len, char* utf8_buffer) const noexcept final; - simdutf_warn_unused size_t convert_valid_utf16le_to_utf8(const char16_t * buf, size_t len, char* utf8_buffer) const noexcept final; - simdutf_warn_unused size_t convert_valid_utf16be_to_utf8(const char16_t * buf, size_t len, char* utf8_buffer) const noexcept final; - simdutf_warn_unused size_t convert_utf32_to_utf8(const char32_t * buf, size_t len, char* utf8_buffer) const noexcept final; - simdutf_warn_unused result convert_utf32_to_utf8_with_errors(const char32_t * buf, size_t len, char* utf8_buffer) const noexcept final; - simdutf_warn_unused size_t convert_valid_utf32_to_utf8(const char32_t * buf, size_t len, char* utf8_buffer) const noexcept final; - simdutf_warn_unused size_t convert_utf32_to_utf16le(const char32_t * buf, size_t len, char16_t* utf16_buffer) const noexcept final; - simdutf_warn_unused size_t convert_utf32_to_utf16be(const char32_t * buf, size_t len, char16_t* utf16_buffer) const noexcept final; - simdutf_warn_unused result convert_utf32_to_utf16le_with_errors(const char32_t * buf, size_t len, char16_t* utf16_buffer) const noexcept final; - simdutf_warn_unused result convert_utf32_to_utf16be_with_errors(const char32_t * buf, size_t len, char16_t* utf16_buffer) const noexcept final; - simdutf_warn_unused size_t convert_valid_utf32_to_utf16le(const char32_t * buf, size_t len, char16_t* utf16_buffer) const noexcept final; - simdutf_warn_unused size_t convert_valid_utf32_to_utf16be(const char32_t * buf, size_t len, char16_t* utf16_buffer) const noexcept final; - simdutf_warn_unused size_t convert_utf16le_to_utf32(const char16_t * buf, size_t len, char32_t* utf32_buffer) const noexcept final; - simdutf_warn_unused size_t convert_utf16be_to_utf32(const char16_t * buf, size_t len, char32_t* utf32_buffer) const noexcept final; - simdutf_warn_unused result convert_utf16le_to_utf32_with_errors(const char16_t * buf, size_t len, char32_t* utf32_buffer) const noexcept final; - simdutf_warn_unused result convert_utf16be_to_utf32_with_errors(const char16_t * buf, size_t len, char32_t* utf32_buffer) const noexcept final; - simdutf_warn_unused size_t convert_valid_utf16le_to_utf32(const char16_t * buf, size_t len, char32_t* utf32_buffer) const noexcept final; - simdutf_warn_unused size_t convert_valid_utf16be_to_utf32(const char16_t * buf, size_t len, char32_t* utf32_buffer) const noexcept final; - void change_endianness_utf16(const char16_t * buf, size_t length, char16_t * output) const noexcept final; - simdutf_warn_unused size_t count_utf16le(const char16_t * buf, size_t length) const noexcept; - simdutf_warn_unused size_t count_utf16be(const char16_t * buf, size_t length) const noexcept; - simdutf_warn_unused size_t count_utf8(const char * buf, size_t length) const noexcept; - simdutf_warn_unused size_t utf8_length_from_utf16le(const char16_t * input, size_t length) const noexcept; - simdutf_warn_unused size_t utf8_length_from_utf16be(const char16_t * input, size_t length) const noexcept; - simdutf_warn_unused size_t utf32_length_from_utf16le(const char16_t * input, size_t length) const noexcept; - simdutf_warn_unused size_t utf32_length_from_utf16be(const char16_t * input, size_t length) const noexcept; - simdutf_warn_unused size_t utf16_length_from_utf8(const char * input, size_t length) const noexcept; - simdutf_warn_unused size_t utf8_length_from_utf32(const char32_t * input, size_t length) const noexcept; - simdutf_warn_unused size_t utf16_length_from_utf32(const char32_t * input, size_t length) const noexcept; - simdutf_warn_unused size_t utf32_length_from_utf8(const char * input, size_t length) const noexcept; -}; - -} // namespace fallback -} // namespace simdutf - -#endif // SIMDUTF_FALLBACK_IMPLEMENTATION_H -/* end file src/simdutf/fallback/implementation.h */ - -// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/fallback/begin.h -/* begin file src/simdutf/fallback/begin.h */ -// redefining SIMDUTF_IMPLEMENTATION to "fallback" -// #define SIMDUTF_IMPLEMENTATION fallback -/* end file src/simdutf/fallback/begin.h */ - -// Declarations -// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/fallback/bitmanipulation.h -/* begin file src/simdutf/fallback/bitmanipulation.h */ -#ifndef SIMDUTF_FALLBACK_BITMANIPULATION_H -#define SIMDUTF_FALLBACK_BITMANIPULATION_H - -#include - -namespace simdutf { -namespace fallback { -namespace { - -#if defined(_MSC_VER) && !defined(_M_ARM64) && !defined(_M_X64) -static inline unsigned char _BitScanForward64(unsigned long* ret, uint64_t x) { - unsigned long x0 = (unsigned long)x, top, bottom; - _BitScanForward(&top, (unsigned long)(x >> 32)); - _BitScanForward(&bottom, x0); - *ret = x0 ? bottom : 32 + top; - return x != 0; -} -static unsigned char _BitScanReverse64(unsigned long* ret, uint64_t x) { - unsigned long x1 = (unsigned long)(x >> 32), top, bottom; - _BitScanReverse(&top, x1); - _BitScanReverse(&bottom, (unsigned long)x); - *ret = x1 ? top + 32 : bottom; - return x != 0; -} -#endif - -} // unnamed namespace -} // namespace fallback -} // namespace simdutf - -#endif // SIMDUTF_FALLBACK_BITMANIPULATION_H -/* end file src/simdutf/fallback/bitmanipulation.h */ - -// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/fallback/end.h -/* begin file src/simdutf/fallback/end.h */ -/* end file src/simdutf/fallback/end.h */ - -#endif // SIMDUTF_IMPLEMENTATION_FALLBACK -#endif // SIMDUTF_FALLBACK_H -/* end file src/simdutf/fallback.h */ -// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/icelake.h -/* begin file src/simdutf/icelake.h */ -#ifndef SIMDUTF_ICELAKE_H -#define SIMDUTF_ICELAKE_H - - - -#ifdef __has_include -// How do we detect that a compiler supports vbmi2? -// For sure if the following header is found, we are ok? -#if __has_include() -#define SIMDUTF_COMPILER_SUPPORTS_VBMI2 1 -#endif -#endif - -#ifdef _MSC_VER -#if _MSC_VER >= 1920 -// Visual Studio 2019 and up support VBMI2 under x64 even if the header -// avx512vbmi2intrin.h is not found. -#define SIMDUTF_COMPILER_SUPPORTS_VBMI2 1 -#endif -#endif - -// We allow icelake on x64 as long as the compiler is known to support VBMI2. -#ifndef SIMDUTF_IMPLEMENTATION_ICELAKE -#define SIMDUTF_IMPLEMENTATION_ICELAKE ((SIMDUTF_IS_X86_64) && (SIMDUTF_COMPILER_SUPPORTS_VBMI2)) -#endif - -// To see why (__BMI__) && (__PCLMUL__) && (__LZCNT__) are not part of this next line, see -// https://github.com/simdutf/simdutf/issues/1247 -#define SIMDUTF_CAN_ALWAYS_RUN_ICELAKE ((SIMDUTF_IMPLEMENTATION_ICELAKE) && (SIMDUTF_IS_X86_64) && (__AVX2__) && (SIMDUTF_HAS_AVX512F && \ - SIMDUTF_HAS_AVX512DQ && \ - SIMDUTF_HAS_AVX512VL && \ - SIMDUTF_HAS_AVX512VBMI2)) - -#if SIMDUTF_IMPLEMENTATION_ICELAKE -#if SIMDUTF_CAN_ALWAYS_RUN_ICELAKE -#define SIMDUTF_TARGET_ICELAKE -#define SIMDJSON_UNTARGET_ICELAKE -#else -#define SIMDUTF_TARGET_ICELAKE SIMDUTF_TARGET_REGION("avx512f,avx512dq,avx512cd,avx512bw,avx512vbmi,avx512vbmi2,avx512vl,avx2,bmi,bmi2,pclmul,lzcnt") -#define SIMDUTF_UNTARGET_ICELAKE SIMDUTF_UNTARGET_REGION -#endif + simd8(this->chunks[1]) >= mask, + simd8(this->chunks[2]) >= mask, + simd8(this->chunks[3]) >= mask + ).to_bitmask(); + } +}; // struct simd8x64 -namespace simdutf { -namespace icelake { -} // namespace icelake +} // namespace simd +} // unnamed namespace +} // namespace ppc64 } // namespace simdutf +#endif // SIMDUTF_PPC64_SIMD_INPUT_H +/* end file src/simdutf/ppc64/simd.h */ +// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/ppc64/end.h +/* begin file src/simdutf/ppc64/end.h */ +/* end file src/simdutf/ppc64/end.h */ -// -// These two need to be included outside SIMDUTF_TARGET_REGION -// -// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/icelake/intrinsics.h -/* begin file src/simdutf/icelake/intrinsics.h */ -#ifndef SIMDUTF_ICELAKE_INTRINSICS_H -#define SIMDUTF_ICELAKE_INTRINSICS_H - - -#ifdef SIMDUTF_VISUAL_STUDIO -// under clang within visual studio, this will include -#include // visual studio or clang -#include -#else +#endif // SIMDUTF_IMPLEMENTATION_PPC64 -#if SIMDUTF_GCC11ORMORE -// We should not get warnings while including yet we do -// under some versions of GCC. -// If the x86intrin.h header has uninitialized values that are problematic, -// it is a GCC issue, we want to ignore these warnigns. -SIMDUTF_DISABLE_GCC_WARNING(-Wuninitialized) -#endif +#endif // SIMDUTF_PPC64_H +/* end file src/simdutf/ppc64.h */ +// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/fallback.h +/* begin file src/simdutf/fallback.h */ +#ifndef SIMDUTF_FALLBACK_H +#define SIMDUTF_FALLBACK_H -#include // elsewhere +// Note that fallback.h is always imported last. -#if SIMDUTF_GCC11ORMORE -// cancels the suppression of the -Wuninitialized -SIMDUTF_POP_DISABLE_WARNINGS +// Default Fallback to on unless a builtin implementation has already been selected. +#ifndef SIMDUTF_IMPLEMENTATION_FALLBACK +#if SIMDUTF_CAN_ALWAYS_RUN_ARM64 || SIMDUTF_CAN_ALWAYS_RUN_ICELAKE || SIMDUTF_CAN_ALWAYS_RUN_HASWELL || SIMDUTF_CAN_ALWAYS_RUN_WESTMERE || SIMDUTF_CAN_ALWAYS_RUN_PPC64 +#define SIMDUTF_IMPLEMENTATION_FALLBACK 0 +#else +#define SIMDUTF_IMPLEMENTATION_FALLBACK 1 +#endif #endif -#ifndef _tzcnt_u64 -#define _tzcnt_u64(x) __tzcnt_u64(x) -#endif // _tzcnt_u64 -#endif // SIMDUTF_VISUAL_STUDIO - -#ifdef SIMDUTF_CLANG_VISUAL_STUDIO -/** - * You are not supposed, normally, to include these - * headers directly. Instead you should either include intrin.h - * or x86intrin.h. However, when compiling with clang - * under Windows (i.e., when _MSC_VER is set), these headers - * only get included *if* the corresponding features are detected - * from macros: - * e.g., if __AVX2__ is set... in turn, we normally set these - * macros by compiling against the corresponding architecture - * (e.g., arch:AVX2, -mavx2, etc.) which compiles the whole - * software with these advanced instructions. In simdutf, we - * want to compile the whole program for a generic target, - * and only target our specific kernels. As a workaround, - * we directly include the needed headers. These headers would - * normally guard against such usage, but we carefully included - * (or ) before, so the headers - * are fooled. - */ -#include // for _blsr_u64 -#include // for _pext_u64, _pdep_u64 -#include // for __lzcnt64 -#include // for most things (AVX2, AVX512, _popcnt64) -#include -#include -#include -#include -#include // for _mm_clmulepi64_si128 -// Important: we need the AVX-512 headers: -#include -#include -#include -#include -#include -#include -#include -#include -// unfortunately, we may not get _blsr_u64, but, thankfully, clang -// has it as a macro. -#ifndef _blsr_u64 -// we roll our own -#define _blsr_u64(n) ((n - 1) & n) -#endif // _blsr_u64 -#endif // SIMDUTF_CLANG_VISUAL_STUDIO - - - -#if defined(__GNUC__) && !defined(__clang__) - -#if __GNUC__ == 8 -#define SIMDUTF_GCC8 1 -#elif __GNUC__ == 9 -#define SIMDUTF_GCC9 1 -#endif // __GNUC__ == 8 || __GNUC__ == 9 +#define SIMDUTF_CAN_ALWAYS_RUN_FALLBACK (SIMDUTF_IMPLEMENTATION_FALLBACK) -#endif // defined(__GNUC__) && !defined(__clang__) +#if SIMDUTF_IMPLEMENTATION_FALLBACK -#if SIMDUTF_GCC8 -#pragma GCC push_options -#pragma GCC target("avx512f") +namespace simdutf { /** - * GCC 8 fails to provide _mm512_set_epi8. We roll our own. + * Fallback implementation (runs on any machine). */ -inline __m512i _mm512_set_epi8(uint8_t a0, uint8_t a1, uint8_t a2, uint8_t a3, uint8_t a4, uint8_t a5, uint8_t a6, uint8_t a7, uint8_t a8, uint8_t a9, uint8_t a10, uint8_t a11, uint8_t a12, uint8_t a13, uint8_t a14, uint8_t a15, uint8_t a16, uint8_t a17, uint8_t a18, uint8_t a19, uint8_t a20, uint8_t a21, uint8_t a22, uint8_t a23, uint8_t a24, uint8_t a25, uint8_t a26, uint8_t a27, uint8_t a28, uint8_t a29, uint8_t a30, uint8_t a31, uint8_t a32, uint8_t a33, uint8_t a34, uint8_t a35, uint8_t a36, uint8_t a37, uint8_t a38, uint8_t a39, uint8_t a40, uint8_t a41, uint8_t a42, uint8_t a43, uint8_t a44, uint8_t a45, uint8_t a46, uint8_t a47, uint8_t a48, uint8_t a49, uint8_t a50, uint8_t a51, uint8_t a52, uint8_t a53, uint8_t a54, uint8_t a55, uint8_t a56, uint8_t a57, uint8_t a58, uint8_t a59, uint8_t a60, uint8_t a61, uint8_t a62, uint8_t a63) { - return _mm512_set_epi64(uint64_t(a7) + (uint64_t(a6) << 8) + (uint64_t(a5) << 16) + (uint64_t(a4) << 24) + (uint64_t(a3) << 32) + (uint64_t(a2) << 40) + (uint64_t(a1) << 48) + (uint64_t(a0) << 56), - uint64_t(a15) + (uint64_t(a14) << 8) + (uint64_t(a13) << 16) + (uint64_t(a12) << 24) + (uint64_t(a11) << 32) + (uint64_t(a10) << 40) + (uint64_t(a9) << 48) + (uint64_t(a8) << 56), - uint64_t(a23) + (uint64_t(a22) << 8) + (uint64_t(a21) << 16) + (uint64_t(a20) << 24) + (uint64_t(a19) << 32) + (uint64_t(a18) << 40) + (uint64_t(a17) << 48) + (uint64_t(a16) << 56), - uint64_t(a31) + (uint64_t(a30) << 8) + (uint64_t(a29) << 16) + (uint64_t(a28) << 24) + (uint64_t(a27) << 32) + (uint64_t(a26) << 40) + (uint64_t(a25) << 48) + (uint64_t(a24) << 56), - uint64_t(a39) + (uint64_t(a38) << 8) + (uint64_t(a37) << 16) + (uint64_t(a36) << 24) + (uint64_t(a35) << 32) + (uint64_t(a34) << 40) + (uint64_t(a33) << 48) + (uint64_t(a32) << 56), - uint64_t(a47) + (uint64_t(a46) << 8) + (uint64_t(a45) << 16) + (uint64_t(a44) << 24) + (uint64_t(a43) << 32) + (uint64_t(a42) << 40) + (uint64_t(a41) << 48) + (uint64_t(a40) << 56), - uint64_t(a55) + (uint64_t(a54) << 8) + (uint64_t(a53) << 16) + (uint64_t(a52) << 24) + (uint64_t(a51) << 32) + (uint64_t(a50) << 40) + (uint64_t(a49) << 48) + (uint64_t(a48) << 56), - uint64_t(a63) + (uint64_t(a62) << 8) + (uint64_t(a61) << 16) + (uint64_t(a60) << 24) + (uint64_t(a59) << 32) + (uint64_t(a58) << 40) + (uint64_t(a57) << 48) + (uint64_t(a56) << 56)); -} -#pragma GCC pop_options -#endif // SIMDUTF_GCC8 +namespace fallback { +} // namespace fallback +} // namespace simdutf -#endif // SIMDUTF_HASWELL_INTRINSICS_H -/* end file src/simdutf/icelake/intrinsics.h */ -// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/icelake/implementation.h -/* begin file src/simdutf/icelake/implementation.h */ -#ifndef SIMDUTF_ICELAKE_IMPLEMENTATION_H -#define SIMDUTF_ICELAKE_IMPLEMENTATION_H +// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/fallback/implementation.h +/* begin file src/simdutf/fallback/implementation.h */ +#ifndef SIMDUTF_FALLBACK_IMPLEMENTATION_H +#define SIMDUTF_FALLBACK_IMPLEMENTATION_H namespace simdutf { -namespace icelake { +namespace fallback { namespace { using namespace simdutf; @@ -3938,9 +3988,10 @@ using namespace simdutf; class implementation final : public simdutf::implementation { public: simdutf_really_inline implementation() : simdutf::implementation( - "icelake", - "Intel AVX512 (AVX-512BW, AVX-512CD, AVX-512VL, AVX-512VBMI2 extensions)", - internal::instruction_set::AVX2 | internal::instruction_set::PCLMULQDQ | internal::instruction_set::BMI1 | internal::instruction_set::BMI2 | internal::instruction_set::AVX512BW | internal::instruction_set::AVX512CD | internal::instruction_set::AVX512VL | internal::instruction_set::AVX512VBMI2 ) {} + "fallback", + "Generic fallback implementation", + 0 + ) {} simdutf_warn_unused int detect_encodings(const char * input, size_t length) const noexcept final; simdutf_warn_unused bool validate_utf8(const char *buf, size_t len) const noexcept final; simdutf_warn_unused result validate_utf8_with_errors(const char *buf, size_t len) const noexcept final; @@ -3996,66 +4047,61 @@ class implementation final : public simdutf::implementation { simdutf_warn_unused size_t utf32_length_from_utf8(const char * input, size_t length) const noexcept; }; -} // namespace icelake +} // namespace fallback } // namespace simdutf -#endif // SIMDUTF_ICELAKE_IMPLEMENTATION_H -/* end file src/simdutf/icelake/implementation.h */ +#endif // SIMDUTF_FALLBACK_IMPLEMENTATION_H +/* end file src/simdutf/fallback/implementation.h */ -// -// The rest need to be inside the region -// -// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/icelake/begin.h -/* begin file src/simdutf/icelake/begin.h */ -// redefining SIMDUTF_IMPLEMENTATION to "icelake" -// #define SIMDUTF_IMPLEMENTATION icelake -SIMDUTF_TARGET_ICELAKE +// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/fallback/begin.h +/* begin file src/simdutf/fallback/begin.h */ +// redefining SIMDUTF_IMPLEMENTATION to "fallback" +// #define SIMDUTF_IMPLEMENTATION fallback +/* end file src/simdutf/fallback/begin.h */ -#if SIMDUTF_GCC11ORMORE // workaround for https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105593 -SIMDUTF_DISABLE_GCC_WARNING(-Wmaybe-uninitialized) -#endif // end of workaround -/* end file src/simdutf/icelake/begin.h */ // Declarations -// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/icelake/bitmanipulation.h -/* begin file src/simdutf/icelake/bitmanipulation.h */ -#ifndef SIMDUTF_ICELAKE_BITMANIPULATION_H -#define SIMDUTF_ICELAKE_BITMANIPULATION_H +// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/fallback/bitmanipulation.h +/* begin file src/simdutf/fallback/bitmanipulation.h */ +#ifndef SIMDUTF_FALLBACK_BITMANIPULATION_H +#define SIMDUTF_FALLBACK_BITMANIPULATION_H + +#include namespace simdutf { -namespace icelake { +namespace fallback { namespace { -#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO -simdutf_really_inline unsigned __int64 count_ones(uint64_t input_num) { - // note: we do not support legacy 32-bit Windows - return __popcnt64(input_num);// Visual Studio wants two underscores +#if defined(_MSC_VER) && !defined(_M_ARM64) && !defined(_M_X64) +static inline unsigned char _BitScanForward64(unsigned long* ret, uint64_t x) { + unsigned long x0 = (unsigned long)x, top, bottom; + _BitScanForward(&top, (unsigned long)(x >> 32)); + _BitScanForward(&bottom, x0); + *ret = x0 ? bottom : 32 + top; + return x != 0; } -#else -simdutf_really_inline long long int count_ones(uint64_t input_num) { - return _popcnt64(input_num); +static unsigned char _BitScanReverse64(unsigned long* ret, uint64_t x) { + unsigned long x1 = (unsigned long)(x >> 32), top, bottom; + _BitScanReverse(&top, x1); + _BitScanReverse(&bottom, (unsigned long)x); + *ret = x1 ? top + 32 : bottom; + return x != 0; } #endif } // unnamed namespace -} // namespace icelake +} // namespace fallback } // namespace simdutf -#endif // SIMDUTF_ICELAKE_BITMANIPULATION_H -/* end file src/simdutf/icelake/bitmanipulation.h */ -// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/icelake/end.h -/* begin file src/simdutf/icelake/end.h */ -SIMDUTF_UNTARGET_REGION - -#if SIMDUTF_GCC11ORMORE // workaround for https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105593 -SIMDUTF_POP_DISABLE_WARNINGS -#endif // end of workaround -/* end file src/simdutf/icelake/end.h */ - +#endif // SIMDUTF_FALLBACK_BITMANIPULATION_H +/* end file src/simdutf/fallback/bitmanipulation.h */ +// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/fallback/end.h +/* begin file src/simdutf/fallback/end.h */ +/* end file src/simdutf/fallback/end.h */ -#endif // SIMDUTF_IMPLEMENTATION_ICELAKE -#endif // SIMDUTF_ICELAKE_H -/* end file src/simdutf/icelake.h */ +#endif // SIMDUTF_IMPLEMENTATION_FALLBACK +#endif // SIMDUTF_FALLBACK_H +/* end file src/simdutf/fallback.h */ namespace simdutf { bool implementation::supported_by_runtime_system() const { @@ -9890,7 +9936,7 @@ inline simdutf_warn_unused result validate_with_errors(const char *buf, size_t l uint32_t code_point = 0; while (pos < len) { // check of the next 8 bytes are ascii. - uint64_t next_pos = pos + 16; + size_t next_pos = pos + 16; if (next_pos <= len) { // if it is safe to read 8 more bytes, check that they are ascii uint64_t v1; std::memcpy(&v1, data + pos, sizeof(uint64_t)); @@ -15447,7 +15493,12 @@ simdutf_warn_unused size_t implementation::utf32_length_from_utf8(const char * i /* begin file src/simdutf/icelake/begin.h */ // redefining SIMDUTF_IMPLEMENTATION to "icelake" // #define SIMDUTF_IMPLEMENTATION icelake + +#if SIMDUTF_CAN_ALWAYS_RUN_ICELAKE +// nothing needed. +#else SIMDUTF_TARGET_ICELAKE +#endif #if SIMDUTF_GCC11ORMORE // workaround for https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105593 SIMDUTF_DISABLE_GCC_WARNING(-Wmaybe-uninitialized) @@ -15734,8 +15785,19 @@ simdutf_really_inline bool process_block_utf8_to_utf16(const char *&in, char16_t } // if (tail == SIMDUTF_FULL) { + // In the two-byte/ASCII scenario, we are easily latency bound, so we want + // to increment the input buffer as quickly as possible. + // We process 32 bytes unless the byte at index 32 is a continuation byte, + // in which case we include it as well for a total of 33 bytes. + // Note that if x is an ASCII byte, then the following is false: + // int8_t(x) <= int8_t(0xc0) under two's complement. in += 32; if(int8_t(*in) <= int8_t(0xc0)) in++; + // The alternative is to do + // in += 64 - _lzcnt_u64(_pdep_u64(0xFFFFFFFF, continuation_or_ascii)); + // but it requires loading the input, doing the mask computation, and converting + // back the mask to a general register. It just takes too long, leaving the + // processor likely to be idle. } else { in += 64 - _lzcnt_u64(_pdep_u64(0xFFFFFFFF, continuation_or_ascii)); } @@ -18919,7 +18981,12 @@ simdutf_warn_unused size_t implementation::utf32_length_from_utf8(const char * i // dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/icelake/end.h /* begin file src/simdutf/icelake/end.h */ +#if SIMDUTF_CAN_ALWAYS_RUN_ICELAKE +// nothing needed. +#else SIMDUTF_UNTARGET_REGION +#endif + #if SIMDUTF_GCC11ORMORE // workaround for https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105593 SIMDUTF_POP_DISABLE_WARNINGS @@ -18935,8 +19002,12 @@ SIMDUTF_POP_DISABLE_WARNINGS /* begin file src/simdutf/haswell/begin.h */ // redefining SIMDUTF_IMPLEMENTATION to "haswell" // #define SIMDUTF_IMPLEMENTATION haswell -SIMDUTF_TARGET_HASWELL +#if SIMDUTF_CAN_ALWAYS_RUN_HASWELL +// nothing needed. +#else +SIMDUTF_TARGET_HASWELL +#endif #if SIMDUTF_GCC11ORMORE // workaround for https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105593 SIMDUTF_DISABLE_GCC_WARNING(-Wmaybe-uninitialized) @@ -22724,10 +22795,15 @@ simdutf_warn_unused size_t implementation::utf32_length_from_utf8(const char * i // dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/haswell/end.h /* begin file src/simdutf/haswell/end.h */ +#if SIMDUTF_CAN_ALWAYS_RUN_HASWELL +// nothing needed. +#else SIMDUTF_UNTARGET_REGION +#endif + #if SIMDUTF_GCC11ORMORE // workaround for https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105593 -#pragma GCC diagnostic pop +SIMDUTF_POP_DISABLE_WARNINGS #endif // end of workaround /* end file src/simdutf/haswell/end.h */ /* end file src/haswell/implementation.cpp */ @@ -24223,7 +24299,12 @@ simdutf_warn_unused size_t implementation::utf32_length_from_utf8(const char * i /* begin file src/simdutf/westmere/begin.h */ // redefining SIMDUTF_IMPLEMENTATION to "westmere" // #define SIMDUTF_IMPLEMENTATION westmere + +#if SIMDUTF_CAN_ALWAYS_RUN_WESTMERE +// nothing needed. +#else SIMDUTF_TARGET_WESTMERE +#endif /* end file src/simdutf/westmere/begin.h */ namespace simdutf { namespace westmere { @@ -28036,7 +28117,12 @@ simdutf_warn_unused size_t implementation::utf32_length_from_utf8(const char * i // dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/westmere/end.h /* begin file src/simdutf/westmere/end.h */ +#if SIMDUTF_CAN_ALWAYS_RUN_WESTMERE +// nothing needed. +#else SIMDUTF_UNTARGET_REGION +#endif + /* end file src/simdutf/westmere/end.h */ /* end file src/westmere/implementation.cpp */ #endif diff --git a/deps/simdutf/simdutf.h b/deps/simdutf/simdutf.h index 80fa7c69542c5a..3514f117dd2360 100644 --- a/deps/simdutf/simdutf.h +++ b/deps/simdutf/simdutf.h @@ -1,4 +1,4 @@ -/* auto-generated on 2022-12-15 12:13:17 -0500. Do not edit! */ +/* auto-generated on 2023-01-02 15:43:33 -0500. Do not edit! */ // dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/include, filename=simdutf.h /* begin file include/simdutf.h */ #ifndef SIMDUTF_H @@ -526,7 +526,7 @@ SIMDUTF_DISABLE_UNDESIRED_WARNINGS #define SIMDUTF_SIMDUTF_VERSION_H /** The version of simdutf being used (major.minor.revision) */ -#define SIMDUTF_VERSION 2.0.9 +#define SIMDUTF_VERSION "2.1.0" namespace simdutf { enum { @@ -537,11 +537,11 @@ enum { /** * The minor version (major.MINOR.revision) of simdutf being used. */ - SIMDUTF_VERSION_MINOR = 0, + SIMDUTF_VERSION_MINOR = 1, /** * The revision (major.minor.REVISION) of simdutf being used. */ - SIMDUTF_VERSION_REVISION = 9 + SIMDUTF_VERSION_REVISION = 0 }; } // namespace simdutf @@ -795,7 +795,7 @@ namespace simdutf { /** * Autodetect the encoding of the input, a single encoding is recommended. - * E.g., the function might return simdutf::encoding_type::UTF8, + * E.g., the function might return simdutf::encoding_type::UTF8, * simdutf::encoding_type::UTF16_LE, simdutf::encoding_type::UTF16_BE, or * simdutf::encoding_type::UTF32_LE. * @@ -826,7 +826,9 @@ simdutf_really_inline simdutf_warn_unused int detect_encodings(const uint8_t * i /** - * Validate the UTF-8 string. + * Validate the UTF-8 string. This function may be best when you expect + * the input to be almost always valid. Otherwise, consider using + * validate_utf8_with_errors. * * Overridden by each implementation. * @@ -859,7 +861,8 @@ simdutf_warn_unused result validate_utf8_with_errors(const char *buf, size_t len simdutf_warn_unused bool validate_ascii(const char *buf, size_t len) noexcept; /** - * Validate the ASCII string and stop on error. + * Validate the ASCII string and stop on error. It might be faster than + * validate_utf8 when an error is expected to occur early. * * Overridden by each implementation. * @@ -870,7 +873,9 @@ simdutf_warn_unused bool validate_ascii(const char *buf, size_t len) noexcept; simdutf_warn_unused result validate_ascii_with_errors(const char *buf, size_t len) noexcept; /** - * Validate the UTF-16LE string. + * Validate the UTF-16LE string. This function may be best when you expect + * the input to be almost always valid. Otherwise, consider using + * validate_utf16le_with_errors. * * Overridden by each implementation. * @@ -883,7 +888,9 @@ simdutf_warn_unused result validate_ascii_with_errors(const char *buf, size_t le simdutf_warn_unused bool validate_utf16le(const char16_t *buf, size_t len) noexcept; /** - * Validate the UTF-16BE string. + * Validate the UTF-16BE string. This function may be best when you expect + * the input to be almost always valid. Otherwise, consider using + * validate_utf16be_with_errors. * * Overridden by each implementation. * @@ -896,7 +903,8 @@ simdutf_warn_unused bool validate_utf16le(const char16_t *buf, size_t len) noexc simdutf_warn_unused bool validate_utf16be(const char16_t *buf, size_t len) noexcept; /** - * Validate the UTF-16LE string and stop on error. + * Validate the UTF-16LE string and stop on error. It might be faster than + * validate_utf16le when an error is expected to occur early. * * Overridden by each implementation. * @@ -909,7 +917,8 @@ simdutf_warn_unused bool validate_utf16be(const char16_t *buf, size_t len) noexc simdutf_warn_unused result validate_utf16le_with_errors(const char16_t *buf, size_t len) noexcept; /** - * Validate the UTF-16BE string and stop on error. + * Validate the UTF-16BE string and stop on error. It might be faster than + * validate_utf16be when an error is expected to occur early. * * Overridden by each implementation. * @@ -922,7 +931,9 @@ simdutf_warn_unused result validate_utf16le_with_errors(const char16_t *buf, siz simdutf_warn_unused result validate_utf16be_with_errors(const char16_t *buf, size_t len) noexcept; /** - * Validate the UTF-32LE string. + * Validate the UTF-32LE string. This function may be best when you expect + * the input to be almost always valid. Otherwise, consider using + * validate_utf32_with_errors. * * Overridden by each implementation. * @@ -935,7 +946,8 @@ simdutf_warn_unused result validate_utf16be_with_errors(const char16_t *buf, siz simdutf_warn_unused bool validate_utf32(const char32_t *buf, size_t len) noexcept; /** - * Validate the UTF-32LE string and stop on error. + * Validate the UTF-32LE string and stop on error. It might be faster than + * validate_utf32 when an error is expected to occur early. * * Overridden by each implementation. * @@ -1635,7 +1647,9 @@ class implementation { simdutf_warn_unused virtual result validate_ascii_with_errors(const char *buf, size_t len) const noexcept = 0; /** - * Validate the UTF-16LE string. + * Validate the UTF-16LE string.This function may be best when you expect + * the input to be almost always valid. Otherwise, consider using + * validate_utf16le_with_errors. * * Overridden by each implementation. * @@ -1648,7 +1662,9 @@ class implementation { simdutf_warn_unused virtual bool validate_utf16le(const char16_t *buf, size_t len) const noexcept = 0; /** - * Validate the UTF-16BE string. + * Validate the UTF-16BE string. This function may be best when you expect + * the input to be almost always valid. Otherwise, consider using + * validate_utf16be_with_errors. * * Overridden by each implementation. * @@ -1661,7 +1677,8 @@ class implementation { simdutf_warn_unused virtual bool validate_utf16be(const char16_t *buf, size_t len) const noexcept = 0; /** - * Validate the UTF-16LE string and stop on error. + * Validate the UTF-16LE string and stop on error. It might be faster than + * validate_utf16le when an error is expected to occur early. * * Overridden by each implementation. * @@ -1674,7 +1691,8 @@ class implementation { simdutf_warn_unused virtual result validate_utf16le_with_errors(const char16_t *buf, size_t len) const noexcept = 0; /** - * Validate the UTF-16BE string and stop on error. + * Validate the UTF-16BE string and stop on error. It might be faster than + * validate_utf16be when an error is expected to occur early. * * Overridden by each implementation. *