Skip to content

Commit

Permalink
[deps] Update is_utf8 to version 1.3.2
Browse files Browse the repository at this point in the history
  • Loading branch information
lpinca committed May 10, 2024
1 parent 8f34cc8 commit e836b85
Show file tree
Hide file tree
Showing 2 changed files with 49 additions and 24 deletions.
12 changes: 9 additions & 3 deletions deps/is_utf8/CMakeLists.txt
Expand Up @@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 3.15)
project(is_utf8
DESCRIPTION "Fast UTF-8 Validation"
LANGUAGES CXX
VERSION 1.3.1
VERSION 1.3.2
)

include(GNUInstallDirs)
Expand All @@ -20,12 +20,16 @@ if (NOT CMAKE_BUILD_TYPE)
endif()
endif()

set(CMAKE_CXX_STANDARD 14)
# We compile tools, tests, etc. with C++ 11. Override yourself if you need on a
# target.
set(IS_UTF8_CXX_STANDARD 11 CACHE STRING "the C++ standard to use for is_utf8")

set(CMAKE_CXX_STANDARD ${IS_UTF8_CXX_STANDARD})
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CXX_EXTENSIONS OFF)
set(CMAKE_MACOSX_RPATH OFF)

set(IS_UTF8_LIB_VERSION "1.3.1" CACHE STRING "is_utf8 library version")
set(IS_UTF8_LIB_VERSION "1.3.2" CACHE STRING "is_utf8 library version")
set(IS_UTF8_LIB_SOVERSION "1" CACHE STRING "is_utf8 library soversion")

set(IS_UTF8_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/src)
Expand All @@ -40,6 +44,8 @@ endif(BUILD_TESTING)


add_subdirectory(benchmarks)

message(STATUS "Compiling using the C++ standard:" ${CMAKE_CXX_STANDARD})
# ---- Install rules ----
add_library(is_utf8::is_utf8 ALIAS is_utf8)

Expand Down
61 changes: 40 additions & 21 deletions deps/is_utf8/src/is_utf8.cpp
Expand Up @@ -872,8 +872,13 @@ template <typename T> std::string toBinaryString(T b) {
#ifndef IS_UTF8_IMPLEMENTATION_ARM64
#define IS_UTF8_IMPLEMENTATION_ARM64 (IS_UTF8_IS_ARM64)
#endif
#define IS_UTF8_CAN_ALWAYS_RUN_ARM64 \
IS_UTF8_IMPLEMENTATION_ARM64 &&IS_UTF8_IS_ARM64
#if IS_UTF8_IMPLEMENTATION_ARM64 &&IS_UTF8_IS_ARM64
#define IS_UTF8_CAN_ALWAYS_RUN_ARM64 1
#else
#define IS_UTF8_CAN_ALWAYS_RUN_ARM64 0
#endif
#if IS_UTF8_IMPLEMENTATION_ARM64
Expand Down Expand Up @@ -1116,8 +1121,9 @@ template <typename T, typename Mask = simd8<bool>> struct base_u8 {
return *this_cast;
}
is_utf8_really_inline Mask operator==(const simd8<T> other) const {
return vceqq_u8(*this, other);
friend is_utf8_really_inline Mask operator==(const simd8<T> lhs,
const simd8<T> rhs) {
return vceqq_u8(lhs, rhs);
}
template <int N = 1>
Expand Down Expand Up @@ -2172,7 +2178,7 @@ namespace icelake {} // namespace icelake
// We should not get warnings while including <x86intrin.h> yet we do
// under some versions of GCC.
// If the x86intrin.h header has uninitialized values that are problematic,
// it is a GCC issue, we want to ignore these warnigns.
// it is a GCC issue, we want to ignore these warnings.
IS_UTF8_DISABLE_GCC_WARNING(-Wuninitialized)
#endif
Expand Down Expand Up @@ -2342,8 +2348,11 @@ IS_UTF8_POP_DISABLE_WARNINGS
#endif
// To see why (__BMI__) && (__PCLMUL__) && (__LZCNT__) are not part of this
// next line, see https://github.com/simdutf/simdutf/issues/1247
#define IS_UTF8_CAN_ALWAYS_RUN_HASWELL \
((IS_UTF8_IMPLEMENTATION_HASWELL) && (IS_UTF8_IS_X86_64) && (__AVX2__))
#if ((IS_UTF8_IMPLEMENTATION_HASWELL) && (IS_UTF8_IS_X86_64) && (__AVX2__))
#define IS_UTF8_CAN_ALWAYS_RUN_HASWELL 1
#else
#define IS_UTF8_CAN_ALWAYS_RUN_HASWELL 0
#endif

#if IS_UTF8_IMPLEMENTATION_HASWELL

Expand Down Expand Up @@ -2398,7 +2407,7 @@ class implementation final : public is_utf8_internals::implementation {
// We should not get warnings while including <x86intrin.h> yet we do
// under some versions of GCC.
// If the x86intrin.h header has uninitialized values that are problematic,
// it is a GCC issue, we want to ignore these warnigns.
// it is a GCC issue, we want to ignore these warnings.
IS_UTF8_DISABLE_GCC_WARNING(-Wuninitialized)
#endif

Expand Down Expand Up @@ -2539,8 +2548,9 @@ struct base8 : base<simd8<T>> {
is_utf8_really_inline T last() const {
return _mm256_extract_epi8(*this, 31);
}
is_utf8_really_inline Mask operator==(const simd8<T> other) const {
return _mm256_cmpeq_epi8(*this, other);
friend is_utf8_really_inline Mask operator==(const simd8<T> lhs,
const simd8<T> rhs) {
return _mm256_cmpeq_epi8(lhs, rhs);
}

static const int SIZE = sizeof(base<T>::value);
Expand Down Expand Up @@ -2965,8 +2975,9 @@ struct base16 : base<simd16<T>> {
is_utf8_really_inline base16(const Pointer *ptr)
: base16(_mm256_loadu_si256(reinterpret_cast<const __m256i *>(ptr))) {}

is_utf8_really_inline Mask operator==(const simd16<T> other) const {
return _mm256_cmpeq_epi16(*this, other);
friend is_utf8_really_inline Mask operator==(const simd16<T> lhs,
const simd16<T> rhs) {
return _mm256_cmpeq_epi16(lhs, rhs);
}

/// the size of vector in bytes
Expand Down Expand Up @@ -3340,9 +3351,11 @@ IS_UTF8_UNTARGET_REGION

#endif

#define IS_UTF8_CAN_ALWAYS_RUN_WESTMERE \
(IS_UTF8_IMPLEMENTATION_WESTMERE && IS_UTF8_IS_X86_64 && __SSE4_2__ && \
__PCLMUL__)
#if IS_UTF8_IMPLEMENTATION_WESTMERE && IS_UTF8_IS_X86_64 && __SSE4_2__ && __PCLMUL__
#define IS_UTF8_CAN_ALWAYS_RUN_WESTMERE 1
#else
#define IS_UTF8_CAN_ALWAYS_RUN_WESTMERE 0
#endif

#if IS_UTF8_IMPLEMENTATION_WESTMERE

Expand Down Expand Up @@ -3395,7 +3408,7 @@ class implementation final : public is_utf8_internals::implementation {
// We should not get warnings while including <x86intrin.h> yet we do
// under some versions of GCC.
// If the x86intrin.h header has uninitialized values that are problematic,
// it is a GCC issue, we want to ignore these warnigns.
// it is a GCC issue, we want to ignore these warnings.
IS_UTF8_DISABLE_GCC_WARNING(-Wuninitialized)
#endif

Expand Down Expand Up @@ -3517,8 +3530,9 @@ struct base8 : base<simd8<T>> {
is_utf8_really_inline base8() : base<simd8<T>>() {}
is_utf8_really_inline base8(const __m128i _value) : base<simd8<T>>(_value) {}

is_utf8_really_inline Mask operator==(const simd8<T> other) const {
return _mm_cmpeq_epi8(*this, other);
friend is_utf8_really_inline Mask operator==(const simd8<T> lhs,
const simd8<T> rhs) {
return _mm_cmpeq_epi8(lhs, rhs);
}

static const int SIZE = sizeof(base<simd8<T>>::value);
Expand Down Expand Up @@ -4032,8 +4046,9 @@ struct base16 : base<simd16<T>> {
is_utf8_really_inline base16(const Pointer *ptr)
: base16(_mm_loadu_si128(reinterpret_cast<const __m128i *>(ptr))) {}

is_utf8_really_inline Mask operator==(const simd16<T> other) const {
return _mm_cmpeq_epi16(*this, other);
friend is_utf8_really_inline Mask operator==(const simd16<T> lhs,
const simd16<T> rhs) {
return _mm_cmpeq_epi16(lhs, rhs);
}

static const int SIZE = sizeof(base<simd16<T>>::value);
Expand Down Expand Up @@ -4407,7 +4422,11 @@ IS_UTF8_UNTARGET_REGION
#endif
#endif

#define IS_UTF8_CAN_ALWAYS_RUN_FALLBACK (IS_UTF8_IMPLEMENTATION_FALLBACK)
#if IS_UTF8_IMPLEMENTATION_FALLBACK
#define IS_UTF8_CAN_ALWAYS_RUN_FALLBACK 1
#else
#define IS_UTF8_CAN_ALWAYS_RUN_FALLBACK 0
#endif

#if IS_UTF8_IMPLEMENTATION_FALLBACK

Expand Down

0 comments on commit e836b85

Please sign in to comment.