From 7fdc51bcd395c25f68badc9fcb88ebb361f6c1d7 Mon Sep 17 00:00:00 2001 From: Marc Auberer Date: Mon, 25 Mar 2024 19:53:01 +0100 Subject: [PATCH] [libcxx] Use generic builtins for popcount, clz and ctz Use __builtin_popcountg instead of __buildin_popcount{l|ll} Use __builtin_clzg instead of __buildin_clz{l|ll} Use __builtin_ctzg instead of __builtin_ctz{l|ll} --- libcxx/include/__bit/countl.h | 17 +++++++++++++++++ libcxx/include/__bit/countr.h | 12 ++++++++++++ libcxx/include/__bit/popcount.h | 12 ++++++++++++ libcxx/src/include/ryu/d2s_intrinsics.h | 2 +- libcxx/src/include/ryu/ryu.h | 5 +++-- libcxx/src/ryu/f2s.cpp | 2 +- 6 files changed, 46 insertions(+), 4 deletions(-) diff --git a/libcxx/include/__bit/countl.h b/libcxx/include/__bit/countl.h index 396cfc2c3f406..ae33c9438bf7b 100644 --- a/libcxx/include/__bit/countl.h +++ b/libcxx/include/__bit/countl.h @@ -25,15 +25,27 @@ _LIBCPP_PUSH_MACROS _LIBCPP_BEGIN_NAMESPACE_STD _LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_clz(unsigned __x) _NOEXCEPT { +#if __has_builtin(__builtin_clzg) + return __builtin_clzg(__x); +#else return __builtin_clz(__x); +#endif } _LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_clz(unsigned long __x) _NOEXCEPT { +#if __has_builtin(__builtin_clzg) + return __builtin_clzg(__x); +#else return __builtin_clzl(__x); +#endif } _LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_clz(unsigned long long __x) _NOEXCEPT { +#if __has_builtin(__builtin_clzg) + return __builtin_clzg(__x); +#else return __builtin_clzll(__x); +#endif } #ifndef _LIBCPP_HAS_NO_INT128 @@ -47,8 +59,13 @@ inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_clz(__uint128_t __x) // - Any bits set: // - The number of leading zeros of the input is the number of leading // zeros in the high 64-bits. +# if __has_builtin(__builtin_clzg) + return ((__x >> 64) == 0) ? (64 + __builtin_clzg(static_cast(__x))) + : __builtin_clzg(static_cast(__x >> 64)); +# else return ((__x >> 64) == 0) ? (64 + __builtin_clzll(static_cast(__x))) : __builtin_clzll(static_cast(__x >> 64)); +# endif } #endif // _LIBCPP_HAS_NO_INT128 diff --git a/libcxx/include/__bit/countr.h b/libcxx/include/__bit/countr.h index b6b3ac52ca4e4..af34768888873 100644 --- a/libcxx/include/__bit/countr.h +++ b/libcxx/include/__bit/countr.h @@ -24,15 +24,27 @@ _LIBCPP_PUSH_MACROS _LIBCPP_BEGIN_NAMESPACE_STD _LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_ctz(unsigned __x) _NOEXCEPT { +#if __has_builtin(__builtin_ctzg) + return __builtin_ctzg(__x); +#else return __builtin_ctz(__x); +#endif } _LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_ctz(unsigned long __x) _NOEXCEPT { +#if __has_builtin(__builtin_ctzg) + return __builtin_ctzg(__x); +#else return __builtin_ctzl(__x); +#endif } _LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_ctz(unsigned long long __x) _NOEXCEPT { +#if __has_builtin(__builtin_ctzg) + return __builtin_ctzg(__x); +#else return __builtin_ctzll(__x); +#endif } template diff --git a/libcxx/include/__bit/popcount.h b/libcxx/include/__bit/popcount.h index b0319cef25189..85ba84a572dd8 100644 --- a/libcxx/include/__bit/popcount.h +++ b/libcxx/include/__bit/popcount.h @@ -24,15 +24,27 @@ _LIBCPP_PUSH_MACROS _LIBCPP_BEGIN_NAMESPACE_STD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_popcount(unsigned __x) _NOEXCEPT { +#if __has_builtin(__builtin_popcountg) + return __builtin_popcountg(__x); +#else return __builtin_popcount(__x); +#endif } inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_popcount(unsigned long __x) _NOEXCEPT { +#if __has_builtin(__builtin_popcountg) + return __builtin_popcountg(__x); +#else return __builtin_popcountl(__x); +#endif } inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_popcount(unsigned long long __x) _NOEXCEPT { +#if __has_builtin(__builtin_popcountg) + return __builtin_popcountg(__x); +#else return __builtin_popcountll(__x); +#endif } #if _LIBCPP_STD_VER >= 20 diff --git a/libcxx/src/include/ryu/d2s_intrinsics.h b/libcxx/src/include/ryu/d2s_intrinsics.h index be50361fb3b33..afe64649a0be1 100644 --- a/libcxx/src/include/ryu/d2s_intrinsics.h +++ b/libcxx/src/include/ryu/d2s_intrinsics.h @@ -249,7 +249,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline bool __multipleOfPowerOf2(const uint64_t __value, const uint32_t __p) { _LIBCPP_ASSERT_INTERNAL(__value != 0, ""); _LIBCPP_ASSERT_INTERNAL(__p < 64, ""); - // __builtin_ctzll doesn't appear to be faster here. + // __builtin_ctzll/__builtin_ctzg doesn't appear to be faster here. return (__value & ((1ull << __p) - 1)) == 0; } diff --git a/libcxx/src/include/ryu/ryu.h b/libcxx/src/include/ryu/ryu.h index 7b19ecfec5915..85831bed61b21 100644 --- a/libcxx/src/include/ryu/ryu.h +++ b/libcxx/src/include/ryu/ryu.h @@ -43,6 +43,7 @@ // Avoid formatting to keep the changes with the original code minimal. // clang-format off +#include <__bit/countr.h> #include <__charconv/chars_format.h> #include <__charconv/to_chars_result.h> #include <__config> @@ -72,7 +73,7 @@ _LIBCPP_HIDE_FROM_ABI inline unsigned char _BitScanForward64(unsigned long* __in if (__mask == 0) { return false; } - *__index = __builtin_ctzll(__mask); + *__index = __libcpp_ctz(__mask); return true; } @@ -80,7 +81,7 @@ _LIBCPP_HIDE_FROM_ABI inline unsigned char _BitScanForward(unsigned long* __inde if (__mask == 0) { return false; } - *__index = __builtin_ctz(__mask); + *__index = __libcpp_ctz(__mask); return true; } #endif // !_MSC_VER diff --git a/libcxx/src/ryu/f2s.cpp b/libcxx/src/ryu/f2s.cpp index f42fbd68c91d2..e7b5d39669f99 100644 --- a/libcxx/src/ryu/f2s.cpp +++ b/libcxx/src/ryu/f2s.cpp @@ -107,7 +107,7 @@ inline constexpr uint64_t __FLOAT_POW5_SPLIT[47] = { [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline bool __multipleOfPowerOf2(const uint32_t __value, const uint32_t __p) { _LIBCPP_ASSERT_INTERNAL(__value != 0, ""); _LIBCPP_ASSERT_INTERNAL(__p < 32, ""); - // __builtin_ctz doesn't appear to be faster here. + // __builtin_ctz/__builtin_ctzg doesn't appear to be faster here. return (__value & ((1u << __p) - 1)) == 0; }