From f4ca119746c5f6f41f87a7db4f7b4fbdb223b99a Mon Sep 17 00:00:00 2001 From: Marc Auberer Date: Mon, 25 Mar 2024 19:53:01 +0100 Subject: [PATCH 1/7] [libcxx] Use generic builtins for popcount, clz and ctz Use __builtin_popcountg instead of __buildin_popcount{l|ll} Use __builtin_clzg instead of __buildin_clz{l|ll} Use __builtin_ctzg instead of __builtin_ctz{l|ll} --- libcxx/include/__bit/countl.h | 16 ++++++++++++++++ libcxx/include/__bit/countr.h | 12 ++++++++++++ libcxx/include/__bit/popcount.h | 12 ++++++++++++ libcxx/src/include/ryu/d2s_intrinsics.h | 2 +- libcxx/src/include/ryu/ryu.h | 5 +++-- libcxx/src/ryu/f2s.cpp | 2 +- 6 files changed, 45 insertions(+), 4 deletions(-) diff --git a/libcxx/include/__bit/countl.h b/libcxx/include/__bit/countl.h index 396cfc2c3f406..bd87f8903787c 100644 --- a/libcxx/include/__bit/countl.h +++ b/libcxx/include/__bit/countl.h @@ -25,15 +25,27 @@ _LIBCPP_PUSH_MACROS _LIBCPP_BEGIN_NAMESPACE_STD _LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_clz(unsigned __x) _NOEXCEPT { +#if __has_builtin(__builtin_clzg) + return __builtin_clzg(__x); +#else return __builtin_clz(__x); +#endif } _LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_clz(unsigned long __x) _NOEXCEPT { +#if __has_builtin(__builtin_clzg) + return __builtin_clzg(__x); +#else return __builtin_clzl(__x); +#endif } _LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_clz(unsigned long long __x) _NOEXCEPT { +#if __has_builtin(__builtin_clzg) + return __builtin_clzg(__x); +#else return __builtin_clzll(__x); +#endif } #ifndef _LIBCPP_HAS_NO_INT128 @@ -47,8 +59,12 @@ inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_clz(__uint128_t __x) // - Any bits set: // - The number of leading zeros of the input is the number of leading // zeros in the high 64-bits. +# if __has_builtin(__builtin_clzg) + return __builtin_clzg(__x); +# else return ((__x >> 64) == 0) ? (64 + __builtin_clzll(static_cast(__x))) : __builtin_clzll(static_cast(__x >> 64)); +# endif } #endif // _LIBCPP_HAS_NO_INT128 diff --git a/libcxx/include/__bit/countr.h b/libcxx/include/__bit/countr.h index b6b3ac52ca4e4..af34768888873 100644 --- a/libcxx/include/__bit/countr.h +++ b/libcxx/include/__bit/countr.h @@ -24,15 +24,27 @@ _LIBCPP_PUSH_MACROS _LIBCPP_BEGIN_NAMESPACE_STD _LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_ctz(unsigned __x) _NOEXCEPT { +#if __has_builtin(__builtin_ctzg) + return __builtin_ctzg(__x); +#else return __builtin_ctz(__x); +#endif } _LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_ctz(unsigned long __x) _NOEXCEPT { +#if __has_builtin(__builtin_ctzg) + return __builtin_ctzg(__x); +#else return __builtin_ctzl(__x); +#endif } _LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_ctz(unsigned long long __x) _NOEXCEPT { +#if __has_builtin(__builtin_ctzg) + return __builtin_ctzg(__x); +#else return __builtin_ctzll(__x); +#endif } template diff --git a/libcxx/include/__bit/popcount.h b/libcxx/include/__bit/popcount.h index b0319cef25189..85ba84a572dd8 100644 --- a/libcxx/include/__bit/popcount.h +++ b/libcxx/include/__bit/popcount.h @@ -24,15 +24,27 @@ _LIBCPP_PUSH_MACROS _LIBCPP_BEGIN_NAMESPACE_STD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_popcount(unsigned __x) _NOEXCEPT { +#if __has_builtin(__builtin_popcountg) + return __builtin_popcountg(__x); +#else return __builtin_popcount(__x); +#endif } inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_popcount(unsigned long __x) _NOEXCEPT { +#if __has_builtin(__builtin_popcountg) + return __builtin_popcountg(__x); +#else return __builtin_popcountl(__x); +#endif } inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_popcount(unsigned long long __x) _NOEXCEPT { +#if __has_builtin(__builtin_popcountg) + return __builtin_popcountg(__x); +#else return __builtin_popcountll(__x); +#endif } #if _LIBCPP_STD_VER >= 20 diff --git a/libcxx/src/include/ryu/d2s_intrinsics.h b/libcxx/src/include/ryu/d2s_intrinsics.h index be50361fb3b33..afe64649a0be1 100644 --- a/libcxx/src/include/ryu/d2s_intrinsics.h +++ b/libcxx/src/include/ryu/d2s_intrinsics.h @@ -249,7 +249,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline bool __multipleOfPowerOf2(const uint64_t __value, const uint32_t __p) { _LIBCPP_ASSERT_INTERNAL(__value != 0, ""); _LIBCPP_ASSERT_INTERNAL(__p < 64, ""); - // __builtin_ctzll doesn't appear to be faster here. + // __builtin_ctzll/__builtin_ctzg doesn't appear to be faster here. return (__value & ((1ull << __p) - 1)) == 0; } diff --git a/libcxx/src/include/ryu/ryu.h b/libcxx/src/include/ryu/ryu.h index 7b19ecfec5915..85831bed61b21 100644 --- a/libcxx/src/include/ryu/ryu.h +++ b/libcxx/src/include/ryu/ryu.h @@ -43,6 +43,7 @@ // Avoid formatting to keep the changes with the original code minimal. // clang-format off +#include <__bit/countr.h> #include <__charconv/chars_format.h> #include <__charconv/to_chars_result.h> #include <__config> @@ -72,7 +73,7 @@ _LIBCPP_HIDE_FROM_ABI inline unsigned char _BitScanForward64(unsigned long* __in if (__mask == 0) { return false; } - *__index = __builtin_ctzll(__mask); + *__index = __libcpp_ctz(__mask); return true; } @@ -80,7 +81,7 @@ _LIBCPP_HIDE_FROM_ABI inline unsigned char _BitScanForward(unsigned long* __inde if (__mask == 0) { return false; } - *__index = __builtin_ctz(__mask); + *__index = __libcpp_ctz(__mask); return true; } #endif // !_MSC_VER diff --git a/libcxx/src/ryu/f2s.cpp b/libcxx/src/ryu/f2s.cpp index f42fbd68c91d2..e7b5d39669f99 100644 --- a/libcxx/src/ryu/f2s.cpp +++ b/libcxx/src/ryu/f2s.cpp @@ -107,7 +107,7 @@ inline constexpr uint64_t __FLOAT_POW5_SPLIT[47] = { [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline bool __multipleOfPowerOf2(const uint32_t __value, const uint32_t __p) { _LIBCPP_ASSERT_INTERNAL(__value != 0, ""); _LIBCPP_ASSERT_INTERNAL(__p < 32, ""); - // __builtin_ctz doesn't appear to be faster here. + // __builtin_ctz/__builtin_ctzg doesn't appear to be faster here. return (__value & ((1u << __p) - 1)) == 0; } From 35355091bf4545384b0750f56d1ee67f7e0f4a81 Mon Sep 17 00:00:00 2001 From: Marc Auberer Date: Mon, 25 Mar 2024 22:30:22 +0100 Subject: [PATCH 2/7] Shortcuts that benefit from generic builtins --- libcxx/include/__bit/countl.h | 4 ++++ libcxx/include/__bit/countr.h | 4 ++++ libcxx/include/__bit/popcount.h | 4 ++++ 3 files changed, 12 insertions(+) diff --git a/libcxx/include/__bit/countl.h b/libcxx/include/__bit/countl.h index bd87f8903787c..af0409950837b 100644 --- a/libcxx/include/__bit/countl.h +++ b/libcxx/include/__bit/countl.h @@ -74,6 +74,9 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 int __countl_zero(_Tp __t) _ if (__t == 0) return numeric_limits<_Tp>::digits; +#if __has_builtin(__builtin_clzg) + return __builtin_clzg(__t) - (numeric_limits::digits - numeric_limits<_Tp>::digits); +#else if (sizeof(_Tp) <= sizeof(unsigned int)) return std::__libcpp_clz(static_cast(__t)) - (numeric_limits::digits - numeric_limits<_Tp>::digits); @@ -95,6 +98,7 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 int __countl_zero(_Tp __t) _ } return __ret + __iter; } +#endif // __has_builtin(__builtin_clzg) } #if _LIBCPP_STD_VER >= 20 diff --git a/libcxx/include/__bit/countr.h b/libcxx/include/__bit/countr.h index af34768888873..6c2b2a74ee7ac 100644 --- a/libcxx/include/__bit/countr.h +++ b/libcxx/include/__bit/countr.h @@ -52,6 +52,9 @@ _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 int __coun if (__t == 0) return numeric_limits<_Tp>::digits; +#if __has_builtin(__builtin_ctz) + return __builtin_ctz(__t); +#else if (sizeof(_Tp) <= sizeof(unsigned int)) return std::__libcpp_ctz(static_cast(__t)); else if (sizeof(_Tp) <= sizeof(unsigned long)) @@ -67,6 +70,7 @@ _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 int __coun } return __ret + std::__libcpp_ctz(static_cast(__t)); } +#endif // __has_builtin(__builtin_ctz) } #if _LIBCPP_STD_VER >= 20 diff --git a/libcxx/include/__bit/popcount.h b/libcxx/include/__bit/popcount.h index 85ba84a572dd8..63e0aadec0496 100644 --- a/libcxx/include/__bit/popcount.h +++ b/libcxx/include/__bit/popcount.h @@ -51,6 +51,9 @@ inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_popcount(unsigned lo template <__libcpp_unsigned_integer _Tp> _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr int popcount(_Tp __t) noexcept { +# if __has_builtin(__builtin_popcount) + return __builtin_popcount(__t); +# else if (sizeof(_Tp) <= sizeof(unsigned int)) return std::__libcpp_popcount(static_cast(__t)); else if (sizeof(_Tp) <= sizeof(unsigned long)) @@ -65,6 +68,7 @@ _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr int popcount(_Tp __t) noex } return __ret; } +# endif // __has_builtin(__builtin_popcount) } #endif // _LIBCPP_STD_VER >= 20 From 227d89e3f0dd9739a5b367311214c25aefeeadaa Mon Sep 17 00:00:00 2001 From: Marc Auberer Date: Tue, 26 Mar 2024 00:42:27 +0100 Subject: [PATCH 3/7] Simplify --- libcxx/include/__bit/countl.h | 17 ++++++++++++++--- libcxx/include/__bit/countr.h | 16 +++++++++++++--- libcxx/include/__bit/popcount.h | 15 +++++++++++---- 3 files changed, 38 insertions(+), 10 deletions(-) diff --git a/libcxx/include/__bit/countl.h b/libcxx/include/__bit/countl.h index af0409950837b..a1998ba5f27b4 100644 --- a/libcxx/include/__bit/countl.h +++ b/libcxx/include/__bit/countl.h @@ -68,15 +68,25 @@ inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_clz(__uint128_t __x) } #endif // _LIBCPP_HAS_NO_INT128 +#if __has_builtin(__builtin_clzg) + template _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 int __countl_zero(_Tp __t) _NOEXCEPT { static_assert(__libcpp_is_unsigned_integer<_Tp>::value, "__countl_zero requires an unsigned integer type"); if (__t == 0) return numeric_limits<_Tp>::digits; -#if __has_builtin(__builtin_clzg) return __builtin_clzg(__t) - (numeric_limits::digits - numeric_limits<_Tp>::digits); -#else +} + +#else // __has_builtin(__builtin_clzg) + +template +_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 int __countl_zero(_Tp __t) _NOEXCEPT { + static_assert(__libcpp_is_unsigned_integer<_Tp>::value, "__countl_zero requires an unsigned integer type"); + if (__t == 0) + return numeric_limits<_Tp>::digits; + if (sizeof(_Tp) <= sizeof(unsigned int)) return std::__libcpp_clz(static_cast(__t)) - (numeric_limits::digits - numeric_limits<_Tp>::digits); @@ -98,9 +108,10 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 int __countl_zero(_Tp __t) _ } return __ret + __iter; } -#endif // __has_builtin(__builtin_clzg) } +#endif // __has_builtin(__builtin_clzg) + #if _LIBCPP_STD_VER >= 20 template <__libcpp_unsigned_integer _Tp> diff --git a/libcxx/include/__bit/countr.h b/libcxx/include/__bit/countr.h index 6c2b2a74ee7ac..b2feaf7d97bce 100644 --- a/libcxx/include/__bit/countr.h +++ b/libcxx/include/__bit/countr.h @@ -47,14 +47,23 @@ _LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_ct #endif } +#if __has_builtin(__builtin_ctzg) + template _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 int __countr_zero(_Tp __t) _NOEXCEPT { if (__t == 0) return numeric_limits<_Tp>::digits; -#if __has_builtin(__builtin_ctz) return __builtin_ctz(__t); -#else +} + +#else // __has_builtin(__builtin_ctzg) + +template +_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 int __countr_zero(_Tp __t) _NOEXCEPT { + if (__t == 0) + return numeric_limits<_Tp>::digits; + if (sizeof(_Tp) <= sizeof(unsigned int)) return std::__libcpp_ctz(static_cast(__t)); else if (sizeof(_Tp) <= sizeof(unsigned long)) @@ -70,9 +79,10 @@ _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 int __coun } return __ret + std::__libcpp_ctz(static_cast(__t)); } -#endif // __has_builtin(__builtin_ctz) } +#endif // __has_builtin(__builtin_ctzg) + #if _LIBCPP_STD_VER >= 20 template <__libcpp_unsigned_integer _Tp> diff --git a/libcxx/include/__bit/popcount.h b/libcxx/include/__bit/popcount.h index 63e0aadec0496..68a723bcce63e 100644 --- a/libcxx/include/__bit/popcount.h +++ b/libcxx/include/__bit/popcount.h @@ -49,11 +49,17 @@ inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_popcount(unsigned lo #if _LIBCPP_STD_VER >= 20 +# if __has_builtin(__builtin_popcountg) + +template <__libcpp_unsigned_integer _Tp> +_LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr int popcount(_Tp __t) noexcept { + return __builtin_popcountg(__t); +} + +# else // __has_builtin(__builtin_popcountg) + template <__libcpp_unsigned_integer _Tp> _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr int popcount(_Tp __t) noexcept { -# if __has_builtin(__builtin_popcount) - return __builtin_popcount(__t); -# else if (sizeof(_Tp) <= sizeof(unsigned int)) return std::__libcpp_popcount(static_cast(__t)); else if (sizeof(_Tp) <= sizeof(unsigned long)) @@ -68,9 +74,10 @@ _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr int popcount(_Tp __t) noex } return __ret; } -# endif // __has_builtin(__builtin_popcount) } +# endif // __has_builtin(__builtin_popcountg) + #endif // _LIBCPP_STD_VER >= 20 _LIBCPP_END_NAMESPACE_STD From 41eb681ac0b69306c173f9686ce47b7366274e36 Mon Sep 17 00:00:00 2001 From: Marc Auberer Date: Tue, 26 Mar 2024 22:54:17 +0100 Subject: [PATCH 4/7] Do cleanup and remove non-feasible usages --- libcxx/include/__bit/countl.h | 23 +++++++---------------- libcxx/include/__bit/countr.h | 15 +++------------ libcxx/include/__bit/popcount.h | 15 +++------------ libcxx/src/include/ryu/d2s_intrinsics.h | 2 +- libcxx/src/include/ryu/ryu.h | 4 ++-- libcxx/src/ryu/f2s.cpp | 2 +- 6 files changed, 17 insertions(+), 44 deletions(-) diff --git a/libcxx/include/__bit/countl.h b/libcxx/include/__bit/countl.h index a1998ba5f27b4..21bdde9bbe4c8 100644 --- a/libcxx/include/__bit/countl.h +++ b/libcxx/include/__bit/countl.h @@ -6,6 +6,9 @@ // //===----------------------------------------------------------------------===// +// ToDo: __builtin_clzg is available since Clang 19 and GCC 14. When support for older versions is dropped, we can +// refactor this code to exclusively use __builtin_clzg. + #ifndef _LIBCPP___BIT_COUNTL_H #define _LIBCPP___BIT_COUNTL_H @@ -25,31 +28,22 @@ _LIBCPP_PUSH_MACROS _LIBCPP_BEGIN_NAMESPACE_STD _LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_clz(unsigned __x) _NOEXCEPT { -#if __has_builtin(__builtin_clzg) - return __builtin_clzg(__x); -#else return __builtin_clz(__x); -#endif } _LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_clz(unsigned long __x) _NOEXCEPT { -#if __has_builtin(__builtin_clzg) - return __builtin_clzg(__x); -#else return __builtin_clzl(__x); -#endif } _LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_clz(unsigned long long __x) _NOEXCEPT { -#if __has_builtin(__builtin_clzg) - return __builtin_clzg(__x); -#else return __builtin_clzll(__x); -#endif } #ifndef _LIBCPP_HAS_NO_INT128 inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_clz(__uint128_t __x) _NOEXCEPT { +# if __has_builtin(__builtin_clzg) + return __builtin_clzg(__x); +# else // The function is written in this form due to C++ constexpr limitations. // The algorithm: // - Test whether any bit in the high 64-bits is set @@ -59,9 +53,6 @@ inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_clz(__uint128_t __x) // - Any bits set: // - The number of leading zeros of the input is the number of leading // zeros in the high 64-bits. -# if __has_builtin(__builtin_clzg) - return __builtin_clzg(__x); -# else return ((__x >> 64) == 0) ? (64 + __builtin_clzll(static_cast(__x))) : __builtin_clzll(static_cast(__x >> 64)); # endif @@ -76,7 +67,7 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 int __countl_zero(_Tp __t) _ if (__t == 0) return numeric_limits<_Tp>::digits; - return __builtin_clzg(__t) - (numeric_limits::digits - numeric_limits<_Tp>::digits); + return __builtin_clzg(__t); } #else // __has_builtin(__builtin_clzg) diff --git a/libcxx/include/__bit/countr.h b/libcxx/include/__bit/countr.h index b2feaf7d97bce..a01ec96417f08 100644 --- a/libcxx/include/__bit/countr.h +++ b/libcxx/include/__bit/countr.h @@ -6,6 +6,9 @@ // //===----------------------------------------------------------------------===// +// ToDo: __builtin_ctzg is available since Clang 19 and GCC 14. When support for older versions is dropped, we can +// refactor this code to exclusively use __builtin_ctzg. + #ifndef _LIBCPP___BIT_COUNTR_H #define _LIBCPP___BIT_COUNTR_H @@ -24,27 +27,15 @@ _LIBCPP_PUSH_MACROS _LIBCPP_BEGIN_NAMESPACE_STD _LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_ctz(unsigned __x) _NOEXCEPT { -#if __has_builtin(__builtin_ctzg) - return __builtin_ctzg(__x); -#else return __builtin_ctz(__x); -#endif } _LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_ctz(unsigned long __x) _NOEXCEPT { -#if __has_builtin(__builtin_ctzg) - return __builtin_ctzg(__x); -#else return __builtin_ctzl(__x); -#endif } _LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_ctz(unsigned long long __x) _NOEXCEPT { -#if __has_builtin(__builtin_ctzg) - return __builtin_ctzg(__x); -#else return __builtin_ctzll(__x); -#endif } #if __has_builtin(__builtin_ctzg) diff --git a/libcxx/include/__bit/popcount.h b/libcxx/include/__bit/popcount.h index 68a723bcce63e..88c967d67c60a 100644 --- a/libcxx/include/__bit/popcount.h +++ b/libcxx/include/__bit/popcount.h @@ -6,6 +6,9 @@ // //===----------------------------------------------------------------------===// +// ToDo: __builtin_popcountg is available since Clang 19 and GCC 14. When support for older versions is dropped, we can +// refactor this code to exclusively use __builtin_popcountg. + #ifndef _LIBCPP___BIT_POPCOUNT_H #define _LIBCPP___BIT_POPCOUNT_H @@ -24,27 +27,15 @@ _LIBCPP_PUSH_MACROS _LIBCPP_BEGIN_NAMESPACE_STD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_popcount(unsigned __x) _NOEXCEPT { -#if __has_builtin(__builtin_popcountg) - return __builtin_popcountg(__x); -#else return __builtin_popcount(__x); -#endif } inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_popcount(unsigned long __x) _NOEXCEPT { -#if __has_builtin(__builtin_popcountg) - return __builtin_popcountg(__x); -#else return __builtin_popcountl(__x); -#endif } inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_popcount(unsigned long long __x) _NOEXCEPT { -#if __has_builtin(__builtin_popcountg) - return __builtin_popcountg(__x); -#else return __builtin_popcountll(__x); -#endif } #if _LIBCPP_STD_VER >= 20 diff --git a/libcxx/src/include/ryu/d2s_intrinsics.h b/libcxx/src/include/ryu/d2s_intrinsics.h index afe64649a0be1..be50361fb3b33 100644 --- a/libcxx/src/include/ryu/d2s_intrinsics.h +++ b/libcxx/src/include/ryu/d2s_intrinsics.h @@ -249,7 +249,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline bool __multipleOfPowerOf2(const uint64_t __value, const uint32_t __p) { _LIBCPP_ASSERT_INTERNAL(__value != 0, ""); _LIBCPP_ASSERT_INTERNAL(__p < 64, ""); - // __builtin_ctzll/__builtin_ctzg doesn't appear to be faster here. + // __builtin_ctzll doesn't appear to be faster here. return (__value & ((1ull << __p) - 1)) == 0; } diff --git a/libcxx/src/include/ryu/ryu.h b/libcxx/src/include/ryu/ryu.h index 85831bed61b21..de1744ba20a0e 100644 --- a/libcxx/src/include/ryu/ryu.h +++ b/libcxx/src/include/ryu/ryu.h @@ -73,7 +73,7 @@ _LIBCPP_HIDE_FROM_ABI inline unsigned char _BitScanForward64(unsigned long* __in if (__mask == 0) { return false; } - *__index = __libcpp_ctz(__mask); + *__index = __builtin_ctzll(__mask); return true; } @@ -81,7 +81,7 @@ _LIBCPP_HIDE_FROM_ABI inline unsigned char _BitScanForward(unsigned long* __inde if (__mask == 0) { return false; } - *__index = __libcpp_ctz(__mask); + *__index = __builtin_ctz(__mask); return true; } #endif // !_MSC_VER diff --git a/libcxx/src/ryu/f2s.cpp b/libcxx/src/ryu/f2s.cpp index e7b5d39669f99..f42fbd68c91d2 100644 --- a/libcxx/src/ryu/f2s.cpp +++ b/libcxx/src/ryu/f2s.cpp @@ -107,7 +107,7 @@ inline constexpr uint64_t __FLOAT_POW5_SPLIT[47] = { [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline bool __multipleOfPowerOf2(const uint32_t __value, const uint32_t __p) { _LIBCPP_ASSERT_INTERNAL(__value != 0, ""); _LIBCPP_ASSERT_INTERNAL(__p < 32, ""); - // __builtin_ctz/__builtin_ctzg doesn't appear to be faster here. + // __builtin_ctz doesn't appear to be faster here. return (__value & ((1u << __p) - 1)) == 0; } From 6d969ba90b9829413c675249a3e187f60625fc15 Mon Sep 17 00:00:00 2001 From: Marc Auberer Date: Wed, 27 Mar 2024 02:17:21 +0100 Subject: [PATCH 5/7] Fix typo --- libcxx/include/__bit/countl.h | 2 +- libcxx/include/__bit/countr.h | 4 ++-- libcxx/include/__bit/popcount.h | 2 +- libcxx/src/include/ryu/ryu.h | 1 - 4 files changed, 4 insertions(+), 5 deletions(-) diff --git a/libcxx/include/__bit/countl.h b/libcxx/include/__bit/countl.h index 21bdde9bbe4c8..5e71deb43fae2 100644 --- a/libcxx/include/__bit/countl.h +++ b/libcxx/include/__bit/countl.h @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// ToDo: __builtin_clzg is available since Clang 19 and GCC 14. When support for older versions is dropped, we can +// TODO: __builtin_clzg is available since Clang 19 and GCC 14. When support for older versions is dropped, we can // refactor this code to exclusively use __builtin_clzg. #ifndef _LIBCPP___BIT_COUNTL_H diff --git a/libcxx/include/__bit/countr.h b/libcxx/include/__bit/countr.h index a01ec96417f08..8d345d72d1727 100644 --- a/libcxx/include/__bit/countr.h +++ b/libcxx/include/__bit/countr.h @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// ToDo: __builtin_ctzg is available since Clang 19 and GCC 14. When support for older versions is dropped, we can +// TODO: __builtin_ctzg is available since Clang 19 and GCC 14. When support for older versions is dropped, we can // refactor this code to exclusively use __builtin_ctzg. #ifndef _LIBCPP___BIT_COUNTR_H @@ -45,7 +45,7 @@ _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 int __coun if (__t == 0) return numeric_limits<_Tp>::digits; - return __builtin_ctz(__t); + return __builtin_ctzg(__t); } #else // __has_builtin(__builtin_ctzg) diff --git a/libcxx/include/__bit/popcount.h b/libcxx/include/__bit/popcount.h index 88c967d67c60a..a07e9e6ffc56e 100644 --- a/libcxx/include/__bit/popcount.h +++ b/libcxx/include/__bit/popcount.h @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// ToDo: __builtin_popcountg is available since Clang 19 and GCC 14. When support for older versions is dropped, we can +// TODO: __builtin_popcountg is available since Clang 19 and GCC 14. When support for older versions is dropped, we can // refactor this code to exclusively use __builtin_popcountg. #ifndef _LIBCPP___BIT_POPCOUNT_H diff --git a/libcxx/src/include/ryu/ryu.h b/libcxx/src/include/ryu/ryu.h index de1744ba20a0e..7b19ecfec5915 100644 --- a/libcxx/src/include/ryu/ryu.h +++ b/libcxx/src/include/ryu/ryu.h @@ -43,7 +43,6 @@ // Avoid formatting to keep the changes with the original code minimal. // clang-format off -#include <__bit/countr.h> #include <__charconv/chars_format.h> #include <__charconv/to_chars_result.h> #include <__config> From bf43981f233b146cb7b259b7af006a951ec912ca Mon Sep 17 00:00:00 2001 From: Marc Auberer Date: Thu, 28 Mar 2024 17:39:31 +0100 Subject: [PATCH 6/7] Avoid duplicate signatures --- libcxx/include/__bit/countl.h | 17 +++-------------- libcxx/include/__bit/countr.h | 16 +++------------- libcxx/include/__bit/popcount.h | 13 +++---------- 3 files changed, 9 insertions(+), 37 deletions(-) diff --git a/libcxx/include/__bit/countl.h b/libcxx/include/__bit/countl.h index 5e71deb43fae2..e64e435f80a0b 100644 --- a/libcxx/include/__bit/countl.h +++ b/libcxx/include/__bit/countl.h @@ -59,25 +59,15 @@ inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_clz(__uint128_t __x) } #endif // _LIBCPP_HAS_NO_INT128 -#if __has_builtin(__builtin_clzg) - template _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 int __countl_zero(_Tp __t) _NOEXCEPT { static_assert(__libcpp_is_unsigned_integer<_Tp>::value, "__countl_zero requires an unsigned integer type"); if (__t == 0) return numeric_limits<_Tp>::digits; +#if __has_builtin(__builtin_clzg) return __builtin_clzg(__t); -} - -#else // __has_builtin(__builtin_clzg) - -template -_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 int __countl_zero(_Tp __t) _NOEXCEPT { - static_assert(__libcpp_is_unsigned_integer<_Tp>::value, "__countl_zero requires an unsigned integer type"); - if (__t == 0) - return numeric_limits<_Tp>::digits; - +#else // __has_builtin(__builtin_clzg) if (sizeof(_Tp) <= sizeof(unsigned int)) return std::__libcpp_clz(static_cast(__t)) - (numeric_limits::digits - numeric_limits<_Tp>::digits); @@ -99,9 +89,8 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 int __countl_zero(_Tp __t) _ } return __ret + __iter; } -} - #endif // __has_builtin(__builtin_clzg) +} #if _LIBCPP_STD_VER >= 20 diff --git a/libcxx/include/__bit/countr.h b/libcxx/include/__bit/countr.h index 8d345d72d1727..5907f6fc9c745 100644 --- a/libcxx/include/__bit/countr.h +++ b/libcxx/include/__bit/countr.h @@ -38,23 +38,14 @@ _LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_ct return __builtin_ctzll(__x); } -#if __has_builtin(__builtin_ctzg) - template _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 int __countr_zero(_Tp __t) _NOEXCEPT { if (__t == 0) return numeric_limits<_Tp>::digits; +#if __has_builtin(__builtin_ctzg) return __builtin_ctzg(__t); -} - -#else // __has_builtin(__builtin_ctzg) - -template -_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 int __countr_zero(_Tp __t) _NOEXCEPT { - if (__t == 0) - return numeric_limits<_Tp>::digits; - +#else // __has_builtin(__builtin_ctzg) if (sizeof(_Tp) <= sizeof(unsigned int)) return std::__libcpp_ctz(static_cast(__t)); else if (sizeof(_Tp) <= sizeof(unsigned long)) @@ -70,9 +61,8 @@ _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 int __coun } return __ret + std::__libcpp_ctz(static_cast(__t)); } -} - #endif // __has_builtin(__builtin_ctzg) +} #if _LIBCPP_STD_VER >= 20 diff --git a/libcxx/include/__bit/popcount.h b/libcxx/include/__bit/popcount.h index a07e9e6ffc56e..37b3a3e1f3f2b 100644 --- a/libcxx/include/__bit/popcount.h +++ b/libcxx/include/__bit/popcount.h @@ -40,17 +40,11 @@ inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_popcount(unsigned lo #if _LIBCPP_STD_VER >= 20 -# if __has_builtin(__builtin_popcountg) - template <__libcpp_unsigned_integer _Tp> _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr int popcount(_Tp __t) noexcept { +# if __has_builtin(__builtin_popcountg) return __builtin_popcountg(__t); -} - -# else // __has_builtin(__builtin_popcountg) - -template <__libcpp_unsigned_integer _Tp> -_LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr int popcount(_Tp __t) noexcept { +# else // __has_builtin(__builtin_popcountg) if (sizeof(_Tp) <= sizeof(unsigned int)) return std::__libcpp_popcount(static_cast(__t)); else if (sizeof(_Tp) <= sizeof(unsigned long)) @@ -65,9 +59,8 @@ _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr int popcount(_Tp __t) noex } return __ret; } -} - # endif // __has_builtin(__builtin_popcountg) +} #endif // _LIBCPP_STD_VER >= 20 From d3c2847a1f3f1289cbe556ba266f4963547ea9f9 Mon Sep 17 00:00:00 2001 From: Marc Auberer Date: Fri, 29 Mar 2024 18:43:57 +0100 Subject: [PATCH 7/7] Use optional second parameter of c{l|t}z builtins Co-authored-by: Nick Desaulniers --- libcxx/include/__bit/countl.h | 6 +++--- libcxx/include/__bit/countr.h | 7 +++---- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/libcxx/include/__bit/countl.h b/libcxx/include/__bit/countl.h index e64e435f80a0b..13df8d4e66c40 100644 --- a/libcxx/include/__bit/countl.h +++ b/libcxx/include/__bit/countl.h @@ -62,12 +62,12 @@ inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_clz(__uint128_t __x) template _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 int __countl_zero(_Tp __t) _NOEXCEPT { static_assert(__libcpp_is_unsigned_integer<_Tp>::value, "__countl_zero requires an unsigned integer type"); +#if __has_builtin(__builtin_clzg) + return __builtin_clzg(__t, numeric_limits<_Tp>::digits); +#else // __has_builtin(__builtin_clzg) if (__t == 0) return numeric_limits<_Tp>::digits; -#if __has_builtin(__builtin_clzg) - return __builtin_clzg(__t); -#else // __has_builtin(__builtin_clzg) if (sizeof(_Tp) <= sizeof(unsigned int)) return std::__libcpp_clz(static_cast(__t)) - (numeric_limits::digits - numeric_limits<_Tp>::digits); diff --git a/libcxx/include/__bit/countr.h b/libcxx/include/__bit/countr.h index 5907f6fc9c745..724a0bc23801c 100644 --- a/libcxx/include/__bit/countr.h +++ b/libcxx/include/__bit/countr.h @@ -40,12 +40,11 @@ _LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_ct template _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 int __countr_zero(_Tp __t) _NOEXCEPT { - if (__t == 0) - return numeric_limits<_Tp>::digits; - #if __has_builtin(__builtin_ctzg) - return __builtin_ctzg(__t); + return __builtin_ctzg(__t, numeric_limits<_Tp>::digits); #else // __has_builtin(__builtin_ctzg) + if (__t == 0) + return numeric_limits<_Tp>::digits; if (sizeof(_Tp) <= sizeof(unsigned int)) return std::__libcpp_ctz(static_cast(__t)); else if (sizeof(_Tp) <= sizeof(unsigned long))