Skip to content

Commit

Permalink
[libcxx] Use generic builtins for popcount, clz and ctz
Browse files Browse the repository at this point in the history
Use __builtin_popcountg instead of __buildin_popcount{l|ll}
Use __builtin_clzg instead of __buildin_clz{l|ll}
Use __builtin_ctzg instead of __builtin_ctz{l|ll}
  • Loading branch information
marcauberer committed Mar 25, 2024
1 parent cce18e4 commit 7fdc51b
Show file tree
Hide file tree
Showing 6 changed files with 46 additions and 4 deletions.
17 changes: 17 additions & 0 deletions libcxx/include/__bit/countl.h
Expand Up @@ -25,15 +25,27 @@ _LIBCPP_PUSH_MACROS
_LIBCPP_BEGIN_NAMESPACE_STD

_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_clz(unsigned __x) _NOEXCEPT {
#if __has_builtin(__builtin_clzg)
return __builtin_clzg(__x);
#else
return __builtin_clz(__x);
#endif
}

_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_clz(unsigned long __x) _NOEXCEPT {
#if __has_builtin(__builtin_clzg)
return __builtin_clzg(__x);
#else
return __builtin_clzl(__x);
#endif
}

_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_clz(unsigned long long __x) _NOEXCEPT {
#if __has_builtin(__builtin_clzg)
return __builtin_clzg(__x);
#else
return __builtin_clzll(__x);
#endif
}

#ifndef _LIBCPP_HAS_NO_INT128
Expand All @@ -47,8 +59,13 @@ inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_clz(__uint128_t __x)
// - Any bits set:
// - The number of leading zeros of the input is the number of leading
// zeros in the high 64-bits.
# if __has_builtin(__builtin_clzg)
return ((__x >> 64) == 0) ? (64 + __builtin_clzg(static_cast<unsigned long long>(__x)))
: __builtin_clzg(static_cast<unsigned long long>(__x >> 64));
# else
return ((__x >> 64) == 0) ? (64 + __builtin_clzll(static_cast<unsigned long long>(__x)))
: __builtin_clzll(static_cast<unsigned long long>(__x >> 64));
# endif
}
#endif // _LIBCPP_HAS_NO_INT128

Expand Down
12 changes: 12 additions & 0 deletions libcxx/include/__bit/countr.h
Expand Up @@ -24,15 +24,27 @@ _LIBCPP_PUSH_MACROS
_LIBCPP_BEGIN_NAMESPACE_STD

_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_ctz(unsigned __x) _NOEXCEPT {
#if __has_builtin(__builtin_ctzg)
return __builtin_ctzg(__x);
#else
return __builtin_ctz(__x);
#endif
}

_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_ctz(unsigned long __x) _NOEXCEPT {
#if __has_builtin(__builtin_ctzg)
return __builtin_ctzg(__x);
#else
return __builtin_ctzl(__x);
#endif
}

_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_ctz(unsigned long long __x) _NOEXCEPT {
#if __has_builtin(__builtin_ctzg)
return __builtin_ctzg(__x);
#else
return __builtin_ctzll(__x);
#endif
}

template <class _Tp>
Expand Down
12 changes: 12 additions & 0 deletions libcxx/include/__bit/popcount.h
Expand Up @@ -24,15 +24,27 @@ _LIBCPP_PUSH_MACROS
_LIBCPP_BEGIN_NAMESPACE_STD

inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_popcount(unsigned __x) _NOEXCEPT {
#if __has_builtin(__builtin_popcountg)
return __builtin_popcountg(__x);
#else
return __builtin_popcount(__x);
#endif
}

inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_popcount(unsigned long __x) _NOEXCEPT {
#if __has_builtin(__builtin_popcountg)
return __builtin_popcountg(__x);
#else
return __builtin_popcountl(__x);
#endif
}

inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_popcount(unsigned long long __x) _NOEXCEPT {
#if __has_builtin(__builtin_popcountg)
return __builtin_popcountg(__x);
#else
return __builtin_popcountll(__x);
#endif
}

#if _LIBCPP_STD_VER >= 20
Expand Down
2 changes: 1 addition & 1 deletion libcxx/src/include/ryu/d2s_intrinsics.h
Expand Up @@ -249,7 +249,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD
[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline bool __multipleOfPowerOf2(const uint64_t __value, const uint32_t __p) {
_LIBCPP_ASSERT_INTERNAL(__value != 0, "");
_LIBCPP_ASSERT_INTERNAL(__p < 64, "");
// __builtin_ctzll doesn't appear to be faster here.
// __builtin_ctzll/__builtin_ctzg doesn't appear to be faster here.
return (__value & ((1ull << __p) - 1)) == 0;
}

Expand Down
5 changes: 3 additions & 2 deletions libcxx/src/include/ryu/ryu.h
Expand Up @@ -43,6 +43,7 @@
// Avoid formatting to keep the changes with the original code minimal.
// clang-format off

#include <__bit/countr.h>
#include <__charconv/chars_format.h>
#include <__charconv/to_chars_result.h>
#include <__config>
Expand Down Expand Up @@ -72,15 +73,15 @@ _LIBCPP_HIDE_FROM_ABI inline unsigned char _BitScanForward64(unsigned long* __in
if (__mask == 0) {
return false;
}
*__index = __builtin_ctzll(__mask);
*__index = __libcpp_ctz(__mask);
return true;
}

_LIBCPP_HIDE_FROM_ABI inline unsigned char _BitScanForward(unsigned long* __index, unsigned int __mask) {
if (__mask == 0) {
return false;
}
*__index = __builtin_ctz(__mask);
*__index = __libcpp_ctz(__mask);
return true;
}
#endif // !_MSC_VER
Expand Down
2 changes: 1 addition & 1 deletion libcxx/src/ryu/f2s.cpp
Expand Up @@ -107,7 +107,7 @@ inline constexpr uint64_t __FLOAT_POW5_SPLIT[47] = {
[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline bool __multipleOfPowerOf2(const uint32_t __value, const uint32_t __p) {
_LIBCPP_ASSERT_INTERNAL(__value != 0, "");
_LIBCPP_ASSERT_INTERNAL(__p < 32, "");
// __builtin_ctz doesn't appear to be faster here.
// __builtin_ctz/__builtin_ctzg doesn't appear to be faster here.
return (__value & ((1u << __p) - 1)) == 0;
}

Expand Down

0 comments on commit 7fdc51b

Please sign in to comment.