diff --git a/numpy/core/_internal.py b/numpy/core/_internal.py index edfbaa3757a1..46da03fbe50d 100644 --- a/numpy/core/_internal.py +++ b/numpy/core/_internal.py @@ -78,6 +78,7 @@ def _usefields(adict, align): "offsets": offsets, "titles": titles}, align) + _INT_SHIFT = 30 _MASK = (2 ** _INT_SHIFT) @@ -89,7 +90,7 @@ def _get_ob_digit_array(num): num_list = [] while t != 0: # Get remainder from division - small_int = t % _MASK # more efficient bitwise analogue: (t & (MASK-1)) + small_int = t % _MASK num_list.append(small_int) # Get integral part of the division (floor division) @@ -816,9 +817,10 @@ def _popcount64(a): # Refer to npy_math_internal.h.src for more details. a = abs(a) - a -= ((a >> 1) & 0x5555555555555555) + a = a - ((a >> 1) & 0x5555555555555555) a = (a & 0x3333333333333333) + (a >> 2 & 0x3333333333333333) - return (((a + (a >> 4)) & 0xf0f0f0f0f0f0f0f) * 0x101010101010101 >> 56) & 0xff + a = (a + (a >> 4)) & 0xf0f0f0f0f0f0f0f + return (a * 0x101010101010101 >> 56) & 0xff def _bit_count(a): """ Computes the number of 1-bits in a (Python Integer) """ diff --git a/numpy/core/_methods.py b/numpy/core/_methods.py index 1cc65552d824..9c333a7df069 100644 --- a/numpy/core/_methods.py +++ b/numpy/core/_methods.py @@ -52,10 +52,6 @@ def _prod(a, axis=None, dtype=None, out=None, keepdims=False, initial=_NoValue, where=True): return umr_prod(a, axis, dtype, out, keepdims, initial, where) -def _bit_count(a, out=None, where=True, casting='same_kind', - order='K', dtype=None, subok=True): - return umr_bit_count(a, dtype) - def _any(a, axis=None, dtype=None, out=None, keepdims=False, *, where=True): # Parsing keyword arguments is currently fairly slow, so avoid it for now if where is True: @@ -293,3 +289,8 @@ def _dump(self, file, protocol=2): def _dumps(self, protocol=2): return pickle.dumps(self, protocol=protocol) + +def _bit_count(a, out=None, *, where=True, casting='same_kind', + order='K', dtype=None, subok=True): + return umr_bit_count(a, out, where=where, casting=casting, + order=order, dtype=dtype, subok=subok) diff --git a/numpy/core/include/numpy/npy_math.h b/numpy/core/include/numpy/npy_math.h index 53b2ff52cb29..8a8e6a4ce371 100644 --- a/numpy/core/include/numpy/npy_math.h +++ b/numpy/core/include/numpy/npy_math.h @@ -113,10 +113,10 @@ NPY_INLINE static float __npy_nzerof(void) /* Magic binary numbers used by popcount * For type T, the magic numbers are computed as follows: - * Magic[0]: 0b101010101010101... = (T)~(T)0/3 - * Magic[1]: 0b11001100110011... = (T)~(T)0/15 * 3 - * Magic[2]: 0b111100001111... = (T)~(T)0/255 * 15 - * Magic[3]: 0b100000001... = (T)~(T)0/255 + * Magic[0]: 0b01 01 01 01 01 01... = (T)~(T)0/3 + * Magic[1]: 0b0011 0011 0011... = (T)~(T)0/15 * 3 + * Magic[2]: 0b00001111 00001111... = (T)~(T)0/255 * 15 + * Magic[3]: 0b00000001 00000001... = (T)~(T)0/255 */ static const npy_uint8 MAGIC8[] = {0x55, 0x33, 0x0F, 0x01}; static const npy_uint16 MAGIC16[] = {0x5555, 0x3333, 0x0F0F, 0x0101}; diff --git a/numpy/core/src/npymath/npy_math_internal.h.src b/numpy/core/src/npymath/npy_math_internal.h.src index 1552a0e44bb9..172af9bad299 100644 --- a/numpy/core/src/npymath/npy_math_internal.h.src +++ b/numpy/core/src/npymath/npy_math_internal.h.src @@ -854,15 +854,13 @@ npy_rshift@u@@c@(npy_@u@@type@ a, npy_@u@@type@ b) * #STYPE = BYTE, SHORT, INT, LONG, LONGLONG# * #c = hh, h, , l, ll# */ -#undef MAGIC_ARRAY -#undef WIN_POPCOUNT +#undef TO_BITS_LEN #if 0 /**begin repeat1 * #len = 8, 16, 32, 64# */ #elif NPY_BITSOF_@STYPE@ == @len@ - #define MAGIC_ARRAY MAGIC@len@ - #define WIN_POPCOUNT __popcnt@len@ + #define TO_BITS_LEN(X) X##@len@ /**end repeat1**/ #endif @@ -872,22 +870,22 @@ npy_popcountu@c@(npy_@type@ a) #if ((defined(__clang__) || defined(__GNUC__))) && NPY_BITSOF_@STYPE@ >= 32 return __builtin_popcount@c@(a); #elif defined(_MSC_VER) && NPY_BITSOF_@STYPE@ >= 16 - return WIN_POPCOUNT(a); + return TO_BITS_LEN(__popcnt)(a); #endif /* Counting bits set, in parallel * Based on: http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel * * Generic Algorithm for type T: - * v = v - ((v >> 1) & (T)~(T)0/3); - * v = (v & (T)~(T)0/15*3) + ((v >> 2) & (T)~(T)0/15*3); - * v = (v + (v >> 4)) & (T)~(T)0/255*15; - * c = (T)(v * ((T)~(T)0/255)) >> (sizeof(T) - 1) * CHAR_BIT; + * a = a - ((a >> 1) & (T)~(T)0/3); + * a = (a & (T)~(T)0/15*3) + ((a >> 2) & (T)~(T)0/15*3); + * a = (a + (a >> 4)) & (T)~(T)0/255*15; + * c = (T)(a * ((T)~(T)0/255)) >> (sizeof(T) - 1) * CHAR_BIT; */ - a = a - ((a >> 1) & (npy_@type@) MAGIC_ARRAY[0]); - a = ((a & (npy_@type@) MAGIC_ARRAY[1])) + ((a >> 2) & (npy_@type@) MAGIC_ARRAY[1]); - a = (a + (a >> 4)) & (npy_@type@) MAGIC_ARRAY[2]; - return (npy_@type@) (a * (npy_@type@) MAGIC_ARRAY[3]) >> ((NPY_SIZEOF_@STYPE@ - 1) * CHAR_BIT); + a = a - ((a >> 1) & (npy_@type@) TO_BITS_LEN(MAGIC)[0]); + a = ((a & (npy_@type@) TO_BITS_LEN(MAGIC)[1])) + ((a >> 2) & (npy_@type@) TO_BITS_LEN(MAGIC)[1]); + a = (a + (a >> 4)) & (npy_@type@) TO_BITS_LEN(MAGIC)[2]; + return (npy_@type@) (a * (npy_@type@) TO_BITS_LEN(MAGIC)[3]) >> ((NPY_SIZEOF_@STYPE@ - 1) * CHAR_BIT); } /**end repeat**/ diff --git a/numpy/core/tests/test_umath.py b/numpy/core/tests/test_umath.py index 0694416f8155..e2f2a256a849 100644 --- a/numpy/core/tests/test_umath.py +++ b/numpy/core/tests/test_umath.py @@ -1760,8 +1760,9 @@ def test_reduce(self): class TestBitwiseUFuncs: _all_ints_bits = [ - np.dtype(c).itemsize * 8 for c in np.typecodes["AllInteger"]] - bitwise_types = [np.dtype(c) for c in '?' + np.typecodes["AllInteger"] + 'O'] + np.dtype(c).itemsize * 8 for c in np.typecodes["AllInteger"]] + bitwise_types = [ + np.dtype(c) for c in '?' + np.typecodes["AllInteger"] + 'O'] bitwise_bits = [ 2, # boolean type *_all_ints_bits, # All integers