From 14735d4fa04dd210fd87e7e997942c936e876a30 Mon Sep 17 00:00:00 2001
From: Ganesh Kathiresan <ganesh3597@gmail.com>
Date: Sat, 26 Jun 2021 10:50:08 +0530
Subject: [PATCH] ENH: Added countbits (popcount)

ENH, DOC: Added countbits (popcount)

ENH: Popcount implementation

ENH: Add popcount to umath

ENH: Added countbits (popcount) to umath `__all__`

ENH: Refined popcount logic

DOC: Added `bit_count`

Co-authored-by: Eric Wieser <wieser.eric@gmail.com>

MAINT: Renamed `countbits` to `bit_count`

MAINT: Fixed 4 1s magic number

DOC: Added `popcount` to docstring

ENH: Added bit_count annotations

ENH: Added GNU/CLANG popcount

DOC: Added `popcount` language example

ENH, BUG: Moved `bitcount` to npy_math.h as `popcount` | Fixed final right shift

ENH: Enable `popcount` for signed

TST: Tests for `bit_count`

BUG, DOC: (BUG) Added missing typecast causing an unwanted upcast
          (DOC) Added more details on `popcount` implementation

MAINT, BUG: (MAINT) Refined `popcount` TC to use typecode
            (BUG) Fixed ufunc.ntypes to include signed ints

ENH: Added windows builtin support

ENH: Added `popcount` implementation for big python ints natively
[1/2] `popcount` object loop changes

ENH: Object loop for `bit_count`
[2/2] `popcount` object loop changes

TST: Refined `bit_count` tests and added object type

ENH: Added `bit_count` to `np.int*`

DOC: Added `np.bit_count` (#19355)

MAINT: Various linting and minor fixes:
1. Fixed passing all args to _internals umath bitcount.
   Note: We use kwargs here that might hinder performance
2. Fixed linting errors.
3. Improved verbosity of logs
4. Made a generic TO_BITS_LEN macro to accomdate more length based
   functions in future

BENCH: Added bit_count (popcount)

MAINT: Style nits | Added signed case

DOC, MAINT: Improved example

ENH: Added annotations for bit_count

TST: Added annotations tests for bit_count

MAINT: Fixed linting errors

MAINT: Moved Magic constants to npy_math_internal

MAINT: Remove python implementation | Added 3.10 check to tests

DOC: Added abs value usage to doc

MAINT: Resolved merge conflicts
---
 benchmarks/benchmarks/bench_ufunc.py          |  2 +-
 .../upcoming_changes/19355.new_feature.rst    | 12 +++++++
 doc/source/reference/routines.math.rst        |  2 ++
 numpy/__init__.pyi                            | 23 +++++++++++++
 numpy/core/_methods.py                        |  6 ++++
 numpy/core/code_generators/generate_umath.py  |  7 ++++
 .../core/code_generators/ufunc_docstrings.py  | 34 +++++++++++++++++++
 numpy/core/src/multiarray/methods.c           | 10 +++++-
 numpy/core/src/multiarray/scalartypes.c.src   |  5 ++-
 .../core/src/npymath/npy_math_internal.h.src  |  1 -
 numpy/core/src/umath/funcs.inc.src            | 15 ++++++++
 numpy/core/src/umath/loops.c.src              | 10 ++++++
 numpy/core/src/umath/loops.h.src              |  3 ++
 numpy/core/tests/test_umath.py                | 34 ++++++++++++++++++-
 numpy/core/umath.py                           |  2 +-
 numpy/matrixlib/tests/test_defmatrix.py       |  2 +-
 numpy/typing/tests/data/reveal/ufuncs.pyi     | 12 +++++++
 17 files changed, 173 insertions(+), 7 deletions(-)
 create mode 100644 doc/release/upcoming_changes/19355.new_feature.rst

diff --git a/benchmarks/benchmarks/bench_ufunc.py b/benchmarks/benchmarks/bench_ufunc.py
index f3a600a3279a..69c7c6af1df8 100644
--- a/benchmarks/benchmarks/bench_ufunc.py
+++ b/benchmarks/benchmarks/bench_ufunc.py
@@ -7,7 +7,7 @@
 
 
 ufuncs = ['abs', 'absolute', 'add', 'arccos', 'arccosh', 'arcsin', 'arcsinh',
-          'arctan', 'arctan2', 'arctanh', 'bitwise_and', 'bitwise_not',
+          'arctan', 'arctan2', 'arctanh', 'bit_count', 'bitwise_and', 'bitwise_not',
           'bitwise_or', 'bitwise_xor', 'cbrt', 'ceil', 'conj', 'conjugate',
           'copysign', 'cos', 'cosh', 'deg2rad', 'degrees', 'divide', 'divmod',
           'equal', 'exp', 'exp2', 'expm1', 'fabs', 'float_power', 'floor',
diff --git a/doc/release/upcoming_changes/19355.new_feature.rst b/doc/release/upcoming_changes/19355.new_feature.rst
new file mode 100644
index 000000000000..b6b458679ba2
--- /dev/null
+++ b/doc/release/upcoming_changes/19355.new_feature.rst
@@ -0,0 +1,12 @@
+`np.bit_count` to compute the number of 1-bits in an integer
+------------------------------------------------------------
+
+This new function counts the number of 1-bits in a number.
+These work on all the numpy integer types, as well as the
+builtin arbitrary-precision `Decimal` and `long` types.
+
+.. code-block:: python
+
+    >>> a = np.array([2**i - 1 for i in range(16)])
+    >>> np.bit_count(a)
+    array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15])
diff --git a/doc/source/reference/routines.math.rst b/doc/source/reference/routines.math.rst
index 6d80d5ad73fe..0762a601ea76 100644
--- a/doc/source/reference/routines.math.rst
+++ b/doc/source/reference/routines.math.rst
@@ -181,3 +181,5 @@ Miscellaneous
    real_if_close
 
    interp
+
+   bit_count
diff --git a/numpy/__init__.pyi b/numpy/__init__.pyi
index c1a15ea8ee44..5bb50a2809ad 100644
--- a/numpy/__init__.pyi
+++ b/numpy/__init__.pyi
@@ -2482,6 +2482,17 @@ class ndarray(_ArrayOrScalarCommon, Generic[_ShapeType, _DType_co]):
     def __dlpack__(self: NDArray[number[Any]], *, stream: None = ...) -> _PyCapsule: ...
     def __dlpack_device__(self) -> tuple[int, L[0]]: ...
 
+    def bit_count(
+        self,
+        out: None | NDArray[Any] = ...,
+        *,
+        where: _ArrayLikeBool_co = ...,
+        casting: _CastingKind = ...,
+        order: _OrderKACF = ...,
+        dtype: DTypeLike = ...,
+        subok: bool = ...,
+    ) -> NDArray[Any]: ...
+
     # Keep `dtype` at the bottom to avoid name conflicts with `np.dtype`
     @property
     def dtype(self) -> _DType_co: ...
@@ -2626,6 +2637,17 @@ class generic(_ArrayOrScalarCommon):
         self: _ScalarType, *shape: SupportsIndex, order: _OrderACF = ...
     ) -> NDArray[_ScalarType]: ...
 
+    def bit_count(
+        self,
+        out: None | NDArray[Any] = ...,
+        *,
+        where: _ArrayLikeBool_co = ...,
+        casting: _CastingKind = ...,
+        order: _OrderKACF = ...,
+        dtype: DTypeLike = ...,
+        subok: bool = ...,
+    ) -> Any: ...
+
     def squeeze(
         self: _ScalarType, axis: None | L[0] | tuple[()] = ...
     ) -> _ScalarType: ...
@@ -3146,6 +3168,7 @@ arcsinh: _UFunc_Nin1_Nout1[L['arcsinh'], L[8], None]
 arctan2: _UFunc_Nin2_Nout1[L['arctan2'], L[5], None]
 arctan: _UFunc_Nin1_Nout1[L['arctan'], L[8], None]
 arctanh: _UFunc_Nin1_Nout1[L['arctanh'], L[8], None]
+bit_count: _UFunc_Nin1_Nout1[L['bit_count'], L[11], None]
 bitwise_and: _UFunc_Nin2_Nout1[L['bitwise_and'], L[12], L[-1]]
 bitwise_not: _UFunc_Nin1_Nout1[L['invert'], L[12], None]
 bitwise_or: _UFunc_Nin2_Nout1[L['bitwise_or'], L[12], L[0]]
diff --git a/numpy/core/_methods.py b/numpy/core/_methods.py
index 9675f9822aaa..7f3cd7dba0f6 100644
--- a/numpy/core/_methods.py
+++ b/numpy/core/_methods.py
@@ -21,6 +21,7 @@
 umr_minimum = um.minimum.reduce
 umr_sum = um.add.reduce
 umr_prod = um.multiply.reduce
+umr_bit_count = um.bit_count
 umr_any = um.logical_or.reduce
 umr_all = um.logical_and.reduce
 
@@ -236,3 +237,8 @@ def _dump(self, file, protocol=2):
 
 def _dumps(self, protocol=2):
     return pickle.dumps(self, protocol=protocol)
+
+def _bit_count(a, out=None, *, where=True, casting='same_kind',
+          order='K', dtype=None, subok=True):
+    return umr_bit_count(a, out, where=where, casting=casting,
+            order=order, dtype=dtype, subok=subok)
diff --git a/numpy/core/code_generators/generate_umath.py b/numpy/core/code_generators/generate_umath.py
index 9cb943ac19fe..860da08ee5ed 100644
--- a/numpy/core/code_generators/generate_umath.py
+++ b/numpy/core/code_generators/generate_umath.py
@@ -1122,6 +1122,13 @@ def english_upper(s):
           TD(ints),
           TD('O', f='npy_ObjectLCM'),
           ),
+'bit_count':
+    Ufunc(1, 1, None,
+          docstrings.get('numpy.core.umath.bit_count'),
+          None,
+          TD(ints),
+          TD('O', f='npy_ObjectPopCount'),
+          ),
 'matmul' :
     Ufunc(2, 1, None,
           docstrings.get('numpy.core.umath.matmul'),
diff --git a/numpy/core/code_generators/ufunc_docstrings.py b/numpy/core/code_generators/ufunc_docstrings.py
index 437901c19470..dff840866bce 100644
--- a/numpy/core/code_generators/ufunc_docstrings.py
+++ b/numpy/core/code_generators/ufunc_docstrings.py
@@ -4214,3 +4214,37 @@ def add_newdoc(place, name, doc):
     array([ 0, 20, 20, 60, 20, 20])
 
     """)
+
+add_newdoc('numpy.core.umath', 'bit_count',
+    """
+    Computes the number of 1-bits in the absolute value of ``x``.
+    Analogous to the builtin `int.bit_count` or ``popcount`` in C++.
+
+    Parameters
+    ----------
+    x : array_like, unsigned int
+        Input array.
+    $PARAMS
+
+    Returns
+    -------
+    y : ndarray
+        The corresponding number of 1-bits in the input.
+        $OUT_SCALAR_1
+
+    References
+    ----------
+    .. [1] https://stackoverflow.com/a/109025/5671364
+
+    .. [2] Wikipedia, "Hamming weight",
+           https://en.wikipedia.org/wiki/Hamming_weight
+
+    Examples
+    --------
+    >>> np.bit_count(1023)
+    10
+    >>> a = np.array([2**i - 1 for i in range(16)])
+    >>> np.bit_count(a)
+    array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15])
+
+    """)
diff --git a/numpy/core/src/multiarray/methods.c b/numpy/core/src/multiarray/methods.c
index fbb2ecd1090f..9290e20f193f 100644
--- a/numpy/core/src/multiarray/methods.c
+++ b/numpy/core/src/multiarray/methods.c
@@ -370,6 +370,12 @@ array_ptp(PyArrayObject *self,
     NPY_FORWARD_NDARRAY_METHOD("_ptp");
 }
 
+static PyObject *
+array_bit_count(PyArrayObject *self, PyObject *args, PyObject *kwds)
+{
+    NPY_FORWARD_NDARRAY_METHOD("_bit_count");
+}
+
 
 static PyObject *
 array_swapaxes(PyArrayObject *self, PyObject *args)
@@ -3131,9 +3137,11 @@ NPY_NO_EXPORT PyMethodDef array_methods[] = {
     {"__dlpack__",
         (PyCFunction)array_dlpack,
         METH_FASTCALL | METH_KEYWORDS, NULL},
-
     {"__dlpack_device__",
         (PyCFunction)array_dlpack_device,
         METH_NOARGS, NULL},
+    {"bit_count",
+        (PyCFunction)array_bit_count,
+        METH_VARARGS | METH_KEYWORDS, NULL},
     {NULL, NULL, 0, NULL}           /* sentinel */
 };
diff --git a/numpy/core/src/multiarray/scalartypes.c.src b/numpy/core/src/multiarray/scalartypes.c.src
index 5ebca1113d7c..9cce25113626 100644
--- a/numpy/core/src/multiarray/scalartypes.c.src
+++ b/numpy/core/src/multiarray/scalartypes.c.src
@@ -1777,7 +1777,7 @@ gentype_byteswap(PyObject *self, PyObject *args, PyObject *kwds)
  *         std, var, sum, cumsum, prod, cumprod, compress, sort, argsort,
  *         round, argmax, argmin, max, min, ptp, any, all, astype, resize,
  *         reshape, choose, tostring, tobytes, copy, searchsorted, view,
- *         flatten, ravel, squeeze#
+ *         flatten, ravel, squeeze, bit_count#
  */
 static PyObject *
 gentype_@name@(PyObject *self, PyObject *args, PyObject *kwds)
@@ -2389,6 +2389,9 @@ static PyMethodDef gentype_methods[] = {
     {"sum",
         (PyCFunction)gentype_sum,
         METH_VARARGS | METH_KEYWORDS, NULL},
+    {"bit_count",
+        (PyCFunction)gentype_bit_count,
+        METH_VARARGS | METH_KEYWORDS, NULL},
     {"cumsum",
         (PyCFunction)gentype_cumsum,
         METH_VARARGS | METH_KEYWORDS, NULL},
diff --git a/numpy/core/src/npymath/npy_math_internal.h.src b/numpy/core/src/npymath/npy_math_internal.h.src
index c7df5e255ca1..4d428f13718b 100644
--- a/numpy/core/src/npymath/npy_math_internal.h.src
+++ b/numpy/core/src/npymath/npy_math_internal.h.src
@@ -678,7 +678,6 @@ npy_rshift@u@@c@(npy_@u@@type@ a, npy_@u@@type@ b)
 /**end repeat1**/
 /**end repeat**/
 
-
 #define __popcnt32 __popcnt
 /**begin repeat
  *
diff --git a/numpy/core/src/umath/funcs.inc.src b/numpy/core/src/umath/funcs.inc.src
index efd730ccc65d..a49a92f1f3d9 100644
--- a/numpy/core/src/umath/funcs.inc.src
+++ b/numpy/core/src/umath/funcs.inc.src
@@ -267,6 +267,21 @@ npy_ObjectClip(PyObject *arr, PyObject *min, PyObject *max) {
     return o;
 }
 
+static PyObject *
+npy_ObjectPopCount(PyObject *obj) {
+    PyObject *result = NULL;
+
+    /* Try to use inbuilt popcount if available */
+    static PyObject *builtin_popcount_func = NULL;
+    builtin_popcount_func = PyObject_GetAttrString(obj, "bit_count");
+
+    if (builtin_popcount_func != NULL) {
+        result = PyObject_CallFunction(builtin_popcount_func, NULL);
+    }
+
+    return result;
+}
+
 /*
  *****************************************************************************
  **                           COMPLEX FUNCTIONS                             **
diff --git a/numpy/core/src/umath/loops.c.src b/numpy/core/src/umath/loops.c.src
index 139b8c2a48d2..dd28cfc9d4fa 100644
--- a/numpy/core/src/umath/loops.c.src
+++ b/numpy/core/src/umath/loops.c.src
@@ -438,6 +438,16 @@ NPY_NO_EXPORT void
         *((@type@ *)op1) = 1;
     }
 }
+
+NPY_NO_EXPORT void
+@TYPE@_bit_count(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
+{
+    UNARY_LOOP {
+        const @type@ in1 = *(@type@ *)ip1;
+        *((@type@ *)op1) = npy_popcount@c@(in1);
+    }
+}
+
 /**begin repeat1
  * Arithmetic
  * #kind = add, subtract, multiply, bitwise_and, bitwise_or, bitwise_xor#
diff --git a/numpy/core/src/umath/loops.h.src b/numpy/core/src/umath/loops.h.src
index cce73aff8504..0776000b36af 100644
--- a/numpy/core/src/umath/loops.h.src
+++ b/numpy/core/src/umath/loops.h.src
@@ -208,6 +208,9 @@ NPY_NO_EXPORT void
 @S@@TYPE@_lcm(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func));
 /**end repeat2**/
 
+NPY_NO_EXPORT void
+@S@@TYPE@_bit_count(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func));
+
 /**end repeat1**/
 /**end repeat**/
 
diff --git a/numpy/core/tests/test_umath.py b/numpy/core/tests/test_umath.py
index 79869c7c11a1..ec3a6ba3a566 100644
--- a/numpy/core/tests/test_umath.py
+++ b/numpy/core/tests/test_umath.py
@@ -2610,7 +2610,15 @@ def test_reduce(self):
 
 class TestBitwiseUFuncs:
 
-    bitwise_types = [np.dtype(c) for c in '?' + 'bBhHiIlLqQ' + 'O']
+    _all_ints_bits = [
+        np.dtype(c).itemsize * 8 for c in np.typecodes["AllInteger"]]
+    bitwise_types = [
+        np.dtype(c) for c in '?' + np.typecodes["AllInteger"] + 'O']
+    bitwise_bits = [
+        2,  # boolean type
+        *_all_ints_bits,  # All integers
+        max(_all_ints_bits) + 1,  # Object_ type
+    ]
 
     def test_values(self):
         for dt in self.bitwise_types:
@@ -2691,6 +2699,30 @@ def test_reduction(self):
             btype = np.array([True], dtype=object)
             assert_(type(f.reduce(btype)) is bool, msg)
 
+    @pytest.mark.parametrize("input_dtype_obj, bitsize",
+            zip(bitwise_types, bitwise_bits))
+    def test_popcount(self, input_dtype_obj, bitsize):
+        input_dtype = input_dtype_obj.type
+
+        # bit_count is only in-built in 3.10+
+        if sys.version_info < (3, 10) and input_dtype == np.object_:
+            pytest.skip()
+
+        for i in range(1, bitsize):
+            num = 2**i - 1
+            msg = f"bit_count for {num}"
+            assert i == np.bit_count(input_dtype(num)), msg
+            if np.issubdtype(
+                input_dtype, np.signedinteger) or input_dtype == np.object_:
+                assert i == np.bit_count(input_dtype(-num)), msg
+
+        a = np.array([2**i-1 for i in range(1, bitsize)], dtype=input_dtype)
+        bit_count_a = np.bit_count(a)
+        expected = np.arange(1, bitsize, dtype=input_dtype)
+
+        msg = f"array bit_count for {input_dtype}"
+        assert all(bit_count_a == expected), msg
+
 
 class TestInt:
     def test_logical_not(self):
diff --git a/numpy/core/umath.py b/numpy/core/umath.py
index 757bf1e59c9d..088266c5e6d9 100644
--- a/numpy/core/umath.py
+++ b/numpy/core/umath.py
@@ -19,7 +19,7 @@
     'absolute', 'add',
     'arccos', 'arccosh', 'arcsin', 'arcsinh', 'arctan', 'arctan2', 'arctanh',
     'bitwise_and', 'bitwise_or', 'bitwise_xor', 'cbrt', 'ceil', 'conj',
-    'conjugate', 'copysign', 'cos', 'cosh', 'deg2rad', 'degrees', 'divide',
+    'conjugate', 'copysign', 'cos', 'cosh', 'bit_count', 'deg2rad', 'degrees', 'divide',
     'divmod', 'e', 'equal', 'euler_gamma', 'exp', 'exp2', 'expm1', 'fabs',
     'floor', 'floor_divide', 'float_power', 'fmax', 'fmin', 'fmod', 'frexp',
     'frompyfunc', 'gcd', 'greater', 'greater_equal', 'heaviside',
diff --git a/numpy/matrixlib/tests/test_defmatrix.py b/numpy/matrixlib/tests/test_defmatrix.py
index 8d5b1524b66d..c97dbb835460 100644
--- a/numpy/matrixlib/tests/test_defmatrix.py
+++ b/numpy/matrixlib/tests/test_defmatrix.py
@@ -285,7 +285,7 @@ def test_instance_methods(self):
             'partition', 'argpartition',
             'take', 'tofile', 'tolist', 'tostring', 'tobytes', 'all', 'any',
             'sum', 'argmax', 'argmin', 'min', 'max', 'mean', 'var', 'ptp',
-            'prod', 'std', 'ctypes', 'itemset',
+            'prod', 'std', 'ctypes', 'itemset', 'bit_count',
             ]
         for attrib in dir(a):
             if attrib.startswith('_') or attrib in excluded_methods:
diff --git a/numpy/typing/tests/data/reveal/ufuncs.pyi b/numpy/typing/tests/data/reveal/ufuncs.pyi
index 5f7d99efd12d..07039d2d4575 100644
--- a/numpy/typing/tests/data/reveal/ufuncs.pyi
+++ b/numpy/typing/tests/data/reveal/ufuncs.pyi
@@ -9,6 +9,7 @@ if sys.version_info >= (3, 11):
 else:
     from typing_extensions import assert_type
 
+i8: np.int64
 f8: np.float64
 AR_f8: npt.NDArray[np.float64]
 AR_i8: npt.NDArray[np.int64]
@@ -74,3 +75,14 @@ assert_type(np.matmul.signature, Literal["(n?,k),(k,m?)->(n?,m?)"])
 assert_type(np.matmul.identity, None)
 assert_type(np.matmul(AR_f8, AR_f8), Any)
 assert_type(np.matmul(AR_f8, AR_f8, axes=[(0, 1), (0, 1), (0, 1)]), Any)
+
+reveal_type(np.bit_count.__name__, Literal['bit_count'])
+reveal_type(np.bit_count.ntypes, Literal[11])
+reveal_type(np.bit_count.identity, None)
+reveal_type(np.bit_count.nin, Literal[1])
+reveal_type(np.bit_count.nout, Literal[1])
+reveal_type(np.bit_count.nargs, Literal[2])
+reveal_type(np.bit_count.signature, None)
+reveal_type(np.bit_count.identity, None)
+reveal_type(np.bit_count(i8), Any)
+reveal_type(np.bit_count(AR_i8), Any)