From 1070cf172800363f6da39d72d67c9af43663a939 Mon Sep 17 00:00:00 2001
From: Ganesh Kathiresan <ganesh3597@gmail.com>
Date: Sat, 14 Aug 2021 19:06:29 +0530
Subject: [PATCH 01/16] ENH: Implementation of bit_count (popcount)

---
 numpy/core/include/numpy/npy_math.h           | 11 +++
 .../core/src/npymath/npy_math_internal.h.src  | 71 +++++++++++++++++++
 2 files changed, 82 insertions(+)

diff --git a/numpy/core/include/numpy/npy_math.h b/numpy/core/include/numpy/npy_math.h
index b1e6363e3bed..ec605603cd74 100644
--- a/numpy/core/include/numpy/npy_math.h
+++ b/numpy/core/include/numpy/npy_math.h
@@ -150,6 +150,17 @@ NPY_INPLACE npy_long npy_lshiftl(npy_long a, npy_long b);
 NPY_INPLACE npy_longlong npy_rshiftll(npy_longlong a, npy_longlong b);
 NPY_INPLACE npy_longlong npy_lshiftll(npy_longlong a, npy_longlong b);
 
+NPY_INPLACE npy_ubyte npy_popcountuhh(npy_ubyte a);
+NPY_INPLACE npy_ushort npy_popcountuh(npy_ushort a);
+NPY_INPLACE npy_uint npy_popcountu(npy_uint a);
+NPY_INPLACE npy_ulong npy_popcountul(npy_ulong a);
+NPY_INPLACE npy_ulonglong npy_popcountull(npy_ulonglong a);
+NPY_INPLACE npy_byte npy_popcounthh(npy_byte a);
+NPY_INPLACE npy_short npy_popcounth(npy_short a);
+NPY_INPLACE npy_int npy_popcount(npy_int a);
+NPY_INPLACE npy_long npy_popcountl(npy_long a);
+NPY_INPLACE npy_longlong npy_popcountll(npy_longlong a);
+
 /*
  * C99 double math funcs
  */
diff --git a/numpy/core/src/npymath/npy_math_internal.h.src b/numpy/core/src/npymath/npy_math_internal.h.src
index cae84befe0d6..dfd015f3b7fc 100644
--- a/numpy/core/src/npymath/npy_math_internal.h.src
+++ b/numpy/core/src/npymath/npy_math_internal.h.src
@@ -55,6 +55,30 @@
  */
 #include "npy_math_private.h"
 
+/* Magic binary numbers used by bit_count
+ * For type T, the magic numbers are computed as follows:
+ * Magic[0]: 01 01 01 01 01 01... = (T)~(T)0/3
+ * Magic[1]: 0011 0011 0011...    = (T)~(T)0/15  * 3
+ * Magic[2]: 00001111 00001111... = (T)~(T)0/255 * 15
+ * Magic[3]: 00000001 00000001... = (T)~(T)0/255
+ *
+ * Counting bits set, in parallel
+ * Based on: http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
+ *
+ * Generic Algorithm for type T:
+ * a = a - ((a >> 1) & (T)~(T)0/3);
+ * a = (a & (T)~(T)0/15*3) + ((a >> 2) & (T)~(T)0/15*3);
+ * a = (a + (a >> 4)) & (T)~(T)0/255*15;
+ * c = (T)(a * ((T)~(T)0/255)) >> (sizeof(T) - 1) * CHAR_BIT;
+*/
+
+ */
+static const npy_uint8  MAGIC8[]  = {0x55,               0x33,               0x0F,               0x01};
+static const npy_uint16 MAGIC16[] = {0x5555,             0x3333,             0x0F0F,             0x0101};
+static const npy_uint32 MAGIC32[] = {0x55555555,         0x33333333,         0x0F0F0F0F,         0x01010101};
+static const npy_uint64 MAGIC64[] = {0x5555555555555555, 0x3333333333333333, 0x0F0F0F0F0F0F0F0F, 0x0101010101010101};
+
+
 /*
  *****************************************************************************
  **                     BASIC MATH FUNCTIONS                                **
@@ -814,3 +838,50 @@ npy_rshift@u@@c@(npy_@u@@type@ a, npy_@u@@type@ b)
 }
 /**end repeat1**/
 /**end repeat**/
+
+
+#define __popcnt32 __popcnt
+/**begin repeat
+ *
+ * #type  = ubyte, ushort, uint, ulong, ulonglong#
+ * #STYPE = BYTE,  SHORT,  INT,  LONG,  LONGLONG#
+ * #c     = hh,    h,      ,     l,     ll#
+ */
+#undef TO_BITS_LEN
+#if 0
+/**begin repeat1
+ * #len = 8, 16, 32, 64#
+ */
+#elif NPY_BITSOF_@STYPE@ == @len@
+    #define TO_BITS_LEN(X) X##@len@
+/**end repeat1**/
+#endif
+
+NPY_INPLACE npy_@type@
+npy_popcountu@c@(npy_@type@ a)
+{
+#if ((defined(__clang__) || defined(__GNUC__))) && NPY_BITSOF_@STYPE@ >= 32
+    return __builtin_popcount@c@(a);
+#elif defined(_MSC_VER) && NPY_BITSOF_@STYPE@ >= 16
+    return TO_BITS_LEN(__popcnt)(a);
+#else
+    a = a - ((a >> 1) & (npy_@type@) TO_BITS_LEN(MAGIC)[0]);
+    a = ((a & (npy_@type@) TO_BITS_LEN(MAGIC)[1])) + ((a >> 2) & (npy_@type@) TO_BITS_LEN(MAGIC)[1]);
+    a = (a + (a >> 4)) & (npy_@type@) TO_BITS_LEN(MAGIC)[2];
+    return (npy_@type@) (a * (npy_@type@) TO_BITS_LEN(MAGIC)[3]) >> ((NPY_SIZEOF_@STYPE@ - 1) * CHAR_BIT);
+#endif
+}
+/**end repeat**/
+
+/**begin repeat
+ *
+ * #type = byte, short, int, long, longlong#
+ * #c    = hh,   h,     ,    l,    ll#
+ */
+NPY_INPLACE npy_@type@
+npy_popcount@c@(npy_@type@ a)
+{
+    /* Return popcount of abs(a) */
+    return npy_popcountu@c@(a < 0 ? -a : a);
+}
+/**end repeat**/

From 33f17a241fd89b67be9d73707a6aa4d3c02347a6 Mon Sep 17 00:00:00 2001
From: Ganesh Kathiresan <ganesh3597@gmail.com>
Date: Sat, 14 Aug 2021 19:07:40 +0530
Subject: [PATCH 02/16] ENH: Add bit_count to integer scalar type

---
 numpy/core/src/multiarray/scalartypes.c.src | 43 +++++++++++++++++++++
 1 file changed, 43 insertions(+)

diff --git a/numpy/core/src/multiarray/scalartypes.c.src b/numpy/core/src/multiarray/scalartypes.c.src
index 93cc9666e19c..d6a4c70950fb 100644
--- a/numpy/core/src/multiarray/scalartypes.c.src
+++ b/numpy/core/src/multiarray/scalartypes.c.src
@@ -208,6 +208,25 @@ gentype_multiply(PyObject *m1, PyObject *m2)
     return PyArray_Type.tp_as_number->nb_multiply(m1, m2);
 }
 
+/**begin repeat
+ * #TYPE    = BYTE, UBYTE, SHORT, USHORT, INT, UINT,
+ *            LONG, ULONG, LONGLONG, ULONGLONG#
+ * #type    = npy_byte, npy_ubyte, npy_short, npy_ushort, npy_int, npy_uint,
+ *            npy_long, npy_ulong, npy_longlong, npy_ulonglong#
+ * #c       = hh, uhh, h, uh,, u, l, ul, ll, ull#
+ * #convert = Long*8, LongLong*2#
+ */
+static PyObject *
+@type@_bit_count(PyObject *self)
+{
+    @type@ scalar = (@type@) PyLong_As@convert@(self);
+    @type@ count = npy_popcount@c@(scalar);
+    PyObject *result = PyLong_From@convert@(count);
+
+    return result;
+}
+/**end repeat**/
+
 /**begin repeat
  *
  * #name = positive, negative, absolute, invert, int, float#
@@ -2318,6 +2337,19 @@ static PyMethodDef @name@type_methods[] = {
 };
 /**end repeat**/
 
+/**begin repeat
+ * #name = byte, ubyte, short, ushort, int, uint,
+ *         long, ulong, longlong, ulonglong#
+ */
+static PyMethodDef @name@type_methods[] = {
+    {"bit_count",
+        (PyCFunction)npy_@name@_bit_count,
+        METH_NOARGS, NULL},
+    {NULL, NULL, 0, NULL} /* sentinel */
+};
+/**end repeat**/
+
+
 /************* As_mapping functions for void array scalar ************/
 
 static Py_ssize_t
@@ -4091,6 +4123,17 @@ initialize_numeric_types(void)
 
     /**end repeat**/
 
+    /**begin repeat
+     * #name = byte, short, int, long, longlong,
+     *         ubyte, ushort, uint, ulong, ulonglong#
+     * #Name = Byte, Short, Int, Long, LongLong,
+     *         UByte, UShort, UInt, ULong, ULongLong#
+     */
+
+    Py@Name@ArrType_Type.tp_methods = @name@type_methods;
+
+    /**end repeat**/
+
     /**begin repeat
      * #name = half, float, double, longdouble#
      * #Name = Half, Float, Double, LongDouble#

From 3ad46833400d8589cb346451c1228e65123eb278 Mon Sep 17 00:00:00 2001
From: Ganesh Kathiresan <ganesh3597@gmail.com>
Date: Sat, 14 Aug 2021 19:08:14 +0530
Subject: [PATCH 03/16] ENH: Annotations for bit_count

---
 numpy/__init__.pyi | 1 +
 1 file changed, 1 insertion(+)

diff --git a/numpy/__init__.pyi b/numpy/__init__.pyi
index c78d48cc6d2f..820325889afd 100644
--- a/numpy/__init__.pyi
+++ b/numpy/__init__.pyi
@@ -2985,6 +2985,7 @@ class integer(number[_NBit1]):  # type: ignore
     ) -> int: ...
     def tolist(self) -> int: ...
     def is_integer(self) -> L[True]: ...
+    def bit_count(self: _ScalarType) -> int
     def __index__(self) -> int: ...
     __truediv__: _IntTrueDiv[_NBit1]
     __rtruediv__: _IntTrueDiv[_NBit1]

From cf6e777e15e93555ffea9c10b04af483cb1939b7 Mon Sep 17 00:00:00 2001
From: Ganesh Kathiresan <ganesh3597@gmail.com>
Date: Sat, 14 Aug 2021 19:08:46 +0530
Subject: [PATCH 04/16] ENH, WIP: Documentation for bit_count

---
 doc/source/reference/routines.math.rst |  2 ++
 numpy/core/_add_newdocs_scalars.py     | 16 ++++++++++++++++
 2 files changed, 18 insertions(+)

diff --git a/doc/source/reference/routines.math.rst b/doc/source/reference/routines.math.rst
index 3c2f968306e1..13e3ffd3dbf5 100644
--- a/doc/source/reference/routines.math.rst
+++ b/doc/source/reference/routines.math.rst
@@ -169,3 +169,5 @@ Miscellaneous
    real_if_close
 
    interp
+
+   bit_count
diff --git a/numpy/core/_add_newdocs_scalars.py b/numpy/core/_add_newdocs_scalars.py
index 8773d6c9631d..10eea98a4d79 100644
--- a/numpy/core/_add_newdocs_scalars.py
+++ b/numpy/core/_add_newdocs_scalars.py
@@ -290,3 +290,19 @@ def add_newdoc_for_scalar_type(obj, fixed_aliases, doc):
         >>> np.{float_name}(3.2).is_integer()
         False
         """))
+
+# XXX Neat way to get all names. Same as TODO above.
+for int_name in ('ubyte', 'ushort', 'uint', 'ulonglong',
+                 'byte', 'short', 'int', 'long', 'longlong'):
+    add_newdoc('numpy.core.numerictypes', int_name, ('bit_count',
+        """
+        {int_name}.bit_count() -> int
+
+        Computes the number of 1-bits in the absolute value of the input.
+        Analogous to the builtin `int.bit_count` or ``popcount`` in C++.
+
+        >>> np.{int_name}(1023).bit_count()
+        10
+        >>> np.{int_name}(-1023).bit_count()
+        10
+        """.format(int_name=int_name)))

From ad081a33acfffa2fe21dbdf9b0fffcbc1569ca52 Mon Sep 17 00:00:00 2001
From: Ganesh Kathiresan <ganesh3597@gmail.com>
Date: Sat, 14 Aug 2021 19:12:04 +0530
Subject: [PATCH 05/16] DOC: Added `bit_count` (#19355)

---
 doc/release/upcoming_changes/19355.new_feature.rst | 10 ++++++++++
 1 file changed, 10 insertions(+)
 create mode 100644 doc/release/upcoming_changes/19355.new_feature.rst

diff --git a/doc/release/upcoming_changes/19355.new_feature.rst b/doc/release/upcoming_changes/19355.new_feature.rst
new file mode 100644
index 000000000000..cdeed83a37a9
--- /dev/null
+++ b/doc/release/upcoming_changes/19355.new_feature.rst
@@ -0,0 +1,10 @@
+`bit_count` to compute the number of 1-bits in an integer
+---------------------------------------------------------
+
+This new function counts the number of 1-bits in a number.
+This works on all the numpy integer types.
+
+.. code-block:: python
+
+    >>> a = np.int32(1023).bit_count()
+    10

From f97ab498f31f923dafcd3bf8d5d63ebea02456db Mon Sep 17 00:00:00 2001
From: Ganesh Kathiresan <ganesh3597@gmail.com>
Date: Sat, 14 Aug 2021 19:23:25 +0530
Subject: [PATCH 06/16] BUG: Fixed windows 32 bit issue with no `__popcnt64`

---
 numpy/core/src/npymath/npy_math_internal.h.src | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/numpy/core/src/npymath/npy_math_internal.h.src b/numpy/core/src/npymath/npy_math_internal.h.src
index dfd015f3b7fc..4ff87ca6421c 100644
--- a/numpy/core/src/npymath/npy_math_internal.h.src
+++ b/numpy/core/src/npymath/npy_math_internal.h.src
@@ -72,7 +72,6 @@
  * c = (T)(a * ((T)~(T)0/255)) >> (sizeof(T) - 1) * CHAR_BIT;
 */
 
- */
 static const npy_uint8  MAGIC8[]  = {0x55,               0x33,               0x0F,               0x01};
 static const npy_uint16 MAGIC16[] = {0x5555,             0x3333,             0x0F0F,             0x0101};
 static const npy_uint32 MAGIC32[] = {0x55555555,         0x33333333,         0x0F0F0F0F,         0x01010101};
@@ -863,7 +862,10 @@ npy_popcountu@c@(npy_@type@ a)
 #if ((defined(__clang__) || defined(__GNUC__))) && NPY_BITSOF_@STYPE@ >= 32
     return __builtin_popcount@c@(a);
 #elif defined(_MSC_VER) && NPY_BITSOF_@STYPE@ >= 16
-    return TO_BITS_LEN(__popcnt)(a);
+    /* no builtin __popcnt64 for 32 bits, so use our implementation */
+    #if defined(_WIN64) || (defined(_WIN32) && NPY_BITSOF_@STYPE@ != 64)
+        return TO_BITS_LEN(__popcnt)(a);
+    #endif
 #else
     a = a - ((a >> 1) & (npy_@type@) TO_BITS_LEN(MAGIC)[0]);
     a = ((a & (npy_@type@) TO_BITS_LEN(MAGIC)[1])) + ((a >> 2) & (npy_@type@) TO_BITS_LEN(MAGIC)[1]);

From 5514a5617e87c871aff7b701330644726b082f5d Mon Sep 17 00:00:00 2001
From: Ganesh Kathiresan <ganesh3597@gmail.com>
Date: Sat, 28 Aug 2021 17:22:05 +0530
Subject: [PATCH 07/16] DOC: Refined docstring for bit_count

---
 numpy/core/_add_newdocs_scalars.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/numpy/core/_add_newdocs_scalars.py b/numpy/core/_add_newdocs_scalars.py
index 10eea98a4d79..8a462b2fda00 100644
--- a/numpy/core/_add_newdocs_scalars.py
+++ b/numpy/core/_add_newdocs_scalars.py
@@ -291,9 +291,8 @@ def add_newdoc_for_scalar_type(obj, fixed_aliases, doc):
         False
         """))
 
-# XXX Neat way to get all names. Same as TODO above.
-for int_name in ('ubyte', 'ushort', 'uint', 'ulonglong',
-                 'byte', 'short', 'int', 'long', 'longlong'):
+for int_name in ('int8', 'uint8', 'int16', 'uint16', 'int32', 'uint32',
+        'int64', 'uint64', 'int64', 'uint64', 'int64', 'uint64'):
     add_newdoc('numpy.core.numerictypes', int_name, ('bit_count',
         """
         {int_name}.bit_count() -> int

From b2ba7831f3cef6a61fdd2cf0e40d42a9c91d9275 Mon Sep 17 00:00:00 2001
From: Ganesh Kathiresan <ganesh3597@gmail.com>
Date: Sat, 28 Aug 2021 18:08:27 +0530
Subject: [PATCH 08/16] TST: Tests for bit_count

---
 numpy/core/tests/test_scalar_methods.py | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/numpy/core/tests/test_scalar_methods.py b/numpy/core/tests/test_scalar_methods.py
index 6077c8f7507a..306d7d861624 100644
--- a/numpy/core/tests/test_scalar_methods.py
+++ b/numpy/core/tests/test_scalar_methods.py
@@ -183,3 +183,21 @@ def test_class_getitem_38(cls: Type[np.number]) -> None:
     match = "Type subscription requires python >= 3.9"
     with pytest.raises(TypeError, match=match):
         cls[Any]
+
+
+class TestBitCount:
+    # derived in part from the cpython test "test_bit_count"
+
+    @pytest.mark.parametrize("itype", np.sctypes['int']+np.sctypes['uint'])
+    def test_small(self, itype):
+        for a in range(max(np.iinfo(itype).min, 0), 128):
+            msg = f"Smoke test for {itype}.bit_count({a})"
+            assert itype(a).bit_count() == bin(a).count("1"), msg
+
+    def test_bit_count(self):
+        for exp in [10, 17, 63]:
+            a = 2**exp
+            assert np.uint64(a).bit_count() == 1
+            assert np.uint64(a - 1).bit_count() == exp
+            assert np.uint64(a ^ 63).bit_count() == 7
+            assert np.uint64((a - 1) ^ 510).bit_count() == exp - 8

From 704526393db64e205c275f728928bb80548d4691 Mon Sep 17 00:00:00 2001
From: Ganesh Kathiresan <ganesh3597@gmail.com>
Date: Sun, 29 Aug 2021 11:46:11 +0530
Subject: [PATCH 09/16] ENH, MAINT: Changed return type to uint_8 | Removed
 extra braces and fixed typo

---
 numpy/core/include/numpy/npy_math.h           | 20 +++++++++----------
 numpy/core/src/multiarray/scalartypes.c.src   |  6 ++++--
 .../core/src/npymath/npy_math_internal.h.src  |  6 +++---
 numpy/core/tests/test_scalar_methods.py       |  2 +-
 4 files changed, 18 insertions(+), 16 deletions(-)

diff --git a/numpy/core/include/numpy/npy_math.h b/numpy/core/include/numpy/npy_math.h
index ec605603cd74..bead0dc14064 100644
--- a/numpy/core/include/numpy/npy_math.h
+++ b/numpy/core/include/numpy/npy_math.h
@@ -150,16 +150,16 @@ NPY_INPLACE npy_long npy_lshiftl(npy_long a, npy_long b);
 NPY_INPLACE npy_longlong npy_rshiftll(npy_longlong a, npy_longlong b);
 NPY_INPLACE npy_longlong npy_lshiftll(npy_longlong a, npy_longlong b);
 
-NPY_INPLACE npy_ubyte npy_popcountuhh(npy_ubyte a);
-NPY_INPLACE npy_ushort npy_popcountuh(npy_ushort a);
-NPY_INPLACE npy_uint npy_popcountu(npy_uint a);
-NPY_INPLACE npy_ulong npy_popcountul(npy_ulong a);
-NPY_INPLACE npy_ulonglong npy_popcountull(npy_ulonglong a);
-NPY_INPLACE npy_byte npy_popcounthh(npy_byte a);
-NPY_INPLACE npy_short npy_popcounth(npy_short a);
-NPY_INPLACE npy_int npy_popcount(npy_int a);
-NPY_INPLACE npy_long npy_popcountl(npy_long a);
-NPY_INPLACE npy_longlong npy_popcountll(npy_longlong a);
+NPY_INPLACE uint8_t npy_popcountuhh(npy_ubyte a);
+NPY_INPLACE uint8_t npy_popcountuh(npy_ushort a);
+NPY_INPLACE uint8_t npy_popcountu(npy_uint a);
+NPY_INPLACE uint8_t npy_popcountul(npy_ulong a);
+NPY_INPLACE uint8_t npy_popcountull(npy_ulonglong a);
+NPY_INPLACE uint8_t npy_popcounthh(npy_byte a);
+NPY_INPLACE uint8_t npy_popcounth(npy_short a);
+NPY_INPLACE uint8_t npy_popcount(npy_int a);
+NPY_INPLACE uint8_t npy_popcountl(npy_long a);
+NPY_INPLACE uint8_t npy_popcountll(npy_longlong a);
 
 /*
  * C99 double math funcs
diff --git a/numpy/core/src/multiarray/scalartypes.c.src b/numpy/core/src/multiarray/scalartypes.c.src
index d6a4c70950fb..c3866b711043 100644
--- a/numpy/core/src/multiarray/scalartypes.c.src
+++ b/numpy/core/src/multiarray/scalartypes.c.src
@@ -214,13 +214,15 @@ gentype_multiply(PyObject *m1, PyObject *m2)
  * #type    = npy_byte, npy_ubyte, npy_short, npy_ushort, npy_int, npy_uint,
  *            npy_long, npy_ulong, npy_longlong, npy_ulonglong#
  * #c       = hh, uhh, h, uh,, u, l, ul, ll, ull#
+ * #Name    = Byte, UByte, Short, UShort, Int, UInt,
+ *            Long, ULong, LongLong, ULongLong#
  * #convert = Long*8, LongLong*2#
  */
 static PyObject *
 @type@_bit_count(PyObject *self)
 {
-    @type@ scalar = (@type@) PyLong_As@convert@(self);
-    @type@ count = npy_popcount@c@(scalar);
+    @type@ scalar = PyArrayScalar_VAL(self, @Name@);
+    uint8_t count = npy_popcount@c@(scalar);
     PyObject *result = PyLong_From@convert@(count);
 
     return result;
diff --git a/numpy/core/src/npymath/npy_math_internal.h.src b/numpy/core/src/npymath/npy_math_internal.h.src
index 4ff87ca6421c..1d1ade212331 100644
--- a/numpy/core/src/npymath/npy_math_internal.h.src
+++ b/numpy/core/src/npymath/npy_math_internal.h.src
@@ -856,10 +856,10 @@ npy_rshift@u@@c@(npy_@u@@type@ a, npy_@u@@type@ b)
 /**end repeat1**/
 #endif
 
-NPY_INPLACE npy_@type@
+NPY_INPLACE uint8_t
 npy_popcountu@c@(npy_@type@ a)
 {
-#if ((defined(__clang__) || defined(__GNUC__))) && NPY_BITSOF_@STYPE@ >= 32
+#if (defined(__clang__) || defined(__GNUC__)) && NPY_BITSOF_@STYPE@ >= 32
     return __builtin_popcount@c@(a);
 #elif defined(_MSC_VER) && NPY_BITSOF_@STYPE@ >= 16
     /* no builtin __popcnt64 for 32 bits, so use our implementation */
@@ -880,7 +880,7 @@ npy_popcountu@c@(npy_@type@ a)
  * #type = byte, short, int, long, longlong#
  * #c    = hh,   h,     ,    l,    ll#
  */
-NPY_INPLACE npy_@type@
+NPY_INPLACE uint8_t
 npy_popcount@c@(npy_@type@ a)
 {
     /* Return popcount of abs(a) */
diff --git a/numpy/core/tests/test_scalar_methods.py b/numpy/core/tests/test_scalar_methods.py
index 306d7d861624..eef4c1433910 100644
--- a/numpy/core/tests/test_scalar_methods.py
+++ b/numpy/core/tests/test_scalar_methods.py
@@ -191,7 +191,7 @@ class TestBitCount:
     @pytest.mark.parametrize("itype", np.sctypes['int']+np.sctypes['uint'])
     def test_small(self, itype):
         for a in range(max(np.iinfo(itype).min, 0), 128):
-            msg = f"Smoke test for {itype}.bit_count({a})"
+            msg = f"Smoke test for {itype}({a}).bit_count()"
             assert itype(a).bit_count() == bin(a).count("1"), msg
 
     def test_bit_count(self):

From b10c63b34f46e6be8b24f3631973ce1bb6501e24 Mon Sep 17 00:00:00 2001
From: Ganesh Kathiresan <ganesh3597@gmail.com>
Date: Sun, 5 Sep 2021 10:27:44 +0530
Subject: [PATCH 10/16] BUG: Fixed syntax of bit_count

---
 numpy/__init__.pyi | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/numpy/__init__.pyi b/numpy/__init__.pyi
index 820325889afd..973df3a130f2 100644
--- a/numpy/__init__.pyi
+++ b/numpy/__init__.pyi
@@ -2985,7 +2985,7 @@ class integer(number[_NBit1]):  # type: ignore
     ) -> int: ...
     def tolist(self) -> int: ...
     def is_integer(self) -> L[True]: ...
-    def bit_count(self: _ScalarType) -> int
+    def bit_count(self: _ScalarType) -> int: ...
     def __index__(self) -> int: ...
     __truediv__: _IntTrueDiv[_NBit1]
     __rtruediv__: _IntTrueDiv[_NBit1]

From f20739c81cdcd192468096d60289ae64147f9e3d Mon Sep 17 00:00:00 2001
From: Ganesh Kathiresan <ganesh3597@gmail.com>
Date: Mon, 20 Sep 2021 20:38:16 +0530
Subject: [PATCH 11/16] DOC, BUG: Fixed bit_count example

---
 numpy/core/_add_newdocs_scalars.py | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/numpy/core/_add_newdocs_scalars.py b/numpy/core/_add_newdocs_scalars.py
index 8a462b2fda00..94859a9d556d 100644
--- a/numpy/core/_add_newdocs_scalars.py
+++ b/numpy/core/_add_newdocs_scalars.py
@@ -293,15 +293,19 @@ def add_newdoc_for_scalar_type(obj, fixed_aliases, doc):
 
 for int_name in ('int8', 'uint8', 'int16', 'uint16', 'int32', 'uint32',
         'int64', 'uint64', 'int64', 'uint64', 'int64', 'uint64'):
+    # Add negative examples for signed cases by checking typecode
     add_newdoc('numpy.core.numerictypes', int_name, ('bit_count',
-        """
+        f"""
         {int_name}.bit_count() -> int
 
         Computes the number of 1-bits in the absolute value of the input.
         Analogous to the builtin `int.bit_count` or ``popcount`` in C++.
 
-        >>> np.{int_name}(1023).bit_count()
-        10
-        >>> np.{int_name}(-1023).bit_count()
-        10
-        """.format(int_name=int_name)))
+        Examples
+        --------
+        >>> np.{int_name}(127).bit_count()
+        7""" +
+        (f"""
+        >>> np.{int_name}(-127).bit_count()
+        7
+        """ if dtype(int_name).char.islower() else "")))

From 46de25e947e2f363996b8ddb2971473defa844d9 Mon Sep 17 00:00:00 2001
From: Ganesh Kathiresan <ganesh3597@gmail.com>
Date: Wed, 22 Sep 2021 19:52:25 +0530
Subject: [PATCH 12/16] DOC, BUG: (#19355) Removed bit_count from
 routines.math.rst | Improved release notes

---
 doc/release/upcoming_changes/19355.new_feature.rst | 9 ++++++---
 doc/source/reference/routines.math.rst             | 2 --
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/doc/release/upcoming_changes/19355.new_feature.rst b/doc/release/upcoming_changes/19355.new_feature.rst
index cdeed83a37a9..0f6ce617f6f7 100644
--- a/doc/release/upcoming_changes/19355.new_feature.rst
+++ b/doc/release/upcoming_changes/19355.new_feature.rst
@@ -1,10 +1,13 @@
 `bit_count` to compute the number of 1-bits in an integer
 ---------------------------------------------------------
 
-This new function counts the number of 1-bits in a number.
-This works on all the numpy integer types.
+Computes the number of 1-bits in the absolute value of the input.
+This works on all the numpy integer types. Analogous to the builtin
+`int.bit_count` or `popcount` in C++.
 
 .. code-block:: python
 
-    >>> a = np.int32(1023).bit_count()
+    >>> np.uint32(1023).bit_count()
     10
+    >>> np.int32(-127).bit_count()
+    7
diff --git a/doc/source/reference/routines.math.rst b/doc/source/reference/routines.math.rst
index 13e3ffd3dbf5..3c2f968306e1 100644
--- a/doc/source/reference/routines.math.rst
+++ b/doc/source/reference/routines.math.rst
@@ -169,5 +169,3 @@ Miscellaneous
    real_if_close
 
    interp
-
-   bit_count

From 2c6efec23c84d10fb1b7cee229377b929ed88381 Mon Sep 17 00:00:00 2001
From: Ganesh Kathiresan <ganesh3597@gmail.com>
Date: Sat, 25 Sep 2021 15:59:20 +0530
Subject: [PATCH 13/16] BUG: Added type suffix to magic constants

---
 numpy/core/src/npymath/npy_math_internal.h.src | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/numpy/core/src/npymath/npy_math_internal.h.src b/numpy/core/src/npymath/npy_math_internal.h.src
index 1d1ade212331..72e927f03977 100644
--- a/numpy/core/src/npymath/npy_math_internal.h.src
+++ b/numpy/core/src/npymath/npy_math_internal.h.src
@@ -72,10 +72,10 @@
  * c = (T)(a * ((T)~(T)0/255)) >> (sizeof(T) - 1) * CHAR_BIT;
 */
 
-static const npy_uint8  MAGIC8[]  = {0x55,               0x33,               0x0F,               0x01};
-static const npy_uint16 MAGIC16[] = {0x5555,             0x3333,             0x0F0F,             0x0101};
-static const npy_uint32 MAGIC32[] = {0x55555555,         0x33333333,         0x0F0F0F0F,         0x01010101};
-static const npy_uint64 MAGIC64[] = {0x5555555555555555, 0x3333333333333333, 0x0F0F0F0F0F0F0F0F, 0x0101010101010101};
+static const npy_uint8  MAGIC8[]  = {0x55u,                 0x33u,                 0x0Fu,                 0x01u};
+static const npy_uint16 MAGIC16[] = {0x5555u,               0x3333u,               0x0F0Fu,               0x0101u};
+static const npy_uint32 MAGIC32[] = {0x55555555ul,          0x33333333ul,          0x0F0F0F0Ful,          0x01010101ul};
+static const npy_uint64 MAGIC64[] = {0x5555555555555555ull, 0x3333333333333333ull, 0x0F0F0F0F0F0F0F0Full, 0x0101010101010101ull};
 
 
 /*

From 30b08c8cfb4e5f6dc923569f8f6b0dd376e9579c Mon Sep 17 00:00:00 2001
From: Ganesh Kathiresan <ganesh3597@gmail.com>
Date: Sat, 25 Sep 2021 17:19:10 +0530
Subject: [PATCH 14/16] ENH: Handle 32 bit windows popcount | Refactored
 popcount implementation to new function

---
 .../core/src/npymath/npy_math_internal.h.src  | 23 +++++++++++++++----
 1 file changed, 18 insertions(+), 5 deletions(-)

diff --git a/numpy/core/src/npymath/npy_math_internal.h.src b/numpy/core/src/npymath/npy_math_internal.h.src
index 72e927f03977..dd2424db8de5 100644
--- a/numpy/core/src/npymath/npy_math_internal.h.src
+++ b/numpy/core/src/npymath/npy_math_internal.h.src
@@ -856,21 +856,34 @@ npy_rshift@u@@c@(npy_@u@@type@ a, npy_@u@@type@ b)
 /**end repeat1**/
 #endif
 
+
+NPY_INPLACE uint8_t
+npy_popcount_parallel@c@(npy_@type@ a)
+{
+    a = a - ((a >> 1) & (npy_@type@) TO_BITS_LEN(MAGIC)[0]);
+    a = ((a & (npy_@type@) TO_BITS_LEN(MAGIC)[1])) + ((a >> 2) & (npy_@type@) TO_BITS_LEN(MAGIC)[1]);
+    a = (a + (a >> 4)) & (npy_@type@) TO_BITS_LEN(MAGIC)[2];
+    return (npy_@type@) (a * (npy_@type@) TO_BITS_LEN(MAGIC)[3]) >> ((NPY_SIZEOF_@STYPE@ - 1) * CHAR_BIT);
+}
+
 NPY_INPLACE uint8_t
 npy_popcountu@c@(npy_@type@ a)
 {
+/* use built-in popcount if present, else use our implementation */
 #if (defined(__clang__) || defined(__GNUC__)) && NPY_BITSOF_@STYPE@ >= 32
     return __builtin_popcount@c@(a);
 #elif defined(_MSC_VER) && NPY_BITSOF_@STYPE@ >= 16
-    /* no builtin __popcnt64 for 32 bits, so use our implementation */
+    /* no builtin __popcnt64 for 32 bits */
     #if defined(_WIN64) || (defined(_WIN32) && NPY_BITSOF_@STYPE@ != 64)
         return TO_BITS_LEN(__popcnt)(a);
+    /* split 64 bit number into two 32 bit ints and return sum of counts */
+    #elif (defined(_WIN32) && NPY_BITSOF_@STYPE@ == 64)
+        npy_uint32 left  = (npy_uint32) (a>>32);
+        npy_uint32 right = (npy_uint32) a;
+        return __popcnt32(left) + __popcnt32(right);
     #endif
 #else
-    a = a - ((a >> 1) & (npy_@type@) TO_BITS_LEN(MAGIC)[0]);
-    a = ((a & (npy_@type@) TO_BITS_LEN(MAGIC)[1])) + ((a >> 2) & (npy_@type@) TO_BITS_LEN(MAGIC)[1]);
-    a = (a + (a >> 4)) & (npy_@type@) TO_BITS_LEN(MAGIC)[2];
-    return (npy_@type@) (a * (npy_@type@) TO_BITS_LEN(MAGIC)[3]) >> ((NPY_SIZEOF_@STYPE@ - 1) * CHAR_BIT);
+    return npy_popcount_parallel@c@(a);
 #endif
 }
 /**end repeat**/

From 9bb34402efaeaabdb77aff1b11a2b2b39d26f4c6 Mon Sep 17 00:00:00 2001
From: Ganesh Kathiresan <ganesh3597@gmail.com>
Date: Wed, 29 Sep 2021 19:35:50 +0530
Subject: [PATCH 15/16] MAINT: Refactor type_methods, separate integer
 definitions

---
 numpy/core/src/multiarray/scalartypes.c.src | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/numpy/core/src/multiarray/scalartypes.c.src b/numpy/core/src/multiarray/scalartypes.c.src
index c3866b711043..524f1554a172 100644
--- a/numpy/core/src/multiarray/scalartypes.c.src
+++ b/numpy/core/src/multiarray/scalartypes.c.src
@@ -2327,8 +2327,7 @@ static PyMethodDef @name@type_methods[] = {
 /**end repeat**/
 
 /**begin repeat
- * #name = byte, short, int, long, longlong, ubyte, ushort,
- *         uint, ulong, ulonglong, timedelta, cdouble#
+ * #name = timedelta, cdouble#
  */
 static PyMethodDef @name@type_methods[] = {
     /* for typing; requires python >= 3.9 */
@@ -2344,6 +2343,10 @@ static PyMethodDef @name@type_methods[] = {
  *         long, ulong, longlong, ulonglong#
  */
 static PyMethodDef @name@type_methods[] = {
+    /* for typing; requires python >= 3.9 */
+    {"__class_getitem__",
+        (PyCFunction)numbertype_class_getitem,
+        METH_CLASS | METH_O, NULL},
     {"bit_count",
         (PyCFunction)npy_@name@_bit_count,
         METH_NOARGS, NULL},

From e0e5a511b349108b649bb7181503737dc1095c35 Mon Sep 17 00:00:00 2001
From: Ganesh Kathiresan <ganesh3597@gmail.com>
Date: Thu, 7 Oct 2021 19:55:12 +0530
Subject: [PATCH 16/16] DOC: Added double-ticks

---
 doc/release/upcoming_changes/19355.new_feature.rst | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/doc/release/upcoming_changes/19355.new_feature.rst b/doc/release/upcoming_changes/19355.new_feature.rst
index 0f6ce617f6f7..cfa50b7a175c 100644
--- a/doc/release/upcoming_changes/19355.new_feature.rst
+++ b/doc/release/upcoming_changes/19355.new_feature.rst
@@ -1,9 +1,9 @@
-`bit_count` to compute the number of 1-bits in an integer
----------------------------------------------------------
+``bit_count`` to compute the number of 1-bits in an integer
+-----------------------------------------------------------
 
 Computes the number of 1-bits in the absolute value of the input.
 This works on all the numpy integer types. Analogous to the builtin
-`int.bit_count` or `popcount` in C++.
+``int.bit_count`` or ``popcount`` in C++.
 
 .. code-block:: python