From 96d5993c4fca186f8c9f0f686c963549a03d0636 Mon Sep 17 00:00:00 2001 From: Bas van Beek <43369155+BvB93@users.noreply.github.com> Date: Sun, 3 Oct 2021 02:21:11 +0200 Subject: [PATCH] BUG,DEP: Allow (arg-)partition to accept `uint64` indices (#20000) * BUG: Allow (arg-)partition to accept `uint64` indices * DEP: Deprecate the use of booleans as (arg-)partition indices * TST,DEP: Add tests for the `parametrize` bool-index-deprecation * TST: Add more dtype-based tests for (arg-)partition * DOC: Add a release note for the (arg-)partition boolean deprecation * DEP: Explicitly mention numpy 1.22 in the (arg-)partition deprecation warning Co-Authored-By: Sebastian Berg Co-authored-by: Sebastian Berg --- .../upcoming_changes/20000.deprecation.rst | 5 ++ numpy/core/_add_newdocs.py | 3 + numpy/core/fromnumeric.py | 6 ++ numpy/core/src/multiarray/item_selection.c | 10 ++- numpy/core/tests/test_deprecations.py | 23 +++++++ numpy/core/tests/test_multiarray.py | 68 +++++++++---------- 6 files changed, 79 insertions(+), 36 deletions(-) create mode 100644 doc/release/upcoming_changes/20000.deprecation.rst diff --git a/doc/release/upcoming_changes/20000.deprecation.rst b/doc/release/upcoming_changes/20000.deprecation.rst new file mode 100644 index 000000000000..e0a56cd47e91 --- /dev/null +++ b/doc/release/upcoming_changes/20000.deprecation.rst @@ -0,0 +1,5 @@ +Passing boolean ``kth`` values to (arg-)partition has been deprecated +--------------------------------------------------------------------- +`~numpy.partition` and `~numpy.argpartition` would previously accept boolean +values for the ``kth`` parameter, which would subsequently be converted into +integers. This behavior has now been deprecated. diff --git a/numpy/core/_add_newdocs.py b/numpy/core/_add_newdocs.py index bb0c2ea12a65..37f21211f927 100644 --- a/numpy/core/_add_newdocs.py +++ b/numpy/core/_add_newdocs.py @@ -4044,6 +4044,9 @@ The order of all elements in the partitions is undefined. If provided with a sequence of kth it will partition all elements indexed by kth of them into their sorted position at once. + + .. deprecated:: 1.22.0 + Passing booleans as index is deprecated. axis : int, optional Axis along which to sort. Default is -1, which means sort along the last axis. diff --git a/numpy/core/fromnumeric.py b/numpy/core/fromnumeric.py index 5ecb1e6669aa..29d215ea0b31 100644 --- a/numpy/core/fromnumeric.py +++ b/numpy/core/fromnumeric.py @@ -689,6 +689,9 @@ def partition(a, kth, axis=-1, kind='introselect', order=None): it. The order of all elements in the partitions is undefined. If provided with a sequence of k-th it will partition all elements indexed by k-th of them into their sorted position at once. + + .. deprecated:: 1.22.0 + Passing booleans as index is deprecated. axis : int or None, optional Axis along which to sort. If None, the array is flattened before sorting. The default is -1, which sorts along the last axis. @@ -781,6 +784,9 @@ def argpartition(a, kth, axis=-1, kind='introselect', order=None): elements in the partitions is undefined. If provided with a sequence of k-th it will partition all of them into their sorted position at once. + + .. deprecated:: 1.22.0 + Passing booleans as index is deprecated. axis : int or None, optional Axis along which to sort. The default is -1 (the last axis). If None, the flattened array is used. diff --git a/numpy/core/src/multiarray/item_selection.c b/numpy/core/src/multiarray/item_selection.c index ad5478bbf6b8..ee66378a938a 100644 --- a/numpy/core/src/multiarray/item_selection.c +++ b/numpy/core/src/multiarray/item_selection.c @@ -1292,7 +1292,15 @@ partition_prep_kth_array(PyArrayObject * ktharray, npy_intp * kth; npy_intp nkth, i; - if (!PyArray_CanCastSafely(PyArray_TYPE(ktharray), NPY_INTP)) { + if (PyArray_ISBOOL(ktharray)) { + /* 2021-09-29, NumPy 1.22 */ + if (DEPRECATE( + "Passing booleans as partition index is deprecated" + " (warning added in NumPy 1.22)") < 0) { + return NULL; + } + } + else if (!PyArray_ISINTEGER(ktharray)) { PyErr_Format(PyExc_TypeError, "Partition index must be integer"); return NULL; } diff --git a/numpy/core/tests/test_deprecations.py b/numpy/core/tests/test_deprecations.py index 1d0c5dfac5c2..898ff8075351 100644 --- a/numpy/core/tests/test_deprecations.py +++ b/numpy/core/tests/test_deprecations.py @@ -1192,3 +1192,26 @@ def test_behaviour(self): np.maximum(arr, arr, dtype="m8[ns]") # previously used the "ns" with pytest.warns(DeprecationWarning, match=self.message): np.maximum.reduce(arr, dtype="m8[ns]") # never preserved the "ns" + + +PARTITION_DICT = { + "partition method": np.arange(10).partition, + "argpartition method": np.arange(10).argpartition, + "partition function": lambda kth: np.partition(np.arange(10), kth), + "argpartition function": lambda kth: np.argpartition(np.arange(10), kth), +} + + +@pytest.mark.parametrize("func", PARTITION_DICT.values(), ids=PARTITION_DICT) +class TestPartitionBoolIndex(_DeprecationTestCase): + # Deprecated 2021-09-29, NumPy 1.22 + warning_cls = DeprecationWarning + message = "Passing booleans as partition index is deprecated" + + def test_deprecated(self, func): + self.assert_deprecated(lambda: func(True)) + self.assert_deprecated(lambda: func([False, True])) + + def test_not_deprecated(self, func): + self.assert_not_deprecated(lambda: func(1)) + self.assert_not_deprecated(lambda: func([0, 1])) diff --git a/numpy/core/tests/test_multiarray.py b/numpy/core/tests/test_multiarray.py index b5f9f8af3c8e..0da36bbeabb2 100644 --- a/numpy/core/tests/test_multiarray.py +++ b/numpy/core/tests/test_multiarray.py @@ -2511,27 +2511,19 @@ class A(np.ndarray): assert_(not isinstance(a.searchsorted(b, 'left', s), A)) assert_(not isinstance(a.searchsorted(b, 'right', s), A)) - def test_argpartition_out_of_range(self): + @pytest.mark.parametrize("dtype", np.typecodes["All"]) + def test_argpartition_out_of_range(self, dtype): # Test out of range values in kth raise an error, gh-5469 - d = np.arange(10) + d = np.arange(10).astype(dtype=dtype) assert_raises(ValueError, d.argpartition, 10) assert_raises(ValueError, d.argpartition, -11) - # Test also for generic type argpartition, which uses sorting - # and used to not bound check kth - d_obj = np.arange(10, dtype=object) - assert_raises(ValueError, d_obj.argpartition, 10) - assert_raises(ValueError, d_obj.argpartition, -11) - def test_partition_out_of_range(self): + @pytest.mark.parametrize("dtype", np.typecodes["All"]) + def test_partition_out_of_range(self, dtype): # Test out of range values in kth raise an error, gh-5469 - d = np.arange(10) + d = np.arange(10).astype(dtype=dtype) assert_raises(ValueError, d.partition, 10) assert_raises(ValueError, d.partition, -11) - # Test also for generic type partition, which uses sorting - # and used to not bound check kth - d_obj = np.arange(10, dtype=object) - assert_raises(ValueError, d_obj.partition, 10) - assert_raises(ValueError, d_obj.partition, -11) def test_argpartition_integer(self): # Test non-integer values in kth raise an error/ @@ -2551,26 +2543,30 @@ def test_partition_integer(self): d_obj = np.arange(10, dtype=object) assert_raises(TypeError, d_obj.partition, 9.) - def test_partition_empty_array(self): + @pytest.mark.parametrize("kth_dtype", np.typecodes["AllInteger"]) + def test_partition_empty_array(self, kth_dtype): # check axis handling for multidimensional empty arrays + kth = np.array(0, dtype=kth_dtype)[()] a = np.array([]) a.shape = (3, 2, 1, 0) for axis in range(-a.ndim, a.ndim): msg = 'test empty array partition with axis={0}'.format(axis) - assert_equal(np.partition(a, 0, axis=axis), a, msg) + assert_equal(np.partition(a, kth, axis=axis), a, msg) msg = 'test empty array partition with axis=None' - assert_equal(np.partition(a, 0, axis=None), a.ravel(), msg) + assert_equal(np.partition(a, kth, axis=None), a.ravel(), msg) - def test_argpartition_empty_array(self): + @pytest.mark.parametrize("kth_dtype", np.typecodes["AllInteger"]) + def test_argpartition_empty_array(self, kth_dtype): # check axis handling for multidimensional empty arrays + kth = np.array(0, dtype=kth_dtype)[()] a = np.array([]) a.shape = (3, 2, 1, 0) for axis in range(-a.ndim, a.ndim): msg = 'test empty array argpartition with axis={0}'.format(axis) - assert_equal(np.partition(a, 0, axis=axis), + assert_equal(np.partition(a, kth, axis=axis), np.zeros_like(a, dtype=np.intp), msg) msg = 'test empty array argpartition with axis=None' - assert_equal(np.partition(a, 0, axis=None), + assert_equal(np.partition(a, kth, axis=None), np.zeros_like(a.ravel(), dtype=np.intp), msg) def test_partition(self): @@ -2901,10 +2897,12 @@ def test_partition_fuzz(self): assert_array_equal(np.partition(d, kth)[kth], tgt, err_msg="data: %r\n kth: %r" % (d, kth)) - def test_argpartition_gh5524(self): + @pytest.mark.parametrize("kth_dtype", np.typecodes["AllInteger"]) + def test_argpartition_gh5524(self, kth_dtype): # A test for functionality of argpartition on lists. - d = [6,7,3,2,9,0] - p = np.argpartition(d,1) + kth = np.array(1, dtype=kth_dtype)[()] + d = [6, 7, 3, 2, 9, 0] + p = np.argpartition(d, kth) self.assert_partitioned(np.array(d)[p],[1]) def test_flatten(self): @@ -4200,7 +4198,7 @@ class TestArgmaxArgminCommon: (3, 4, 1, 2), (4, 1, 2, 3)] @pytest.mark.parametrize("size, axis", itertools.chain(*[[(size, axis) - for axis in list(range(-len(size), len(size))) + [None]] + for axis in list(range(-len(size), len(size))) + [None]] for size in sizes])) @pytest.mark.parametrize('method', [np.argmax, np.argmin]) def test_np_argmin_argmax_keepdims(self, size, axis, method): @@ -4221,7 +4219,7 @@ def test_np_argmin_argmax_keepdims(self, size, axis, method): assert_equal(res, res_orig) assert_(res.shape == new_shape) outarray = np.empty(res.shape, dtype=res.dtype) - res1 = method(arr, axis=axis, out=outarray, + res1 = method(arr, axis=axis, out=outarray, keepdims=True) assert_(res1 is outarray) assert_equal(res, outarray) @@ -4234,7 +4232,7 @@ def test_np_argmin_argmax_keepdims(self, size, axis, method): wrong_shape[0] = 2 wrong_outarray = np.empty(wrong_shape, dtype=res.dtype) with pytest.raises(ValueError): - method(arr.T, axis=axis, + method(arr.T, axis=axis, out=wrong_outarray, keepdims=True) # non-contiguous arrays @@ -4252,18 +4250,18 @@ def test_np_argmin_argmax_keepdims(self, size, axis, method): assert_(res.shape == new_shape) outarray = np.empty(new_shape[::-1], dtype=res.dtype) outarray = outarray.T - res1 = method(arr.T, axis=axis, out=outarray, + res1 = method(arr.T, axis=axis, out=outarray, keepdims=True) assert_(res1 is outarray) assert_equal(res, outarray) if len(size) > 0: - # one dimension lesser for non-zero sized + # one dimension lesser for non-zero sized # array should raise an error with pytest.raises(ValueError): - method(arr[0], axis=axis, + method(arr[0], axis=axis, out=outarray, keepdims=True) - + if len(size) > 0: wrong_shape = list(new_shape) if axis is not None: @@ -4272,7 +4270,7 @@ def test_np_argmin_argmax_keepdims(self, size, axis, method): wrong_shape[0] = 2 wrong_outarray = np.empty(wrong_shape, dtype=res.dtype) with pytest.raises(ValueError): - method(arr.T, axis=axis, + method(arr.T, axis=axis, out=wrong_outarray, keepdims=True) @pytest.mark.parametrize('method', ['max', 'min']) @@ -4287,7 +4285,7 @@ def test_all(self, method): axes.remove(i) assert_(np.all(a_maxmin == aarg_maxmin.choose( *a.transpose(i, *axes)))) - + @pytest.mark.parametrize('method', ['argmax', 'argmin']) def test_output_shape(self, method): # see also gh-616 @@ -4330,7 +4328,7 @@ def test_unicode(self, np_array, method, idx, val): [('argmax', np.argmax), ('argmin', np.argmin)]) def test_np_vs_ndarray(self, arr_method, np_method): - # make sure both ndarray.argmax/argmin and + # make sure both ndarray.argmax/argmin and # numpy.argmax/argmin support out/axis args a = np.random.normal(size=(2, 3)) arg_method = getattr(a, arr_method) @@ -4344,7 +4342,7 @@ def test_np_vs_ndarray(self, arr_method, np_method): # check keyword args out1 = np.zeros(3, dtype=int) out2 = np.zeros(3, dtype=int) - assert_equal(arg_method(out=out1, axis=0), + assert_equal(arg_method(out=out1, axis=0), np_method(a, out=out2, axis=0)) assert_equal(out1, out2) @@ -4438,7 +4436,7 @@ def test_combinations(self, data): assert_equal(np.argmax(arr), pos, err_msg="%r" % arr) assert_equal(arr[np.argmax(arr)], val, err_msg="%r" % arr) - + def test_maximum_signed_integers(self): a = np.array([1, 2**7 - 1, -2**7], dtype=np.int8)