From 172a27fae4829933c4a8a7393aa6d0e71f9cc609 Mon Sep 17 00:00:00 2001 From: Jason Thai Date: Sat, 28 May 2022 00:36:43 -0700 Subject: [PATCH 1/5] TST: Added test for np.unique equal_nans kwarg --- numpy/lib/tests/test_arraysetops.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/numpy/lib/tests/test_arraysetops.py b/numpy/lib/tests/test_arraysetops.py index 13385cd2409d..f97fea310dbb 100644 --- a/numpy/lib/tests/test_arraysetops.py +++ b/numpy/lib/tests/test_arraysetops.py @@ -765,3 +765,11 @@ def _run_axis_tests(self, dtype): assert_array_equal(uniq[:, inv], data) msg = "Unique's return_counts=True failed with axis=1" assert_array_equal(cnt, np.array([2, 1, 1]), msg) + + def test_unique_nanequals(self): + # issue 20326 + a = np.array([1, 1, np.nan, np.nan, np.nan]) + unq = np.unique(a) + not_unq = np.unique(a, equal_nans = False) + assert_array_equal(unq, np.array([1, np.nan])) + assert_array_equal(not_unq, np.array([1, np.nan, np.nan, np.nan])) From 6a567fce2d3ef5aa1f325e7efae15b315537a06e Mon Sep 17 00:00:00 2001 From: Jason Thai Date: Fri, 27 May 2022 01:50:40 -0700 Subject: [PATCH 2/5] ENH: Added equal-nans kwarg to np.unique --- numpy/lib/arraysetops.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/numpy/lib/arraysetops.py b/numpy/lib/arraysetops.py index d44e1a983ebf..490ff6a872dd 100644 --- a/numpy/lib/arraysetops.py +++ b/numpy/lib/arraysetops.py @@ -131,13 +131,13 @@ def _unpack_tuple(x): def _unique_dispatcher(ar, return_index=None, return_inverse=None, - return_counts=None, axis=None): + return_counts=None, axis=None, *, equal_nans=None): return (ar,) @array_function_dispatch(_unique_dispatcher) def unique(ar, return_index=False, return_inverse=False, - return_counts=False, axis=None): + return_counts=False, axis=None, *, equal_nans=True): """ Find the unique elements of an array. @@ -162,8 +162,10 @@ def unique(ar, return_index=False, return_inverse=False, return_counts : bool, optional If True, also return the number of times each unique item appears in `ar`. + equals_nan : bool, optional + If True, collapses multiple NaN values in return array into 1 - .. versionadded:: 1.9.0 + .. versionchanged: NumPy 1.24 axis : int or None, optional The axis to operate on. If None, `ar` will be flattened. If an integer, @@ -269,7 +271,8 @@ def unique(ar, return_index=False, return_inverse=False, """ ar = np.asanyarray(ar) if axis is None: - ret = _unique1d(ar, return_index, return_inverse, return_counts) + ret = _unique1d(ar, return_index, return_inverse, return_counts, + equal_nans) return _unpack_tuple(ret) # axis was specified and not None @@ -312,13 +315,13 @@ def reshape_uniq(uniq): return uniq output = _unique1d(consolidated, return_index, - return_inverse, return_counts) + return_inverse, return_counts, equal_nans) output = (reshape_uniq(output[0]),) + output[1:] return _unpack_tuple(output) def _unique1d(ar, return_index=False, return_inverse=False, - return_counts=False): + return_counts=False, equal_nans=True): """ Find the unique elements of an array, ignoring shape. """ @@ -334,7 +337,8 @@ def _unique1d(ar, return_index=False, return_inverse=False, aux = ar mask = np.empty(aux.shape, dtype=np.bool_) mask[:1] = True - if aux.shape[0] > 0 and aux.dtype.kind in "cfmM" and np.isnan(aux[-1]): + if (equal_nans and aux.shape[0] > 0 and aux.dtype.kind in "cfmM" and + np.isnan(aux[-1])): if aux.dtype.kind == "c": # for complex all NaNs are considered equivalent aux_firstnan = np.searchsorted(np.isnan(aux), True, side='left') else: From 345ced482bac1c974d6bc8b70b101293d76a16e1 Mon Sep 17 00:00:00 2001 From: Jason Thai Date: Sat, 28 May 2022 13:06:10 -0700 Subject: [PATCH 3/5] DOC: Added releasenote for 21623 --- doc/release/upcoming_changes/21623.new_feature.rst | 4 ++++ numpy/lib/arraysetops.py | 8 ++++---- 2 files changed, 8 insertions(+), 4 deletions(-) create mode 100644 doc/release/upcoming_changes/21623.new_feature.rst diff --git a/doc/release/upcoming_changes/21623.new_feature.rst b/doc/release/upcoming_changes/21623.new_feature.rst new file mode 100644 index 000000000000..33e86ad26fbe --- /dev/null +++ b/doc/release/upcoming_changes/21623.new_feature.rst @@ -0,0 +1,4 @@ +New parameter ``equal_nans`` added to `np.unique` +----------------------------------------------------------------------------------- + +`np.unique` was previously changed to treat NaN values as equal. Now this functionality is decided by setting the ``equal_nans`` kwarg to True or False. True is the default behavior. diff --git a/numpy/lib/arraysetops.py b/numpy/lib/arraysetops.py index 490ff6a872dd..28aede464940 100644 --- a/numpy/lib/arraysetops.py +++ b/numpy/lib/arraysetops.py @@ -162,7 +162,7 @@ def unique(ar, return_index=False, return_inverse=False, return_counts : bool, optional If True, also return the number of times each unique item appears in `ar`. - equals_nan : bool, optional + equal_nans : bool, optional If True, collapses multiple NaN values in return array into 1 .. versionchanged: NumPy 1.24 @@ -272,7 +272,7 @@ def unique(ar, return_index=False, return_inverse=False, ar = np.asanyarray(ar) if axis is None: ret = _unique1d(ar, return_index, return_inverse, return_counts, - equal_nans) + equal_nans = equal_nans) return _unpack_tuple(ret) # axis was specified and not None @@ -315,13 +315,13 @@ def reshape_uniq(uniq): return uniq output = _unique1d(consolidated, return_index, - return_inverse, return_counts, equal_nans) + return_inverse, return_counts, equal_nans = equal_nans) output = (reshape_uniq(output[0]),) + output[1:] return _unpack_tuple(output) def _unique1d(ar, return_index=False, return_inverse=False, - return_counts=False, equal_nans=True): + return_counts=False, *, equal_nans=True): """ Find the unique elements of an array, ignoring shape. """ From 350b3fe83ff48bb9b87e06e8b1c0be6072141f39 Mon Sep 17 00:00:00 2001 From: Jason Thai Date: Sun, 29 May 2022 00:49:19 -0700 Subject: [PATCH 4/5] DOC: Update 21623 release documentation Co-authored-by: Matti Picus --- doc/release/upcoming_changes/21623.new_feature.rst | 4 +++- numpy/lib/arraysetops.py | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/doc/release/upcoming_changes/21623.new_feature.rst b/doc/release/upcoming_changes/21623.new_feature.rst index 33e86ad26fbe..fafb2e8d6907 100644 --- a/doc/release/upcoming_changes/21623.new_feature.rst +++ b/doc/release/upcoming_changes/21623.new_feature.rst @@ -1,4 +1,6 @@ New parameter ``equal_nans`` added to `np.unique` ----------------------------------------------------------------------------------- -`np.unique` was previously changed to treat NaN values as equal. Now this functionality is decided by setting the ``equal_nans`` kwarg to True or False. True is the default behavior. +`np.unique` was changed in 1.21 to treat all ``NaN`` values as equal and return +a single ``NaN``. Setting ``equal_nans=False`` will restore pre-1.21 behavior +to treat ``NaNs`` as unique. Defaults to ``True``. diff --git a/numpy/lib/arraysetops.py b/numpy/lib/arraysetops.py index 28aede464940..53930850e32c 100644 --- a/numpy/lib/arraysetops.py +++ b/numpy/lib/arraysetops.py @@ -165,7 +165,7 @@ def unique(ar, return_index=False, return_inverse=False, equal_nans : bool, optional If True, collapses multiple NaN values in return array into 1 - .. versionchanged: NumPy 1.24 + .. versionchanged: 1.24 axis : int or None, optional The axis to operate on. If None, `ar` will be flattened. If an integer, From 7a880a65a2b519995e2a1a4c911380170d38ae1b Mon Sep 17 00:00:00 2001 From: Jason Thai Date: Wed, 1 Jun 2022 00:48:52 -0700 Subject: [PATCH 5/5] MAINT: Update multiline indentations Co-authored-by: Matti Picus --- numpy/lib/arraysetops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/numpy/lib/arraysetops.py b/numpy/lib/arraysetops.py index 53930850e32c..6d36fdcbddc9 100644 --- a/numpy/lib/arraysetops.py +++ b/numpy/lib/arraysetops.py @@ -338,7 +338,7 @@ def _unique1d(ar, return_index=False, return_inverse=False, mask = np.empty(aux.shape, dtype=np.bool_) mask[:1] = True if (equal_nans and aux.shape[0] > 0 and aux.dtype.kind in "cfmM" and - np.isnan(aux[-1])): + np.isnan(aux[-1])): if aux.dtype.kind == "c": # for complex all NaNs are considered equivalent aux_firstnan = np.searchsorted(np.isnan(aux), True, side='left') else: