From 45357de4cf83340bd05d279673f68a7fd42b2308 Mon Sep 17 00:00:00 2001 From: "M. Eric Irrgang" Date: Sat, 16 Jul 2022 12:47:07 -0500 Subject: [PATCH 01/11] BUG: Distinguish exact vs. equivalent dtype for C type aliases. For `asarray` and for the `dtype` equality operator, equivalent dtype aliases were considered exact matches. This change ensures that the returned array has a descriptor that exactly matches the requested dtype. Note: Intended behavior of `np.dtype('i') == np.dtype('l')` is to test compatibility, not identity. This change does not affect the behavior of `PyArray_EquivTypes()`, and the `__eq__` operator for `dtype` continues to map to `PyArray_EquivTypes()`. Fixes #1468. --- numpy/core/src/multiarray/multiarraymodule.c | 22 ++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/numpy/core/src/multiarray/multiarraymodule.c b/numpy/core/src/multiarray/multiarraymodule.c index 16069d619f66..f364b1f57362 100644 --- a/numpy/core/src/multiarray/multiarraymodule.c +++ b/numpy/core/src/multiarray/multiarraymodule.c @@ -1627,12 +1627,30 @@ _array_fromobject_generic( goto finish; } } - /* One more chance */ + /* One more chance for faster exit if user specified the dtype. */ oldtype = PyArray_DESCR(oparr); if (PyArray_EquivTypes(oldtype, type)) { if (copy != NPY_COPY_ALWAYS && STRIDING_OK(oparr, order)) { Py_INCREF(op); - ret = oparr; + if (oldtype == type) { + ret = oparr; + } + else { + /* Create a new PyArrayObject from the caller's + * PyArray_Descr. Use the reference `op` as the base + * object. */ + Py_INCREF(type); + ret = PyArray_NewFromDescr( + Py_TYPE(op), + type, + PyArray_NDIM(oparr), + PyArray_DIMS(oparr), + PyArray_STRIDES(oparr), + PyArray_DATA(oparr), + PyArray_FLAGS(oparr), + oparr + ); + } goto finish; } else { From a3eedb0ca339d2d98d5b54698fcf24b2587ac59b Mon Sep 17 00:00:00 2001 From: "M. Eric Irrgang" Date: Sat, 16 Jul 2022 13:48:04 -0500 Subject: [PATCH 02/11] Use the correct pointer type. --- numpy/core/src/multiarray/multiarraymodule.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/numpy/core/src/multiarray/multiarraymodule.c b/numpy/core/src/multiarray/multiarraymodule.c index f364b1f57362..027ce374a2a6 100644 --- a/numpy/core/src/multiarray/multiarraymodule.c +++ b/numpy/core/src/multiarray/multiarraymodule.c @@ -1648,7 +1648,7 @@ _array_fromobject_generic( PyArray_STRIDES(oparr), PyArray_DATA(oparr), PyArray_FLAGS(oparr), - oparr + op ); } goto finish; From 78cd6b0780ee11b0952988baf688061e931815b0 Mon Sep 17 00:00:00 2001 From: "M. Eric Irrgang" Date: Sat, 16 Jul 2022 13:54:49 -0500 Subject: [PATCH 03/11] Coerce the returned pointer type. --- numpy/core/src/multiarray/multiarraymodule.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/numpy/core/src/multiarray/multiarraymodule.c b/numpy/core/src/multiarray/multiarraymodule.c index 027ce374a2a6..fc12d0589eac 100644 --- a/numpy/core/src/multiarray/multiarraymodule.c +++ b/numpy/core/src/multiarray/multiarraymodule.c @@ -1640,7 +1640,7 @@ _array_fromobject_generic( * PyArray_Descr. Use the reference `op` as the base * object. */ Py_INCREF(type); - ret = PyArray_NewFromDescr( + ret = (PyArrayObject *)PyArray_NewFromDescr( Py_TYPE(op), type, PyArray_NDIM(oparr), From 45281b8a0f88b39b9444032578559a25e62763ec Mon Sep 17 00:00:00 2001 From: "M. Eric Irrgang" Date: Sat, 16 Jul 2022 15:27:18 -0500 Subject: [PATCH 04/11] Get the base object. Use the correct API call to actually set the base object. --- numpy/core/src/multiarray/multiarraymodule.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/numpy/core/src/multiarray/multiarraymodule.c b/numpy/core/src/multiarray/multiarraymodule.c index fc12d0589eac..8f651ad80ee9 100644 --- a/numpy/core/src/multiarray/multiarraymodule.c +++ b/numpy/core/src/multiarray/multiarraymodule.c @@ -1640,7 +1640,7 @@ _array_fromobject_generic( * PyArray_Descr. Use the reference `op` as the base * object. */ Py_INCREF(type); - ret = (PyArrayObject *)PyArray_NewFromDescr( + ret = (PyArrayObject *)PyArray_NewFromDescrAndBase( Py_TYPE(op), type, PyArray_NDIM(oparr), @@ -1648,6 +1648,7 @@ _array_fromobject_generic( PyArray_STRIDES(oparr), PyArray_DATA(oparr), PyArray_FLAGS(oparr), + op, op ); } From 81b97607339ac68b27cf72ba7923345d58e2895e Mon Sep 17 00:00:00 2001 From: "M. Eric Irrgang" Date: Sat, 16 Jul 2022 15:27:38 -0500 Subject: [PATCH 05/11] Add unit testing. --- numpy/array_api/tests/test_asarray.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 numpy/array_api/tests/test_asarray.py diff --git a/numpy/array_api/tests/test_asarray.py b/numpy/array_api/tests/test_asarray.py new file mode 100644 index 000000000000..d9bbc675b9aa --- /dev/null +++ b/numpy/array_api/tests/test_asarray.py @@ -0,0 +1,24 @@ +import numpy as np + + +def test_fast_return(): + """""" + a = np.array([1, 2, 3], dtype='i') + assert np.asarray(a) is a + assert np.asarray(a, dtype='i') is a + # This may produce a new view or a copy, but is never the same object. + assert np.asarray(a, dtype='l') is not a + + unequal_type = np.dtype('i', metadata={'spam': True}) + b = np.asarray(a, dtype=unequal_type) + assert b is not a + assert b.base is a + + equivalent_requirement = np.dtype('i', metadata={'spam': True}) + c = np.asarray(b, dtype=equivalent_requirement) + # A quirk of the metadata test is that equivalent metadata dicts are still + # separate objects and so don't evaluate as the same array type description. + assert unequal_type == equivalent_requirement + assert unequal_type is not equivalent_requirement + assert c is not b + assert c.dtype is equivalent_requirement From 871a1f9e7ac791acde6664498cf374ed88b0c850 Mon Sep 17 00:00:00 2001 From: "M. Eric Irrgang" Date: Sat, 16 Jul 2022 15:42:03 -0500 Subject: [PATCH 06/11] Don't regenerate the descriptor unnecessarily. Fix a false mismatch. Separate dtype objects, even if equivalent, cause distinct array views to be created. --- numpy/ma/tests/test_core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/numpy/ma/tests/test_core.py b/numpy/ma/tests/test_core.py index 5b779edcbbca..69ace1948453 100644 --- a/numpy/ma/tests/test_core.py +++ b/numpy/ma/tests/test_core.py @@ -5387,7 +5387,7 @@ def test_ufunc_with_out_varied(): def test_astype_mask_ordering(): - descr = [('v', int, 3), ('x', [('y', float)])] + descr = np.dtype([('v', int, 3), ('x', [('y', float)])]) x = array([ [([1, 2, 3], (1.0,)), ([1, 2, 3], (2.0,))], [([1, 2, 3], (3.0,)), ([1, 2, 3], (4.0,))]], dtype=descr) From 5651445944bce163a2c3f746d6ac1acd9ae76032 Mon Sep 17 00:00:00 2001 From: "M. Eric Irrgang" Date: Sat, 16 Jul 2022 15:51:51 -0500 Subject: [PATCH 07/11] Update comment and obey formatting requirements. --- numpy/array_api/tests/test_asarray.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/numpy/array_api/tests/test_asarray.py b/numpy/array_api/tests/test_asarray.py index d9bbc675b9aa..bd1859d71a45 100644 --- a/numpy/array_api/tests/test_asarray.py +++ b/numpy/array_api/tests/test_asarray.py @@ -16,8 +16,8 @@ def test_fast_return(): equivalent_requirement = np.dtype('i', metadata={'spam': True}) c = np.asarray(b, dtype=equivalent_requirement) - # A quirk of the metadata test is that equivalent metadata dicts are still - # separate objects and so don't evaluate as the same array type description. + # The descriptors are equivalent, but we have created + # distinct dtype instances. assert unequal_type == equivalent_requirement assert unequal_type is not equivalent_requirement assert c is not b From e286f461b54c43e2a16b3f6fc6d829936ea28c27 Mon Sep 17 00:00:00 2001 From: "M. Eric Irrgang" Date: Sun, 17 Jul 2022 10:49:56 -0500 Subject: [PATCH 08/11] Expand test_asarray.py. * Improve comments/docs. * Improve descriptiveness of variable names. * Add additional test expressions that would not pass without this patch. --- numpy/array_api/tests/test_asarray.py | 46 +++++++++++++++++++-------- 1 file changed, 33 insertions(+), 13 deletions(-) diff --git a/numpy/array_api/tests/test_asarray.py b/numpy/array_api/tests/test_asarray.py index bd1859d71a45..fdcc77506b62 100644 --- a/numpy/array_api/tests/test_asarray.py +++ b/numpy/array_api/tests/test_asarray.py @@ -1,24 +1,44 @@ import numpy as np -def test_fast_return(): - """""" - a = np.array([1, 2, 3], dtype='i') - assert np.asarray(a) is a - assert np.asarray(a, dtype='i') is a - # This may produce a new view or a copy, but is never the same object. - assert np.asarray(a, dtype='l') is not a +def test_dtype_identity(): + """Confirm the intended behavior for ``asarray`` results. + The result of ``asarray()`` should have the dtype provided through the + keyword argument, when used. This forces unique array handles to be + produced for unique np.dtype objects, but (for equivalent dtypes), the + underlying data (the base object) is shared with the original array object. + + Ref https://github.com/numpy/numpy/issues/1468 + """ + int_array = np.array([1, 2, 3], dtype='i') + assert np.asarray(int_array) is int_array + + # The character code resolves to the singleton dtype object provided + # by the numpy package. + assert np.asarray(int_array, dtype='i') is int_array + + # Derive a dtype from n.dtype('i'), but add a metadata object to force + # the dtype to be distinct. unequal_type = np.dtype('i', metadata={'spam': True}) - b = np.asarray(a, dtype=unequal_type) - assert b is not a - assert b.base is a + annotated_int_array = np.asarray(int_array, dtype=unequal_type) + assert annotated_int_array is not int_array + assert annotated_int_array.base is int_array + + # These ``asarray()`` calls may produce a new view or a copy, + # but never the same object. + long_int_array = np.asarray(int_array, dtype='l') + assert long_int_array is not int_array + assert np.asarray(int_array, dtype='q') is not int_array + assert np.asarray(long_int_array, dtype='q') is not long_int_array + assert np.asarray(int_array, dtype='l') is not np.asarray(int_array, dtype='l') + assert np.asarray(int_array, dtype='l').base is np.asarray(int_array, dtype='l').base equivalent_requirement = np.dtype('i', metadata={'spam': True}) - c = np.asarray(b, dtype=equivalent_requirement) + annotated_int_array_alt = np.asarray(annotated_int_array, dtype=equivalent_requirement) # The descriptors are equivalent, but we have created # distinct dtype instances. assert unequal_type == equivalent_requirement assert unequal_type is not equivalent_requirement - assert c is not b - assert c.dtype is equivalent_requirement + assert annotated_int_array_alt is not annotated_int_array + assert annotated_int_array_alt.dtype is equivalent_requirement From 01438a848b029b4fb3d3509c7fd313bc0588bd38 Mon Sep 17 00:00:00 2001 From: "M. Eric Irrgang" Date: Sun, 17 Jul 2022 10:55:59 -0500 Subject: [PATCH 09/11] Lint. Shorten some lines. --- numpy/array_api/tests/test_asarray.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/numpy/array_api/tests/test_asarray.py b/numpy/array_api/tests/test_asarray.py index fdcc77506b62..4a9dd77a0652 100644 --- a/numpy/array_api/tests/test_asarray.py +++ b/numpy/array_api/tests/test_asarray.py @@ -31,11 +31,12 @@ def test_dtype_identity(): assert long_int_array is not int_array assert np.asarray(int_array, dtype='q') is not int_array assert np.asarray(long_int_array, dtype='q') is not long_int_array - assert np.asarray(int_array, dtype='l') is not np.asarray(int_array, dtype='l') - assert np.asarray(int_array, dtype='l').base is np.asarray(int_array, dtype='l').base + assert long_int_array is not np.asarray(int_array, dtype='l') + assert long_int_array.base is np.asarray(int_array, dtype='l').base equivalent_requirement = np.dtype('i', metadata={'spam': True}) - annotated_int_array_alt = np.asarray(annotated_int_array, dtype=equivalent_requirement) + annotated_int_array_alt = np.asarray(annotated_int_array, + dtype=equivalent_requirement) # The descriptors are equivalent, but we have created # distinct dtype instances. assert unequal_type == equivalent_requirement From b1a8ff8fa73b744416e12cdd4bb70594717b5336 Mon Sep 17 00:00:00 2001 From: "M. Eric Irrgang" Date: Sun, 17 Jul 2022 13:12:31 -0500 Subject: [PATCH 10/11] Add release note and further clarify tests. --- .../upcoming_changes/21995.compatibility.rst | 46 +++++++++++++++++++ numpy/array_api/tests/test_asarray.py | 43 ++++++++++++----- 2 files changed, 77 insertions(+), 12 deletions(-) create mode 100644 doc/release/upcoming_changes/21995.compatibility.rst diff --git a/doc/release/upcoming_changes/21995.compatibility.rst b/doc/release/upcoming_changes/21995.compatibility.rst new file mode 100644 index 000000000000..cc00b50b38d5 --- /dev/null +++ b/doc/release/upcoming_changes/21995.compatibility.rst @@ -0,0 +1,46 @@ +Returned arrays respect uniqueness of dtype kwarg objects +--------------------------------------------------------- +When ``dtype`` keyword argument is used with :py:func:`np.array()` +or :py:func:`asarray()`, the dtype of the returned array has +the same dtype *instance* as provided by the caller. + +If the provided dtype is compatible, but not identically the same +:py:class:`dtype` object, a new array handle is always created with +a reference to the user-provided dtype instance. +If the data type is compatible, and copying is not required, the new +`ndarray` uses the original array as its +`base `__. + +Before this change, for two equivalent but non-identical dtypes, + + assert isinstance(typeA, np.dtype) and isinstance(typeB, np.dtype) + assert typeA == typeB + assert typeA is not typeB + if my_array.dtype is typeA: + assert my_array is np.asarray(my_array, dtype=typeB) + assert np.asarray(my_array, dtype=typeB).dtype is not typeB + +This change allows programs to be able to reliably get the exact dtype +representation they request, regardless of possibly aliased types on the +calling platform. + +However, identity semantics for array results and their +dtype members may require minor updates to calling code. + +After this change, on a system where C ``int`` and C ``long`` are the same +precision, ``np.dtype('i') == np.dtype('l')``, +but ``np.dtype('i') is not np.dtype('l')``. + + assert int_array.dtype is np.dtype('i') + long_int_array = np.asarray(int_array, dtype='l') + assert long_int_array is not int_array + if np.dtype('i') == np.dtype('l'): + assert int_array is long_int_array.base + +New array views are created with each call to `asarray` with non-identical +dtype kwarg, but the underlying data is the same. + + assert int_array.dtype is np.dtype('i') + long_int_array = np.asarray(int_array, dtype='l') + assert long_int_array is not np.asarray(int_array, dtype='l') + assert long_int_array.base is np.asarray(int_array, dtype='l').base diff --git a/numpy/array_api/tests/test_asarray.py b/numpy/array_api/tests/test_asarray.py index 4a9dd77a0652..5c269823f94b 100644 --- a/numpy/array_api/tests/test_asarray.py +++ b/numpy/array_api/tests/test_asarray.py @@ -1,3 +1,5 @@ +import itertools + import numpy as np @@ -24,22 +26,39 @@ def test_dtype_identity(): annotated_int_array = np.asarray(int_array, dtype=unequal_type) assert annotated_int_array is not int_array assert annotated_int_array.base is int_array - - # These ``asarray()`` calls may produce a new view or a copy, - # but never the same object. - long_int_array = np.asarray(int_array, dtype='l') - assert long_int_array is not int_array - assert np.asarray(int_array, dtype='q') is not int_array - assert np.asarray(long_int_array, dtype='q') is not long_int_array - assert long_int_array is not np.asarray(int_array, dtype='l') - assert long_int_array.base is np.asarray(int_array, dtype='l').base - + # Create an equivalent descriptor with a new and distinct dtype instance. equivalent_requirement = np.dtype('i', metadata={'spam': True}) annotated_int_array_alt = np.asarray(annotated_int_array, dtype=equivalent_requirement) - # The descriptors are equivalent, but we have created - # distinct dtype instances. assert unequal_type == equivalent_requirement assert unequal_type is not equivalent_requirement assert annotated_int_array_alt is not annotated_int_array assert annotated_int_array_alt.dtype is equivalent_requirement + + # Check the same logic for a pair of C types whose equivalence may vary + # between computing environments. + # Find an equivalent pair. + integer_type_codes = ('i', 'l', 'q') + integer_dtypes = [np.dtype(code) for code in integer_type_codes] + typeA = None + typeB = None + for typeA, typeB in itertools.permutations(integer_dtypes, r=2): + if typeA == typeB: + assert typeA is not typeB + break + assert isinstance(typeA, np.dtype) and isinstance(typeB, np.dtype) + + # These ``asarray()`` calls may produce a new view or a copy, + # but never the same object. + long_int_array = np.asarray(int_array, dtype='l') + long_long_int_array = np.asarray(int_array, dtype='q') + assert long_int_array is not int_array + assert long_long_int_array is not int_array + assert np.asarray(long_int_array, dtype='q') is not long_int_array + array_a = np.asarray(int_array, dtype=typeA) + assert typeA == typeB + assert typeA is not typeB + assert array_a.dtype is typeA + assert array_a is not np.asarray(array_a, dtype=typeB) + assert np.asarray(array_a, dtype=typeB).dtype is typeB + assert array_a is np.asarray(array_a, dtype=typeB).base From 235b75e939b8233f29a073175d93070fcfc5e8d1 Mon Sep 17 00:00:00 2001 From: Sebastian Berg Date: Sat, 3 Sep 2022 18:19:03 +0200 Subject: [PATCH 11/11] DOC: Take a stab at shortening the release note --- .../upcoming_changes/21995.compatibility.rst | 63 ++++++------------- 1 file changed, 19 insertions(+), 44 deletions(-) diff --git a/doc/release/upcoming_changes/21995.compatibility.rst b/doc/release/upcoming_changes/21995.compatibility.rst index cc00b50b38d5..d3ff9c1bd70d 100644 --- a/doc/release/upcoming_changes/21995.compatibility.rst +++ b/doc/release/upcoming_changes/21995.compatibility.rst @@ -1,46 +1,21 @@ Returned arrays respect uniqueness of dtype kwarg objects --------------------------------------------------------- -When ``dtype`` keyword argument is used with :py:func:`np.array()` -or :py:func:`asarray()`, the dtype of the returned array has -the same dtype *instance* as provided by the caller. - -If the provided dtype is compatible, but not identically the same -:py:class:`dtype` object, a new array handle is always created with -a reference to the user-provided dtype instance. -If the data type is compatible, and copying is not required, the new -`ndarray` uses the original array as its -`base `__. - -Before this change, for two equivalent but non-identical dtypes, - - assert isinstance(typeA, np.dtype) and isinstance(typeB, np.dtype) - assert typeA == typeB - assert typeA is not typeB - if my_array.dtype is typeA: - assert my_array is np.asarray(my_array, dtype=typeB) - assert np.asarray(my_array, dtype=typeB).dtype is not typeB - -This change allows programs to be able to reliably get the exact dtype -representation they request, regardless of possibly aliased types on the -calling platform. - -However, identity semantics for array results and their -dtype members may require minor updates to calling code. - -After this change, on a system where C ``int`` and C ``long`` are the same -precision, ``np.dtype('i') == np.dtype('l')``, -but ``np.dtype('i') is not np.dtype('l')``. - - assert int_array.dtype is np.dtype('i') - long_int_array = np.asarray(int_array, dtype='l') - assert long_int_array is not int_array - if np.dtype('i') == np.dtype('l'): - assert int_array is long_int_array.base - -New array views are created with each call to `asarray` with non-identical -dtype kwarg, but the underlying data is the same. - - assert int_array.dtype is np.dtype('i') - long_int_array = np.asarray(int_array, dtype='l') - assert long_int_array is not np.asarray(int_array, dtype='l') - assert long_int_array.base is np.asarray(int_array, dtype='l').base +When the ``dtype`` keyword argument is used with :py:func:`np.array()` +or :py:func:`asarray()`, the dtype of the returned array now +always exactly matches the dtype provided by the caller. + +In some cases this change means that a *view* rather than the +input array is returned. +The following is an example for this on 64bit Linux where ``long`` +and ``longlong`` are the same precision:: + + >>> arr = np.array([1, 2, 3], dtype="long") + >>> new_dtype = np.dtype("longlong") + >>> new = np.asarray(arr, dtype=new_dtype) + >>> new.dtype is dtype + True + >>> new is arr + False + +Before the change, the ``dtype`` did not match because ``new is arr`` +was true.