From fd6fc0e78afca9f89ebf07790ea5d447d5ede53c Mon Sep 17 00:00:00 2001 From: Sebastian Berg Date: Tue, 5 Jul 2022 09:35:34 -0700 Subject: [PATCH 1/2] BUG: Fix subarray to object cast ownership details --- numpy/core/src/multiarray/arraytypes.c.src | 11 ++++++++++- numpy/core/src/multiarray/dtype_transfer.c | 1 + numpy/core/tests/test_dtype.py | 23 ++++++++++++++++++++++ 3 files changed, 34 insertions(+), 1 deletion(-) diff --git a/numpy/core/src/multiarray/arraytypes.c.src b/numpy/core/src/multiarray/arraytypes.c.src index 7cd80ba9a5af..56ac83cbbcd9 100644 --- a/numpy/core/src/multiarray/arraytypes.c.src +++ b/numpy/core/src/multiarray/arraytypes.c.src @@ -804,7 +804,7 @@ VOID_getitem(void *input, void *vap) * could have special handling. */ PyObject *base = (PyObject *)ap; - while (Py_TYPE(base) == NULL) { + while (base != NULL && Py_TYPE(base) == NULL) { base = PyArray_BASE((PyArrayObject *)base); } ret = (PyArrayObject *)PyArray_NewFromDescrAndBase( @@ -812,6 +812,15 @@ VOID_getitem(void *input, void *vap) shape.len, shape.ptr, NULL, ip, PyArray_FLAGS(ap) & ~NPY_ARRAY_F_CONTIGUOUS, NULL, base); + if (base == NULL) { + /* + * Need to create a copy, or we may point to wrong data. This path + * is taken when no "valid" array is passed. This happens for + * casts. + */ + PyObject *copy = PyArray_FromArray(ret, NULL, NPY_ARRAY_ENSURECOPY); + Py_SETREF(ret, (PyArrayObject *)copy); + } npy_free_cache_dim_obj(shape); return (PyObject *)ret; } diff --git a/numpy/core/src/multiarray/dtype_transfer.c b/numpy/core/src/multiarray/dtype_transfer.c index f8458d2d7b42..c588494e7fca 100644 --- a/numpy/core/src/multiarray/dtype_transfer.c +++ b/numpy/core/src/multiarray/dtype_transfer.c @@ -258,6 +258,7 @@ any_to_object_get_loop( data->base.free = &_any_to_object_auxdata_free; data->base.clone = &_any_to_object_auxdata_clone; data->arr_fields.base = NULL; + Py_SET_TYPE(&data->arr_fields, NULL); data->arr_fields.descr = context->descriptors[0]; Py_INCREF(data->arr_fields.descr); data->arr_fields.flags = aligned ? NPY_ARRAY_ALIGNED : 0; diff --git a/numpy/core/tests/test_dtype.py b/numpy/core/tests/test_dtype.py index f95f95893e48..9b471a5bfa95 100644 --- a/numpy/core/tests/test_dtype.py +++ b/numpy/core/tests/test_dtype.py @@ -650,6 +650,29 @@ def test_aligned_empty(self): dt = np.dtype({"names": [], "formats": [], "itemsize": 0}, align=True) assert dt == np.dtype([]) + def test_subarray_base_item(self): + arr = np.ones(3, dtype=[("f", "i", 3)]) + # Extracting the field "absorbs" the subarray into a view: + assert arr["f"].base is arr + # Extract the structured item, and then check the tuple component: + item = arr.item(0) + assert type(item) is tuple and len(item) == 1 + assert item[0].base is arr + + def test_subarray_cast_copies(self): + # Older versions of NumPy did NOT copy, but they got the ownership + # wrong (not actually knowing the correct base!). Versions since 1.21 + # (I think) crashed fairly reliable. This defines the correct behavior + # as a copy. Keeping the ownership would be possible (but harder) + arr = np.ones(3, dtype=[("f", "i", 3)]) + cast = arr.astype(object) + for fields in cast: + assert type(fields) == tuple and len(fields) == 1 + subarr = fields[0] + assert subarr.base is None + assert subarr.flags.owndata + + def iter_struct_object_dtypes(): """ Iterates over a few complex dtypes and object pattern which From f1f9834d84a3508aa10cc01157011b5349ec3903 Mon Sep 17 00:00:00 2001 From: Sebastian Berg Date: Tue, 12 Jul 2022 13:27:53 -0700 Subject: [PATCH 2/2] DOC: Add release note mentioning change to subarray->object cast --- doc/release/upcoming_changes/21925.compatibility.rst | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100644 doc/release/upcoming_changes/21925.compatibility.rst diff --git a/doc/release/upcoming_changes/21925.compatibility.rst b/doc/release/upcoming_changes/21925.compatibility.rst new file mode 100644 index 000000000000..af9c47127ae6 --- /dev/null +++ b/doc/release/upcoming_changes/21925.compatibility.rst @@ -0,0 +1,12 @@ +Subarray to object cast now copies +---------------------------------- +Casting a dtype that includes a subarray to an object will now ensure +a copy of the subarray. Previously an unsafe view was returned:: + + arr = np.ones(3, dtype=[("f", "i", 3)]) + subarray_fields = arr.astype(object)[0] + subarray = subarray_fields[0] # "f" field + + np.may_share_memory(subarray, arr) + +Is now always false. While previously it was true for the specific cast.