From 519ce63b8ef9e3af0688936b3b822a8cbcc123f7 Mon Sep 17 00:00:00 2001 From: Sebastian Berg Date: Tue, 20 Sep 2022 12:29:38 +0200 Subject: [PATCH 1/2] ENH: Allow creating structured void scalars by passing dtype Adds an optional `dtype=` kwarg to `np.void`. If given (and not None), this kwarg effectively turns it into: res = np.array(data, dtype=dtype)[()] Thanks for Marten's review and Bas' help with the typing. Reviewed-by: Marten van Kerkwijk Co-authored-by: Bas van Beek <43369155+BvB93@users.noreply.github.com> --- .../upcoming_changes/22316.new_feature.rst | 4 ++ numpy/__init__.pyi | 6 +- numpy/core/_add_newdocs_scalars.py | 48 +++++++++++-- numpy/core/src/multiarray/scalartypes.c.src | 43 +++++++++--- numpy/core/tests/test_scalar_ctors.py | 70 +++++++++++++++++++ numpy/typing/tests/data/fail/scalars.pyi | 5 +- numpy/typing/tests/data/pass/scalars.py | 2 + 7 files changed, 158 insertions(+), 20 deletions(-) create mode 100644 doc/release/upcoming_changes/22316.new_feature.rst diff --git a/doc/release/upcoming_changes/22316.new_feature.rst b/doc/release/upcoming_changes/22316.new_feature.rst new file mode 100644 index 000000000000..f6655eaecfcd --- /dev/null +++ b/doc/release/upcoming_changes/22316.new_feature.rst @@ -0,0 +1,4 @@ +``np.void`` now has a ``dtype`` argument +---------------------------------------- +NumPy now allows constructing structured void scalars directly by +passing the ``dtype`` argument to ``np.void``. diff --git a/numpy/__init__.pyi b/numpy/__init__.pyi index 992ed908a6eb..22a6309e2c65 100644 --- a/numpy/__init__.pyi +++ b/numpy/__init__.pyi @@ -40,6 +40,7 @@ from numpy._typing import ( # DTypes DTypeLike, _DTypeLike, + _DTypeLikeVoid, _SupportsDType, _VoidDTypeLike, @@ -3058,7 +3059,10 @@ class flexible(generic): ... # type: ignore # depending on whether or not it's used as an opaque bytes sequence # or a structure class void(flexible): - def __init__(self, value: _IntLike_co | bytes, /) -> None: ... + @overload + def __init__(self, value: _IntLike_co | bytes, /, dtype : None = ...) -> None: ... + @overload + def __init__(self, value: Any, /, dtype: _DTypeLikeVoid) -> None: ... @property def real(self: _ArraySelf) -> _ArraySelf: ... @property diff --git a/numpy/core/_add_newdocs_scalars.py b/numpy/core/_add_newdocs_scalars.py index 491052faeae8..24398d8b57f5 100644 --- a/numpy/core/_add_newdocs_scalars.py +++ b/numpy/core/_add_newdocs_scalars.py @@ -225,16 +225,52 @@ def add_newdoc_for_scalar_type(obj, fixed_aliases, doc): add_newdoc_for_scalar_type('void', [], r""" - Either an opaque sequence of bytes, or a structure. + np.void(length_or_data, /, dtype=None) + + Create a new structured or unstructured void scalar. + + Parameters + ---------- + length_or_data : int, array-like, bytes-like, object + One of multiple meanings (see notes). The length or + bytes data of an unstructured void. Or alternatively, + the data to be stored in the new scalar when `dtype` + is provided. + This can be an array-like, in which case an array may + be returned. + dtype : dtype, optional + If provided the dtype of the new scalar. This dtype must + be "void" dtype (i.e. a structured or unstructured + void). + + ..versionadded:: 1.24 + + Notes + ----- + For historical reasons and because void scalars can represent both + arbitrary byte data and structured dtypes, the void constructor + has three calling conventions: + + 1. ``np.void(5)`` creates a ``dtype="V5"`` scalar filled with + ``\0`` bytes. The 5 can be a Python or NumPy integer. + 2. ``np.void(b"bytes-like")`` creates a void scalar from + the byte string. The dtype is chosen based on its length. + 3. When a ``dtype=`` is passed the call is rougly the same as an + array creation. However a void scalar is returned when possible. + + Please see the examples which show all three different conventions. + Examples + -------- + >>> np.void(5) + void(b'\x00\x00\x00\x00\x00') >>> np.void(b'abcd') void(b'\x61\x62\x63\x64') + >>> np.void((5, 3.2, "eggs"), dtype="i,d,S5") + (5, 3.2, b'eggs') # looks like a tuple, but is `np.void` + >>> np.void(3, dtype=[('x', np.int8), ('y', np.int8)]) + (3, 3) # looks like a tuple, but is `np.void` - Structured `void` scalars can only be constructed via extraction from :ref:`structured_arrays`: - - >>> arr = np.array((1, 2), dtype=[('x', np.int8), ('y', np.int8)]) - >>> arr[()] - (1, 2) # looks like a tuple, but is `np.void` """) add_newdoc_for_scalar_type('datetime64', [], diff --git a/numpy/core/src/multiarray/scalartypes.c.src b/numpy/core/src/multiarray/scalartypes.c.src index e1f23600182f..47e4de4178c4 100644 --- a/numpy/core/src/multiarray/scalartypes.c.src +++ b/numpy/core/src/multiarray/scalartypes.c.src @@ -3170,28 +3170,33 @@ static PyObject * void_arrtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds) { PyObject *obj, *arr; - PyObject *new = NULL; + PyArray_Descr *descr = NULL; - static char *kwnames[] = {"", NULL}; /* positional-only */ - if (!PyArg_ParseTupleAndKeywords(args, kwds, "O:void", kwnames, &obj)) { + static char *kwnames[] = {"", "dtype", NULL}; + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O&:void", kwnames, + &obj, &PyArray_DescrConverter2, &descr)) { return NULL; } /* * For a VOID scalar first see if obj is an integer or long * and create new memory of that size (filled with 0) for the scalar */ - if (PyLong_Check(obj) || + if (descr == NULL && ( + PyLong_Check(obj) || PyArray_IsScalar(obj, Integer) || (PyArray_Check(obj) && PyArray_NDIM((PyArrayObject *)obj)==0 && - PyArray_ISINTEGER((PyArrayObject *)obj))) { - new = Py_TYPE(obj)->tp_as_number->nb_int(obj); - } - if (new && PyLong_Check(new)) { + PyArray_ISINTEGER((PyArrayObject *)obj)))) { + + PyObject *length = Py_TYPE(obj)->tp_as_number->nb_int(obj); + if (length == NULL) { + return NULL; + } + PyObject *ret; char *destptr; - npy_ulonglong memu = PyLong_AsUnsignedLongLong(new); - Py_DECREF(new); + npy_ulonglong memu = PyLong_AsUnsignedLongLong(length); + Py_DECREF(length); if (PyErr_Occurred() || (memu > NPY_MAX_INT)) { PyErr_Clear(); PyErr_Format(PyExc_OverflowError, @@ -3226,7 +3231,23 @@ void_arrtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds) return ret; } - arr = PyArray_FROM_OTF(obj, NPY_VOID, NPY_ARRAY_FORCECAST); + if (descr == NULL) { + /* Use the "size-less" void dtype to discover the size. */ + descr = PyArray_DescrNewFromType(NPY_VOID); + } + else if (descr->type_num != NPY_VOID || PyDataType_HASSUBARRAY(descr)) { + /* we reject subarrays, since subarray scalars do not exist. */ + PyErr_Format(PyExc_TypeError, + "void: descr must be a `void` dtype that is not " + "a subarray dtype (structured or unstructured). " + "Got '%.100R'.", descr); + return NULL; + } + else { + Py_INCREF(descr); + } + + arr = PyArray_FromAny(obj, descr, 0, 0, NPY_ARRAY_FORCECAST, NULL); return PyArray_Return((PyArrayObject *)arr); } diff --git a/numpy/core/tests/test_scalar_ctors.py b/numpy/core/tests/test_scalar_ctors.py index 7e933537dbf3..dccfb08350e0 100644 --- a/numpy/core/tests/test_scalar_ctors.py +++ b/numpy/core/tests/test_scalar_ctors.py @@ -113,3 +113,73 @@ def test_reals(self, t1, t2): @pytest.mark.parametrize('t2', cfloat_types + [None]) def test_complex(self, t1, t2): return self._do_test(t1, t2) + + +@pytest.mark.parametrize("length", + [5, np.int8(5), np.array(5, dtype=np.uint16)]) +def test_void_via_length(length): + res = np.void(length) + assert type(res) is np.void + assert res.item() == b"\0" * 5 + assert res.dtype == "V5" + +@pytest.mark.parametrize("bytes_", + [b"spam", np.array(567.)]) +def test_void_from_byteslike(bytes_): + res = np.void(bytes_) + expected = bytes(bytes_) + assert type(res) is np.void + assert res.item() == expected + + # Passing dtype can extend it (this is how filling works) + res = np.void(bytes_, dtype="V100") + assert type(res) is np.void + assert res.item()[:len(expected)] == expected + assert res.item()[len(expected):] == b"\0" * (res.nbytes - len(expected)) + # As well as shorten: + res = np.void(bytes_, dtype="V4") + assert type(res) is np.void + assert res.item() == expected[:4] + +def test_void_arraylike_trumps_byteslike(): + # The memoryview is converted as an array-like of shape (18,) + # rather than a single bytes-like of that length. + m = memoryview(b"just one mintleaf?") + res = np.void(m) + assert type(res) is np.ndarray + assert res.dtype == "V1" + assert res.shape == (18,) + +def test_void_dtype_arg(): + # Basic test for the dtype argument (positional and keyword) + res = np.void((1, 2), dtype="i,i") + assert res.item() == (1, 2) + res = np.void((2, 3), "i,i") + assert res.item() == (2, 3) + +@pytest.mark.parametrize("data", + [5, np.int8(5), np.array(5, dtype=np.uint16)]) +def test_void_from_integer_with_dtype(data): + # The "length" meaning is ignored, rather data is used: + res = np.void(data, dtype="i,i") + assert type(res) is np.void + assert res.dtype == "i,i" + assert res["f0"] == 5 and res["f1"] == 5 + +def test_void_from_structure(): + dtype = np.dtype([('s', [('f', 'f8'), ('u', 'U1')]), ('i', 'i2')]) + data = np.array(((1., 'a'), 2), dtype=dtype) + res = np.void(data[()], dtype=dtype) + assert type(res) is np.void + assert res.dtype == dtype + assert res == data[()] + +def test_void_bad_dtype(): + with pytest.raises(TypeError, + match="void: descr must be a `void.*int64"): + np.void(4, dtype="i8") + + # Subarray dtype (with shape `(4,)` is rejected): + with pytest.raises(TypeError, + match=r"void: descr must be a `void.*\(4,\)"): + np.void(4, dtype="4i") diff --git a/numpy/typing/tests/data/fail/scalars.pyi b/numpy/typing/tests/data/fail/scalars.pyi index 96447053888e..74a86bbb38b6 100644 --- a/numpy/typing/tests/data/fail/scalars.pyi +++ b/numpy/typing/tests/data/fail/scalars.pyi @@ -47,7 +47,8 @@ np.uint16(A()) # E: incompatible type np.uint32(A()) # E: incompatible type np.uint64(A()) # E: incompatible type -np.void("test") # E: incompatible type +np.void("test") # E: No overload variant +np.void("test", dtype=None) # E: No overload variant np.generic(1) # E: Cannot instantiate abstract class np.number(1) # E: Cannot instantiate abstract class @@ -62,7 +63,7 @@ np.uint64(value=0) # E: Unexpected keyword argument np.complex128(value=0.0j) # E: Unexpected keyword argument np.str_(value='bob') # E: No overload variant np.bytes_(value=b'test') # E: No overload variant -np.void(value=b'test') # E: Unexpected keyword argument +np.void(value=b'test') # E: No overload variant np.bool_(value=True) # E: Unexpected keyword argument np.datetime64(value="2019") # E: No overload variant np.timedelta64(value=0) # E: Unexpected keyword argument diff --git a/numpy/typing/tests/data/pass/scalars.py b/numpy/typing/tests/data/pass/scalars.py index 684d41fad61f..aef9c3584246 100644 --- a/numpy/typing/tests/data/pass/scalars.py +++ b/numpy/typing/tests/data/pass/scalars.py @@ -113,6 +113,8 @@ def __float__(self) -> float: np.void(np.bool_(True)) np.void(b"test") np.void(np.bytes_("test")) +np.void(object(), [("a", "O"), ("b", "O")]) +np.void(object(), dtype=[("a", "O"), ("b", "O")]) # Protocols i8 = np.int64() From 7404cb68f20d8299a0935cb4242f37c41ab58aeb Mon Sep 17 00:00:00 2001 From: Sebastian Berg Date: Mon, 21 Nov 2022 17:01:53 +0100 Subject: [PATCH 2/2] DOC: Update np.void docs based on Matti's comments Co-authored-by: Matti Picus --- numpy/core/_add_newdocs_scalars.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/numpy/core/_add_newdocs_scalars.py b/numpy/core/_add_newdocs_scalars.py index 24398d8b57f5..15d37522ad68 100644 --- a/numpy/core/_add_newdocs_scalars.py +++ b/numpy/core/_add_newdocs_scalars.py @@ -240,8 +240,8 @@ def add_newdoc_for_scalar_type(obj, fixed_aliases, doc): be returned. dtype : dtype, optional If provided the dtype of the new scalar. This dtype must - be "void" dtype (i.e. a structured or unstructured - void). + be "void" dtype (i.e. a structured or unstructured void, + see also :ref:`defining-structured-types`). ..versionadded:: 1.24 @@ -251,12 +251,12 @@ def add_newdoc_for_scalar_type(obj, fixed_aliases, doc): arbitrary byte data and structured dtypes, the void constructor has three calling conventions: - 1. ``np.void(5)`` creates a ``dtype="V5"`` scalar filled with + 1. ``np.void(5)`` creates a ``dtype="V5"`` scalar filled with five ``\0`` bytes. The 5 can be a Python or NumPy integer. - 2. ``np.void(b"bytes-like")`` creates a void scalar from - the byte string. The dtype is chosen based on its length. + 2. ``np.void(b"bytes-like")`` creates a void scalar from the byte string. + The dtype itemsize will match the byte string length, here ``"V10"``. 3. When a ``dtype=`` is passed the call is rougly the same as an - array creation. However a void scalar is returned when possible. + array creation. However, a void scalar rather than array is returned. Please see the examples which show all three different conventions.