From 37c6a54393fed79bd8cb13a99c22fbbe47a2d658 Mon Sep 17 00:00:00 2001 From: Sebastian Berg Date: Tue, 20 Sep 2022 12:29:38 +0200 Subject: [PATCH] ENH: Allow creating structured void scalars by passing dtype Adds an optional `dtype=` kwarg to `np.void`. If given (and not None), this kwarg effectively turns it into: res = np.array(data, dtype=dtype)[()] The new dtype argument is keyword-only. --- numpy/__init__.pyi | 3 ++ numpy/core/_add_newdocs_scalars.py | 48 +++++++++++++++--- numpy/core/src/multiarray/scalartypes.c.src | 43 +++++++++++----- numpy/core/tests/test_scalar_ctors.py | 54 +++++++++++++++++++++ 4 files changed, 131 insertions(+), 17 deletions(-) diff --git a/numpy/__init__.pyi b/numpy/__init__.pyi index 992ed908a6eb..82516ef04dc0 100644 --- a/numpy/__init__.pyi +++ b/numpy/__init__.pyi @@ -3058,7 +3058,10 @@ class flexible(generic): ... # type: ignore # depending on whether or not it's used as an opaque bytes sequence # or a structure class void(flexible): + @overload def __init__(self, value: _IntLike_co | bytes, /) -> None: ... + @overload + def __init__(self, value: object, *, dtype=DTypeLike) -> None: ... @property def real(self: _ArraySelf) -> _ArraySelf: ... @property diff --git a/numpy/core/_add_newdocs_scalars.py b/numpy/core/_add_newdocs_scalars.py index 491052faeae8..dbc3e522571d 100644 --- a/numpy/core/_add_newdocs_scalars.py +++ b/numpy/core/_add_newdocs_scalars.py @@ -225,16 +225,52 @@ def add_newdoc_for_scalar_type(obj, fixed_aliases, doc): add_newdoc_for_scalar_type('void', [], r""" - Either an opaque sequence of bytes, or a structure. + np.void(length_or_data, *, dtype=None) + + Create a new structured or unstructured void scalar. + + Parameters + ---------- + length_or_data : int, array-like, bytes-like, object + One of multiple meanings (see notes). The length or + bytes data of an unstructured void. Or alternatively, + the data to be stored in the new scalar when `dtype` + is provided. + This can be an array-like, in which case an array may + be returned. + dtype : dtype, optional + If provided the dtype of the new scalar. This dtype must + be "void" dtype (i.e. a structured or unstructured + void). + + ..versionadded:: 1.24 + + Notes + ----- + For historical reasons and because void scalars can represent both + arbitrary byte data and structured dtypes, the void constructor + has three calling conventions: + + 1. ``np.void(5)`` creates a ``dtype="V5"`` scalar filled with + ``\0`` bytes. The 5 can be a Python or NumPy integer. + 2. ``np.void(b"bytes-like")`` creates a void scalar from + the byte string. The dtype is chosen based on its length. + 3. When a ``dtype=`` is passed the call is rougly the same as an + array creation. However a void scalar is returned when possible. + + Please see the examples which show all three different conventions. + Examples + -------- + >>> np.void(5) + void(b'\x00\x00\x00\x00\x00') >>> np.void(b'abcd') void(b'\x61\x62\x63\x64') + >>> np.void((5, 3.2, "eggs"), dtype="i,d,S5") + (5, 3.2, b'eggs') # looks like a tuple, but is `np.void` + >>> np.void(3, dtype=[('x', np.int8), ('y', np.int8)]) + (3, 3) # looks like a tuple, but is `np.void` - Structured `void` scalars can only be constructed via extraction from :ref:`structured_arrays`: - - >>> arr = np.array((1, 2), dtype=[('x', np.int8), ('y', np.int8)]) - >>> arr[()] - (1, 2) # looks like a tuple, but is `np.void` """) add_newdoc_for_scalar_type('datetime64', [], diff --git a/numpy/core/src/multiarray/scalartypes.c.src b/numpy/core/src/multiarray/scalartypes.c.src index e1f23600182f..851836cc7aaf 100644 --- a/numpy/core/src/multiarray/scalartypes.c.src +++ b/numpy/core/src/multiarray/scalartypes.c.src @@ -3170,28 +3170,33 @@ static PyObject * void_arrtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds) { PyObject *obj, *arr; - PyObject *new = NULL; + PyArray_Descr *descr = NULL; - static char *kwnames[] = {"", NULL}; /* positional-only */ - if (!PyArg_ParseTupleAndKeywords(args, kwds, "O:void", kwnames, &obj)) { + static char *kwnames[] = {"", "dtype", NULL}; /* positional-only */ + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|$O&:void", kwnames, + &obj, &PyArray_DescrConverter2, &descr)) { return NULL; } /* * For a VOID scalar first see if obj is an integer or long * and create new memory of that size (filled with 0) for the scalar */ - if (PyLong_Check(obj) || + if (descr == NULL && ( + PyLong_Check(obj) || PyArray_IsScalar(obj, Integer) || (PyArray_Check(obj) && PyArray_NDIM((PyArrayObject *)obj)==0 && - PyArray_ISINTEGER((PyArrayObject *)obj))) { - new = Py_TYPE(obj)->tp_as_number->nb_int(obj); - } - if (new && PyLong_Check(new)) { + PyArray_ISINTEGER((PyArrayObject *)obj)))) { + + PyObject *length = Py_TYPE(obj)->tp_as_number->nb_int(obj); + if (length == NULL) { + return NULL; + } + PyObject *ret; char *destptr; - npy_ulonglong memu = PyLong_AsUnsignedLongLong(new); - Py_DECREF(new); + npy_ulonglong memu = PyLong_AsUnsignedLongLong(length); + Py_DECREF(length); if (PyErr_Occurred() || (memu > NPY_MAX_INT)) { PyErr_Clear(); PyErr_Format(PyExc_OverflowError, @@ -3226,7 +3231,23 @@ void_arrtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds) return ret; } - arr = PyArray_FROM_OTF(obj, NPY_VOID, NPY_ARRAY_FORCECAST); + if (descr == NULL) { + /* Use the "size-less" void dtype to discover the size. */ + descr = PyArray_DescrNewFromType(NPY_VOID); + } + else if (descr->type_num != NPY_VOID || PyDataType_HASSUBARRAY(descr)) { + /* we reject subarrays, since subarray scalars do not exist. */ + PyErr_Format(PyExc_TypeError, + "void: descr must be a `void` dtype that is not " + "a subarray dtype (structured or unstructured). " + "Got '%.100R'.", descr); + return NULL; + } + else { + Py_INCREF(descr); + } + + arr = PyArray_FromAny(obj, descr, 0, 0, NPY_ARRAY_FORCECAST, NULL); return PyArray_Return((PyArrayObject *)arr); } diff --git a/numpy/core/tests/test_scalar_ctors.py b/numpy/core/tests/test_scalar_ctors.py index 7e933537dbf3..588841612b65 100644 --- a/numpy/core/tests/test_scalar_ctors.py +++ b/numpy/core/tests/test_scalar_ctors.py @@ -113,3 +113,57 @@ def test_reals(self, t1, t2): @pytest.mark.parametrize('t2', cfloat_types + [None]) def test_complex(self, t1, t2): return self._do_test(t1, t2) + + +@pytest.mark.parametrize("length", + [5, np.int8(5), np.array(5, dtype=np.uint16)]) +def test_void_via_length(length): + res = np.void(length) + assert type(res) is np.void + assert res.item() == b"\0" * 5 + assert res.dtype == "V5" + +@pytest.mark.parametrize("bytes_", + [b"spam", np.array(567.)]) +def test_void_from_byteslike(bytes_): + res = np.void(bytes_) + expected = bytes(bytes_) + assert type(res) is np.void + assert res.item() == expected + + # Passing dtype can extend it (this is how filling works) + res = np.void(bytes_, dtype="V100") + assert type(res) is np.void + assert res.item()[:len(expected)] == expected + assert res.item()[len(expected):] == b"\0" * (res.nbytes - len(expected)) + # As well as shorten: + res = np.void(bytes_, dtype="V4") + assert type(res) is np.void + assert res.item() == expected[:4] + +def test_void_arraylike_trumps_byteslike(): + # The memoryview is converted as an array-like of shape (18,) + # rather than a single bytes-like of that length. + m = memoryview(b"just one mintleaf?") + res = np.void(m) + assert type(res) is np.ndarray + assert res.dtype == "V1" + assert res.shape == (18,) + +@pytest.mark.parametrize("data", + [5, np.int8(5), np.array(5, dtype=np.uint16)]) +def test_void_from_integer_with_dtype(data): + # The "length" meaning is ignored, rather data is used: + res = np.void(data, dtype="i,i") + assert type(res) is np.void + assert res.dtype == "i,i" + assert res["f0"] == 5 and res["f1"] == 5 + +def test_void_bad_dtype(): + with pytest.raises(TypeError, + match="void: descr must be a `void.*int64"): + np.void(4, dtype="i8") + + dtype = np.dtype("4i") + with pytest.raises(TypeError): + np.void(4, dtype="i(4)")