diff --git a/doc/release/upcoming_changes/22316.new_feature.rst b/doc/release/upcoming_changes/22316.new_feature.rst new file mode 100644 index 000000000000..f6655eaecfcd --- /dev/null +++ b/doc/release/upcoming_changes/22316.new_feature.rst @@ -0,0 +1,4 @@ +``np.void`` now has a ``dtype`` argument +---------------------------------------- +NumPy now allows constructing structured void scalars directly by +passing the ``dtype`` argument to ``np.void``. diff --git a/numpy/__init__.pyi b/numpy/__init__.pyi index 0a3bbcdb36d7..8019976d0da9 100644 --- a/numpy/__init__.pyi +++ b/numpy/__init__.pyi @@ -42,6 +42,7 @@ from numpy._typing import ( # DTypes DTypeLike, _DTypeLike, + _DTypeLikeVoid, _SupportsDType, _VoidDTypeLike, @@ -3065,7 +3066,10 @@ class flexible(generic): ... # type: ignore # depending on whether or not it's used as an opaque bytes sequence # or a structure class void(flexible): - def __init__(self, value: _IntLike_co | bytes, /) -> None: ... + @overload + def __init__(self, value: _IntLike_co | bytes, /, dtype : None = ...) -> None: ... + @overload + def __init__(self, value: Any, /, dtype: _DTypeLikeVoid) -> None: ... @property def real(self: _ArraySelf) -> _ArraySelf: ... @property diff --git a/numpy/core/_add_newdocs_scalars.py b/numpy/core/_add_newdocs_scalars.py index 491052faeae8..15d37522ad68 100644 --- a/numpy/core/_add_newdocs_scalars.py +++ b/numpy/core/_add_newdocs_scalars.py @@ -225,16 +225,52 @@ def add_newdoc_for_scalar_type(obj, fixed_aliases, doc): add_newdoc_for_scalar_type('void', [], r""" - Either an opaque sequence of bytes, or a structure. + np.void(length_or_data, /, dtype=None) + + Create a new structured or unstructured void scalar. + + Parameters + ---------- + length_or_data : int, array-like, bytes-like, object + One of multiple meanings (see notes). The length or + bytes data of an unstructured void. Or alternatively, + the data to be stored in the new scalar when `dtype` + is provided. + This can be an array-like, in which case an array may + be returned. + dtype : dtype, optional + If provided the dtype of the new scalar. This dtype must + be "void" dtype (i.e. a structured or unstructured void, + see also :ref:`defining-structured-types`). + + ..versionadded:: 1.24 + + Notes + ----- + For historical reasons and because void scalars can represent both + arbitrary byte data and structured dtypes, the void constructor + has three calling conventions: + + 1. ``np.void(5)`` creates a ``dtype="V5"`` scalar filled with five + ``\0`` bytes. The 5 can be a Python or NumPy integer. + 2. ``np.void(b"bytes-like")`` creates a void scalar from the byte string. + The dtype itemsize will match the byte string length, here ``"V10"``. + 3. When a ``dtype=`` is passed the call is rougly the same as an + array creation. However, a void scalar rather than array is returned. + + Please see the examples which show all three different conventions. + Examples + -------- + >>> np.void(5) + void(b'\x00\x00\x00\x00\x00') >>> np.void(b'abcd') void(b'\x61\x62\x63\x64') + >>> np.void((5, 3.2, "eggs"), dtype="i,d,S5") + (5, 3.2, b'eggs') # looks like a tuple, but is `np.void` + >>> np.void(3, dtype=[('x', np.int8), ('y', np.int8)]) + (3, 3) # looks like a tuple, but is `np.void` - Structured `void` scalars can only be constructed via extraction from :ref:`structured_arrays`: - - >>> arr = np.array((1, 2), dtype=[('x', np.int8), ('y', np.int8)]) - >>> arr[()] - (1, 2) # looks like a tuple, but is `np.void` """) add_newdoc_for_scalar_type('datetime64', [], diff --git a/numpy/core/src/multiarray/scalartypes.c.src b/numpy/core/src/multiarray/scalartypes.c.src index 55559ef021fd..32dc60e06329 100644 --- a/numpy/core/src/multiarray/scalartypes.c.src +++ b/numpy/core/src/multiarray/scalartypes.c.src @@ -3175,28 +3175,33 @@ static PyObject * void_arrtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds) { PyObject *obj, *arr; - PyObject *new = NULL; + PyArray_Descr *descr = NULL; - static char *kwnames[] = {"", NULL}; /* positional-only */ - if (!PyArg_ParseTupleAndKeywords(args, kwds, "O:void", kwnames, &obj)) { + static char *kwnames[] = {"", "dtype", NULL}; + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O&:void", kwnames, + &obj, &PyArray_DescrConverter2, &descr)) { return NULL; } /* * For a VOID scalar first see if obj is an integer or long * and create new memory of that size (filled with 0) for the scalar */ - if (PyLong_Check(obj) || + if (descr == NULL && ( + PyLong_Check(obj) || PyArray_IsScalar(obj, Integer) || (PyArray_Check(obj) && PyArray_NDIM((PyArrayObject *)obj)==0 && - PyArray_ISINTEGER((PyArrayObject *)obj))) { - new = Py_TYPE(obj)->tp_as_number->nb_int(obj); - } - if (new && PyLong_Check(new)) { + PyArray_ISINTEGER((PyArrayObject *)obj)))) { + + PyObject *length = Py_TYPE(obj)->tp_as_number->nb_int(obj); + if (length == NULL) { + return NULL; + } + PyObject *ret; char *destptr; - npy_ulonglong memu = PyLong_AsUnsignedLongLong(new); - Py_DECREF(new); + npy_ulonglong memu = PyLong_AsUnsignedLongLong(length); + Py_DECREF(length); if (PyErr_Occurred() || (memu > NPY_MAX_INT)) { PyErr_Clear(); PyErr_Format(PyExc_OverflowError, @@ -3231,7 +3236,23 @@ void_arrtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds) return ret; } - arr = PyArray_FROM_OTF(obj, NPY_VOID, NPY_ARRAY_FORCECAST); + if (descr == NULL) { + /* Use the "size-less" void dtype to discover the size. */ + descr = PyArray_DescrNewFromType(NPY_VOID); + } + else if (descr->type_num != NPY_VOID || PyDataType_HASSUBARRAY(descr)) { + /* we reject subarrays, since subarray scalars do not exist. */ + PyErr_Format(PyExc_TypeError, + "void: descr must be a `void` dtype that is not " + "a subarray dtype (structured or unstructured). " + "Got '%.100R'.", descr); + return NULL; + } + else { + Py_INCREF(descr); + } + + arr = PyArray_FromAny(obj, descr, 0, 0, NPY_ARRAY_FORCECAST, NULL); return PyArray_Return((PyArrayObject *)arr); } diff --git a/numpy/core/tests/test_scalar_ctors.py b/numpy/core/tests/test_scalar_ctors.py index 17aca3fb8254..da976d64fd7c 100644 --- a/numpy/core/tests/test_scalar_ctors.py +++ b/numpy/core/tests/test_scalar_ctors.py @@ -114,3 +114,73 @@ def test_reals(self, t1, t2): @pytest.mark.parametrize('t2', cfloat_types + [None]) def test_complex(self, t1, t2): return self._do_test(t1, t2) + + +@pytest.mark.parametrize("length", + [5, np.int8(5), np.array(5, dtype=np.uint16)]) +def test_void_via_length(length): + res = np.void(length) + assert type(res) is np.void + assert res.item() == b"\0" * 5 + assert res.dtype == "V5" + +@pytest.mark.parametrize("bytes_", + [b"spam", np.array(567.)]) +def test_void_from_byteslike(bytes_): + res = np.void(bytes_) + expected = bytes(bytes_) + assert type(res) is np.void + assert res.item() == expected + + # Passing dtype can extend it (this is how filling works) + res = np.void(bytes_, dtype="V100") + assert type(res) is np.void + assert res.item()[:len(expected)] == expected + assert res.item()[len(expected):] == b"\0" * (res.nbytes - len(expected)) + # As well as shorten: + res = np.void(bytes_, dtype="V4") + assert type(res) is np.void + assert res.item() == expected[:4] + +def test_void_arraylike_trumps_byteslike(): + # The memoryview is converted as an array-like of shape (18,) + # rather than a single bytes-like of that length. + m = memoryview(b"just one mintleaf?") + res = np.void(m) + assert type(res) is np.ndarray + assert res.dtype == "V1" + assert res.shape == (18,) + +def test_void_dtype_arg(): + # Basic test for the dtype argument (positional and keyword) + res = np.void((1, 2), dtype="i,i") + assert res.item() == (1, 2) + res = np.void((2, 3), "i,i") + assert res.item() == (2, 3) + +@pytest.mark.parametrize("data", + [5, np.int8(5), np.array(5, dtype=np.uint16)]) +def test_void_from_integer_with_dtype(data): + # The "length" meaning is ignored, rather data is used: + res = np.void(data, dtype="i,i") + assert type(res) is np.void + assert res.dtype == "i,i" + assert res["f0"] == 5 and res["f1"] == 5 + +def test_void_from_structure(): + dtype = np.dtype([('s', [('f', 'f8'), ('u', 'U1')]), ('i', 'i2')]) + data = np.array(((1., 'a'), 2), dtype=dtype) + res = np.void(data[()], dtype=dtype) + assert type(res) is np.void + assert res.dtype == dtype + assert res == data[()] + +def test_void_bad_dtype(): + with pytest.raises(TypeError, + match="void: descr must be a `void.*int64"): + np.void(4, dtype="i8") + + # Subarray dtype (with shape `(4,)` is rejected): + with pytest.raises(TypeError, + match=r"void: descr must be a `void.*\(4,\)"): + np.void(4, dtype="4i") diff --git a/numpy/typing/tests/data/fail/scalars.pyi b/numpy/typing/tests/data/fail/scalars.pyi index c24f9e479eeb..2a6c2c7addfc 100644 --- a/numpy/typing/tests/data/fail/scalars.pyi +++ b/numpy/typing/tests/data/fail/scalars.pyi @@ -47,7 +47,8 @@ np.uint16(A()) # E: incompatible type np.uint32(A()) # E: incompatible type np.uint64(A()) # E: incompatible type -np.void("test") # E: incompatible type +np.void("test") # E: No overload variant +np.void("test", dtype=None) # E: No overload variant np.generic(1) # E: Cannot instantiate abstract class np.number(1) # E: Cannot instantiate abstract class @@ -62,7 +63,7 @@ np.uint64(value=0) # E: Unexpected keyword argument np.complex128(value=0.0j) # E: Unexpected keyword argument np.str_(value='bob') # E: No overload variant np.bytes_(value=b'test') # E: No overload variant -np.void(value=b'test') # E: Unexpected keyword argument +np.void(value=b'test') # E: No overload variant np.bool_(value=True) # E: Unexpected keyword argument np.datetime64(value="2019") # E: No overload variant np.timedelta64(value=0) # E: Unexpected keyword argument diff --git a/numpy/typing/tests/data/pass/scalars.py b/numpy/typing/tests/data/pass/scalars.py index 124681bcb32a..a5c6f96e9fa2 100644 --- a/numpy/typing/tests/data/pass/scalars.py +++ b/numpy/typing/tests/data/pass/scalars.py @@ -113,6 +113,8 @@ def __float__(self) -> float: np.void(np.bool_(True)) np.void(b"test") np.void(np.bytes_("test")) +np.void(object(), [("a", "O"), ("b", "O")]) +np.void(object(), dtype=[("a", "O"), ("b", "O")]) # Protocols i8 = np.int64()