Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ENH: Allow creating structured void scalars by passing dtype #22316

Merged
merged 2 commits into from Nov 21, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
4 changes: 4 additions & 0 deletions doc/release/upcoming_changes/22316.new_feature.rst
@@ -0,0 +1,4 @@
``np.void`` now has a ``dtype`` argument
----------------------------------------
NumPy now allows constructing structured void scalars directly by
passing the ``dtype`` argument to ``np.void``.
6 changes: 5 additions & 1 deletion numpy/__init__.pyi
Expand Up @@ -40,6 +40,7 @@ from numpy._typing import (
# DTypes
DTypeLike,
_DTypeLike,
_DTypeLikeVoid,
_SupportsDType,
_VoidDTypeLike,

Expand Down Expand Up @@ -3058,7 +3059,10 @@ class flexible(generic): ... # type: ignore
# depending on whether or not it's used as an opaque bytes sequence
# or a structure
class void(flexible):
def __init__(self, value: _IntLike_co | bytes, /) -> None: ...
@overload
def __init__(self, value: _IntLike_co | bytes, /, dtype : None = ...) -> None: ...
@overload
def __init__(self, value: Any, /, dtype: _DTypeLikeVoid) -> None: ...
@property
def real(self: _ArraySelf) -> _ArraySelf: ...
@property
Expand Down
48 changes: 42 additions & 6 deletions numpy/core/_add_newdocs_scalars.py
Expand Up @@ -225,16 +225,52 @@ def add_newdoc_for_scalar_type(obj, fixed_aliases, doc):

add_newdoc_for_scalar_type('void', [],
r"""
Either an opaque sequence of bytes, or a structure.
np.void(length_or_data, /, dtype=None)

Create a new structured or unstructured void scalar.

Parameters
----------
length_or_data : int, array-like, bytes-like, object
One of multiple meanings (see notes). The length or
bytes data of an unstructured void. Or alternatively,
the data to be stored in the new scalar when `dtype`
is provided.
This can be an array-like, in which case an array may
be returned.
dtype : dtype, optional
If provided the dtype of the new scalar. This dtype must
be "void" dtype (i.e. a structured or unstructured void,
see also :ref:`defining-structured-types`).

..versionadded:: 1.24

Notes
-----
For historical reasons and because void scalars can represent both
arbitrary byte data and structured dtypes, the void constructor
has three calling conventions:

1. ``np.void(5)`` creates a ``dtype="V5"`` scalar filled with five
``\0`` bytes. The 5 can be a Python or NumPy integer.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How many \0 bytes?

2. ``np.void(b"bytes-like")`` creates a void scalar from the byte string.
The dtype itemsize will match the byte string length, here ``"V10"``.
3. When a ``dtype=`` is passed the call is rougly the same as an
array creation. However, a void scalar rather than array is returned.

Please see the examples which show all three different conventions.

Examples
--------
>>> np.void(5)
void(b'\x00\x00\x00\x00\x00')
>>> np.void(b'abcd')
void(b'\x61\x62\x63\x64')
>>> np.void((5, 3.2, "eggs"), dtype="i,d,S5")
(5, 3.2, b'eggs') # looks like a tuple, but is `np.void`
>>> np.void(3, dtype=[('x', np.int8), ('y', np.int8)])
(3, 3) # looks like a tuple, but is `np.void`
seberg marked this conversation as resolved.
Show resolved Hide resolved

Structured `void` scalars can only be constructed via extraction from :ref:`structured_arrays`:

>>> arr = np.array((1, 2), dtype=[('x', np.int8), ('y', np.int8)])
>>> arr[()]
(1, 2) # looks like a tuple, but is `np.void`
""")

add_newdoc_for_scalar_type('datetime64', [],
Expand Down
43 changes: 32 additions & 11 deletions numpy/core/src/multiarray/scalartypes.c.src
Expand Up @@ -3170,28 +3170,33 @@ static PyObject *
void_arrtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
{
PyObject *obj, *arr;
PyObject *new = NULL;
PyArray_Descr *descr = NULL;

static char *kwnames[] = {"", NULL}; /* positional-only */
if (!PyArg_ParseTupleAndKeywords(args, kwds, "O:void", kwnames, &obj)) {
static char *kwnames[] = {"", "dtype", NULL};
if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O&:void", kwnames,
&obj, &PyArray_DescrConverter2, &descr)) {
return NULL;
}
/*
* For a VOID scalar first see if obj is an integer or long
* and create new memory of that size (filled with 0) for the scalar
*/
if (PyLong_Check(obj) ||
if (descr == NULL && (
PyLong_Check(obj) ||
PyArray_IsScalar(obj, Integer) ||
(PyArray_Check(obj) &&
PyArray_NDIM((PyArrayObject *)obj)==0 &&
PyArray_ISINTEGER((PyArrayObject *)obj))) {
new = Py_TYPE(obj)->tp_as_number->nb_int(obj);
}
if (new && PyLong_Check(new)) {
PyArray_ISINTEGER((PyArrayObject *)obj)))) {

PyObject *length = Py_TYPE(obj)->tp_as_number->nb_int(obj);
if (length == NULL) {
return NULL;
}

PyObject *ret;
char *destptr;
npy_ulonglong memu = PyLong_AsUnsignedLongLong(new);
Py_DECREF(new);
npy_ulonglong memu = PyLong_AsUnsignedLongLong(length);
Py_DECREF(length);
if (PyErr_Occurred() || (memu > NPY_MAX_INT)) {
PyErr_Clear();
PyErr_Format(PyExc_OverflowError,
Expand Down Expand Up @@ -3226,7 +3231,23 @@ void_arrtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
return ret;
}

arr = PyArray_FROM_OTF(obj, NPY_VOID, NPY_ARRAY_FORCECAST);
if (descr == NULL) {
/* Use the "size-less" void dtype to discover the size. */
descr = PyArray_DescrNewFromType(NPY_VOID);
}
else if (descr->type_num != NPY_VOID || PyDataType_HASSUBARRAY(descr)) {
/* we reject subarrays, since subarray scalars do not exist. */
PyErr_Format(PyExc_TypeError,
"void: descr must be a `void` dtype that is not "
"a subarray dtype (structured or unstructured). "
"Got '%.100R'.", descr);
return NULL;
}
else {
Py_INCREF(descr);
}

arr = PyArray_FromAny(obj, descr, 0, 0, NPY_ARRAY_FORCECAST, NULL);
return PyArray_Return((PyArrayObject *)arr);
}

Expand Down
70 changes: 70 additions & 0 deletions numpy/core/tests/test_scalar_ctors.py
Expand Up @@ -113,3 +113,73 @@ def test_reals(self, t1, t2):
@pytest.mark.parametrize('t2', cfloat_types + [None])
def test_complex(self, t1, t2):
return self._do_test(t1, t2)


@pytest.mark.parametrize("length",
[5, np.int8(5), np.array(5, dtype=np.uint16)])
def test_void_via_length(length):
res = np.void(length)
assert type(res) is np.void
assert res.item() == b"\0" * 5
assert res.dtype == "V5"

@pytest.mark.parametrize("bytes_",
[b"spam", np.array(567.)])
def test_void_from_byteslike(bytes_):
res = np.void(bytes_)
expected = bytes(bytes_)
assert type(res) is np.void
assert res.item() == expected

# Passing dtype can extend it (this is how filling works)
res = np.void(bytes_, dtype="V100")
assert type(res) is np.void
assert res.item()[:len(expected)] == expected
assert res.item()[len(expected):] == b"\0" * (res.nbytes - len(expected))
# As well as shorten:
res = np.void(bytes_, dtype="V4")
assert type(res) is np.void
assert res.item() == expected[:4]

def test_void_arraylike_trumps_byteslike():
# The memoryview is converted as an array-like of shape (18,)
# rather than a single bytes-like of that length.
m = memoryview(b"just one mintleaf?")
res = np.void(m)
assert type(res) is np.ndarray
assert res.dtype == "V1"
assert res.shape == (18,)

def test_void_dtype_arg():
# Basic test for the dtype argument (positional and keyword)
res = np.void((1, 2), dtype="i,i")
assert res.item() == (1, 2)
res = np.void((2, 3), "i,i")
assert res.item() == (2, 3)

@pytest.mark.parametrize("data",
mhvk marked this conversation as resolved.
Show resolved Hide resolved
[5, np.int8(5), np.array(5, dtype=np.uint16)])
def test_void_from_integer_with_dtype(data):
# The "length" meaning is ignored, rather data is used:
res = np.void(data, dtype="i,i")
assert type(res) is np.void
assert res.dtype == "i,i"
assert res["f0"] == 5 and res["f1"] == 5

def test_void_from_structure():
dtype = np.dtype([('s', [('f', 'f8'), ('u', 'U1')]), ('i', 'i2')])
data = np.array(((1., 'a'), 2), dtype=dtype)
res = np.void(data[()], dtype=dtype)
assert type(res) is np.void
assert res.dtype == dtype
assert res == data[()]

def test_void_bad_dtype():
with pytest.raises(TypeError,
match="void: descr must be a `void.*int64"):
np.void(4, dtype="i8")

# Subarray dtype (with shape `(4,)` is rejected):
with pytest.raises(TypeError,
match=r"void: descr must be a `void.*\(4,\)"):
np.void(4, dtype="4i")
5 changes: 3 additions & 2 deletions numpy/typing/tests/data/fail/scalars.pyi
Expand Up @@ -47,7 +47,8 @@ np.uint16(A()) # E: incompatible type
np.uint32(A()) # E: incompatible type
np.uint64(A()) # E: incompatible type

np.void("test") # E: incompatible type
np.void("test") # E: No overload variant
np.void("test", dtype=None) # E: No overload variant

np.generic(1) # E: Cannot instantiate abstract class
np.number(1) # E: Cannot instantiate abstract class
Expand All @@ -62,7 +63,7 @@ np.uint64(value=0) # E: Unexpected keyword argument
np.complex128(value=0.0j) # E: Unexpected keyword argument
np.str_(value='bob') # E: No overload variant
np.bytes_(value=b'test') # E: No overload variant
np.void(value=b'test') # E: Unexpected keyword argument
np.void(value=b'test') # E: No overload variant
seberg marked this conversation as resolved.
Show resolved Hide resolved
np.bool_(value=True) # E: Unexpected keyword argument
np.datetime64(value="2019") # E: No overload variant
np.timedelta64(value=0) # E: Unexpected keyword argument
Expand Down
2 changes: 2 additions & 0 deletions numpy/typing/tests/data/pass/scalars.py
Expand Up @@ -113,6 +113,8 @@ def __float__(self) -> float:
np.void(np.bool_(True))
np.void(b"test")
np.void(np.bytes_("test"))
np.void(object(), [("a", "O"), ("b", "O")])
np.void(object(), dtype=[("a", "O"), ("b", "O")])

# Protocols
i8 = np.int64()
Expand Down