Skip to content

Commit

Permalink
ENH: Allow creating structured void scalars by passing dtype
Browse files Browse the repository at this point in the history
Adds an optional `dtype=` kwarg to `np.void`.  If given (and not None),
this kwarg effectively turns it into:

     res = np.array(data, dtype=dtype)[()]

The new dtype argument is keyword-only.
  • Loading branch information
seberg committed Sep 20, 2022
1 parent 2524a53 commit 37c6a54
Show file tree
Hide file tree
Showing 4 changed files with 131 additions and 17 deletions.
3 changes: 3 additions & 0 deletions numpy/__init__.pyi
Expand Up @@ -3058,7 +3058,10 @@ class flexible(generic): ... # type: ignore
# depending on whether or not it's used as an opaque bytes sequence
# or a structure
class void(flexible):
@overload
def __init__(self, value: _IntLike_co | bytes, /) -> None: ...
@overload
def __init__(self, value: object, *, dtype=DTypeLike) -> None: ...
@property
def real(self: _ArraySelf) -> _ArraySelf: ...
@property
Expand Down
48 changes: 42 additions & 6 deletions numpy/core/_add_newdocs_scalars.py
Expand Up @@ -225,16 +225,52 @@ def add_newdoc_for_scalar_type(obj, fixed_aliases, doc):

add_newdoc_for_scalar_type('void', [],
r"""
Either an opaque sequence of bytes, or a structure.
np.void(length_or_data, *, dtype=None)
Create a new structured or unstructured void scalar.
Parameters
----------
length_or_data : int, array-like, bytes-like, object
One of multiple meanings (see notes). The length or
bytes data of an unstructured void. Or alternatively,
the data to be stored in the new scalar when `dtype`
is provided.
This can be an array-like, in which case an array may
be returned.
dtype : dtype, optional
If provided the dtype of the new scalar. This dtype must
be "void" dtype (i.e. a structured or unstructured
void).
..versionadded:: 1.24
Notes
-----
For historical reasons and because void scalars can represent both
arbitrary byte data and structured dtypes, the void constructor
has three calling conventions:
1. ``np.void(5)`` creates a ``dtype="V5"`` scalar filled with
``\0`` bytes. The 5 can be a Python or NumPy integer.
2. ``np.void(b"bytes-like")`` creates a void scalar from
the byte string. The dtype is chosen based on its length.
3. When a ``dtype=`` is passed the call is rougly the same as an
array creation. However a void scalar is returned when possible.
Please see the examples which show all three different conventions.
Examples
--------
>>> np.void(5)
void(b'\x00\x00\x00\x00\x00')
>>> np.void(b'abcd')
void(b'\x61\x62\x63\x64')
>>> np.void((5, 3.2, "eggs"), dtype="i,d,S5")
(5, 3.2, b'eggs') # looks like a tuple, but is `np.void`
>>> np.void(3, dtype=[('x', np.int8), ('y', np.int8)])
(3, 3) # looks like a tuple, but is `np.void`
Structured `void` scalars can only be constructed via extraction from :ref:`structured_arrays`:
>>> arr = np.array((1, 2), dtype=[('x', np.int8), ('y', np.int8)])
>>> arr[()]
(1, 2) # looks like a tuple, but is `np.void`
""")

add_newdoc_for_scalar_type('datetime64', [],
Expand Down
43 changes: 32 additions & 11 deletions numpy/core/src/multiarray/scalartypes.c.src
Expand Up @@ -3170,28 +3170,33 @@ static PyObject *
void_arrtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
{
PyObject *obj, *arr;
PyObject *new = NULL;
PyArray_Descr *descr = NULL;

static char *kwnames[] = {"", NULL}; /* positional-only */
if (!PyArg_ParseTupleAndKeywords(args, kwds, "O:void", kwnames, &obj)) {
static char *kwnames[] = {"", "dtype", NULL}; /* positional-only */
if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|$O&:void", kwnames,
&obj, &PyArray_DescrConverter2, &descr)) {
return NULL;
}
/*
* For a VOID scalar first see if obj is an integer or long
* and create new memory of that size (filled with 0) for the scalar
*/
if (PyLong_Check(obj) ||
if (descr == NULL && (
PyLong_Check(obj) ||
PyArray_IsScalar(obj, Integer) ||
(PyArray_Check(obj) &&
PyArray_NDIM((PyArrayObject *)obj)==0 &&
PyArray_ISINTEGER((PyArrayObject *)obj))) {
new = Py_TYPE(obj)->tp_as_number->nb_int(obj);
}
if (new && PyLong_Check(new)) {
PyArray_ISINTEGER((PyArrayObject *)obj)))) {

PyObject *length = Py_TYPE(obj)->tp_as_number->nb_int(obj);
if (length == NULL) {
return NULL;
}

PyObject *ret;
char *destptr;
npy_ulonglong memu = PyLong_AsUnsignedLongLong(new);
Py_DECREF(new);
npy_ulonglong memu = PyLong_AsUnsignedLongLong(length);
Py_DECREF(length);
if (PyErr_Occurred() || (memu > NPY_MAX_INT)) {
PyErr_Clear();
PyErr_Format(PyExc_OverflowError,
Expand Down Expand Up @@ -3226,7 +3231,23 @@ void_arrtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
return ret;
}

arr = PyArray_FROM_OTF(obj, NPY_VOID, NPY_ARRAY_FORCECAST);
if (descr == NULL) {
/* Use the "size-less" void dtype to discover the size. */
descr = PyArray_DescrNewFromType(NPY_VOID);
}
else if (descr->type_num != NPY_VOID || PyDataType_HASSUBARRAY(descr)) {
/* we reject subarrays, since subarray scalars do not exist. */
PyErr_Format(PyExc_TypeError,
"void: descr must be a `void` dtype that is not "
"a subarray dtype (structured or unstructured). "
"Got '%.100R'.", descr);
return NULL;
}
else {
Py_INCREF(descr);
}

arr = PyArray_FromAny(obj, descr, 0, 0, NPY_ARRAY_FORCECAST, NULL);
return PyArray_Return((PyArrayObject *)arr);
}

Expand Down
54 changes: 54 additions & 0 deletions numpy/core/tests/test_scalar_ctors.py
Expand Up @@ -113,3 +113,57 @@ def test_reals(self, t1, t2):
@pytest.mark.parametrize('t2', cfloat_types + [None])
def test_complex(self, t1, t2):
return self._do_test(t1, t2)


@pytest.mark.parametrize("length",
[5, np.int8(5), np.array(5, dtype=np.uint16)])
def test_void_via_length(length):
res = np.void(length)
assert type(res) is np.void
assert res.item() == b"\0" * 5
assert res.dtype == "V5"

@pytest.mark.parametrize("bytes_",
[b"spam", np.array(567.)])
def test_void_from_byteslike(bytes_):
res = np.void(bytes_)
expected = bytes(bytes_)
assert type(res) is np.void
assert res.item() == expected

# Passing dtype can extend it (this is how filling works)
res = np.void(bytes_, dtype="V100")
assert type(res) is np.void
assert res.item()[:len(expected)] == expected
assert res.item()[len(expected):] == b"\0" * (res.nbytes - len(expected))
# As well as shorten:
res = np.void(bytes_, dtype="V4")
assert type(res) is np.void
assert res.item() == expected[:4]

def test_void_arraylike_trumps_byteslike():
# The memoryview is converted as an array-like of shape (18,)
# rather than a single bytes-like of that length.
m = memoryview(b"just one mintleaf?")
res = np.void(m)
assert type(res) is np.ndarray
assert res.dtype == "V1"
assert res.shape == (18,)

@pytest.mark.parametrize("data",
[5, np.int8(5), np.array(5, dtype=np.uint16)])
def test_void_from_integer_with_dtype(data):
# The "length" meaning is ignored, rather data is used:
res = np.void(data, dtype="i,i")
assert type(res) is np.void
assert res.dtype == "i,i"
assert res["f0"] == 5 and res["f1"] == 5

def test_void_bad_dtype():
with pytest.raises(TypeError,
match="void: descr must be a `void.*int64"):
np.void(4, dtype="i8")

dtype = np.dtype("4i")
with pytest.raises(TypeError):
np.void(4, dtype="i(4)")

0 comments on commit 37c6a54

Please sign in to comment.