Skip to content

Commit

Permalink
Merge pull request #20924 from seberg/cleanup-scalar-fill
Browse files Browse the repository at this point in the history
MAINT: Simplify element setting and use it for filling
  • Loading branch information
mattip committed Jun 13, 2022
2 parents 7d21a8d + 20b75ea commit d4baf36
Show file tree
Hide file tree
Showing 9 changed files with 154 additions and 230 deletions.
13 changes: 13 additions & 0 deletions doc/release/upcoming_changes/20924.compatibility.rst
@@ -0,0 +1,13 @@
``array.fill(scalar)`` may behave slightly different
----------------------------------------------------
`~numpy.ndarray.fill` may in some cases behave slightly different
now due to the fact that the logic is aligned with item assignment::

arr = np.array([1]) # with any dtype/value
arr.fill(scalar)
# is now identical to:
arr[0] = scalar

Previously casting may have produced slightly different answers when using
values that could not be represented in the target ``dtype`` or when the
target had ``object`` dtype.
18 changes: 18 additions & 0 deletions numpy/core/_add_newdocs.py
Expand Up @@ -3437,6 +3437,24 @@
>>> a
array([1., 1.])
Fill expects a scalar value and always behaves the same as assigning
to a single array element. The following is a rare example where this
distinction is important:
>>> a = np.array([None, None], dtype=object)
>>> a[0] = np.array(3)
>>> a
array([array(3), None], dtype=object)
>>> a.fill(np.array(3))
>>> a
array([array(3), array(3)], dtype=object)
Where other forms of assignments will unpack the array being assigned:
>>> a[...] = np.array(3)
>>> a
array([3, 3], dtype=object)
"""))


Expand Down
112 changes: 75 additions & 37 deletions numpy/core/src/multiarray/array_coercion.c
Expand Up @@ -378,6 +378,34 @@ find_scalar_descriptor(
}


/*
* Helper function for casting a raw value from one descriptor to another.
* This helper uses the normal casting machinery, but e.g. does not care about
* checking cast safety.
*/
static int
cast_raw_scalar_item(
PyArray_Descr *from_descr, char *from_item,
PyArray_Descr *to_descr, char *to_item)
{
int needs_api = 0;
NPY_cast_info cast_info;
if (PyArray_GetDTypeTransferFunction(
0, 0, 0, from_descr, to_descr, 0, &cast_info,
&needs_api) == NPY_FAIL) {
return -1;
}
char *args[2] = {from_item, to_item};
const npy_intp strides[2] = {0, 0};
const npy_intp length = 1;
int res = cast_info.func(&cast_info.context,
args, &length, strides, cast_info.auxdata);

NPY_cast_info_xfree(&cast_info);
return res;
}


/**
* Assign a single element in an array from a python value.
*
Expand All @@ -388,26 +416,35 @@ find_scalar_descriptor(
* This function handles the cast, which is for example hit when assigning
* a float128 to complex128.
*
* At this time, this function does not support arrays (historically we
* mainly supported arrays through `__float__()`, etc.). Such support should
* possibly be added (although when called from `PyArray_AssignFromCache`
* the input cannot be an array).
* Note that this is also problematic for some array-likes, such as
* `astropy.units.Quantity` and `np.ma.masked`. These are used to us calling
* `__float__`/`__int__` for 0-D instances in many cases.
* Eventually, we may want to define this as wrong: They must use DTypes
* instead of (only) subclasses. Until then, here as well as in
* `PyArray_AssignFromCache` (which already does this), we need to special
* case 0-D array-likes to behave like arbitrary (unknown!) Python objects.
* TODO: This function probably needs to be passed an "owner" for the sake of
* future HPy (non CPython) support
*
* NOTE: We do support 0-D exact NumPy arrays correctly via casting here.
* There be dragons, because we must NOT support generic array-likes.
* The problem is that some (e.g. astropy's Quantity and our masked
* arrays) have divergent behaviour for `__array__` as opposed to
* `__float__`. And they rely on that.
* That is arguably bad as it limits the things that work seamlessly
* because `__float__`, etc. cannot even begin to cover all of casting.
* However, we have no choice. We simply CANNOT support array-likes
* here without finding a solution for this first.
* And the only plausible one I see currently, is expanding protocols
* in some form, either to indicate that we want a scalar or to indicate
* that we want the unsafe version that `__array__` currently gives
* for both objects.
*
* If we ever figure out how to expand this to other array-likes, care
* may need to be taken. `PyArray_FromAny`/`PyArray_AssignFromCache`
* uses this function but know if the input is an array, array-like,
* or scalar. Relaxing things here should be OK, but looks a bit
* like possible recursion, so it may make sense to make a "scalars only"
* version of this function.
*
* @param descr
* @param item
* @param value
* @return 0 on success -1 on failure.
*/
/*
* TODO: This function should possibly be public API.
*/
NPY_NO_EXPORT int
PyArray_Pack(PyArray_Descr *descr, char *item, PyObject *value)
{
Expand All @@ -433,6 +470,29 @@ PyArray_Pack(PyArray_Descr *descr, char *item, PyObject *value)
if (DType == NULL) {
return -1;
}
if (DType == (PyArray_DTypeMeta *)Py_None && PyArray_CheckExact(value)
&& PyArray_NDIM((PyArrayObject *)value) == 0) {
/*
* WARNING: Do NOT relax the above `PyArray_CheckExact`, unless you
* read the function doc NOTE carefully and understood it.
*
* NOTE: The ndim == 0 check should probably be an error, but
* unfortunately. `arr.__float__()` works for 1 element arrays
* so in some contexts we need to let it handled like a scalar.
* (If we manage to deprecate the above, we can do that.)
*/
Py_DECREF(DType);

PyArrayObject *arr = (PyArrayObject *)value;
if (PyArray_DESCR(arr) == descr && !PyDataType_REFCHK(descr)) {
/* light-weight fast-path for when the descrs obviously matches */
memcpy(item, PyArray_BYTES(arr), descr->elsize);
return 0; /* success (it was an array-like) */
}
return cast_raw_scalar_item(
PyArray_DESCR(arr), PyArray_BYTES(arr), descr, item);

}
if (DType == NPY_DTYPE(descr) || DType == (PyArray_DTypeMeta *)Py_None) {
/* We can set the element directly (or at least will try to) */
Py_XDECREF(DType);
Expand Down Expand Up @@ -461,30 +521,8 @@ PyArray_Pack(PyArray_Descr *descr, char *item, PyObject *value)
Py_DECREF(tmp_descr);
return -1;
}
if (PyDataType_REFCHK(tmp_descr)) {
/* We could probably use move-references above */
PyArray_Item_INCREF(data, tmp_descr);
}

int res = 0;
int needs_api = 0;
NPY_cast_info cast_info;
if (PyArray_GetDTypeTransferFunction(
0, 0, 0, tmp_descr, descr, 0, &cast_info,
&needs_api) == NPY_FAIL) {
res = -1;
goto finish;
}
char *args[2] = {data, item};
const npy_intp strides[2] = {0, 0};
const npy_intp length = 1;
if (cast_info.func(&cast_info.context,
args, &length, strides, cast_info.auxdata) < 0) {
res = -1;
}
NPY_cast_info_xfree(&cast_info);
int res = cast_raw_scalar_item(tmp_descr, data, descr, item);

finish:
if (PyDataType_REFCHK(tmp_descr)) {
/* We could probably use move-references above */
PyArray_Item_XDECREF(data, tmp_descr);
Expand Down
166 changes: 29 additions & 137 deletions numpy/core/src/multiarray/convert.c
Expand Up @@ -20,6 +20,7 @@
#include "array_assign.h"

#include "convert.h"
#include "array_coercion.h"

int
fallocate(int fd, int mode, off_t offset, off_t len);
Expand Down Expand Up @@ -358,151 +359,42 @@ PyArray_ToString(PyArrayObject *self, NPY_ORDER order)
NPY_NO_EXPORT int
PyArray_FillWithScalar(PyArrayObject *arr, PyObject *obj)
{
PyArray_Descr *dtype = NULL;
npy_longlong value_buffer[4];
char *value = NULL;
int retcode = 0;

/*
* If 'arr' is an object array, copy the object as is unless
* 'obj' is a zero-dimensional array, in which case we copy
* the element in that array instead.
* If we knew that the output array has at least one element, we would
* not actually need a helping buffer, we always null it, just in case.
*
* (The longlong here should help with alignment.)
*/
if (PyArray_DESCR(arr)->type_num == NPY_OBJECT &&
!(PyArray_Check(obj) &&
PyArray_NDIM((PyArrayObject *)obj) == 0)) {
value = (char *)&obj;

dtype = PyArray_DescrFromType(NPY_OBJECT);
if (dtype == NULL) {
return -1;
}
}
/* NumPy scalar */
else if (PyArray_IsScalar(obj, Generic)) {
dtype = PyArray_DescrFromScalar(obj);
if (dtype == NULL) {
return -1;
}
value = scalar_value(obj, dtype);
if (value == NULL) {
Py_DECREF(dtype);
return -1;
}
}
/* Python boolean */
else if (PyBool_Check(obj)) {
value = (char *)value_buffer;
*value = (obj == Py_True);

dtype = PyArray_DescrFromType(NPY_BOOL);
if (dtype == NULL) {
return -1;
}
}
/* Python integer */
else if (PyLong_Check(obj)) {
/* Try long long before unsigned long long */
npy_longlong ll_v = PyLong_AsLongLong(obj);
if (error_converting(ll_v)) {
/* Long long failed, try unsigned long long */
npy_ulonglong ull_v;
PyErr_Clear();
ull_v = PyLong_AsUnsignedLongLong(obj);
if (ull_v == (unsigned long long)-1 && PyErr_Occurred()) {
return -1;
}
value = (char *)value_buffer;
*(npy_ulonglong *)value = ull_v;

dtype = PyArray_DescrFromType(NPY_ULONGLONG);
if (dtype == NULL) {
return -1;
}
}
else {
/* Long long succeeded */
value = (char *)value_buffer;
*(npy_longlong *)value = ll_v;

dtype = PyArray_DescrFromType(NPY_LONGLONG);
if (dtype == NULL) {
return -1;
}
}
}
/* Python float */
else if (PyFloat_Check(obj)) {
npy_double v = PyFloat_AsDouble(obj);
if (error_converting(v)) {
return -1;
}
value = (char *)value_buffer;
*(npy_double *)value = v;

dtype = PyArray_DescrFromType(NPY_DOUBLE);
if (dtype == NULL) {
npy_longlong value_buffer_stack[4] = {0};
char *value_buffer_heap = NULL;
char *value = (char *)value_buffer_stack;
PyArray_Descr *descr = PyArray_DESCR(arr);

if (descr->elsize > sizeof(value_buffer_stack)) {
/* We need a large temporary buffer... */
value_buffer_heap = PyObject_Calloc(1, descr->elsize);
if (value_buffer_heap == NULL) {
PyErr_NoMemory();
return -1;
}
value = value_buffer_heap;
}
/* Python complex */
else if (PyComplex_Check(obj)) {
npy_double re, im;

re = PyComplex_RealAsDouble(obj);
if (error_converting(re)) {
return -1;
}
im = PyComplex_ImagAsDouble(obj);
if (error_converting(im)) {
return -1;
}
value = (char *)value_buffer;
((npy_double *)value)[0] = re;
((npy_double *)value)[1] = im;

dtype = PyArray_DescrFromType(NPY_CDOUBLE);
if (dtype == NULL) {
return -1;
}
}

/* Use the value pointer we got if possible */
if (value != NULL) {
/* TODO: switch to SAME_KIND casting */
retcode = PyArray_AssignRawScalar(arr, dtype, value,
NULL, NPY_UNSAFE_CASTING);
Py_DECREF(dtype);
return retcode;
if (PyArray_Pack(descr, value, obj) < 0) {
PyMem_FREE(value_buffer_heap);
return -1;
}
/* Otherwise convert to an array to do the assignment */
else {
PyArrayObject *src_arr;

/**
* The dtype of the destination is used when converting
* from the pyobject, so that for example a tuple gets
* recognized as a struct scalar of the required type.
*/
Py_INCREF(PyArray_DTYPE(arr));
src_arr = (PyArrayObject *)PyArray_FromAny(obj,
PyArray_DTYPE(arr), 0, 0, 0, NULL);
if (src_arr == NULL) {
return -1;
}

if (PyArray_NDIM(src_arr) != 0) {
PyErr_SetString(PyExc_ValueError,
"Input object to FillWithScalar is not a scalar");
Py_DECREF(src_arr);
return -1;
}

retcode = PyArray_CopyInto(arr, src_arr);
/*
* There is no cast anymore, the above already coerced using scalar
* coercion rules
*/
int retcode = raw_array_assign_scalar(
PyArray_NDIM(arr), PyArray_DIMS(arr), descr,
PyArray_BYTES(arr), PyArray_STRIDES(arr),
descr, value);

Py_DECREF(src_arr);
return retcode;
}
PyMem_FREE(value_buffer_heap);
return retcode;
}

/*
Expand Down

0 comments on commit d4baf36

Please sign in to comment.