Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DEP: Deprecate conversion of out-of-bound Python integers #22385

Merged
merged 9 commits into from Oct 11, 2022
17 changes: 17 additions & 0 deletions doc/release/upcoming_changes/22393.deprecation.rst
@@ -0,0 +1,17 @@
Conversion of out-of-bound Python integers
------------------------------------------
Attempting a conversion from a Python integer to a NumPy
value will now always check whether the result can be
represented by NumPy. This means the following examples will
fail in the future and give a ``DeprecationWarning`` now::

np.uint8(-1)
np.array([3000], dtype=np.int8)

Many of these did succeed before. Such code was mainly
useful for unsigned integers with negative values such as
`np.uint8(-1)` giving `np.iinfo(np.uint8).max`.

Note that conversion between NumPy integers is unaffected,
so that `np.array(-1).astype(np.uint8)` continues to work
and use C integer overflow logic.
133 changes: 123 additions & 10 deletions numpy/core/src/multiarray/arraytypes.c.src
Expand Up @@ -21,6 +21,7 @@
#include "npy_sort.h"
#include "common.h"
#include "ctors.h"
#include "convert_datatype.h"
#include "dtypemeta.h"
#include "lowlevel_strided_loops.h"
#include "usertypes.h"
Expand Down Expand Up @@ -174,6 +175,13 @@ MyPyLong_As@Type@ (PyObject *obj)
return ret;
}

static @type@
MyPyLong_As@Type@WithWrap(PyObject *obj, int *wraparound)
{
*wraparound = 0; /* Never happens within the function */
return MyPyLong_As@Type@(obj);
}

/**end repeat**/

/**begin repeat
Expand All @@ -182,9 +190,10 @@ MyPyLong_As@Type@ (PyObject *obj)
* #type = npy_ulong, npy_ulonglong#
*/
static @type@
MyPyLong_AsUnsigned@Type@ (PyObject *obj)
MyPyLong_AsUnsigned@Type@WithWrap(PyObject *obj, int *wraparound)
{
@type@ ret;
*wraparound = 0;
PyObject *num = PyNumber_Long(obj);

if (num == NULL) {
Expand All @@ -193,12 +202,21 @@ MyPyLong_AsUnsigned@Type@ (PyObject *obj)
ret = PyLong_AsUnsigned@Type@(num);
if (PyErr_Occurred()) {
PyErr_Clear();
*wraparound = 1; /* negative wrapped to positive */
ret = PyLong_As@Type@(num);
}
Py_DECREF(num);
return ret;
}

static @type@
MyPyLong_AsUnsigned@Type@(PyObject *obj)
{
int wraparound;
return MyPyLong_AsUnsigned@Type@WithWrap(obj, &wraparound);
}


/**end repeat**/

/*
Expand All @@ -217,6 +235,88 @@ MyPyLong_AsUnsigned@Type@ (PyObject *obj)
#endif


/**begin repeat
*
* #type = npy_byte, npy_short, npy_int, npy_long, npy_longlong,
* npy_ubyte, npy_ushort, npy_uint, npy_ulong, npy_ulonglong#
* #TYPE = BYTE, SHORT, INT, LONG, LONGLONG,
* UBYTE, USHORT, UINT, ULONG, ULONGLONG#
* #STYPE = BYTE, SHORT, INT, LONG, LONGLONG,
* BYTE, SHORT, INT, LONG, LONGLONG#
* #conv_type = npy_long*4, npy_longlong, npy_ulong*4, npy_ulonglong#
* #CSTYPE = LONG*4, LONGLONG, LONG*4, LONGLONG#
* #func = MyPyLong_AsLong*4, MyPyLong_AsLongLong,
* MyPyLong_AsLong*2, MyPyLong_AsUnsignedLong*2,
* MyPyLong_AsUnsignedLongLong#
*/

/*
* Helper for conversion from Python integers. This uses the same conversion
* function as below for compatibility (which may seem strange).
* However, it adds more strict integer overflow checks to prevent mainly
* conversion of negative integers. These are considered deprecated, which is
* related to NEP 50 (but somewhat independent).
*/
static int
@TYPE@_safe_pyint_setitem(PyObject *obj, @type@ *result)
{
/* Input is guaranteed to be a Python integer */
assert(PyLong_Check(obj));
int wraparound;
@conv_type@ value = @func@WithWrap(obj, &wraparound);
if (value == (@conv_type@)-1 && PyErr_Occurred()) {
return -1;
}
*result = (@type@)value;

if (wraparound
#if NPY_SIZEOF_@STYPE@ < NPY_SIZEOF_@CSTYPE@
|| *result != value
#endif
) {
PyArray_Descr *descr = PyArray_DescrFromType(NPY_@TYPE@);

if (npy_promotion_state == NPY_USE_LEGACY_PROMOTION || (
npy_promotion_state == NPY_USE_WEAK_PROMOTION_AND_WARN
&& !npy_give_promotion_warnings())) {
/*
* This path will be taken both for the "promotion" case such as
* `uint8_arr + 123` as well as the assignment case.
* The "legacy" path should only ever be taken for assignment
* (legacy promotion will prevent overflows by promoting up)
* so a normal deprecation makes sense.
* When weak promotion is active, we use "future" behavior unless
* warnings were explicitly opt-in.
*/
if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
"NumPy will stop allowing conversion of out-of-bound "
"Python integers to integer arrays. The conversion "
"of %.100R to %S will fail in the future.\n"
"For the old behavior, usually:\n"
" np.array(value).astype(dtype)`\n"
"will give the desired result (the cast overflows).",
obj, descr) < 0) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe add the astype() workaround, i.e. np.array(-1, np.uint8) will raise here, but np.array(-1).astype(np.uint8) will do the right thing? I am not sure how to express that this deep in the code path...

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good idea, also added a release note. If this becomes too painful for downstream, we could consider allowing np.uint8(-1) making it explicitly work for negative values. (I would still restrict this to the range of the corresponding signed integer, so -128 for uint8)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we might need that as a fallback

Py_DECREF(descr);
return -1;
}
Py_DECREF(descr);
return 0;
}
else {
/* Live in the future, outright error: */
PyErr_Format(PyExc_OverflowError,
"Python int %R too large to convert to %S", obj, descr);
Py_DECREF(descr);
return -1;
}
assert(0);
}
return 0;
}

/**end repeat**/


/**begin repeat
*
* #TYPE = BOOL, BYTE, UBYTE, SHORT, USHORT, INT, LONG, UINT, ULONG,
Expand All @@ -235,7 +335,8 @@ MyPyLong_AsUnsigned@Type@ (PyObject *obj)
* npy_half, npy_float, npy_double#
* #kind = Bool, Byte, UByte, Short, UShort, Int, Long, UInt, ULong,
* LongLong, ULongLong, Half, Float, Double#
*/
* #is_int = 0,1*10,0*3#
*/
static PyObject *
@TYPE@_getitem(void *input, void *vap)
{
Expand All @@ -253,12 +354,26 @@ static PyObject *
}
}

static int
NPY_NO_EXPORT int
@TYPE@_setitem(PyObject *op, void *ov, void *vap)
{
PyArrayObject *ap = vap;
@type@ temp; /* ensures alignment */

#if @is_int@
if (PyLong_Check(op)) {
/*
* When weak promotion is enabled (using NEP 50) we also use more
* strict parsing of integers: All out-of-bound Python integer
* parsing fails.
*/
if (@TYPE@_safe_pyint_setitem(op, &temp) < 0) {
return -1;
}
}
else /* continue with if below */
#endif

if (PyArray_IsScalar(op, @kind@)) {
temp = PyArrayScalar_VAL(op, @kind@);
}
Expand Down Expand Up @@ -291,6 +406,7 @@ static int

/**end repeat**/


/**begin repeat
*
* #TYPE = CFLOAT, CDOUBLE#
Expand Down Expand Up @@ -328,13 +444,12 @@ static PyObject *
* #ftype = npy_float, npy_double, npy_longdouble#
* #kind = CFloat, CDouble, CLongDouble#
*/
static int
NPY_NO_EXPORT int
@NAME@_setitem(PyObject *op, void *ov, void *vap)
{
PyArrayObject *ap = vap;
Py_complex oop;
@type@ temp;
int rsize;

if (PyArray_IsZeroDim(op)) {
return convert_to_scalar_and_retry(op, ov, vap, @NAME@_setitem);
Expand Down Expand Up @@ -401,12 +516,10 @@ static int
#endif
}

memcpy(ov, &temp, PyArray_DESCR(ap)->elsize);
if (PyArray_ISBYTESWAPPED(ap)) {
memcpy(ov, &temp, NPY_SIZEOF_@NAME@);
if (ap != NULL && PyArray_ISBYTESWAPPED(ap)) {
byte_swap_vector(ov, 2, sizeof(@ftype@));
}
rsize = sizeof(@ftype@);
copy_and_swap(ov, &temp, rsize, 2, rsize, PyArray_ISBYTESWAPPED(ap));
return 0;
}

Expand Down Expand Up @@ -487,7 +600,7 @@ LONGDOUBLE_getitem(void *ip, void *ap)
return PyArray_Scalar(ip, PyArray_DESCR((PyArrayObject *)ap), NULL);
}

static int
NPY_NO_EXPORT int
LONGDOUBLE_setitem(PyObject *op, void *ov, void *vap)
{
PyArrayObject *ap = vap;
Expand Down
17 changes: 17 additions & 0 deletions numpy/core/src/multiarray/arraytypes.h.src
Expand Up @@ -28,6 +28,23 @@ small_correlate(const char * d_, npy_intp dstride,
npy_intp nk, enum NPY_TYPES ktype,
char * out_, npy_intp ostride);

/**begin repeat
* #TYPE = BYTE, UBYTE, SHORT, USHORT, INT, UINT,
* LONG, ULONG, LONGLONG, ULONGLONG,
* HALF, FLOAT, DOUBLE, LONGDOUBLE,
* CFLOAT, CDOUBLE, CLONGDOUBLE#
*/
/*
* The setitem functions are currently directly used in certain branches
* of the scalar-math code. (Yes, this would be nice to refactor...)
*/

NPY_NO_EXPORT int
@TYPE@_setitem(PyObject *obj, void *data_ptr, void *arr);

/**end repeat**/


#ifndef NPY_DISABLE_OPTIMIZATION
#include "argfunc.dispatch.h"
#endif
Expand Down
42 changes: 31 additions & 11 deletions numpy/core/src/umath/ufunc_object.c
Expand Up @@ -2958,6 +2958,7 @@ PyUFunc_Reduce(PyUFuncObject *ufunc,
int iaxes, ndim;
npy_bool reorderable;
npy_bool axis_flags[NPY_MAXDIMS];
PyArrayObject *result = NULL;
PyObject *identity;
const char *ufunc_name = ufunc_get_name_cstr(ufunc);
/* These parameters come from a TLS global */
Expand All @@ -2983,11 +2984,21 @@ PyUFunc_Reduce(PyUFuncObject *ufunc,
return NULL;
}

/*
* Promote and fetch ufuncimpl (currently needed to fix up the identity).
*/
PyArray_Descr *descrs[3];
PyArrayMethodObject *ufuncimpl = reducelike_promote_and_resolve(ufunc,
arr, out, signature, NPY_FALSE, descrs, "reduce");
if (ufuncimpl == NULL) {
return NULL;
}

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There was a request from Numba to expose the ufunc loop used for a dtype. They did not explicitly ask for reductions, but I imagine that will be the next request. Will it be sufficient to expose reducelike_promote_and_resolve or will we need more logic?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good point, I have to think abou the API. Could would probably consider a single function/entrypoint. But with reduction=True as kwarg?
There are some weirder things in how we do reduction promotion and type resolution right now, but I imagine that is fine.

Such new API might also just always live in a NEP 50 future.

I don't think it matters much for this PR. To me the decision here seems mainly whether we prefer this churn, over the churn of having a special (internal for now) API for Python integers. (And maybe the timing of both)

/* Get the identity */
/* TODO: Both of these should be provided by the ArrayMethod! */
identity = _get_identity(ufunc, &reorderable);
if (identity == NULL) {
return NULL;
goto finish;
}

/* Get the initial value */
Expand All @@ -3003,33 +3014,42 @@ PyUFunc_Reduce(PyUFuncObject *ufunc,
initial = Py_None;
Py_INCREF(initial);
}
else if (PyTypeNum_ISUNSIGNED(descrs[2]->type_num)
&& PyLong_CheckExact(initial)) {
/*
* This is a bit of a hack until we have truly loop specific
* identities. Python -1 cannot be cast to unsigned so convert
* it to a NumPy scalar, but we use -1 for bitwise functions to
* signal all 1s.
* (A builtin identity would not overflow here, although we may
* unnecessary convert 0 and 1.)
*/
Py_SETREF(initial, PyObject_CallFunctionObjArgs(
(PyObject *)&PyLongArrType_Type, initial, NULL));
if (initial == NULL) {
goto finish;
}
}
} else {
Py_DECREF(identity);
Py_INCREF(initial); /* match the reference count in the if above */
}

PyArray_Descr *descrs[3];
PyArrayMethodObject *ufuncimpl = reducelike_promote_and_resolve(ufunc,
arr, out, signature, NPY_FALSE, descrs, "reduce");
if (ufuncimpl == NULL) {
Py_DECREF(initial);
return NULL;
}

PyArrayMethod_Context context = {
.caller = (PyObject *)ufunc,
.method = ufuncimpl,
.descriptors = descrs,
};

PyArrayObject *result = PyUFunc_ReduceWrapper(&context,
result = PyUFunc_ReduceWrapper(&context,
arr, out, wheremask, axis_flags, reorderable, keepdims,
initial, reduce_loop, ufunc, buffersize, ufunc_name, errormask);

finish:
for (int i = 0; i < 3; i++) {
Py_DECREF(descrs[i]);
}
Py_DECREF(initial);
Py_XDECREF(initial);
return result;
}

Expand Down
2 changes: 1 addition & 1 deletion numpy/core/tests/test_array_coercion.py
Expand Up @@ -375,7 +375,7 @@ def test_default_dtype_instance(self, dtype_char):
@pytest.mark.parametrize("dtype", np.typecodes["Integer"])
@pytest.mark.parametrize(["scalar", "error"],
[(np.float64(np.nan), ValueError),
(np.ulonglong(-1), OverflowError)])
(np.array(-1).astype(np.ulonglong)[()], OverflowError)])
def test_scalar_to_int_coerce_does_not_cast(self, dtype, scalar, error):
"""
Signed integers are currently different in that they do not cast other
Expand Down
6 changes: 6 additions & 0 deletions numpy/core/tests/test_casting_unittests.py
Expand Up @@ -169,6 +169,9 @@ def get_data(self, dtype1, dtype2):

for i, value in enumerate(values):
# Use item assignment to ensure this is not using casting:
if value < 0 and dtype1.kind == "u":
# Manually rollover unsigned integers (-1 -> int.max)
value = value + np.iinfo(dtype1).max + 1
arr1[i] = value

if dtype2 is None:
Expand All @@ -185,6 +188,9 @@ def get_data(self, dtype1, dtype2):

for i, value in enumerate(values):
# Use item assignment to ensure this is not using casting:
if value < 0 and dtype2.kind == "u":
# Manually rollover unsigned integers (-1 -> int.max)
value = value + np.iinfo(dtype2).max + 1
arr2[i] = value

return arr1, arr2, values
Expand Down