From 508722f65995844a130fd6f287ae0503ea60f76e Mon Sep 17 00:00:00 2001 From: Sebastian Berg Date: Wed, 5 Oct 2022 11:59:25 +0200 Subject: [PATCH 1/9] DEP: Deprecate out-of-bound Python integer conversions Any conversion from a Python integer (or subclass) that is stored into a NumPy dtype but does not fit should raise an error in the future. Note, that casts between NumPy types (or assignments of them) are explicitly not affected by this. There are certain use-cases for allowing such casts, even if technically undefined behavior in C. They just work out well in practice in many cases since e.g. -1 is all 1's in binary represenation (twos complement repr). --- numpy/core/src/multiarray/arraytypes.c.src | 130 +++++++++++++++++++-- numpy/core/src/multiarray/arraytypes.h.src | 17 +++ 2 files changed, 137 insertions(+), 10 deletions(-) diff --git a/numpy/core/src/multiarray/arraytypes.c.src b/numpy/core/src/multiarray/arraytypes.c.src index f06875cae4e2..81c0862d7422 100644 --- a/numpy/core/src/multiarray/arraytypes.c.src +++ b/numpy/core/src/multiarray/arraytypes.c.src @@ -21,6 +21,7 @@ #include "npy_sort.h" #include "common.h" #include "ctors.h" +#include "convert_datatype.h" #include "dtypemeta.h" #include "lowlevel_strided_loops.h" #include "usertypes.h" @@ -174,6 +175,13 @@ MyPyLong_As@Type@ (PyObject *obj) return ret; } +static @type@ +MyPyLong_As@Type@WithWrap(PyObject *obj, int *wraparound) +{ + *wraparound = 0; /* Never happens within the function */ + return MyPyLong_As@Type@(obj); +} + /**end repeat**/ /**begin repeat @@ -182,9 +190,10 @@ MyPyLong_As@Type@ (PyObject *obj) * #type = npy_ulong, npy_ulonglong# */ static @type@ -MyPyLong_AsUnsigned@Type@ (PyObject *obj) +MyPyLong_AsUnsigned@Type@WithWrap(PyObject *obj, int *wraparound) { @type@ ret; + *wraparound = 0; PyObject *num = PyNumber_Long(obj); if (num == NULL) { @@ -193,12 +202,21 @@ MyPyLong_AsUnsigned@Type@ (PyObject *obj) ret = PyLong_AsUnsigned@Type@(num); if (PyErr_Occurred()) { PyErr_Clear(); + *wraparound = 1; /* negative wrapped to positive */ ret = PyLong_As@Type@(num); } Py_DECREF(num); return ret; } +static @type@ +MyPyLong_AsUnsigned@Type@(PyObject *obj) +{ + int wraparound; + return MyPyLong_AsUnsigned@Type@WithWrap(obj, &wraparound); +} + + /**end repeat**/ /* @@ -217,6 +235,85 @@ MyPyLong_AsUnsigned@Type@ (PyObject *obj) #endif +/**begin repeat + * + * #type = npy_byte, npy_short, npy_int, npy_long, npy_longlong, + * npy_ubyte, npy_ushort, npy_uint, npy_ulong, npy_ulonglong# + * #TYPE = BYTE, SHORT, INT, LONG, LONGLONG, + * UBYTE, USHORT, UINT, ULONG, ULONGLONG# + * #STYPE = BYTE, SHORT, INT, LONG, LONGLONG, + * BYTE, SHORT, INT, LONG, LONGLONG# + * #conv_type = npy_long*4, npy_longlong, npy_ulong*4, npy_ulonglong# + * #CSTYPE = LONG*4, LONGLONG, LONG*4, LONGLONG# + * #func = MyPyLong_AsLong*4, MyPyLong_AsLongLong, + * MyPyLong_AsLong*2, MyPyLong_AsUnsignedLong*2, + * MyPyLong_AsUnsignedLongLong# + */ + +/* + * Helper for conversion from Python integers. This uses the same conversion + * function as below for compatibility (which may seem strange). + * However, it adds more strict integer overflow checks to prevent mainly + * conversion of negative integers. These are considered deprecated, which is + * related to NEP 50 (but somewhat independent). + */ +static int +@TYPE@_safe_pyint_setitem(PyObject *obj, @type@ *result) +{ + /* Input is guaranteed to be a Python integer */ + assert(PyLong_Check(obj)); + int wraparound; + @conv_type@ value = @func@WithWrap(obj, &wraparound); + if (value == (@conv_type@)-1 && PyErr_Occurred()) { + return -1; + } + *result = (@type@)value; + + if (wraparound +#if NPY_SIZEOF_@STYPE@ < NPY_SIZEOF_@CSTYPE@ + || *result != value +#endif + ) { + PyArray_Descr *descr = PyArray_DescrFromType(NPY_@TYPE@); + + if (npy_promotion_state == NPY_USE_LEGACY_PROMOTION || ( + npy_promotion_state == NPY_USE_WEAK_PROMOTION_AND_WARN + && !npy_give_promotion_warnings())) { + /* + * This path will be taken both for the "promotion" case such as + * `uint8_arr + 123` as well as the assignment case. + * The "legacy" path should only ever be taken for assignment + * (legacy promotion will prevent overflows by promoting up) + * so a normal deprecation makes sense. + * When weak promotion is active, we use "future" behavior unless + * warnings were explicitly opt-in. + */ + if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1, + "NumPy will stop allowing conversion of out-of-bound " + "Python integers to integer arrays. The conversion " + "of %.100R to %S will fail in the future.", + obj, descr) < 0) { + Py_DECREF(descr); + return -1; + } + Py_DECREF(descr); + return 0; + } + else { + /* Live in the future, outright error: */ + PyErr_Format(PyExc_OverflowError, + "Python int %R too large to convert to %S", obj, descr); + Py_DECREF(descr); + return -1; + } + assert(0); + } + return 0; +} + +/**end repeat**/ + + /**begin repeat * * #TYPE = BOOL, BYTE, UBYTE, SHORT, USHORT, INT, LONG, UINT, ULONG, @@ -235,7 +332,8 @@ MyPyLong_AsUnsigned@Type@ (PyObject *obj) * npy_half, npy_float, npy_double# * #kind = Bool, Byte, UByte, Short, UShort, Int, Long, UInt, ULong, * LongLong, ULongLong, Half, Float, Double# -*/ + * #is_int = 0,1*10,0*3# + */ static PyObject * @TYPE@_getitem(void *input, void *vap) { @@ -253,12 +351,26 @@ static PyObject * } } -static int +NPY_NO_EXPORT int @TYPE@_setitem(PyObject *op, void *ov, void *vap) { PyArrayObject *ap = vap; @type@ temp; /* ensures alignment */ +#if @is_int@ + if (PyLong_Check(op)) { + /* + * When weak promotion is enabled (using NEP 50) we also use more + * strict parsing of integers: All out-of-bound Python integer + * parsing fails. + */ + if (@TYPE@_safe_pyint_setitem(op, &temp) < 0) { + return -1; + } + } + else /* continue with if below */ +#endif + if (PyArray_IsScalar(op, @kind@)) { temp = PyArrayScalar_VAL(op, @kind@); } @@ -291,6 +403,7 @@ static int /**end repeat**/ + /**begin repeat * * #TYPE = CFLOAT, CDOUBLE# @@ -328,13 +441,12 @@ static PyObject * * #ftype = npy_float, npy_double, npy_longdouble# * #kind = CFloat, CDouble, CLongDouble# */ -static int +NPY_NO_EXPORT int @NAME@_setitem(PyObject *op, void *ov, void *vap) { PyArrayObject *ap = vap; Py_complex oop; @type@ temp; - int rsize; if (PyArray_IsZeroDim(op)) { return convert_to_scalar_and_retry(op, ov, vap, @NAME@_setitem); @@ -401,12 +513,10 @@ static int #endif } - memcpy(ov, &temp, PyArray_DESCR(ap)->elsize); - if (PyArray_ISBYTESWAPPED(ap)) { + memcpy(ov, &temp, NPY_SIZEOF_@NAME@); + if (ap != NULL && PyArray_ISBYTESWAPPED(ap)) { byte_swap_vector(ov, 2, sizeof(@ftype@)); } - rsize = sizeof(@ftype@); - copy_and_swap(ov, &temp, rsize, 2, rsize, PyArray_ISBYTESWAPPED(ap)); return 0; } @@ -487,7 +597,7 @@ LONGDOUBLE_getitem(void *ip, void *ap) return PyArray_Scalar(ip, PyArray_DESCR((PyArrayObject *)ap), NULL); } -static int +NPY_NO_EXPORT int LONGDOUBLE_setitem(PyObject *op, void *ov, void *vap) { PyArrayObject *ap = vap; diff --git a/numpy/core/src/multiarray/arraytypes.h.src b/numpy/core/src/multiarray/arraytypes.h.src index 4c7487189b5a..aad464ccf95d 100644 --- a/numpy/core/src/multiarray/arraytypes.h.src +++ b/numpy/core/src/multiarray/arraytypes.h.src @@ -28,6 +28,23 @@ small_correlate(const char * d_, npy_intp dstride, npy_intp nk, enum NPY_TYPES ktype, char * out_, npy_intp ostride); +/**begin repeat + * #TYPE = BYTE, UBYTE, SHORT, USHORT, INT, UINT, + * LONG, ULONG, LONGLONG, ULONGLONG, + * HALF, FLOAT, DOUBLE, LONGDOUBLE, + * CFLOAT, CDOUBLE, CLONGDOUBLE# + */ +/* + * The setitem functions are currently directly used in certain branches + * of the scalar-math code. (Yes, this would be nice to refactor...) + */ + +NPY_NO_EXPORT int +@TYPE@_setitem(PyObject *obj, void *data_ptr, void *arr); + +/**end repeat**/ + + #ifndef NPY_DISABLE_OPTIMIZATION #include "argfunc.dispatch.h" #endif From 0a821c1c1d46247c29e02570000cc6196ee2884c Mon Sep 17 00:00:00 2001 From: Sebastian Berg Date: Wed, 5 Oct 2022 17:44:46 +0200 Subject: [PATCH 2/9] ENH: Workaround -1 reduce identity for unsigned integers This is a temporary solution until such a time where we can have loop specific identities which can handle this more gracefully. --- numpy/core/src/umath/ufunc_object.c | 42 +++++++++++++++++++++-------- 1 file changed, 31 insertions(+), 11 deletions(-) diff --git a/numpy/core/src/umath/ufunc_object.c b/numpy/core/src/umath/ufunc_object.c index b7e390abba5b..5485c2006887 100644 --- a/numpy/core/src/umath/ufunc_object.c +++ b/numpy/core/src/umath/ufunc_object.c @@ -2958,6 +2958,7 @@ PyUFunc_Reduce(PyUFuncObject *ufunc, int iaxes, ndim; npy_bool reorderable; npy_bool axis_flags[NPY_MAXDIMS]; + PyArrayObject *result = NULL; PyObject *identity; const char *ufunc_name = ufunc_get_name_cstr(ufunc); /* These parameters come from a TLS global */ @@ -2983,11 +2984,21 @@ PyUFunc_Reduce(PyUFuncObject *ufunc, return NULL; } + /* + * Promote and fetch ufuncimpl (currently needed to fix up the identity). + */ + PyArray_Descr *descrs[3]; + PyArrayMethodObject *ufuncimpl = reducelike_promote_and_resolve(ufunc, + arr, out, signature, NPY_FALSE, descrs, "reduce"); + if (ufuncimpl == NULL) { + return NULL; + } + /* Get the identity */ /* TODO: Both of these should be provided by the ArrayMethod! */ identity = _get_identity(ufunc, &reorderable); if (identity == NULL) { - return NULL; + goto finish; } /* Get the initial value */ @@ -3003,33 +3014,42 @@ PyUFunc_Reduce(PyUFuncObject *ufunc, initial = Py_None; Py_INCREF(initial); } + else if (PyTypeNum_ISUNSIGNED(descrs[2]->type_num) + && PyLong_CheckExact(initial)) { + /* + * This is a bit of a hack until we have truly loop specific + * identities. Python -1 cannot be cast to unsigned so convert + * it to a NumPy scalar, but we use -1 for bitwise functions to + * signal all 1s. + * (A builtin identity would not overflow here, although we may + * unnecessary convert 0 and 1.) + */ + Py_SETREF(initial, PyObject_CallFunctionObjArgs( + (PyObject *)&PyLongArrType_Type, initial, NULL)); + if (initial == NULL) { + goto finish; + } + } } else { Py_DECREF(identity); Py_INCREF(initial); /* match the reference count in the if above */ } - PyArray_Descr *descrs[3]; - PyArrayMethodObject *ufuncimpl = reducelike_promote_and_resolve(ufunc, - arr, out, signature, NPY_FALSE, descrs, "reduce"); - if (ufuncimpl == NULL) { - Py_DECREF(initial); - return NULL; - } - PyArrayMethod_Context context = { .caller = (PyObject *)ufunc, .method = ufuncimpl, .descriptors = descrs, }; - PyArrayObject *result = PyUFunc_ReduceWrapper(&context, + result = PyUFunc_ReduceWrapper(&context, arr, out, wheremask, axis_flags, reorderable, keepdims, initial, reduce_loop, ufunc, buffersize, ufunc_name, errormask); + finish: for (int i = 0; i < 3; i++) { Py_DECREF(descrs[i]); } - Py_DECREF(initial); + Py_XDECREF(initial); return result; } From afcc5608dadb17d7df565f95313cdbc1bd3a399b Mon Sep 17 00:00:00 2001 From: Sebastian Berg Date: Wed, 5 Oct 2022 17:45:42 +0200 Subject: [PATCH 3/9] MAINT: Structured MA fill value workaround by adding array cast This wraps the fill value into an array, the default fill value for all ointegers is 99999 which doesn't work for many integer dtypes. Note that this might still subtle change the behavior in other code paths where we cannot avoid this. Plus, the deprecationwarning may show up (and in fact be a "in the future will use the default fill value" warning). --- numpy/ma/core.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/numpy/ma/core.py b/numpy/ma/core.py index d3cbb33e54f1..a2f7c9e5c9f2 100644 --- a/numpy/ma/core.py +++ b/numpy/ma/core.py @@ -201,7 +201,13 @@ def _recursive_fill_value(dtype, f): Recursively produce a fill value for `dtype`, calling f on scalar dtypes """ if dtype.names is not None: - vals = tuple(_recursive_fill_value(dtype[name], f) for name in dtype.names) + # We wrap into `array` here, which ensures we use NumPy cast rules + # for integer casts, this allows the use of 99999 as a fill value + # for int8. + # TODO: This is probably a mess, but should best preserve behavior? + vals = tuple( + np.array(_recursive_fill_value(dtype[name], f)) + for name in dtype.names) return np.array(vals, dtype=dtype)[()] # decay to void scalar from 0d elif dtype.subdtype: subtype, shape = dtype.subdtype From eec99b48df9ce782090157b657bf14f3b79c4981 Mon Sep 17 00:00:00 2001 From: Sebastian Berg Date: Wed, 5 Oct 2022 17:47:46 +0200 Subject: [PATCH 4/9] TST: Fixup tests for strict Python integer conversions --- numpy/core/tests/test_array_coercion.py | 2 +- numpy/core/tests/test_casting_unittests.py | 6 +++++ numpy/core/tests/test_dtype.py | 4 ++-- numpy/core/tests/test_einsum.py | 7 ++++-- numpy/core/tests/test_getlimits.py | 4 +++- numpy/core/tests/test_multiarray.py | 7 +++--- numpy/core/tests/test_numeric.py | 11 +++++---- numpy/core/tests/test_regression.py | 6 ++--- numpy/core/tests/test_scalar_ctors.py | 3 ++- numpy/core/tests/test_scalarmath.py | 7 +++--- numpy/core/tests/test_umath.py | 26 +++++++++++----------- numpy/f2py/tests/test_return_complex.py | 2 +- numpy/f2py/tests/test_return_real.py | 2 +- 13 files changed, 50 insertions(+), 37 deletions(-) diff --git a/numpy/core/tests/test_array_coercion.py b/numpy/core/tests/test_array_coercion.py index 3a074a2b5c17..fade572928c9 100644 --- a/numpy/core/tests/test_array_coercion.py +++ b/numpy/core/tests/test_array_coercion.py @@ -375,7 +375,7 @@ def test_default_dtype_instance(self, dtype_char): @pytest.mark.parametrize("dtype", np.typecodes["Integer"]) @pytest.mark.parametrize(["scalar", "error"], [(np.float64(np.nan), ValueError), - (np.ulonglong(-1), OverflowError)]) + (np.array(-1).astype(np.ulonglong)[()], OverflowError)]) def test_scalar_to_int_coerce_does_not_cast(self, dtype, scalar, error): """ Signed integers are currently different in that they do not cast other diff --git a/numpy/core/tests/test_casting_unittests.py b/numpy/core/tests/test_casting_unittests.py index 16ecb1943706..a49d876d410b 100644 --- a/numpy/core/tests/test_casting_unittests.py +++ b/numpy/core/tests/test_casting_unittests.py @@ -169,6 +169,9 @@ def get_data(self, dtype1, dtype2): for i, value in enumerate(values): # Use item assignment to ensure this is not using casting: + if value < 0 and dtype1.kind == "u": + # Manually rollover unsigned integers (-1 -> int.max) + value = value + np.iinfo(dtype1).max + 1 arr1[i] = value if dtype2 is None: @@ -185,6 +188,9 @@ def get_data(self, dtype1, dtype2): for i, value in enumerate(values): # Use item assignment to ensure this is not using casting: + if value < 0 and dtype2.kind == "u": + # Manually rollover unsigned integers (-1 -> int.max) + value = value + np.iinfo(dtype2).max + 1 arr2[i] = value return arr1, arr2, values diff --git a/numpy/core/tests/test_dtype.py b/numpy/core/tests/test_dtype.py index 9b471a5bfa95..f7819e83b687 100644 --- a/numpy/core/tests/test_dtype.py +++ b/numpy/core/tests/test_dtype.py @@ -328,8 +328,8 @@ def test_union_struct(self): dt2 = np.dtype({'names':['f2', 'f0', 'f1'], 'formats':['i", a, b, c, dtype='?', casting='unsafe', optimize=do_opt), np.logical_and(np.logical_and(a != 0, b != 0), c != 0)) diff --git a/numpy/core/tests/test_getlimits.py b/numpy/core/tests/test_getlimits.py index c5148db2c715..b8aaba386e98 100644 --- a/numpy/core/tests/test_getlimits.py +++ b/numpy/core/tests/test_getlimits.py @@ -69,7 +69,9 @@ def test_basic(self): def test_unsigned_max(self): types = np.sctypes['uint'] for T in types: - assert_equal(iinfo(T).max, T(-1)) + with np.errstate(over="ignore"): + max_calculated = T(0) - T(1) + assert_equal(iinfo(T).max, max_calculated) class TestRepr: def test_iinfo_repr(self): diff --git a/numpy/core/tests/test_multiarray.py b/numpy/core/tests/test_multiarray.py index 87317cc926fc..a9a21e8c31c9 100644 --- a/numpy/core/tests/test_multiarray.py +++ b/numpy/core/tests/test_multiarray.py @@ -5020,6 +5020,8 @@ def test_ip_types(self): for types in np.sctypes.values(): for T in types: if T not in unchecked_types: + if val < 0 and np.dtype(T).kind == "u": + val = np.iinfo(T).max - 99 self.tst_basic(x.copy().astype(T), T, mask, val) # Also test string of a length which uses an untypical length @@ -7234,9 +7236,8 @@ def test_3d_tensor(self): [2630, 2910, 3190]], [[2198, 2542, 2886], - [3230, 3574, 3918]]]], - dtype=dt - ) + [3230, 3574, 3918]]]] + ).astype(dt) assert_equal(np.inner(a, b), desired) assert_equal(np.inner(b, a).transpose(2,3,0,1), desired) diff --git a/numpy/core/tests/test_numeric.py b/numpy/core/tests/test_numeric.py index 21bf91a356b7..bc4ba9f72b7a 100644 --- a/numpy/core/tests/test_numeric.py +++ b/numpy/core/tests/test_numeric.py @@ -2824,12 +2824,11 @@ def setup(self): def compare_array_value(self, dz, value, fill_value): if value is not None: if fill_value: - try: - z = dz.dtype.type(value) - except OverflowError: - pass - else: - assert_(np.all(dz == z)) + # Conversion is close to what np.full_like uses + # but we may want to convert directly in the future + # which may result in errors (where this does not). + z = np.array(value).astype(dz.dtype) + assert_(np.all(dz == z)) else: assert_(np.all(dz == value)) diff --git a/numpy/core/tests/test_regression.py b/numpy/core/tests/test_regression.py index 4538c825db64..2f2d115a4336 100644 --- a/numpy/core/tests/test_regression.py +++ b/numpy/core/tests/test_regression.py @@ -1922,7 +1922,7 @@ def test_pickle_string_overwrite(self): # Check that loads does not clobber interned strings s = re.sub("a(.)", "\x01\\1", "a_") assert_equal(s[0], "\x01") - data[0] = 0xbb + data[0] = 0x6a s = re.sub("a(.)", "\x01\\1", "a_") assert_equal(s[0], "\x01") @@ -1930,7 +1930,7 @@ def test_pickle_bytes_overwrite(self): for proto in range(2, pickle.HIGHEST_PROTOCOL + 1): data = np.array([1], dtype='b') data = pickle.loads(pickle.dumps(data, protocol=proto)) - data[0] = 0xdd + data[0] = 0x7d bytestring = "\x01 ".encode('ascii') assert_equal(bytestring[0:1], '\x01'.encode('ascii')) @@ -1945,7 +1945,7 @@ def test_pickle_py2_array_latin1_hack(self): b"p13\ntp14\nb.") # This should work: result = pickle.loads(data, encoding='latin1') - assert_array_equal(result, np.array([129], dtype='b')) + assert_array_equal(result, np.array([129]).astype('b')) # Should not segfault: assert_raises(Exception, pickle.loads, data, encoding='koi8-r') diff --git a/numpy/core/tests/test_scalar_ctors.py b/numpy/core/tests/test_scalar_ctors.py index 7e933537dbf3..17aca3fb8254 100644 --- a/numpy/core/tests/test_scalar_ctors.py +++ b/numpy/core/tests/test_scalar_ctors.py @@ -78,7 +78,8 @@ def test_intp(self): assert_equal(1024, np.intp(1024)) def test_uint64_from_negative(self): - assert_equal(np.uint64(-2), np.uint64(18446744073709551614)) + with pytest.warns(DeprecationWarning): + assert_equal(np.uint64(-2), np.uint64(18446744073709551614)) int_types = [np.byte, np.short, np.intc, np.int_, np.longlong] diff --git a/numpy/core/tests/test_scalarmath.py b/numpy/core/tests/test_scalarmath.py index 3830ec0c7403..6d9f0a46ddfc 100644 --- a/numpy/core/tests/test_scalarmath.py +++ b/numpy/core/tests/test_scalarmath.py @@ -442,7 +442,8 @@ def test_int_from_long(self): def test_iinfo_long_values(self): for code in 'bBhH': - res = np.array(np.iinfo(code).max + 1, dtype=code) + with pytest.warns(DeprecationWarning): + res = np.array(np.iinfo(code).max + 1, dtype=code) tgt = np.iinfo(code).min assert_(res == tgt) @@ -767,7 +768,7 @@ def test_shift_all_bits(self, type_code, op): nbits = dt.itemsize * 8 for val in [5, -5]: for shift in [nbits, nbits + 4]: - val_scl = dt.type(val) + val_scl = np.array(val).astype(dt)[()] shift_scl = dt.type(shift) res_scl = op(val_scl, shift_scl) if val_scl < 0 and op is operator.rshift: @@ -777,7 +778,7 @@ def test_shift_all_bits(self, type_code, op): assert_equal(res_scl, 0) # Result on scalars should be the same as on arrays - val_arr = np.array([val]*32, dtype=dt) + val_arr = np.array([val_scl]*32, dtype=dt) shift_arr = np.array([shift]*32, dtype=dt) res_arr = op(val_arr, shift_arr) assert_equal(res_arr, res_scl) diff --git a/numpy/core/tests/test_umath.py b/numpy/core/tests/test_umath.py index afe42b56a48d..ff2b6a0f297c 100644 --- a/numpy/core/tests/test_umath.py +++ b/numpy/core/tests/test_umath.py @@ -366,24 +366,24 @@ def test_division_int(self): np.sctypes['int'] + np.sctypes['uint'], ( ( # dividend - "np.arange(fo.max-lsize, fo.max, dtype=dtype)," + "np.array(range(fo.max-lsize, fo.max)).astype(dtype)," # divisors - "np.arange(lsize, dtype=dtype)," + "np.arange(lsize).astype(dtype)," # scalar divisors "range(15)" ), ( # dividend - "np.arange(fo.min, fo.min+lsize, dtype=dtype)," + "np.arange(fo.min, fo.min+lsize).astype(dtype)," # divisors - "np.arange(lsize//-2, lsize//2, dtype=dtype)," + "np.arange(lsize//-2, lsize//2).astype(dtype)," # scalar divisors "range(fo.min, fo.min + 15)" ), ( # dividend - "np.arange(fo.max-lsize, fo.max, dtype=dtype)," + "np.array(range(fo.max-lsize, fo.max)).astype(dtype)," # divisors - "np.arange(lsize, dtype=dtype)," + "np.arange(lsize).astype(dtype)," # scalar divisors "[1,3,9,13,neg, fo.min+1, fo.min//2, fo.max//3, fo.max//4]" ) @@ -450,9 +450,9 @@ def test_division_int_boundary(self, dtype, ex_val): @pytest.mark.parametrize("dtype,ex_val", itertools.product( np.sctypes['int'] + np.sctypes['uint'], ( "np.array([fo.max, 1, 2, 1, 1, 2, 3], dtype=dtype)", - "np.array([fo.min, 1, -2, 1, 1, 2, -3], dtype=dtype)", + "np.array([fo.min, 1, -2, 1, 1, 2, -3]).astype(dtype)", "np.arange(fo.min, fo.min+(100*10), 10, dtype=dtype)", - "np.arange(fo.max-(100*7), fo.max, 7, dtype=dtype)", + "np.array(range(fo.max-(100*7), fo.max, 7)).astype(dtype)", ) )) def test_division_int_reduce(self, dtype, ex_val): @@ -472,7 +472,7 @@ def test_division_int_reduce(self, dtype, ex_val): with np.errstate(divide='raise', over='raise'): with pytest.raises(FloatingPointError, match="divide by zero encountered in reduce"): - np.floor_divide.reduce(np.arange(-100, 100, dtype=dtype)) + np.floor_divide.reduce(np.arange(-100, 100).astype(dtype)) if fo.min: with pytest.raises(FloatingPointError, match='overflow encountered in reduce'): @@ -2328,7 +2328,7 @@ class TestBitwiseUFuncs: def test_values(self): for dt in self.bitwise_types: zeros = np.array([0], dtype=dt) - ones = np.array([-1], dtype=dt) + ones = np.array([-1]).astype(dt) msg = "dt = '%s'" % dt.char assert_equal(np.bitwise_not(zeros), ones, err_msg=msg) @@ -2352,7 +2352,7 @@ def test_values(self): def test_types(self): for dt in self.bitwise_types: zeros = np.array([0], dtype=dt) - ones = np.array([-1], dtype=dt) + ones = np.array([-1]).astype(dt) msg = "dt = '%s'" % dt.char assert_(np.bitwise_not(zeros).dtype == dt, msg) @@ -2370,7 +2370,7 @@ def test_reduction(self): for dt in self.bitwise_types: zeros = np.array([0], dtype=dt) - ones = np.array([-1], dtype=dt) + ones = np.array([-1]).astype(dt) for f in binary_funcs: msg = "dt: '%s', f: '%s'" % (dt, f) assert_equal(f.reduce(zeros), zeros, err_msg=msg) @@ -2382,7 +2382,7 @@ def test_reduction(self): empty = np.array([], dtype=dt) for f in binary_funcs: msg = "dt: '%s', f: '%s'" % (dt, f) - tgt = np.array(f.identity, dtype=dt) + tgt = np.array(f.identity).astype(dt) res = f.reduce(empty) assert_equal(res, tgt, err_msg=msg) assert_(res.dtype == tgt.dtype, msg) diff --git a/numpy/f2py/tests/test_return_complex.py b/numpy/f2py/tests/test_return_complex.py index dc5592899860..9df79632dd40 100644 --- a/numpy/f2py/tests/test_return_complex.py +++ b/numpy/f2py/tests/test_return_complex.py @@ -23,7 +23,7 @@ def check_function(self, t, tname): assert abs(t(array(23 + 4j, "F")) - (23 + 4j)) <= err assert abs(t(array([234])) - 234.0) <= err assert abs(t(array([[234]])) - 234.0) <= err - assert abs(t(array([234], "b")) + 22.0) <= err + assert abs(t(array([234]).astype("b")) + 22.0) <= err assert abs(t(array([234], "h")) - 234.0) <= err assert abs(t(array([234], "i")) - 234.0) <= err assert abs(t(array([234], "l")) - 234.0) <= err diff --git a/numpy/f2py/tests/test_return_real.py b/numpy/f2py/tests/test_return_real.py index 7705a11229bb..9e76c151e88e 100644 --- a/numpy/f2py/tests/test_return_real.py +++ b/numpy/f2py/tests/test_return_real.py @@ -22,7 +22,7 @@ def check_function(self, t, tname): assert abs(t(array(234)) - 234.0) <= err assert abs(t(array([234])) - 234.0) <= err assert abs(t(array([[234]])) - 234.0) <= err - assert abs(t(array([234], "b")) + 22) <= err + assert abs(t(array([234]).astype("b")) + 22) <= err assert abs(t(array([234], "h")) - 234.0) <= err assert abs(t(array([234], "i")) - 234.0) <= err assert abs(t(array([234], "l")) - 234.0) <= err From 7a951f9d114a574ecfbe049dfb7f58a91884eba8 Mon Sep 17 00:00:00 2001 From: Sebastian Berg Date: Wed, 5 Oct 2022 22:00:04 +0200 Subject: [PATCH 5/9] TST: Add deprecation tests for out-of-bound pyint conversion deprecation --- numpy/core/tests/test_deprecations.py | 34 +++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/numpy/core/tests/test_deprecations.py b/numpy/core/tests/test_deprecations.py index 5c13fcd4fbc7..164e504bf5d2 100644 --- a/numpy/core/tests/test_deprecations.py +++ b/numpy/core/tests/test_deprecations.py @@ -1196,3 +1196,37 @@ def test_deprecated_raised(self, dtype): np.loadtxt(["10.5"], dtype=dtype) except ValueError as e: assert isinstance(e.__cause__, DeprecationWarning) + + +class TestPyIntConversion(_DeprecationTestCase): + message = r".*stop allowing conversion of out-of-bound.*" + + @pytest.mark.parametrize("dtype", np.typecodes["AllInteger"]) + def test_deprecated_scalar(self, dtype): + dtype = np.dtype(dtype) + info = np.iinfo(dtype) + + # Cover the most common creation paths (all end up in the + # same place): + def scalar(value, dtype): + dtype.type(value) + + def assign(value, dtype): + arr = np.array([0, 0, 0], dtype=dtype) + arr[2] = value + + def create(value, dtype): + np.array([value], dtype=dtype) + + for creation_func in [scalar, assign, create]: + try: + self.assert_deprecated( + lambda: creation_func(info.min - 1, dtype)) + except OverflowError: + pass # OverflowErrors always happened also before and are OK. + + try: + self.assert_deprecated( + lambda: creation_func(info.max + 1, dtype)) + except OverflowError: + pass # OverflowErrors always happened also before and are OK. From 0ca3a6d11f27eb1561811c0b72ce1998655e1ee5 Mon Sep 17 00:00:00 2001 From: Sebastian Berg Date: Thu, 6 Oct 2022 17:26:28 +0200 Subject: [PATCH 6/9] DOC: Add release note about deprecation of out-of-bound python integer conv --- .../upcoming_changes/22393.deprecation.rst | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 doc/release/upcoming_changes/22393.deprecation.rst diff --git a/doc/release/upcoming_changes/22393.deprecation.rst b/doc/release/upcoming_changes/22393.deprecation.rst new file mode 100644 index 000000000000..227463233f65 --- /dev/null +++ b/doc/release/upcoming_changes/22393.deprecation.rst @@ -0,0 +1,17 @@ +Conversion of out-of-bound Python integers +------------------------------------------ +Attempting a conversion from a Python integer to a NumPy +value will now always check whether the result can be +represented by NumPy. This means the following examples will +fail in the future and give a ``DeprecationWarning`` now:: + + np.uint8(-1) + np.array([3000], dtype=np.int8) + +While many of these did succeed before. Such code was mainly +useful for unsigned integers with negative values such as +`np.uint8(-1)` giving `np.iinfo(np.uint8).max`. + +Note that conversion between NumPy integers is unaffected, +so that `np.array(-1).astype(np.uint8)` continues to work +and use C integer overflow logic. From fb44bd1eb505181a9dffecd4b578e8b5494a298f Mon Sep 17 00:00:00 2001 From: Sebastian Berg Date: Thu, 6 Oct 2022 17:26:59 +0200 Subject: [PATCH 7/9] DOC: Extend out-of-bound python integer deprecation warning --- numpy/core/src/multiarray/arraytypes.c.src | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/numpy/core/src/multiarray/arraytypes.c.src b/numpy/core/src/multiarray/arraytypes.c.src index 81c0862d7422..e135fee1df70 100644 --- a/numpy/core/src/multiarray/arraytypes.c.src +++ b/numpy/core/src/multiarray/arraytypes.c.src @@ -291,7 +291,10 @@ static int if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1, "NumPy will stop allowing conversion of out-of-bound " "Python integers to integer arrays. The conversion " - "of %.100R to %S will fail in the future.", + "of %.100R to %S will fail in the future.\n" + "For the old behavior, usually:\n" + " np.array(value).astype(dtype)`\n" + "will give the desired result (the cast overflows).", obj, descr) < 0) { Py_DECREF(descr); return -1; From 94340814c1c32db54f719dc5e825a544ebc6a137 Mon Sep 17 00:00:00 2001 From: Sebastian Berg Date: Thu, 6 Oct 2022 17:27:19 +0200 Subject: [PATCH 8/9] TST: Further test fixup for python integer conversion warning --- numpy/f2py/tests/test_return_logical.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/numpy/f2py/tests/test_return_logical.py b/numpy/f2py/tests/test_return_logical.py index 6f64745ee481..92fb902af4dd 100644 --- a/numpy/f2py/tests/test_return_logical.py +++ b/numpy/f2py/tests/test_return_logical.py @@ -30,7 +30,7 @@ def check_function(self, t): assert t(array(234)) == 1 assert t(array([234])) == 1 assert t(array([[234]])) == 1 - assert t(array([234], "b")) == 1 + assert t(array([127], "b")) == 1 assert t(array([234], "h")) == 1 assert t(array([234], "i")) == 1 assert t(array([234], "l")) == 1 From 5f1102429b0602e9ed6ad644295dc8f3f7a43b95 Mon Sep 17 00:00:00 2001 From: Sebastian Berg Date: Tue, 11 Oct 2022 13:55:00 +0200 Subject: [PATCH 9/9] Update doc/release/upcoming_changes/22393.deprecation.rst Co-authored-by: Matti Picus --- doc/release/upcoming_changes/22393.deprecation.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/release/upcoming_changes/22393.deprecation.rst b/doc/release/upcoming_changes/22393.deprecation.rst index 227463233f65..52099506c7b3 100644 --- a/doc/release/upcoming_changes/22393.deprecation.rst +++ b/doc/release/upcoming_changes/22393.deprecation.rst @@ -8,7 +8,7 @@ fail in the future and give a ``DeprecationWarning`` now:: np.uint8(-1) np.array([3000], dtype=np.int8) -While many of these did succeed before. Such code was mainly +Many of these did succeed before. Such code was mainly useful for unsigned integers with negative values such as `np.uint8(-1)` giving `np.iinfo(np.uint8).max`.