Merge pull request #21875 from seberg/weak-scalars-safe-ints

ENH: Implement correct scalar and integer overflow errors for NEP 50
numpy · Oct 18, 2022 · aed648c · aed648c
2 parents ac39f38 + ee3c20b
commit aed648c
Show file tree

Hide file tree

Showing 8 changed files with 232 additions and 22 deletions.
diff --git a/numpy/core/src/multiarray/arraytypes.c.src b/numpy/core/src/multiarray/arraytypes.c.src
@@ -305,7 +305,7 @@ static int
         else {
             /* Live in the future, outright error: */
             PyErr_Format(PyExc_OverflowError,
-                    "Python int %R too large to convert to %S", obj, descr);
+                    "Python integer %R out of bounds for %S", obj, descr);
             Py_DECREF(descr);
             return -1;
             }

diff --git a/numpy/core/src/multiarray/dtypemeta.h b/numpy/core/src/multiarray/dtypemeta.h
@@ -51,7 +51,6 @@ typedef struct {
     ensure_canonical_function *ensure_canonical;
     /*
      * Currently only used for experimental user DTypes.
-     * Typing as `void *` until NumPy itself uses these (directly).
      */
     setitemfunction *setitem;
     getitemfunction *getitem;
@@ -105,6 +104,7 @@ typedef struct {
 #define NPY_DT_CALL_setitem(descr, value, data_ptr)  \
     NPY_DT_SLOTS(NPY_DTYPE(descr))->setitem(descr, value, data_ptr)
 
+
 /*
  * This function will hopefully be phased out or replaced, but was convenient
  * for incremental implementation of new DTypes based on DTypeMeta.

diff --git a/numpy/core/src/umath/scalarmath.c.src b/numpy/core/src/umath/scalarmath.c.src
@@ -26,11 +26,14 @@
 #include "binop_override.h"
 #include "npy_longdouble.h"
 
+#include "arraytypes.h"
 #include "array_coercion.h"
 #include "common.h"
 #include "can_cast_table.h"
 #include "umathmodule.h"
 
+#include "convert_datatype.h"
+
 
 /* TODO: Used for some functions, should possibly move these to npy_math.h */
 #include "loops.h"
@@ -792,7 +795,12 @@ typedef enum {
      */
     CONVERSION_SUCCESS,
     /*
-     * Other object is an unknown scalar or array-like, we (typically) use
+     * We use the normal conversion (setitem) function when coercing from
+     * Python scalars.
+     */
+    CONVERT_PYSCALAR,
+    /*
+     * Other object is an unkown scalar or array-like, we (typically) use
      * the generic path, which normally ends up in the ufunc machinery.
      */
     OTHER_IS_UNKNOWN_OBJECT,
@@ -956,7 +964,15 @@ convert_to_@name@(PyObject *value, @type@ *result, npy_bool *may_need_deferring)
             *may_need_deferring = NPY_TRUE;
         }
         if (!IS_SAFE(NPY_DOUBLE, NPY_@TYPE@)) {
-            return PROMOTION_REQUIRED;
+            if (npy_promotion_state != NPY_USE_WEAK_PROMOTION) {
+                /* Legacy promotion and weak-and-warn not handled here */
+                return PROMOTION_REQUIRED;
+            }
+            /* Weak promotion is used when self is float or complex: */
+            if (!PyTypeNum_ISFLOAT(NPY_@TYPE@) && !PyTypeNum_ISCOMPLEX(NPY_@TYPE@)) {
+                return PROMOTION_REQUIRED;
+            }
+            return CONVERT_PYSCALAR;
         }
         CONVERT_TO_RESULT(PyFloat_AS_DOUBLE(value));
         return CONVERSION_SUCCESS;
@@ -968,15 +984,23 @@ convert_to_@name@(PyObject *value, @type@ *result, npy_bool *may_need_deferring)
         }
         if (!IS_SAFE(NPY_LONG, NPY_@TYPE@)) {
             /*
-             * long -> (c)longdouble is safe, so `THER_IS_UNKNOWN_OBJECT` will
+             * long -> (c)longdouble is safe, so `OTHER_IS_UNKNOWN_OBJECT` will
              * be returned below for huge integers.
              */
-            return PROMOTION_REQUIRED;
+            if (npy_promotion_state != NPY_USE_WEAK_PROMOTION) {
+                /* Legacy promotion and weak-and-warn not handled here */
+                return PROMOTION_REQUIRED;
+            }
+            return CONVERT_PYSCALAR;
         }
         int overflow;
         long val = PyLong_AsLongAndOverflow(value, &overflow);
         if (overflow) {
-            return OTHER_IS_UNKNOWN_OBJECT;  /* handle as if arbitrary object */
+            /* handle as if "unsafe" */
+            if (npy_promotion_state != NPY_USE_WEAK_PROMOTION) {
+                return PROMOTION_REQUIRED;
+            }
+            return CONVERT_PYSCALAR;
         }
         if (error_converting(val)) {
             return CONVERSION_ERROR;  /* should not be possible */
@@ -995,7 +1019,15 @@ convert_to_@name@(PyObject *value, @type@ *result, npy_bool *may_need_deferring)
             *may_need_deferring = NPY_TRUE;
         }
         if (!IS_SAFE(NPY_CDOUBLE, NPY_@TYPE@)) {
-            return PROMOTION_REQUIRED;
+            if (npy_promotion_state != NPY_USE_WEAK_PROMOTION) {
+                /* Legacy promotion and weak-and-warn not handled here */
+                return PROMOTION_REQUIRED;
+            }
+            /* Weak promotion is used when self is float or complex: */
+            if (!PyTypeNum_ISCOMPLEX(NPY_@TYPE@)) {
+                return PROMOTION_REQUIRED;
+            }
+            return CONVERT_PYSCALAR;
         }
 #if defined(IS_CFLOAT) || defined(IS_CDOUBLE) || defined(IS_CLONGDOUBLE)
         Py_complex val = PyComplex_AsCComplex(value);
@@ -1164,12 +1196,24 @@ convert_to_@name@(PyObject *value, @type@ *result, npy_bool *may_need_deferring)
  *         (npy_half, npy_float, npy_double, npy_longdouble,
  *             npy_cfloat, npy_cdouble, npy_clongdouble)*4,
  *         (npy_half, npy_float, npy_double, npy_longdouble)*3#
+ * #oname = (byte, ubyte, short, ushort, int, uint,
+ *              long, ulong, longlong, ulonglong)*11,
+ *          double*10,
+ *          (half, float, double, longdouble,
+ *              cfloat, cdouble, clongdouble)*4,
+ *          (half, float, double, longdouble)*3#
  * #OName = (Byte, UByte, Short, UShort, Int, UInt,
  *              Long, ULong, LongLong, ULongLong)*11,
  *          Double*10,
  *          (Half, Float, Double, LongDouble,
  *              CFloat, CDouble, CLongDouble)*4,
  *          (Half, Float, Double, LongDouble)*3#
+ * #ONAME = (BYTE, UBYTE, SHORT, USHORT, INT, UINT,
+ *              LONG, ULONG, LONGLONG, ULONGLONG)*11,
+ *          DOUBLE*10,
+ *          (HALF, FLOAT, DOUBLE, LONGDOUBLE,
+ *              CFLOAT, CDOUBLE, CLONGDOUBLE)*4,
+ *          (HALF, FLOAT, DOUBLE, LONGDOUBLE)*3#
  */
 #define IS_@name@
 /* drop the "true_" from "true_divide" for floating point warnings: */
@@ -1179,13 +1223,12 @@ convert_to_@name@(PyObject *value, @type@ *result, npy_bool *may_need_deferring)
 #else
     #define OP_NAME "@oper@"
 #endif
-#undef IS_@oper@
 
 static PyObject *
 @name@_@oper@(PyObject *a, PyObject *b)
 {
     PyObject *ret;
-    @type@ arg1, arg2, other_val;
+    @otype@ arg1, arg2, other_val;
 
     /*
      * Check if this operation may be considered forward.  Note `is_forward`
@@ -1214,7 +1257,7 @@ static PyObject *
     PyObject *other = is_forward ? b : a;
 
     npy_bool may_need_deferring;
-    conversion_result res = convert_to_@name@(
+    conversion_result res = convert_to_@oname@(
             other, &other_val, &may_need_deferring);
     if (res == CONVERSION_ERROR) {
         return NULL;  /* an error occurred (should never happen) */
@@ -1255,6 +1298,11 @@ static PyObject *
              *       correctly.  (e.g. `uint8 * int8` cannot warn).
              */
             return PyGenericArrType_Type.tp_as_number->nb_@oper@(a,b);
+        case CONVERT_PYSCALAR:
+            if (@ONAME@_setitem(other, (char *)&other_val, NULL) < 0) {
+                return NULL;
+            }
+            break;
         default:
             assert(0);  /* error was checked already, impossible to reach */
             return NULL;
@@ -1291,7 +1339,7 @@ static PyObject *
 #if @twoout@
     int retstatus = @name@_ctype_@oper@(arg1, arg2, &out, &out2);
 #else
-    int retstatus = @name@_ctype_@oper@(arg1, arg2, &out);
+    int retstatus = @oname@_ctype_@oper@(arg1, arg2, &out);
 #endif
 
 #if @fperr@
@@ -1336,6 +1384,7 @@ static PyObject *
 
 
 #undef OP_NAME
+#undef IS_@oper@
 #undef IS_@name@
 
 /**end repeat**/
@@ -1358,6 +1407,10 @@ static PyObject *
  *         Long, ULong, LongLong, ULongLong,
  *         Half, Float, Double, LongDouble,
  *         CFloat, CDouble, CLongDouble#
+ * #NAME = BYTE, UBYTE, SHORT, USHORT, INT, UINT,
+ *         LONG, ULONG, LONGLONG, ULONGLONG,
+ *         HALF, FLOAT, DOUBLE, LONGDOUBLE,
+ *         CFLOAT, CDOUBLE, CLONGDOUBLE#
  *
  * #isint = 1*10,0*7#
  * #isuint = (0,1)*5,0*7#
@@ -1417,6 +1470,11 @@ static PyObject *
 #endif
         case PROMOTION_REQUIRED:
             return PyGenericArrType_Type.tp_as_number->nb_power(a, b, modulo);
+        case CONVERT_PYSCALAR:
+            if (@NAME@_setitem(other, (char *)&other_val, NULL) < 0) {
+                return NULL;
+            }
+            break;
         default:
             assert(0);  /* error was checked already, impossible to reach */
             return NULL;
@@ -1759,6 +1817,10 @@ static PyObject *
  *         Long, ULong, LongLong, ULongLong,
  *         Half, Float, Double, LongDouble,
  *         CFloat, CDouble, CLongDouble#
+ * #NAME = BYTE, UBYTE, SHORT, USHORT, INT, UINT,
+ *         LONG, ULONG, LONGLONG, ULONGLONG,
+ *         HALF, FLOAT, DOUBLE, LONGDOUBLE,
+ *         CFLOAT, CDOUBLE, CLONGDOUBLE#
  * #simp = def*10, def_half, def*3, cmplx*3#
  */
 #define IS_@name@
@@ -1791,6 +1853,11 @@ static PyObject*
 #endif
         case PROMOTION_REQUIRED:
             return PyGenericArrType_Type.tp_richcompare(self, other, cmp_op);
+        case CONVERT_PYSCALAR:
+            if (@NAME@_setitem(other, (char *)&arg2, NULL) < 0) {
+                return NULL;
+            }
+            break;
         default:
             assert(0);  /* error was checked already, impossible to reach */
             return NULL;

diff --git a/numpy/core/src/umath/ufunc_object.c b/numpy/core/src/umath/ufunc_object.c
@@ -4947,6 +4947,41 @@ ufunc_generic_fastcall(PyUFuncObject *ufunc,
         goto fail;
     }
 
+    if (promoting_pyscalars) {
+        /*
+         * Python integers need to be cast specially.  For other python
+         * scalars it does not hurt either.  It would be nice to never create
+         * the array in this case, but that is difficult until value-based
+         * promotion rules are gone.  (After that, we may get away with using
+         * dummy arrays rather than real arrays for the legacy resolvers.)
+         */
+        for (int i = 0; i < nin; i++) {
+            int orig_flags = PyArray_FLAGS(operands[i]);
+            if (!(orig_flags & NPY_ARRAY_WAS_PYTHON_LITERAL)) {
+                continue;
+            }
+            /* If the descriptor matches, no need to worry about conversion */
+            if (PyArray_EquivTypes(
+                    PyArray_DESCR(operands[i]), operation_descrs[i])) {
+                continue;
+            }
+            /* Otherwise, replace the operand with a new array */
+            PyArray_Descr *descr = operation_descrs[i];
+            Py_INCREF(descr);
+            PyArrayObject *new = (PyArrayObject *)PyArray_NewFromDescr(
+                    &PyArray_Type, descr, 0, NULL, NULL, NULL, 0, NULL);
+            Py_SETREF(operands[i], new);
+            if (operands[i] == NULL) {
+                goto fail;
+            }
+
+            PyObject *value = PyTuple_GET_ITEM(full_args.in, i);
+            if (PyArray_SETITEM(new, PyArray_BYTES(operands[i]), value) < 0) {
+                goto fail;
+            }
+        }
+    }
+
     if (subok) {
         _find_array_prepare(full_args, output_array_prepare, nout);
     }

diff --git a/numpy/core/tests/test_mem_overlap.py b/numpy/core/tests/test_mem_overlap.py
@@ -105,7 +105,7 @@ def test_diophantine_fuzz():
                       for j in range(ndim))
 
             b_ub = min(max_int-2, sum(a*ub for a, ub in zip(A, U)))
-            b = rng.randint(-1, b_ub+2, dtype=np.intp)
+            b = int(rng.randint(-1, b_ub+2, dtype=np.intp))
 
             if ndim == 0 and feasible_count < min_count:
                 b = 0