From 8735980bd44446a789aa00b9e05cfffe4d583dae Mon Sep 17 00:00:00 2001
From: Sebastian Berg <sebastian@sipsolutions.net>
Date: Fri, 1 Apr 2022 11:46:41 -0700
Subject: [PATCH 01/20] ENH: Check floating point error flags for all casts

To achieve this, I first need to propagate the ArrayMethod information
that FPE flags _may_ be given for a specific cast.
This requires quite a bit of changes (which is arguably the more annoying
part of the PR/commit).
Note that for some "legacy" casts, we do not modify this and assume that
the only interesting information is `needs_api`.
---
 .../core/src/common/lowlevel_strided_loops.h  |   8 +-
 numpy/core/src/common/umathmodule.h           |   6 +
 .../core/src/multiarray/array_assign_array.c  |  49 ++++++--
 .../core/src/multiarray/array_assign_scalar.c |  49 ++++++--
 numpy/core/src/multiarray/array_coercion.c    |  29 ++++-
 numpy/core/src/multiarray/array_method.h      |  20 ++-
 numpy/core/src/multiarray/arraytypes.c.src    |   1 +
 numpy/core/src/multiarray/convert_datatype.c  |  26 ++--
 numpy/core/src/multiarray/ctors.c             |  27 +++-
 numpy/core/src/multiarray/dtype_transfer.c    | 115 ++++++++++++------
 .../multiarray/lowlevel_strided_loops.c.src   |  19 ++-
 numpy/core/src/multiarray/mapping.c           |  30 ++++-
 numpy/core/src/multiarray/nditer_constr.c     |  25 ++--
 numpy/core/src/umath/extobj.c                 |  27 ++++
 14 files changed, 331 insertions(+), 100 deletions(-)

diff --git a/numpy/core/src/common/lowlevel_strided_loops.h b/numpy/core/src/common/lowlevel_strided_loops.h
index 118ce9cb1e0b..c06cd4551ab0 100644
--- a/numpy/core/src/common/lowlevel_strided_loops.h
+++ b/numpy/core/src/common/lowlevel_strided_loops.h
@@ -196,7 +196,7 @@ PyArray_GetDTypeTransferFunction(int aligned,
                             PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype,
                             int move_references,
                             NPY_cast_info *cast_info,
-                            int *out_needs_api);
+                            NPY_ARRAYMETHOD_FLAGS *out_flags);
 
 NPY_NO_EXPORT int
 get_fields_transfer_function(int aligned,
@@ -205,7 +205,7 @@ get_fields_transfer_function(int aligned,
         int move_references,
         PyArrayMethod_StridedLoop **out_stransfer,
         NpyAuxData **out_transferdata,
-        int *out_needs_api);
+        NPY_ARRAYMETHOD_FLAGS *out_flags);
 
 NPY_NO_EXPORT int
 get_subarray_transfer_function(int aligned,
@@ -214,7 +214,7 @@ get_subarray_transfer_function(int aligned,
         int move_references,
         PyArrayMethod_StridedLoop **out_stransfer,
         NpyAuxData **out_transferdata,
-        int *out_needs_api);
+        NPY_ARRAYMETHOD_FLAGS *out_flags);
 
 /*
  * This is identical to PyArray_GetDTypeTransferFunction, but returns a
@@ -241,7 +241,7 @@ PyArray_GetMaskedDTypeTransferFunction(int aligned,
                             PyArray_Descr *mask_dtype,
                             int move_references,
                             NPY_cast_info *cast_info,
-                            int *out_needs_api);
+                            NPY_ARRAYMETHOD_FLAGS *out_flags);
 
 /*
  * Casts the specified number of elements from 'src' with data type
diff --git a/numpy/core/src/common/umathmodule.h b/numpy/core/src/common/umathmodule.h
index fe44fe403783..0c69f8f545b4 100644
--- a/numpy/core/src/common/umathmodule.h
+++ b/numpy/core/src/common/umathmodule.h
@@ -7,8 +7,14 @@
 NPY_NO_EXPORT PyObject *
 get_sfloat_dtype(PyObject *NPY_UNUSED(mod), PyObject *NPY_UNUSED(args));
 
+/* Defined in umath/extobj.c */
+NPY_NO_EXPORT int
+PyUFunc_GiveFloatingpointErrors(const char *name, int fpe_errors);
+
 PyObject * add_newdoc_ufunc(PyObject *NPY_UNUSED(dummy), PyObject *args);
 PyObject * ufunc_frompyfunc(PyObject *NPY_UNUSED(dummy), PyObject *args, PyObject *NPY_UNUSED(kwds));
+
+
 int initumath(PyObject *m);
 
 #endif  /* NUMPY_CORE_SRC_COMMON_UMATHMODULE_H_ */
diff --git a/numpy/core/src/multiarray/array_assign_array.c b/numpy/core/src/multiarray/array_assign_array.c
index 020a7f29a615..9d5bf687576d 100644
--- a/numpy/core/src/multiarray/array_assign_array.c
+++ b/numpy/core/src/multiarray/array_assign_array.c
@@ -8,11 +8,13 @@
  */
 #define NPY_NO_DEPRECATED_API NPY_API_VERSION
 #define _MULTIARRAYMODULE
+#define _UMATHMODULE
 
 #define PY_SSIZE_T_CLEAN
 #include <Python.h>
 
 #include "numpy/ndarraytypes.h"
+#include "numpy/npy_math.h"
 
 #include "npy_config.h"
 #include "npy_pycompat.h"
@@ -25,6 +27,8 @@
 #include "array_assign.h"
 #include "dtype_transfer.h"
 
+#include "umathmodule.h"
+
 /*
  * Check that array data is both uint-aligned and true-aligned for all array
  * elements, as required by the copy/casting code in lowlevel_strided_loops.c
@@ -83,7 +87,7 @@ raw_array_assign_array(int ndim, npy_intp const *shape,
     npy_intp src_strides_it[NPY_MAXDIMS];
     npy_intp coord[NPY_MAXDIMS];
 
-    int aligned, needs_api = 0;
+    int aligned;
 
     NPY_BEGIN_THREADS_DEF;
 
@@ -116,15 +120,19 @@ raw_array_assign_array(int ndim, npy_intp const *shape,
 
     /* Get the function to do the casting */
     NPY_cast_info cast_info;
+    NPY_ARRAYMETHOD_FLAGS flags;
     if (PyArray_GetDTypeTransferFunction(aligned,
                         src_strides_it[0], dst_strides_it[0],
                         src_dtype, dst_dtype,
                         0,
-                        &cast_info, &needs_api) != NPY_SUCCEED) {
+                        &cast_info, &flags) != NPY_SUCCEED) {
         return -1;
     }
 
-    if (!needs_api) {
+    if (!(flags & NPY_METH_NO_FLOATINGPOINT_ERRORS)) {
+        npy_clear_floatstatus_barrier(src_data);
+    }
+    if (!(flags & NPY_METH_REQUIRES_PYAPI)) {
         NPY_BEGIN_THREADS;
     }
 
@@ -143,6 +151,14 @@ raw_array_assign_array(int ndim, npy_intp const *shape,
 
     NPY_END_THREADS;
     NPY_cast_info_xfree(&cast_info);
+
+    if (!(flags & NPY_METH_NO_FLOATINGPOINT_ERRORS)) {
+        int fpes = npy_get_floatstatus_barrier(src_data);
+        if (fpes && PyUFunc_GiveFloatingpointErrors("cast", fpes) < 0) {
+            return -1;
+        }
+    }
+
     return 0;
 fail:
     NPY_END_THREADS;
@@ -170,7 +186,7 @@ raw_array_wheremasked_assign_array(int ndim, npy_intp const *shape,
     npy_intp wheremask_strides_it[NPY_MAXDIMS];
     npy_intp coord[NPY_MAXDIMS];
 
-    int aligned, needs_api = 0;
+    int aligned;
 
     NPY_BEGIN_THREADS_DEF;
 
@@ -207,17 +223,21 @@ raw_array_wheremasked_assign_array(int ndim, npy_intp const *shape,
 
     /* Get the function to do the casting */
     NPY_cast_info cast_info;
+    NPY_ARRAYMETHOD_FLAGS flags;
     if (PyArray_GetMaskedDTypeTransferFunction(aligned,
                         src_strides_it[0],
                         dst_strides_it[0],
                         wheremask_strides_it[0],
                         src_dtype, dst_dtype, wheremask_dtype,
                         0,
-                        &cast_info, &needs_api) != NPY_SUCCEED) {
+                        &cast_info, &flags) != NPY_SUCCEED) {
         return -1;
     }
 
-    if (!needs_api) {
+    if (!(flags & NPY_METH_NO_FLOATINGPOINT_ERRORS)) {
+        npy_clear_floatstatus_barrier(src_data);
+    }
+    if (!(flags & NPY_METH_REQUIRES_PYAPI)) {
         NPY_BEGIN_THREADS;
     }
     npy_intp strides[2] = {src_strides_it[0], dst_strides_it[0]};
@@ -232,7 +252,7 @@ raw_array_wheremasked_assign_array(int ndim, npy_intp const *shape,
                 args, &shape_it[0], strides,
                 (npy_bool *)wheremask_data, wheremask_strides_it[0],
                 cast_info.auxdata) < 0) {
-            break;
+            goto fail;
         }
     } NPY_RAW_ITER_THREE_NEXT(idim, ndim, coord, shape_it,
                             dst_data, dst_strides_it,
@@ -241,7 +261,20 @@ raw_array_wheremasked_assign_array(int ndim, npy_intp const *shape,
 
     NPY_END_THREADS;
     NPY_cast_info_xfree(&cast_info);
-    return (needs_api && PyErr_Occurred()) ? -1 : 0;
+
+    if (!(flags & NPY_METH_NO_FLOATINGPOINT_ERRORS)) {
+        int fpes = npy_get_floatstatus_barrier(src_data);
+        if (fpes && PyUFunc_GiveFloatingpointErrors("cast", fpes) < 0) {
+            return -1;
+        }
+    }
+
+    return 0;
+
+fail:
+    NPY_END_THREADS;
+    NPY_cast_info_xfree(&cast_info);
+    return -1;
 }
 
 /*
diff --git a/numpy/core/src/multiarray/array_assign_scalar.c b/numpy/core/src/multiarray/array_assign_scalar.c
index 4ffef7ecc96e..ba964b86d3a9 100644
--- a/numpy/core/src/multiarray/array_assign_scalar.c
+++ b/numpy/core/src/multiarray/array_assign_scalar.c
@@ -8,11 +8,13 @@
  */
 #define NPY_NO_DEPRECATED_API NPY_API_VERSION
 #define _MULTIARRAYMODULE
+#define _UMATHMODULE
 
 #define PY_SSIZE_T_CLEAN
 #include <Python.h>
 
 #include <numpy/ndarraytypes.h>
+#include "numpy/npy_math.h"
 
 #include "npy_config.h"
 #include "npy_pycompat.h"
@@ -25,6 +27,8 @@
 #include "array_assign.h"
 #include "dtype_transfer.h"
 
+#include "umathmodule.h"
+
 /*
  * Assigns the scalar value to every element of the destination raw array.
  *
@@ -39,7 +43,7 @@ raw_array_assign_scalar(int ndim, npy_intp const *shape,
     npy_intp shape_it[NPY_MAXDIMS], dst_strides_it[NPY_MAXDIMS];
     npy_intp coord[NPY_MAXDIMS];
 
-    int aligned, needs_api = 0;
+    int aligned;
 
     NPY_BEGIN_THREADS_DEF;
 
@@ -62,15 +66,19 @@ raw_array_assign_scalar(int ndim, npy_intp const *shape,
 
     /* Get the function to do the casting */
     NPY_cast_info cast_info;
+    NPY_ARRAYMETHOD_FLAGS flags;
     if (PyArray_GetDTypeTransferFunction(aligned,
                         0, dst_strides_it[0],
                         src_dtype, dst_dtype,
                         0,
-                        &cast_info, &needs_api) != NPY_SUCCEED) {
+                        &cast_info, &flags) != NPY_SUCCEED) {
         return -1;
     }
 
-    if (!needs_api) {
+    if (!(flags & NPY_METH_NO_FLOATINGPOINT_ERRORS)) {
+        npy_clear_floatstatus_barrier(src_data);
+    }
+    if (!(flags & NPY_METH_REQUIRES_PYAPI)) {
         npy_intp nitems = 1, i;
         for (i = 0; i < ndim; i++) {
             nitems *= shape_it[i];
@@ -92,6 +100,14 @@ raw_array_assign_scalar(int ndim, npy_intp const *shape,
 
     NPY_END_THREADS;
     NPY_cast_info_xfree(&cast_info);
+
+    if (!(flags & NPY_METH_NO_FLOATINGPOINT_ERRORS)) {
+        int fpes = npy_get_floatstatus_barrier(src_data);
+        if (fpes && PyUFunc_GiveFloatingpointErrors("cast", fpes) < 0) {
+            return -1;
+        }
+    }
+
     return 0;
 fail:
     NPY_END_THREADS;
@@ -117,7 +133,7 @@ raw_array_wheremasked_assign_scalar(int ndim, npy_intp const *shape,
     npy_intp wheremask_strides_it[NPY_MAXDIMS];
     npy_intp coord[NPY_MAXDIMS];
 
-    int aligned, needs_api = 0;
+    int aligned;
 
     NPY_BEGIN_THREADS_DEF;
 
@@ -142,15 +158,19 @@ raw_array_wheremasked_assign_scalar(int ndim, npy_intp const *shape,
 
     /* Get the function to do the casting */
     NPY_cast_info cast_info;
+    NPY_ARRAYMETHOD_FLAGS flags;
     if (PyArray_GetMaskedDTypeTransferFunction(aligned,
                         0, dst_strides_it[0], wheremask_strides_it[0],
                         src_dtype, dst_dtype, wheremask_dtype,
                         0,
-                        &cast_info, &needs_api) != NPY_SUCCEED) {
+                        &cast_info, &flags) != NPY_SUCCEED) {
         return -1;
     }
 
-    if (!needs_api) {
+    if (!(flags & NPY_METH_NO_FLOATINGPOINT_ERRORS)) {
+        npy_clear_floatstatus_barrier(src_data);
+    }
+    if (!(flags & NPY_METH_REQUIRES_PYAPI)) {
         npy_intp nitems = 1, i;
         for (i = 0; i < ndim; i++) {
             nitems *= shape_it[i];
@@ -170,7 +190,7 @@ raw_array_wheremasked_assign_scalar(int ndim, npy_intp const *shape,
                 args, &shape_it[0], strides,
                 (npy_bool *)wheremask_data, wheremask_strides_it[0],
                 cast_info.auxdata) < 0) {
-            break;
+            goto fail;
         }
     } NPY_RAW_ITER_TWO_NEXT(idim, ndim, coord, shape_it,
                             dst_data, dst_strides_it,
@@ -178,7 +198,20 @@ raw_array_wheremasked_assign_scalar(int ndim, npy_intp const *shape,
 
     NPY_END_THREADS;
     NPY_cast_info_xfree(&cast_info);
-    return (needs_api && PyErr_Occurred()) ? -1 : 0;
+
+    if (!(flags & NPY_METH_NO_FLOATINGPOINT_ERRORS)) {
+        int fpes = npy_get_floatstatus_barrier(src_data);
+        if (fpes && PyUFunc_GiveFloatingpointErrors("cast", fpes) < 0) {
+            return -1;
+        }
+    }
+
+    return 0;
+
+fail:
+    NPY_END_THREADS;
+    NPY_cast_info_xfree(&cast_info);
+    return -1;
 }
 
 /*
diff --git a/numpy/core/src/multiarray/array_coercion.c b/numpy/core/src/multiarray/array_coercion.c
index 0886d2d0b403..e703e73820b4 100644
--- a/numpy/core/src/multiarray/array_coercion.c
+++ b/numpy/core/src/multiarray/array_coercion.c
@@ -9,6 +9,7 @@
 
 #include "lowlevel_strided_loops.h"
 #include "numpy/arrayobject.h"
+#include "numpy/npy_math.h"
 
 #include "descriptor.h"
 #include "convert_datatype.h"
@@ -22,6 +23,7 @@
 #include "_datetime.h"
 #include "npy_import.h"
 
+#include "umathmodule.h"
 
 /*
  * This file defines helpers for some of the ctors.c functions which
@@ -388,21 +390,36 @@ cast_raw_scalar_item(
         PyArray_Descr *from_descr, char *from_item,
         PyArray_Descr *to_descr, char *to_item)
 {
-    int needs_api = 0;
     NPY_cast_info cast_info;
+    NPY_ARRAYMETHOD_FLAGS flags;
     if (PyArray_GetDTypeTransferFunction(
             0, 0, 0, from_descr, to_descr, 0, &cast_info,
-            &needs_api) == NPY_FAIL) {
+            &flags) == NPY_FAIL) {
         return -1;
     }
+
+    if (!(flags & NPY_METH_NO_FLOATINGPOINT_ERRORS)) {
+        npy_clear_floatstatus_barrier(from_item);
+    }
+
     char *args[2] = {from_item, to_item};
     const npy_intp strides[2] = {0, 0};
     const npy_intp length = 1;
-    int res = cast_info.func(&cast_info.context,
-            args, &length, strides, cast_info.auxdata);
-
+    if (cast_info.func(&cast_info.context,
+            args, &length, strides, cast_info.auxdata) < 0) {
+        NPY_cast_info_xfree(&cast_info);
+        return -1;
+    }
     NPY_cast_info_xfree(&cast_info);
-    return res;
+
+    if (!(flags & NPY_METH_NO_FLOATINGPOINT_ERRORS)) {
+        int fpes = npy_get_floatstatus_barrier(to_item);
+        if (fpes && PyUFunc_GiveFloatingpointErrors("cast", fpes) < 0) {
+            return -1;
+        }
+    }
+
+    return 0;
 }
 
 
diff --git a/numpy/core/src/multiarray/array_method.h b/numpy/core/src/multiarray/array_method.h
index 6e6a026bc963..c9ec8903d1bb 100644
--- a/numpy/core/src/multiarray/array_method.h
+++ b/numpy/core/src/multiarray/array_method.h
@@ -20,7 +20,11 @@ typedef enum {
      * setup/check. No function should set error flags and ignore them
      * since it would interfere with chaining operations (e.g. casting).
      */
-    /* TODO: Change this into a positive flag */
+    /*
+     * TODO: Change this into a positive flag?  That would make "combing"
+     *       multiple methods easier. OTOH, if we add more flags, the default
+     *       would be 0 just like it is here.
+     */
     NPY_METH_NO_FLOATINGPOINT_ERRORS = 1 << 2,
     /* Whether the method supports unaligned access (not runtime) */
     NPY_METH_SUPPORTS_UNALIGNED = 1 << 3,
@@ -43,6 +47,20 @@ typedef enum {
 } NPY_ARRAYMETHOD_FLAGS;
 
 
+/*
+ * It would be nice to just | flags, but in general it seems that 0 bits
+ * probably should indicate "default".
+ * And that is not necessarily compatible with `|`.
+ *
+ * NOTE: If made public, should maybe be a function to easier add flags?
+ */
+#define PyArrayMethod_MINIMAL_FLAGS NPY_METH_NO_FLOATINGPOINT_ERRORS
+#define PyArrayMethod_COMBINED_FLAGS(flags1, flags2)  \
+        ((NPY_ARRAYMETHOD_FLAGS)(  \
+            ((flags1 | flags2) & ~PyArrayMethod_MINIMAL_FLAGS)  \
+            | (flags1 & flags2)))
+
+
 struct PyArrayMethodObject_tag;
 
 /*
diff --git a/numpy/core/src/multiarray/arraytypes.c.src b/numpy/core/src/multiarray/arraytypes.c.src
index ee4f5f312bf0..d7d54a2c6e9f 100644
--- a/numpy/core/src/multiarray/arraytypes.c.src
+++ b/numpy/core/src/multiarray/arraytypes.c.src
@@ -238,6 +238,7 @@ static int
     }
     else {
         temp = (@type@)@func2@(op);
+        // TODO: Need to check for inf, if input is not inf (for float)!
     }
     if (PyErr_Occurred()) {
         PyObject *type, *value, *traceback;
diff --git a/numpy/core/src/multiarray/convert_datatype.c b/numpy/core/src/multiarray/convert_datatype.c
index 8d0a4cd56cae..ce2c73a7a4e2 100644
--- a/numpy/core/src/multiarray/convert_datatype.c
+++ b/numpy/core/src/multiarray/convert_datatype.c
@@ -3042,26 +3042,22 @@ nonstructured_to_structured_get_loop(
         NPY_ARRAYMETHOD_FLAGS *flags)
 {
     if (context->descriptors[1]->names != NULL) {
-        int needs_api = 0;
         if (get_fields_transfer_function(
                 aligned, strides[0], strides[1],
                 context->descriptors[0], context->descriptors[1],
                 move_references, out_loop, out_transferdata,
-                &needs_api) == NPY_FAIL) {
+                flags) == NPY_FAIL) {
             return -1;
         }
-        *flags = needs_api ? NPY_METH_REQUIRES_PYAPI : 0;
     }
     else if (context->descriptors[1]->subarray != NULL) {
-        int needs_api = 0;
         if (get_subarray_transfer_function(
                 aligned, strides[0], strides[1],
                 context->descriptors[0], context->descriptors[1],
                 move_references, out_loop, out_transferdata,
-                &needs_api) == NPY_FAIL) {
+                flags) == NPY_FAIL) {
             return -1;
         }
-        *flags = needs_api ? NPY_METH_REQUIRES_PYAPI : 0;
     }
     else {
         /*
@@ -3204,26 +3200,22 @@ structured_to_nonstructured_get_loop(
         NPY_ARRAYMETHOD_FLAGS *flags)
 {
     if (context->descriptors[0]->names != NULL) {
-        int needs_api = 0;
         if (get_fields_transfer_function(
                 aligned, strides[0], strides[1],
                 context->descriptors[0], context->descriptors[1],
                 move_references, out_loop, out_transferdata,
-                &needs_api) == NPY_FAIL) {
+                flags) == NPY_FAIL) {
             return -1;
         }
-        *flags = needs_api ? NPY_METH_REQUIRES_PYAPI : 0;
     }
     else if (context->descriptors[0]->subarray != NULL) {
-        int needs_api = 0;
         if (get_subarray_transfer_function(
                 aligned, strides[0], strides[1],
                 context->descriptors[0], context->descriptors[1],
                 move_references, out_loop, out_transferdata,
-                &needs_api) == NPY_FAIL) {
+                flags) == NPY_FAIL) {
             return -1;
         }
-        *flags = needs_api ? NPY_METH_REQUIRES_PYAPI : 0;
     }
     else {
         /*
@@ -3513,27 +3505,23 @@ void_to_void_get_loop(
 {
     if (context->descriptors[0]->names != NULL ||
             context->descriptors[1]->names != NULL) {
-        int needs_api = 0;
         if (get_fields_transfer_function(
                 aligned, strides[0], strides[1],
                 context->descriptors[0], context->descriptors[1],
                 move_references, out_loop, out_transferdata,
-                &needs_api) == NPY_FAIL) {
+                flags) == NPY_FAIL) {
             return -1;
         }
-        *flags = needs_api ? NPY_METH_REQUIRES_PYAPI : 0;
     }
     else if (context->descriptors[0]->subarray != NULL ||
              context->descriptors[1]->subarray != NULL) {
-        int needs_api = 0;
         if (get_subarray_transfer_function(
                 aligned, strides[0], strides[1],
                 context->descriptors[0], context->descriptors[1],
                 move_references, out_loop, out_transferdata,
-                &needs_api) == NPY_FAIL) {
+                flags) == NPY_FAIL) {
             return -1;
         }
-        *flags = needs_api ? NPY_METH_REQUIRES_PYAPI : 0;
     }
     else {
         /*
@@ -3546,7 +3534,7 @@ void_to_void_get_loop(
                 out_loop, out_transferdata) == NPY_FAIL) {
             return -1;
         }
-        *flags = 0;
+        *flags = PyArrayMethod_MINIMAL_FLAGS;
     }
     return 0;
 }
diff --git a/numpy/core/src/multiarray/ctors.c b/numpy/core/src/multiarray/ctors.c
index 95a3bd84d82b..c3d66dd6bd64 100644
--- a/numpy/core/src/multiarray/ctors.c
+++ b/numpy/core/src/multiarray/ctors.c
@@ -1,5 +1,6 @@
 #define NPY_NO_DEPRECATED_API NPY_API_VERSION
 #define _MULTIARRAYMODULE
+#define _UMATHMODULE
 
 #define PY_SSIZE_T_CLEAN
 #include <Python.h>
@@ -33,6 +34,8 @@
 #include "get_attr_string.h"
 #include "array_coercion.h"
 
+#include "umathmodule.h"
+
 /*
  * Reading from a file or a string.
  *
@@ -2741,18 +2744,22 @@ PyArray_CopyAsFlat(PyArrayObject *dst, PyArrayObject *src, NPY_ORDER order)
      * contiguous strides, etc.
      */
     NPY_cast_info cast_info;
+    NPY_ARRAYMETHOD_FLAGS flags;
     if (PyArray_GetDTypeTransferFunction(
                     IsUintAligned(src) && IsAligned(src) &&
                     IsUintAligned(dst) && IsAligned(dst),
                     src_stride, dst_stride,
                     PyArray_DESCR(src), PyArray_DESCR(dst),
                     0,
-                    &cast_info, &needs_api) != NPY_SUCCEED) {
+                    &cast_info, &flags) != NPY_SUCCEED) {
         NpyIter_Deallocate(dst_iter);
         NpyIter_Deallocate(src_iter);
         return -1;
     }
-
+    needs_api |= (flags & NPY_METH_REQUIRES_PYAPI) != 0;
+    if (!(flags & NPY_METH_NO_FLOATINGPOINT_ERRORS)) {
+        npy_clear_floatstatus_barrier((char *)src_iter);
+    }
     if (!needs_api) {
         NPY_BEGIN_THREADS;
     }
@@ -2804,8 +2811,20 @@ PyArray_CopyAsFlat(PyArrayObject *dst, PyArrayObject *src, NPY_ORDER order)
     NPY_END_THREADS;
 
     NPY_cast_info_xfree(&cast_info);
-    NpyIter_Deallocate(dst_iter);
-    NpyIter_Deallocate(src_iter);
+    if (!NpyIter_Deallocate(dst_iter)) {
+        res = -1;
+    }
+    if (!NpyIter_Deallocate(src_iter)) {
+        res = -1;
+    }
+
+    if (res == 0 && !(flags & NPY_METH_NO_FLOATINGPOINT_ERRORS)) {
+        int fpes = npy_get_floatstatus_barrier((char *)src_iter);
+        if (fpes && PyUFunc_GiveFloatingpointErrors("cast", fpes) < 0) {
+            return -1;
+        }
+    }
+
     return res;
 }
 
diff --git a/numpy/core/src/multiarray/dtype_transfer.c b/numpy/core/src/multiarray/dtype_transfer.c
index 18de5d1321d2..f8458d2d7b42 100644
--- a/numpy/core/src/multiarray/dtype_transfer.c
+++ b/numpy/core/src/multiarray/dtype_transfer.c
@@ -11,12 +11,14 @@
  */
 #define NPY_NO_DEPRECATED_API NPY_API_VERSION
 #define _MULTIARRAYMODULE
+#define _UMATHMODULE
 
 #define PY_SSIZE_T_CLEAN
 #include <Python.h>
 #include <structmember.h>
 
 #include "numpy/arrayobject.h"
+#include "numpy/npy_math.h"
 
 #include "lowlevel_strided_loops.h"
 #include "npy_pycompat.h"
@@ -35,6 +37,8 @@
 #include "array_method.h"
 #include "array_coercion.h"
 
+#include "umathmodule.h"
+
 #define NPY_LOWLEVEL_BUFFER_BLOCKSIZE  128
 
 /********** PRINTF DEBUG TRACING **************/
@@ -1506,7 +1510,7 @@ get_one_to_n_transfer_function(int aligned,
                             npy_intp N,
                             PyArrayMethod_StridedLoop **out_stransfer,
                             NpyAuxData **out_transferdata,
-                            int *out_needs_api)
+                            NPY_ARRAYMETHOD_FLAGS *out_flags)
 {
     _one_to_n_data *data = PyMem_Malloc(sizeof(_one_to_n_data));
     if (data == NULL) {
@@ -1530,18 +1534,19 @@ get_one_to_n_transfer_function(int aligned,
                     src_dtype, dst_dtype,
                     0,
                     &data->wrapped,
-                    out_needs_api) != NPY_SUCCEED) {
+                    out_flags) != NPY_SUCCEED) {
         NPY_AUXDATA_FREE((NpyAuxData *)data);
         return NPY_FAIL;
     }
 
     /* If the src object will need a DECREF, set src_dtype */
     if (move_references && PyDataType_REFCHK(src_dtype)) {
+        *out_flags |= NPY_METH_REQUIRES_PYAPI;
         if (get_decref_transfer_function(aligned,
                             src_stride,
                             src_dtype,
                             &data->decref_src,
-                            out_needs_api) != NPY_SUCCEED) {
+                            NULL) != NPY_SUCCEED) {
             NPY_AUXDATA_FREE((NpyAuxData *)data);
             return NPY_FAIL;
         }
@@ -1667,7 +1672,7 @@ get_n_to_n_transfer_function(int aligned,
                             npy_intp N,
                             PyArrayMethod_StridedLoop **out_stransfer,
                             NpyAuxData **out_transferdata,
-                            int *out_needs_api)
+                            NPY_ARRAYMETHOD_FLAGS *out_flags)
 {
     _n_to_n_data *data = PyMem_Malloc(sizeof(_n_to_n_data));
     if (data == NULL) {
@@ -1699,7 +1704,7 @@ get_n_to_n_transfer_function(int aligned,
                     src_dtype, dst_dtype,
                     move_references,
                     &data->wrapped,
-                    out_needs_api) != NPY_SUCCEED) {
+                    out_flags) != NPY_SUCCEED) {
         NPY_AUXDATA_FREE((NpyAuxData *)data);
         return NPY_FAIL;
     }
@@ -1913,7 +1918,7 @@ get_subarray_broadcast_transfer_function(int aligned,
                             int move_references,
                             PyArrayMethod_StridedLoop **out_stransfer,
                             NpyAuxData **out_transferdata,
-                            int *out_needs_api)
+                            NPY_ARRAYMETHOD_FLAGS *out_flags)
 {
     _subarray_broadcast_data *data;
     npy_intp structsize, loop_index, run, run_size,
@@ -1946,7 +1951,7 @@ get_subarray_broadcast_transfer_function(int aligned,
                     src_dtype, dst_dtype,
                     0,
                     &data->wrapped,
-                    out_needs_api) != NPY_SUCCEED) {
+                    out_flags) != NPY_SUCCEED) {
         NPY_AUXDATA_FREE((NpyAuxData *)data);
         return NPY_FAIL;
     }
@@ -1958,7 +1963,7 @@ get_subarray_broadcast_transfer_function(int aligned,
                         src_dtype, NULL,
                         1,
                         &data->decref_src,
-                        out_needs_api) != NPY_SUCCEED) {
+                        out_flags) != NPY_SUCCEED) {
             NPY_AUXDATA_FREE((NpyAuxData *)data);
             return NPY_FAIL;
         }
@@ -1971,7 +1976,7 @@ get_subarray_broadcast_transfer_function(int aligned,
                         dst_dtype, NULL,
                         1,
                         &data->decref_dst,
-                        out_needs_api) != NPY_SUCCEED) {
+                        out_flags) != NPY_SUCCEED) {
             NPY_AUXDATA_FREE((NpyAuxData *)data);
             return NPY_FAIL;
         }
@@ -2087,7 +2092,7 @@ get_subarray_transfer_function(int aligned,
                             int move_references,
                             PyArrayMethod_StridedLoop **out_stransfer,
                             NpyAuxData **out_transferdata,
-                            int *out_needs_api)
+                            NPY_ARRAYMETHOD_FLAGS *out_flags)
 {
     PyArray_Dims src_shape = {NULL, -1}, dst_shape = {NULL, -1};
     npy_intp src_size = 1, dst_size = 1;
@@ -2132,7 +2137,7 @@ get_subarray_transfer_function(int aligned,
                         move_references,
                         src_size,
                         out_stransfer, out_transferdata,
-                        out_needs_api);
+                        out_flags);
     }
     /* Copy the src value to all the dst values */
     else if (src_size == 1) {
@@ -2145,7 +2150,7 @@ get_subarray_transfer_function(int aligned,
                 move_references,
                 dst_size,
                 out_stransfer, out_transferdata,
-                out_needs_api);
+                out_flags);
     }
     /*
      * Copy the subarray with broadcasting, truncating, and zero-padding
@@ -2159,7 +2164,7 @@ get_subarray_transfer_function(int aligned,
                         src_shape, dst_shape,
                         move_references,
                         out_stransfer, out_transferdata,
-                        out_needs_api);
+                        out_flags);
 
         npy_free_cache_dim_obj(src_shape);
         npy_free_cache_dim_obj(dst_shape);
@@ -2277,7 +2282,7 @@ get_fields_transfer_function(int NPY_UNUSED(aligned),
                             int move_references,
                             PyArrayMethod_StridedLoop **out_stransfer,
                             NpyAuxData **out_transferdata,
-                            int *out_needs_api)
+                            NPY_ARRAYMETHOD_FLAGS *out_flags)
 {
     PyObject *key, *tup, *title;
     PyArray_Descr *src_fld_dtype, *dst_fld_dtype;
@@ -2308,6 +2313,7 @@ get_fields_transfer_function(int NPY_UNUSED(aligned),
         data->base.clone = &_field_transfer_data_clone;
         data->field_count = 0;
 
+        *out_flags = PyArrayMethod_MINIMAL_FLAGS;
         for (i = 0; i < field_count; ++i) {
             key = PyTuple_GET_ITEM(dst_dtype->names, i);
             tup = PyDict_GetItem(dst_dtype->fields, key);
@@ -2316,15 +2322,17 @@ get_fields_transfer_function(int NPY_UNUSED(aligned),
                 PyMem_Free(data);
                 return NPY_FAIL;
             }
+            NPY_ARRAYMETHOD_FLAGS field_flags;
             if (PyArray_GetDTypeTransferFunction(0,
                                     src_stride, dst_stride,
                                     src_dtype, dst_fld_dtype,
                                     0,
                                     &data->fields[i].info,
-                                    out_needs_api) != NPY_SUCCEED) {
+                                    &field_flags) != NPY_SUCCEED) {
                 NPY_AUXDATA_FREE((NpyAuxData *)data);
                 return NPY_FAIL;
             }
+            *out_flags = PyArrayMethod_COMBINED_FLAGS(*out_flags, field_flags);
             data->fields[i].src_offset = 0;
             data->fields[i].dst_offset = dst_offset;
             data->field_count++;
@@ -2336,11 +2344,12 @@ get_fields_transfer_function(int NPY_UNUSED(aligned),
          * input, the second one (normally output) just does not matter here.
          */
         if (move_references && PyDataType_REFCHK(src_dtype)) {
+            *out_flags |= NPY_METH_REQUIRES_PYAPI;
             if (get_decref_transfer_function(0,
                                     src_stride,
                                     src_dtype,
                                     &data->fields[field_count].info,
-                                    out_needs_api) != NPY_SUCCEED) {
+                                    NULL) != NPY_SUCCEED) {
                 NPY_AUXDATA_FREE((NpyAuxData *)data);
                 return NPY_FAIL;
             }
@@ -2388,7 +2397,7 @@ get_fields_transfer_function(int NPY_UNUSED(aligned),
                                              src_fld_dtype, dst_dtype,
                                              move_references,
                                              &data->fields[0].info,
-                                             out_needs_api) != NPY_SUCCEED) {
+                                             out_flags) != NPY_SUCCEED) {
             PyMem_Free(data);
             return NPY_FAIL;
         }
@@ -2423,6 +2432,7 @@ get_fields_transfer_function(int NPY_UNUSED(aligned),
     data->base.clone = &_field_transfer_data_clone;
     data->field_count = 0;
 
+    *out_flags = PyArrayMethod_MINIMAL_FLAGS;
     /* set up the transfer function for each field */
     for (i = 0; i < field_count; ++i) {
         key = PyTuple_GET_ITEM(dst_dtype->names, i);
@@ -2440,15 +2450,17 @@ get_fields_transfer_function(int NPY_UNUSED(aligned),
             return NPY_FAIL;
         }
 
+        NPY_ARRAYMETHOD_FLAGS field_flags;
         if (PyArray_GetDTypeTransferFunction(0,
                                              src_stride, dst_stride,
                                              src_fld_dtype, dst_fld_dtype,
                                              move_references,
                                              &data->fields[i].info,
-                                             out_needs_api) != NPY_SUCCEED) {
+                                             &field_flags) != NPY_SUCCEED) {
             NPY_AUXDATA_FREE((NpyAuxData *)data);
             return NPY_FAIL;
         }
+        *out_flags = PyArrayMethod_COMBINED_FLAGS(*out_flags, field_flags);
         data->fields[i].src_offset = src_offset;
         data->fields[i].dst_offset = dst_offset;
         data->field_count++;
@@ -2748,11 +2760,12 @@ get_decref_transfer_function(int aligned,
         src_size = PyArray_MultiplyList(src_shape.ptr, src_shape.len);
         npy_free_cache_dim_obj(src_shape);
 
+        NPY_ARRAYMETHOD_FLAGS ignored_flags;
         if (get_n_to_n_transfer_function(aligned,
                 src_stride, 0,
                 src_dtype->subarray->base, NULL, 1, src_size,
                 &cast_info->func, &cast_info->auxdata,
-                out_needs_api) != NPY_SUCCEED) {
+                &ignored_flags) != NPY_SUCCEED) {
             return NPY_FAIL;
         }
 
@@ -3098,7 +3111,7 @@ define_cast_for_descrs(
         npy_intp src_stride, npy_intp dst_stride,
         PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype,
         int move_references,
-        NPY_cast_info *cast_info, int *out_needs_api)
+        NPY_cast_info *cast_info, NPY_ARRAYMETHOD_FLAGS *out_flags)
 {
     /* Storage for all cast info in case multi-step casting is necessary */
     _multistep_castdata castdata;
@@ -3109,6 +3122,7 @@ define_cast_for_descrs(
     /* `view_offset` passed to `init_cast_info` but unused for the main cast */
     npy_intp view_offset = NPY_MIN_INTP;
     NPY_CASTING casting = -1;
+    *out_flags = PyArrayMethod_MINIMAL_FLAGS;
 
     if (init_cast_info(
             cast_info, &casting, &view_offset, src_dtype, dst_dtype, 1) < 0) {
@@ -3159,7 +3173,7 @@ define_cast_for_descrs(
             }
             assert(castdata.from.func != NULL);
 
-            *out_needs_api |= (flags & NPY_METH_REQUIRES_PYAPI) != 0;
+            *out_flags = PyArrayMethod_COMBINED_FLAGS(*out_flags, flags);
             /* The main cast now uses a buffered input: */
             src_stride = strides[1];
             move_references = 1;  /* main cast has to clear the buffer */
@@ -3198,7 +3212,7 @@ define_cast_for_descrs(
             }
             assert(castdata.to.func != NULL);
 
-            *out_needs_api |= (flags & NPY_METH_REQUIRES_PYAPI) != 0;
+            *out_flags = PyArrayMethod_COMBINED_FLAGS(*out_flags, flags);
             /* The main cast now uses a buffered input: */
             dst_stride = strides[0];
             if (castdata.from.func != NULL) {
@@ -3219,7 +3233,7 @@ define_cast_for_descrs(
         goto fail;
     }
 
-    *out_needs_api |= (flags & NPY_METH_REQUIRES_PYAPI) != 0;
+    *out_flags = PyArrayMethod_COMBINED_FLAGS(*out_flags, flags);
 
     if (castdata.from.func == NULL && castdata.to.func == NULL) {
         /* Most of the time, there will be only one step required. */
@@ -3256,7 +3270,7 @@ PyArray_GetDTypeTransferFunction(int aligned,
                             PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype,
                             int move_references,
                             NPY_cast_info *cast_info,
-                            int *out_needs_api)
+                            NPY_ARRAYMETHOD_FLAGS *out_flags)
 {
     assert(src_dtype != NULL);
 
@@ -3271,17 +3285,24 @@ PyArray_GetDTypeTransferFunction(int aligned,
      */
     if (dst_dtype == NULL) {
         assert(move_references);
-        return get_decref_transfer_function(aligned,
+        int needs_api = 0;
+        int res = get_decref_transfer_function(aligned,
                                 src_dtype->elsize,
                                 src_dtype,
                                 cast_info,
-                                out_needs_api);
+                                &needs_api);
+        /* decref'ing never creates floating point errors, so just ignore it */
+        *out_flags = PyArrayMethod_MINIMAL_FLAGS;
+        if (needs_api) {
+            *out_flags |= NPY_METH_REQUIRES_PYAPI;
+        }
+        return res;
     }
 
     if (define_cast_for_descrs(aligned,
             src_stride, dst_stride,
             src_dtype, dst_dtype, move_references,
-            cast_info, out_needs_api) < 0) {
+            cast_info, out_flags) < 0) {
         return NPY_FAIL;
     }
 
@@ -3353,21 +3374,29 @@ wrap_aligned_transferfunction(
      *       have an explicit implementation instead if we want performance.
      */
     if (must_wrap || src_wrapped_dtype != src_dtype) {
+        NPY_ARRAYMETHOD_FLAGS flags;
         if (PyArray_GetDTypeTransferFunction(aligned,
                 src_stride, castdata.main.descriptors[0]->elsize,
                 src_dtype, castdata.main.descriptors[0], 0,
-                &castdata.from, out_needs_api) != NPY_SUCCEED) {
+                &castdata.from, &flags) != NPY_SUCCEED) {
             goto fail;
         }
+        if (flags & NPY_METH_REQUIRES_PYAPI) {
+            *out_needs_api = 1;
+        }
     }
     if (must_wrap || dst_wrapped_dtype != dst_dtype) {
+        NPY_ARRAYMETHOD_FLAGS flags;
         if (PyArray_GetDTypeTransferFunction(aligned,
                 castdata.main.descriptors[1]->elsize, dst_stride,
                 castdata.main.descriptors[1], dst_dtype,
                 1,  /* clear buffer if it includes references */
-                &castdata.to, out_needs_api) != NPY_SUCCEED) {
+                &castdata.to, &flags) != NPY_SUCCEED) {
             goto fail;
         }
+        if (flags & NPY_METH_REQUIRES_PYAPI) {
+            *out_needs_api = 1;
+        }
     }
 
     *out_transferdata = _multistep_cast_auxdata_clone_int(&castdata, 1);
@@ -3492,7 +3521,7 @@ PyArray_GetMaskedDTypeTransferFunction(int aligned,
                             PyArray_Descr *mask_dtype,
                             int move_references,
                             NPY_cast_info *cast_info,
-                            int *out_needs_api)
+                            NPY_ARRAYMETHOD_FLAGS *out_flags)
 {
     NPY_cast_info_init(cast_info);
 
@@ -3520,18 +3549,19 @@ PyArray_GetMaskedDTypeTransferFunction(int aligned,
                                 src_dtype, dst_dtype,
                                 move_references,
                                 &data->wrapped,
-                                out_needs_api) != NPY_SUCCEED) {
+                                out_flags) != NPY_SUCCEED) {
         PyMem_Free(data);
         return NPY_FAIL;
     }
 
     /* If the src object will need a DECREF, get a function to handle that */
     if (move_references && PyDataType_REFCHK(src_dtype)) {
+        *out_flags |= NPY_METH_REQUIRES_PYAPI;
         if (get_decref_transfer_function(aligned,
                             src_stride,
                             src_dtype,
                             &data->decref_src,
-                            out_needs_api) != NPY_SUCCEED) {
+                            NULL) != NPY_SUCCEED) {
             NPY_AUXDATA_FREE((NpyAuxData *)data);
             return NPY_FAIL;
         }
@@ -3562,7 +3592,7 @@ PyArray_CastRawArrays(npy_intp count,
                       PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype,
                       int move_references)
 {
-    int aligned = 1, needs_api = 0;
+    int aligned;
 
     /* Make sure the copy is reasonable */
     if (dst_stride == 0 && count > 1) {
@@ -3586,15 +3616,20 @@ PyArray_CastRawArrays(npy_intp count,
 
     /* Get the function to do the casting */
     NPY_cast_info cast_info;
+    NPY_ARRAYMETHOD_FLAGS flags;
     if (PyArray_GetDTypeTransferFunction(aligned,
                         src_stride, dst_stride,
                         src_dtype, dst_dtype,
                         move_references,
                         &cast_info,
-                        &needs_api) != NPY_SUCCEED) {
+                        &flags) != NPY_SUCCEED) {
         return NPY_FAIL;
     }
 
+    if (!(flags & NPY_METH_NO_FLOATINGPOINT_ERRORS)) {
+        npy_clear_floatstatus_barrier((char*)&cast_info);
+    }
+
     /* Cast */
     char *args[2] = {src, dst};
     npy_intp strides[2] = {src_stride, dst_stride};
@@ -3603,8 +3638,16 @@ PyArray_CastRawArrays(npy_intp count,
     /* Cleanup */
     NPY_cast_info_xfree(&cast_info);
 
-    /* If needs_api was set to 1, it may have raised a Python exception */
-    return (needs_api && PyErr_Occurred()) ? NPY_FAIL : NPY_SUCCEED;
+    if (flags & NPY_METH_REQUIRES_PYAPI && PyErr_Occurred()) {
+        return NPY_FAIL;
+    }
+    if (!(flags & NPY_METH_NO_FLOATINGPOINT_ERRORS)) {
+        int fpes = npy_get_floatstatus_barrier(*args);
+        if (fpes && PyUFunc_GiveFloatingpointErrors("cast", fpes) < 0) {
+            return NPY_FAIL;
+        }
+    }
+    return NPY_SUCCEED;
 }
 
 /*
diff --git a/numpy/core/src/multiarray/lowlevel_strided_loops.c.src b/numpy/core/src/multiarray/lowlevel_strided_loops.c.src
index e313d244768d..a9f95886f24a 100644
--- a/numpy/core/src/multiarray/lowlevel_strided_loops.c.src
+++ b/numpy/core/src/multiarray/lowlevel_strided_loops.c.src
@@ -13,6 +13,7 @@
 
 #define NPY_NO_DEPRECATED_API NPY_API_VERSION
 #define _MULTIARRAYMODULE
+#define _UMATHMODULE
 #include <numpy/arrayobject.h>
 #include <numpy/npy_cpu.h>
 #include <numpy/halffloat.h>
@@ -22,6 +23,7 @@
 #include "array_method.h"
 #include "usertypes.h"
 
+#include "umathmodule.h"
 
 /*
  * x86 platform works with unaligned access but the compiler is allowed to
@@ -1725,6 +1727,7 @@ mapiter_@name@(PyArrayMapIterObject *mit)
         NpyIter_GetInnerFixedStrideArray(mit->subspace_iter, fixed_strides);
 
         NPY_cast_info cast_info;
+        NPY_ARRAYMETHOD_FLAGS flags;
         if (PyArray_GetDTypeTransferFunction(is_aligned,
 #if @isget@
                         fixed_strides[0], fixed_strides[1],
@@ -1735,9 +1738,13 @@ mapiter_@name@(PyArrayMapIterObject *mit)
 #endif
                         0,
                         &cast_info,
-                        &needs_api) != NPY_SUCCEED) {
+                        &flags) != NPY_SUCCEED) {
             return -1;
         }
+        /* Note: it may make sense to refactor `needs_api` out in this branch */
+        if (flags & NPY_METH_REQUIRES_PYAPI) {
+            needs_api = 1;
+        }
 
         counter = NpyIter_GetInnerLoopSizePtr(mit->subspace_iter);
         if (*counter == PyArray_SIZE(mit->subspace)) {
@@ -1761,6 +1768,9 @@ mapiter_@name@(PyArrayMapIterObject *mit)
             if (!needs_api) {
                 NPY_BEGIN_THREADS;
             }
+            if (!(flags & NPY_METH_NO_FLOATINGPOINT_ERRORS)) {
+                npy_clear_floatstatus_barrier((char *)mit);
+            }
 
             /* Outer iteration (safe because mit->size != 0) */
             do {
@@ -1868,6 +1878,13 @@ mapiter_@name@(PyArrayMapIterObject *mit)
 /**end repeat1**/
 
         NPY_cast_info_xfree(&cast_info);
+
+        if (!(flags & NPY_METH_NO_FLOATINGPOINT_ERRORS)) {
+            int fpes = npy_get_floatstatus_barrier((char *)mit);
+            if (fpes && PyUFunc_GiveFloatingpointErrors("cast", fpes) < 0) {
+                return -1;
+            }
+        }
     }
     return 0;
 }
diff --git a/numpy/core/src/multiarray/mapping.c b/numpy/core/src/multiarray/mapping.c
index 1a2ade11b93b..6cf5eab962f9 100644
--- a/numpy/core/src/multiarray/mapping.c
+++ b/numpy/core/src/multiarray/mapping.c
@@ -1,11 +1,14 @@
 #define NPY_NO_DEPRECATED_API NPY_API_VERSION
 #define _MULTIARRAYMODULE
+#define _UMATHMODULE
 
 #define PY_SSIZE_T_CLEAN
 #include <Python.h>
 #include <structmember.h>
 
 #include "numpy/arrayobject.h"
+#include "numpy/npy_math.h"
+
 #include "arrayobject.h"
 
 #include "npy_config.h"
@@ -24,6 +27,8 @@
 #include "array_assign.h"
 #include "array_coercion.h"
 
+#include "umathmodule.h"
+
 
 #define HAS_INTEGER 1
 #define HAS_NEWAXIS 2
@@ -914,7 +919,6 @@ array_boolean_subscript(PyArrayObject *self,
     char *ret_data;
     PyArray_Descr *dtype;
     PyArrayObject *ret;
-    int needs_api = 0;
 
     size = count_boolean_trues(PyArray_NDIM(bmask), PyArray_DATA(bmask),
                                 PyArray_DIMS(bmask), PyArray_STRIDES(bmask));
@@ -962,13 +966,18 @@ array_boolean_subscript(PyArrayObject *self,
         /* Get a dtype transfer function */
         NpyIter_GetInnerFixedStrideArray(iter, fixed_strides);
         NPY_cast_info cast_info;
+        /*
+         * TODO: Ignoring cast flags, since this is only ever a copy. In
+         *       principle that may not be quite right in some future?
+         */
+        NPY_ARRAYMETHOD_FLAGS cast_flags;
         if (PyArray_GetDTypeTransferFunction(
                         IsUintAligned(self) && IsAligned(self),
                         fixed_strides[0], itemsize,
                         dtype, dtype,
                         0,
                         &cast_info,
-                        &needs_api) != NPY_SUCCEED) {
+                        &cast_flags) != NPY_SUCCEED) {
             Py_DECREF(ret);
             NpyIter_Deallocate(iter);
             return NULL;
@@ -1068,7 +1077,6 @@ array_assign_boolean_subscript(PyArrayObject *self,
 {
     npy_intp size, v_stride;
     char *v_data;
-    int needs_api = 0;
     npy_intp bmask_size;
 
     if (PyArray_DESCR(bmask)->type_num != NPY_BOOL) {
@@ -1164,6 +1172,7 @@ array_assign_boolean_subscript(PyArrayObject *self,
         /* Get a dtype transfer function */
         NpyIter_GetInnerFixedStrideArray(iter, fixed_strides);
         NPY_cast_info cast_info;
+        NPY_ARRAYMETHOD_FLAGS cast_flags;
         if (PyArray_GetDTypeTransferFunction(
                  IsUintAligned(self) && IsAligned(self) &&
                         IsUintAligned(v) && IsAligned(v),
@@ -1171,14 +1180,17 @@ array_assign_boolean_subscript(PyArrayObject *self,
                         PyArray_DESCR(v), PyArray_DESCR(self),
                         0,
                         &cast_info,
-                        &needs_api) != NPY_SUCCEED) {
+                        &cast_flags) != NPY_SUCCEED) {
             NpyIter_Deallocate(iter);
             return -1;
         }
 
-        if (!needs_api) {
+        if (!(cast_flags & NPY_METH_REQUIRES_PYAPI)) {
             NPY_BEGIN_THREADS_NDITER(iter);
         }
+        if (!(flags & NPY_METH_NO_FLOATINGPOINT_ERRORS)) {
+            npy_clear_floatstatus_barrier((char *)iter);
+        }
 
         npy_intp strides[2] = {v_stride, self_stride};
 
@@ -1209,7 +1221,7 @@ array_assign_boolean_subscript(PyArrayObject *self,
             }
         } while (iternext(iter));
 
-        if (!needs_api) {
+        if (!(cast_flags & NPY_METH_REQUIRES_PYAPI)) {
             NPY_END_THREADS;
         }
 
@@ -1217,6 +1229,12 @@ array_assign_boolean_subscript(PyArrayObject *self,
         if (!NpyIter_Deallocate(iter)) {
             res = -1;
         }
+        if (res == 0 && !(flags & NPY_METH_NO_FLOATINGPOINT_ERRORS)) {
+            int fpes = npy_get_floatstatus_barrier((char*)iter);
+            if (fpes && PyUFunc_GiveFloatingpointErrors("cast", fpes) < 0) {
+                return -1;
+            }
+        }
     }
 
     return res;
diff --git a/numpy/core/src/multiarray/nditer_constr.c b/numpy/core/src/multiarray/nditer_constr.c
index f82a9624eeb0..bd0763b195f8 100644
--- a/numpy/core/src/multiarray/nditer_constr.c
+++ b/numpy/core/src/multiarray/nditer_constr.c
@@ -3141,7 +3141,9 @@ npyiter_allocate_transfer_functions(NpyIter *iter)
     npy_intp *strides = NAD_STRIDES(axisdata), op_stride;
     NpyIter_TransferInfo *transferinfo = NBF_TRANSFERINFO(bufferdata);
 
-    int needs_api = 0;
+    /* combined cast flags, the new cast flags for each cast: */
+    NPY_ARRAYMETHOD_FLAGS cflags = PyArrayMethod_MINIMAL_FLAGS;
+    NPY_ARRAYMETHOD_FLAGS nc_flags;
 
     for (iop = 0; iop < nop; ++iop) {
         npyiter_opitflags flags = op_itflags[iop];
@@ -3167,10 +3169,11 @@ npyiter_allocate_transfer_functions(NpyIter *iter)
                                         op_dtype[iop],
                                         move_references,
                                         &transferinfo[iop].read,
-                                        &needs_api) != NPY_SUCCEED) {
+                                        &nc_flags) != NPY_SUCCEED) {
                     iop -= 1;  /* This one cannot be cleaned up yet. */
                     goto fail;
                 }
+                cflags = PyArrayMethod_COMBINED_FLAGS(cflags, nc_flags);
             }
             else {
                 transferinfo[iop].read.func = NULL;
@@ -3199,9 +3202,10 @@ npyiter_allocate_transfer_functions(NpyIter *iter)
                             mask_dtype,
                             move_references,
                             &transferinfo[iop].write,
-                            &needs_api) != NPY_SUCCEED) {
+                            &nc_flags) != NPY_SUCCEED) {
                         goto fail;
                     }
+                    cflags = PyArrayMethod_COMBINED_FLAGS(cflags, nc_flags);
                 }
                 else {
                     if (PyArray_GetDTypeTransferFunction(
@@ -3212,9 +3216,10 @@ npyiter_allocate_transfer_functions(NpyIter *iter)
                             PyArray_DESCR(op[iop]),
                             move_references,
                             &transferinfo[iop].write,
-                            &needs_api) != NPY_SUCCEED) {
+                            &nc_flags) != NPY_SUCCEED) {
                         goto fail;
                     }
+                    cflags = PyArrayMethod_COMBINED_FLAGS(cflags, nc_flags);
                 }
             }
             /* If no write back but there are references make a decref fn */
@@ -3230,9 +3235,10 @@ npyiter_allocate_transfer_functions(NpyIter *iter)
                         op_dtype[iop], NULL,
                         1,
                         &transferinfo[iop].write,
-                        &needs_api) != NPY_SUCCEED) {
+                        &nc_flags) != NPY_SUCCEED) {
                     goto fail;
                 }
+                cflags = PyArrayMethod_COMBINED_FLAGS(cflags, nc_flags);
             }
             else {
                 transferinfo[iop].write.func = NULL;
@@ -3244,8 +3250,13 @@ npyiter_allocate_transfer_functions(NpyIter *iter)
         }
     }
 
-    /* If any of the dtype transfer functions needed the API, flag it */
-    if (needs_api) {
+    /*
+     * If any of the dtype transfer functions needed the API, flag it.
+     * TODO: We ignore other flags (currently only if floating point errors are
+     *       possible).  This is incorrect, the combined flags should be
+     *       exposed explicitly.
+     */
+    if (cflags & NPY_METH_REQUIRES_PYAPI) {
         NIT_ITFLAGS(iter) |= NPY_ITFLAG_NEEDSAPI;
     }
 
diff --git a/numpy/core/src/umath/extobj.c b/numpy/core/src/umath/extobj.c
index 6b9a27e2621a..893429107f77 100644
--- a/numpy/core/src/umath/extobj.c
+++ b/numpy/core/src/umath/extobj.c
@@ -266,6 +266,33 @@ _extract_pyvals(PyObject *ref, const char *name, int *bufsize,
     return 0;
 }
 
+/*
+ * Handler which uses the default `np.errstate` given that `fpe_errors` is
+ * already set.  `fpe_errors` is typically the (nonzero) result of
+ * `npy_get_floatstatus_barrier`.
+ *
+ * Returns -1 on failure (an error was raised) and 0 on success.
+ */
+NPY_NO_EXPORT int
+PyUFunc_GiveFloatingpointErrors(const char *name, int fpe_errors)
+{
+    int bufsize, errmask;
+    PyObject *errobj;
+
+    if (PyUFunc_GetPyValues((char *)name, &bufsize, &errmask,
+                            &errobj) < 0) {
+        return -1;
+    }
+    int first = 1;
+    if (PyUFunc_handlefperr(errmask, errobj, fpe_errors, &first)) {
+        Py_XDECREF(errobj);
+        return -1;
+    }
+    Py_XDECREF(errobj);
+    return 0;
+}
+
+
 /*
  * check the floating point status
  *  - errmask: mask of status to check

From 341d9885120123675737ab83e69b9642b28aeb17 Mon Sep 17 00:00:00 2001
From: Sebastian Berg <sebastian@sipsolutions.net>
Date: Fri, 1 Apr 2022 14:51:33 -0700
Subject: [PATCH 02/20] BUG: Avoid FPE for float NaN to datetime/timedelta
 casts

---
 numpy/core/src/multiarray/arraytypes.c.src | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/numpy/core/src/multiarray/arraytypes.c.src b/numpy/core/src/multiarray/arraytypes.c.src
index d7d54a2c6e9f..62a96e37173e 100644
--- a/numpy/core/src/multiarray/arraytypes.c.src
+++ b/numpy/core/src/multiarray/arraytypes.c.src
@@ -1153,12 +1153,17 @@ static void
 
     while (n--) {
         @fromtype@ f = *ip++;
-        @totype@ t = (@totype@)f;
 #if @supports_nat@ && @floatingpoint@
-        /* Avoid undefined behaviour for NaN -> NaT */
+        @totype@ t;
+        /* Avoid undefined behaviour and warning for NaN -> NaT */
         if (npy_isnan(f)) {
             t = (@totype@)NPY_DATETIME_NAT;
         }
+        else {
+            t = (@totype@)f;
+        }
+#else
+        @totype@ t = (@totype@)f;
 #endif
         *op++ = t;
     }
@@ -1179,12 +1184,17 @@ static void
 
     while (n--) {
         @fromtype@ f = *ip;
-        @totype@ t = (@totype@)f;
 #if @supports_nat@
-        /* Avoid undefined behaviour for NaN -> NaT */
+        @totype@ t;
+        /* Avoid undefined behaviour and warning for NaN -> NaT */
         if (npy_isnan(f)) {
             t = (@totype@)NPY_DATETIME_NAT;
         }
+        else {
+            t = (@totype@)f;
+        }
+#else
+        @totype@ t = (@totype@)f;
 #endif
         *op++ = t;
         ip += 2;

From 84fd4a5a0e064d1f6be7bc0c96663690faa0353b Mon Sep 17 00:00:00 2001
From: Sebastian Berg <sebastian@sipsolutions.net>
Date: Sat, 2 Apr 2022 20:28:00 -0700
Subject: [PATCH 03/20] TST: Fixup tests that cause FPEs during casts

---
 numpy/core/tests/test_array_coercion.py | 25 +++++++++++++------------
 numpy/core/tests/test_half.py           |  6 ++++--
 numpy/core/tests/test_numeric.py        |  4 +++-
 numpy/core/tests/test_regression.py     | 18 +++++++++---------
 numpy/core/tests/test_ufunc.py          |  5 +++--
 numpy/ma/tests/test_core.py             |  5 ++++-
 6 files changed, 36 insertions(+), 27 deletions(-)

diff --git a/numpy/core/tests/test_array_coercion.py b/numpy/core/tests/test_array_coercion.py
index b8b8ef187668..ed3ef7e678ec 100644
--- a/numpy/core/tests/test_array_coercion.py
+++ b/numpy/core/tests/test_array_coercion.py
@@ -373,28 +373,29 @@ def test_default_dtype_instance(self, dtype_char):
         assert discovered_dtype.itemsize == dtype.itemsize
 
     @pytest.mark.parametrize("dtype", np.typecodes["Integer"])
-    def test_scalar_to_int_coerce_does_not_cast(self, dtype):
+    @pytest.mark.parametrize(["scalar", "error"],
+            [(np.float64(np.nan), ValueError),
+             (np.ulonglong(-1), OverflowError)])
+    def test_scalar_to_int_coerce_does_not_cast(self, dtype, scalar, error):
         """
         Signed integers are currently different in that they do not cast other
         NumPy scalar, but instead use scalar.__int__(). The hardcoded
         exception to this rule is `np.array(scalar, dtype=integer)`.
         """
         dtype = np.dtype(dtype)
-        invalid_int = np.ulonglong(-1)
 
-        float_nan = np.float64(np.nan)
-
-        for scalar in [float_nan, invalid_int]:
-            # This is a special case using casting logic and thus not failing:
+        # This is a special case using casting logic.  It warns for the NaN
+        # but allows the cast (giving undefined behaviour).
+        with np.errstate(invalid="ignore"):
             coerced = np.array(scalar, dtype=dtype)
             cast = np.array(scalar).astype(dtype)
-            assert_array_equal(coerced, cast)
+        assert_array_equal(coerced, cast)
 
-            # However these fail:
-            with pytest.raises((ValueError, OverflowError)):
-                np.array([scalar], dtype=dtype)
-            with pytest.raises((ValueError, OverflowError)):
-                cast[()] = scalar
+        # However these fail:
+        with pytest.raises(error):
+            np.array([scalar], dtype=dtype)
+        with pytest.raises(error):
+            cast[()] = scalar
 
 
 class TestTimeScalars:
diff --git a/numpy/core/tests/test_half.py b/numpy/core/tests/test_half.py
index 1b6fd21e14bb..b60f7ba9c2e7 100644
--- a/numpy/core/tests/test_half.py
+++ b/numpy/core/tests/test_half.py
@@ -233,12 +233,14 @@ def test_half_rounding(self):
                    np.inf]
 
         # Check float64->float16 rounding
-        b = np.array(a, dtype=float16)
+        with np.errstate(over="ignore"):
+            b = np.array(a, dtype=float16)
         assert_equal(b, rounded)
 
         # Check float32->float16 rounding
         a = np.array(a, dtype=float32)
-        b = np.array(a, dtype=float16)
+        with np.errstate(over="ignore"):
+            b = np.array(a, dtype=float16)
         assert_equal(b, rounded)
 
     def test_half_correctness(self):
diff --git a/numpy/core/tests/test_numeric.py b/numpy/core/tests/test_numeric.py
index 0b03c65760f4..5b15e29b46ce 100644
--- a/numpy/core/tests/test_numeric.py
+++ b/numpy/core/tests/test_numeric.py
@@ -2939,7 +2939,9 @@ def test_filled_like(self):
         self.check_like_function(np.full_like, 1, True)
         self.check_like_function(np.full_like, 1000, True)
         self.check_like_function(np.full_like, 123.456, True)
-        self.check_like_function(np.full_like, np.inf, True)
+        # Inf to integer casts cause invalid-value errors: ignore them.
+        with np.errstate(invalid="ignore"):
+            self.check_like_function(np.full_like, np.inf, True)
 
     @pytest.mark.parametrize('likefunc', [np.empty_like, np.full_like,
                                           np.zeros_like, np.ones_like])
diff --git a/numpy/core/tests/test_regression.py b/numpy/core/tests/test_regression.py
index 4388023dc1d9..4538c825db64 100644
--- a/numpy/core/tests/test_regression.py
+++ b/numpy/core/tests/test_regression.py
@@ -326,20 +326,20 @@ def bfb():
         assert_raises(ValueError, bfa)
         assert_raises(ValueError, bfb)
 
-    def test_nonarray_assignment(self):
+    @pytest.mark.parametrize("index",
+            [np.ones(10, dtype=bool), np.arange(10)],
+            ids=["boolean-arr-index", "integer-arr-index"])
+    def test_nonarray_assignment(self, index):
         # See also Issue gh-2870, test for non-array assignment
         # and equivalent unsafe casted array assignment
         a = np.arange(10)
-        b = np.ones(10, dtype=bool)
-        r = np.arange(10)
 
-        def assign(a, b, c):
-            a[b] = c
+        with pytest.raises(ValueError):
+            a[index] = np.nan
 
-        assert_raises(ValueError, assign, a, b, np.nan)
-        a[b] = np.array(np.nan)  # but not this.
-        assert_raises(ValueError, assign, a, r, np.nan)
-        a[r] = np.array(np.nan)
+        with np.errstate(invalid="warn"):
+            with pytest.warns(RuntimeWarning, match="invalid value"):
+                a[index] = np.array(np.nan)  # Only warns
 
     def test_unpickle_dtype_with_object(self):
         # Implemented in r2840
diff --git a/numpy/core/tests/test_ufunc.py b/numpy/core/tests/test_ufunc.py
index 852044d32fcc..1142d66f51dd 100644
--- a/numpy/core/tests/test_ufunc.py
+++ b/numpy/core/tests/test_ufunc.py
@@ -620,8 +620,9 @@ def test_true_divide(self):
                                 atol = max(np.finfo(dtout).tiny, 3e-308)
                             else:
                                 atol = 3e-308
-                        # Some test values result in invalid for float16.
-                        with np.errstate(invalid='ignore'):
+                        # Some test values result in invalid for float16
+                        # and the cast to it may overflow to inf.
+                        with np.errstate(invalid='ignore', over='ignore'):
                             res = np.true_divide(x, y, dtype=dtout)
                         if not np.isfinite(res) and tcout == 'e':
                             continue
diff --git a/numpy/ma/tests/test_core.py b/numpy/ma/tests/test_core.py
index 679ed2090859..04540bc70e1c 100644
--- a/numpy/ma/tests/test_core.py
+++ b/numpy/ma/tests/test_core.py
@@ -4340,7 +4340,10 @@ def test_where(self):
         tmp[(xm <= 2).filled(True)] = True
         assert_equal(d._mask, tmp)
 
-        ixm = xm.astype(int)
+        with np.errstate(invalid="warn"):
+            # The fill value is 1e20, it cannot be converted to `int`:
+            with pytest.warns(RuntimeWarning, match="invalid value"):
+                ixm = xm.astype(int)
         d = where(ixm > 2, ixm, masked)
         assert_equal(d, [-9, -9, -9, -9, -9, 4, -9, -9, 10, -9, -9, 3])
         assert_equal(d.dtype, ixm.dtype)

From 35dae708788106361a73ffd141d77f91151f483d Mon Sep 17 00:00:00 2001
From: Sebastian Berg <sebastian@sipsolutions.net>
Date: Mon, 4 Apr 2022 11:49:19 -0700
Subject: [PATCH 04/20] ENH: Add overflow check to float setitem

---
 numpy/core/src/multiarray/arraytypes.c.src | 51 ++++++++++++++++++++--
 numpy/core/tests/test_half.py              |  4 +-
 numpy/core/tests/test_ufunc.py             | 18 ++++----
 numpy/ma/tests/test_core.py                |  6 ++-
 4 files changed, 65 insertions(+), 14 deletions(-)

diff --git a/numpy/core/src/multiarray/arraytypes.c.src b/numpy/core/src/multiarray/arraytypes.c.src
index 62a96e37173e..ca06643cebd6 100644
--- a/numpy/core/src/multiarray/arraytypes.c.src
+++ b/numpy/core/src/multiarray/arraytypes.c.src
@@ -7,6 +7,7 @@
 
 #define NPY_NO_DEPRECATED_API NPY_API_VERSION
 #define _MULTIARRAYMODULE
+#define _UMATHMODULE
 #define _NPY_NO_DEPRECATIONS /* for NPY_CHAR */
 
 #include "numpy/npy_common.h"
@@ -37,6 +38,9 @@
 #include "npy_buffer.h"
 
 #include "arraytypes.h"
+
+#include "umathmodule.h"
+
 /*
  * Define a stack allocated dummy array with only the minimum information set:
  *   1. The descr, the main field interesting here.
@@ -96,10 +100,32 @@ MyPyFloat_AsDouble(PyObject *obj)
     return ret;
 }
 
+
+static float
+MyPyFloat_AsFloat(PyObject *obj)
+{
+    double d_val = MyPyFloat_AsDouble(obj);
+    float res = (float)d_val;
+    if (NPY_UNLIKELY(npy_isinf(res) && !npy_isinf(d_val))) {
+        if (PyUFunc_GiveFloatingpointErrors("cast", NPY_FPE_OVERFLOW) < 0) {
+            return -1;
+        }
+    }
+    return res;
+}
+
+
 static npy_half
 MyPyFloat_AsHalf(PyObject *obj)
 {
-    return npy_double_to_half(MyPyFloat_AsDouble(obj));
+    double d_val = MyPyFloat_AsDouble(obj);
+    npy_half res = npy_double_to_half(d_val);
+    if (NPY_UNLIKELY(npy_half_isinf(res) && !npy_isinf(d_val))) {
+        if (PyUFunc_GiveFloatingpointErrors("cast", NPY_FPE_OVERFLOW) < 0) {
+            return npy_double_to_half(-1.);
+        }
+    }
+    return res;
 }
 
 static PyObject *
@@ -200,7 +226,7 @@ MyPyLong_AsUnsigned@Type@ (PyObject *obj)
  *          MyPyFloat_FromHalf, PyFloat_FromDouble*2#
  * #func2 = PyObject_IsTrue, MyPyLong_AsLong*6, MyPyLong_AsUnsignedLong*2,
  *          MyPyLong_AsLongLong, MyPyLong_AsUnsignedLongLong,
- *          MyPyFloat_AsHalf, MyPyFloat_AsDouble*2#
+ *          MyPyFloat_AsHalf, MyPyFloat_AsFloat, MyPyFloat_AsDouble#
  * #type = npy_bool,
  *         npy_byte, npy_ubyte, npy_short, npy_ushort, npy_int,
  *         npy_long, npy_uint, npy_ulong, npy_longlong, npy_ulonglong,
@@ -238,7 +264,6 @@ static int
     }
     else {
         temp = (@type@)@func2@(op);
-        // TODO: Need to check for inf, if input is not inf (for float)!
     }
     if (PyErr_Occurred()) {
         PyObject *type, *value, *traceback;
@@ -364,6 +389,26 @@ static int
         }
         temp.real = (@ftype@) oop.real;
         temp.imag = (@ftype@) oop.imag;
+
+#if NPY_SIZEOF_@NAME@ < NPY_SIZEOF_CDOUBLE  /* really just float... */
+        /* Overflow could have occured converting double to float */
+        if (NPY_UNLIKELY((npy_isinf(temp.real) && !npy_isinf(oop.real)) ||
+                         (npy_isinf(temp.imag) && !npy_isinf(oop.imag)))) {
+            int bufsize, errmask;
+            PyObject *errobj;
+
+            if (PyUFunc_GetPyValues("assignment", &bufsize, &errmask,
+                    &errobj) < 0) {
+                return -1;
+            }
+            int first = 1;
+            if (PyUFunc_handlefperr(errmask, errobj, NPY_FPE_OVERFLOW, &first)) {
+                Py_XDECREF(errobj);
+                return -1;
+            }
+            Py_XDECREF(errobj);
+        }
+#endif
     }
 
     memcpy(ov, &temp, PyArray_DESCR(ap)->elsize);
diff --git a/numpy/core/tests/test_half.py b/numpy/core/tests/test_half.py
index b60f7ba9c2e7..6743dfb51208 100644
--- a/numpy/core/tests/test_half.py
+++ b/numpy/core/tests/test_half.py
@@ -104,9 +104,9 @@ def test_half_conversion_rounding(self, float_t, shift, offset):
 
         # Increase the float by a minimal value:
         if offset == "up":
-            f16s_float = np.nextafter(f16s_float, float_t(1e50))
+            f16s_float = np.nextafter(f16s_float, float_t(np.inf))
         elif offset == "down":
-            f16s_float = np.nextafter(f16s_float, float_t(-1e50))
+            f16s_float = np.nextafter(f16s_float, float_t(-np.inf))
 
         # Convert back to float16 and its bit pattern:
         res_patterns = f16s_float.astype(np.float16).view(np.uint16)
diff --git a/numpy/core/tests/test_ufunc.py b/numpy/core/tests/test_ufunc.py
index 1142d66f51dd..56ca7f4bd252 100644
--- a/numpy/core/tests/test_ufunc.py
+++ b/numpy/core/tests/test_ufunc.py
@@ -666,20 +666,22 @@ def test_sum(self):
         for dt in (int, np.float16, np.float32, np.float64, np.longdouble):
             for v in (0, 1, 2, 7, 8, 9, 15, 16, 19, 127,
                       128, 1024, 1235):
-                tgt = dt(v * (v + 1) / 2)
-                d = np.arange(1, v + 1, dtype=dt)
-
                 # warning if sum overflows, which it does in float16
-                overflow = not np.isfinite(tgt)
-
                 with warnings.catch_warnings(record=True) as w:
-                    warnings.simplefilter("always")
-                    assert_almost_equal(np.sum(d), tgt)
+                    warnings.simplefilter("always", RuntimeWarning)
+
+                    tgt = dt(v * (v + 1) / 2)
+                    overflow = not np.isfinite(tgt)
                     assert_equal(len(w), 1 * overflow)
 
-                    assert_almost_equal(np.sum(d[::-1]), tgt)
+                    d = np.arange(1, v + 1, dtype=dt)
+
+                    assert_almost_equal(np.sum(d), tgt)
                     assert_equal(len(w), 2 * overflow)
 
+                    assert_almost_equal(np.sum(d[::-1]), tgt)
+                    assert_equal(len(w), 3 * overflow)
+
             d = np.ones(500, dtype=dt)
             assert_almost_equal(np.sum(d[::2]), 250.)
             assert_almost_equal(np.sum(d[1::2]), 250.)
diff --git a/numpy/ma/tests/test_core.py b/numpy/ma/tests/test_core.py
index 04540bc70e1c..4fac897de314 100644
--- a/numpy/ma/tests/test_core.py
+++ b/numpy/ma/tests/test_core.py
@@ -4164,7 +4164,11 @@ def test_masked_where_structured(self):
         # test that masked_where on a structured array sets a structured
         # mask (see issue #2972)
         a = np.zeros(10, dtype=[("A", "<f2"), ("B", "<f4")])
-        am = np.ma.masked_where(a["A"] < 5, a)
+        with np.errstate(over="ignore"):
+            # NOTE: The float16 "uses" 1e20 as mask, which overflows to inf
+            #       and warns.  Unrelated to this test, but probably undesired.
+            #       But NumPy previously did not warn for this overflow.
+            am = np.ma.masked_where(a["A"] < 5, a)
         assert_equal(am.mask.dtype.names, am.dtype.names)
         assert_equal(am["A"],
                     np.ma.masked_array(np.zeros(10), np.ones(10)))

From 54d6ac5bf42d671ce7aa79e13dbdda1b9b2175a8 Mon Sep 17 00:00:00 2001
From: Sebastian Berg <sebastian@sipsolutions.net>
Date: Mon, 4 Apr 2022 19:01:28 -0700
Subject: [PATCH 05/20] WIP,TST: Add exhaustive test for FPEs in casts

unfortunately, I had to realize that float -> integer casts are not
well defined when values are out of range for the integer.
This is a problem with C, but means that neither the warnings seem
to be particularly well defined...
(I suspect, on my CPU it either warns OR gives "valid" integer overflow
results, but the question is if that is even universally true...)
---
 .../test_casting_floatingpoint_errors.py      | 135 ++++++++++++++++++
 1 file changed, 135 insertions(+)
 create mode 100644 numpy/core/tests/test_casting_floatingpoint_errors.py

diff --git a/numpy/core/tests/test_casting_floatingpoint_errors.py b/numpy/core/tests/test_casting_floatingpoint_errors.py
new file mode 100644
index 000000000000..797dc3387e09
--- /dev/null
+++ b/numpy/core/tests/test_casting_floatingpoint_errors.py
@@ -0,0 +1,135 @@
+import pytest
+from pytest import param
+
+import numpy as np
+
+
+def values_and_dtypes():
+    """
+    Generate value+dtype pairs that generate floating point errors during
+    casts.  The invalid casts to integers will generate "invalid" value
+    warnings, the float casts all generate "overflow".
+
+    (The Python int/float paths don't need to get tested in all the same
+    situations, but it does not hurt.)
+    """
+    # Casting to float16:
+    yield param(70000, "float16", id="int-to-f2")
+    yield param("70000", "float16", id="str-to-f2")
+    yield param(70000.0, "float16", id="float-to-f2")
+    yield param(np.longdouble(70000.), "float16", id="longdouble-to-f2")
+    yield param(np.float64(70000.), "float16", id="double-to-f2")
+    yield param(np.float32(70000.), "float16", id="float-to-f2")
+    # Casting to float32:
+    yield param(10**100, "float32", id="int-to-f4")
+    yield param(1e100, "float32", id="float-to-f2")
+    yield param(np.longdouble(1e300), "float32", id="longdouble-to-f2")
+    yield param(np.float64(1e300), "float32", id="double-to-f2")
+    # Casting to float64:
+    if np.finfo(np.longdouble).max > np.finfo(np.float64).max:
+        yield param(np.finfo(np.longdouble).max, "float64",
+                    id="longdouble-to-f4")
+
+    # Invalid float to integer casts:
+    with np.errstate(over="ignore"):
+        for to_dt in np.typecodes["AllInteger"]:
+            for value in [np.inf, np.nan]:
+                for from_dt in np.typecodes["AllFloat"]:
+                    from_dt = np.dtype(from_dt)
+                    from_val = from_dt.type(value)
+
+                    yield param(from_val, to_dt, id=f"{from_val}-to-{to_dt}")
+
+
+def check_operations(dtype, value):
+    """
+    There are many dedicated paths in NumPy which cast and should check for
+    floating point errors which occurred during those casts.
+    """
+    if dtype.kind != 'i':
+        def assignment():
+            arr = np.empty(3, dtype=dtype)
+            arr[0] = value
+
+        yield assignment
+
+        def fill():
+            arr = np.empty(3, dtype=dtype)
+            arr.fill(value)
+
+        yield fill
+
+    def copyto_scalar():
+        arr = np.empty(3, dtype=dtype)
+        np.copyto(arr, value, casting="unsafe")
+
+    yield copyto_scalar
+
+    def copyto():
+        arr = np.empty(3, dtype=dtype)
+        np.copyto(arr, np.array([value, value, value]), casting="unsafe")
+
+    yield copyto
+
+    def copyto_scalar_masked():
+        arr = np.empty(3, dtype=dtype)
+        np.copyto(arr, value, casting="unsafe",
+                  where=[True, False, True])
+
+    yield copyto_scalar_masked
+
+    def copyto_masked():
+        arr = np.empty(3, dtype=dtype)
+        np.copyto(arr, np.array([value, value, value]), casting="unsafe",
+                  where=[True, False, True])
+
+    yield copyto_masked
+
+    def direct_cast():
+        np.array([value, value, value]).astype(dtype)
+
+    yield direct_cast
+
+    def direct_cast_nd_strided():
+        arr = np.full((5, 5, 5), fill_value=value)[:, ::2, :]
+        arr.astype(dtype)
+
+    yield direct_cast_nd_strided
+
+    def boolean_array_assignment():
+        arr = np.empty(3, dtype=dtype)
+        arr[[True, False, True]] = np.array([value, value])
+
+    yield boolean_array_assignment
+
+    def integer_array_assignment():
+        arr = np.empty(3, dtype=dtype)
+        values = np.array([value, value])
+
+        arr[[0, 1]] = values
+
+    #yield integer_array_assignment
+
+    def integer_array_assignment_with_subspace():
+        arr = np.empty((5, 3), dtype=dtype)
+        values = np.array([value, value, value])
+
+        arr[[0, 2]] = values
+
+    yield integer_array_assignment_with_subspace
+
+
+@pytest.mark.parametrize(["value", "dtype"], values_and_dtypes())
+@pytest.mark.filterwarnings("ignore::numpy.ComplexWarning")
+def test_floatingpoint_errors_casting(dtype, value):
+    dtype = np.dtype(dtype)
+    for operation in check_operations(dtype, value):
+        dtype = np.dtype(dtype)
+
+        match = "invalid" if dtype.kind in 'iu' else "overflow"
+        with pytest.warns(RuntimeWarning, match=match):
+            operation()
+
+        with np.errstate(all="raise"):
+            with pytest.raises(FloatingPointError, match=match):
+                operation()

From d53fe91e551f6318e004665995456b838429e4ab Mon Sep 17 00:00:00 2001
From: Sebastian Berg <sebastian@sipsolutions.net>
Date: Wed, 6 Apr 2022 13:54:30 -0700
Subject: [PATCH 06/20] WIP: Threads things into nditer for ufuncs and advanced
 indexing

---
 .../multiarray/lowlevel_strided_loops.c.src   | 24 -----------
 numpy/core/src/multiarray/mapping.c           | 35 ++++++++++++++--
 numpy/core/src/multiarray/nditer_api.c        | 22 ++++++++++
 numpy/core/src/multiarray/nditer_constr.c     | 11 +++--
 numpy/core/src/multiarray/nditer_impl.h       | 41 ++++++++++++-------
 numpy/core/src/umath/ufunc_object.c           | 23 +++++++----
 6 files changed, 102 insertions(+), 54 deletions(-)

diff --git a/numpy/core/src/multiarray/lowlevel_strided_loops.c.src b/numpy/core/src/multiarray/lowlevel_strided_loops.c.src
index a9f95886f24a..c46f733f38c6 100644
--- a/numpy/core/src/multiarray/lowlevel_strided_loops.c.src
+++ b/numpy/core/src/multiarray/lowlevel_strided_loops.c.src
@@ -1717,30 +1717,6 @@ mapiter_@name@(PyArrayMapIterObject *mit)
         int is_subiter_trivial = 0; /* has three states */
         npy_intp reset_offsets[2] = {0, 0};
 
-        /* Use strided transfer functions for the inner loop */
-        npy_intp fixed_strides[2];
-
-        /*
-         * Get a dtype transfer function, since there are no
-         * buffers, this is safe.
-         */
-        NpyIter_GetInnerFixedStrideArray(mit->subspace_iter, fixed_strides);
-
-        NPY_cast_info cast_info;
-        NPY_ARRAYMETHOD_FLAGS flags;
-        if (PyArray_GetDTypeTransferFunction(is_aligned,
-#if @isget@
-                        fixed_strides[0], fixed_strides[1],
-                        PyArray_DESCR(array), PyArray_DESCR(mit->extra_op),
-#else
-                        fixed_strides[1], fixed_strides[0],
-                         PyArray_DESCR(mit->extra_op), PyArray_DESCR(array),
-#endif
-                        0,
-                        &cast_info,
-                        &flags) != NPY_SUCCEED) {
-            return -1;
-        }
         /* Note: it may make sense to refactor `needs_api` out in this branch */
         if (flags & NPY_METH_REQUIRES_PYAPI) {
             needs_api = 1;
diff --git a/numpy/core/src/multiarray/mapping.c b/numpy/core/src/multiarray/mapping.c
index 6cf5eab962f9..55884422793d 100644
--- a/numpy/core/src/multiarray/mapping.c
+++ b/numpy/core/src/multiarray/mapping.c
@@ -2610,13 +2610,14 @@ PyArray_MapIterNew(npy_index_info *indices , int index_num, int index_type,
     }
 
     /* create new MapIter object */
-    mit = (PyArrayMapIterObject *)PyArray_malloc(sizeof(PyArrayMapIterObject));
+    mit = (PyArrayMapIterObject *)PyArray_malloc(
+            sizeof(PyArrayMapIterObject) + sizeof(NPY_cast_info));
     if (mit == NULL) {
         Py_DECREF(intp_descr);
         return NULL;
     }
     /* set all attributes of mapiter to zero */
-    memset(mit, 0, sizeof(PyArrayMapIterObject));
+    memset(mit, 0, sizeof(PyArrayMapIterObject) + sizeof(NPY_cast_info));
     PyObject_Init((PyObject *)mit, &PyArrayMapIter_Type);
 
     Py_INCREF(arr);
@@ -3079,7 +3080,34 @@ PyArray_MapIterNew(npy_index_info *indices , int index_num, int index_type,
     mit->subspace_ptrs = NpyIter_GetDataPtrArray(mit->subspace_iter);
     mit->subspace_strides = NpyIter_GetInnerStrideArray(mit->subspace_iter);
 
-    if (NpyIter_IterationNeedsAPI(mit->outer)) {
+    NPY_cast_info *cast_info = (NPY_cast_info *)&mit->subspace_castinfo;
+    NPY_ARRAYMETHOD_FLAGS flags;
+    npy_intp fixed_strides[2];
+
+    /*
+     * Get a dtype transfer function, since there are no
+     * buffers, this is safe.
+     */
+    NpyIter_GetInnerFixedStrideArray(mit->subspace_iter, fixed_strides);
+
+    if (PyArray_GetDTypeTransferFunction(is_aligned,
+#if @isget@
+    fixed_strides[0], fixed_strides[1],
+                        PyArray_DESCR(array), PyArray_DESCR(mit->extra_op),
+#else
+            fixed_strides[1], fixed_strides[0],
+            PyArray_DESCR(mit->extra_op), PyArray_DESCR(array),
+#endif
+            0,
+            &cast_info,
+            &flags) != NPY_SUCCEED) {
+        return -1;
+    }
+
+
+
+
+    if (NpyIter_IterationNeedsAPI(mit->subspace_iter)) {
         mit->needs_api = 1;
         /*
          * NOTE: In this case, need to call PyErr_Occurred() after
@@ -3286,6 +3314,7 @@ arraymapiter_dealloc(PyArrayMapIterObject *mit)
     }
     if (mit->subspace_iter != NULL) {
         NpyIter_Deallocate(mit->subspace_iter);
+        NPY_cast_info_xfree((NPY_cast_info *)(&(mit->subspace_castinfo)));
     }
     if (mit->extra_op_iter != NULL) {
         NpyIter_Deallocate(mit->extra_op_iter);
diff --git a/numpy/core/src/multiarray/nditer_api.c b/numpy/core/src/multiarray/nditer_api.c
index 860c8c1f65fa..b80312e06770 100644
--- a/numpy/core/src/multiarray/nditer_api.c
+++ b/numpy/core/src/multiarray/nditer_api.c
@@ -857,6 +857,13 @@ NpyIter_RequiresBuffering(NpyIter *iter)
  * Whether the iteration loop, and in particular the iternext()
  * function, needs API access.  If this is true, the GIL must
  * be retained while iterating.
+ *
+ * NOTE: Internally (currently), `NpyIter_GetTransferFlags` will
+ *       additionally provide information on whether floating point errors
+ *       may be given during casts.  The flags only require the API use
+ *       necessary for buffering though.  So an iterate which does not require
+ *       buffering may indicate `NpyIter_IterationNeedsAPI`, but not include
+ *       the flag in `NpyIter_GetTransferFlags`.
  */
 NPY_NO_EXPORT npy_bool
 NpyIter_IterationNeedsAPI(NpyIter *iter)
@@ -864,6 +871,21 @@ NpyIter_IterationNeedsAPI(NpyIter *iter)
     return (NIT_ITFLAGS(iter)&NPY_ITFLAG_NEEDSAPI) != 0;
 }
 
+
+/*
+ * Fetch the ArrayMethod (runtime) flags for all "transfer functions' (i.e.
+ * copy to buffer/casts).
+ *
+ * TODO: This should be public API, but that only makes sense when the
+ *       ArrayMethod API is made public.
+ */
+NPY_NO_EXPORT int
+NpyIter_GetTransferFlags(NpyIter *iter)
+{
+    return NIT_ITFLAGS(iter) >> NPY_ITFLAG_TRANSFERFLAGS_SHIFT;
+}
+
+
 /*NUMPY_API
  * Gets the number of dimensions being iterated
  */
diff --git a/numpy/core/src/multiarray/nditer_constr.c b/numpy/core/src/multiarray/nditer_constr.c
index bd0763b195f8..a383c63e8d5b 100644
--- a/numpy/core/src/multiarray/nditer_constr.c
+++ b/numpy/core/src/multiarray/nditer_constr.c
@@ -3250,12 +3250,11 @@ npyiter_allocate_transfer_functions(NpyIter *iter)
         }
     }
 
-    /*
-     * If any of the dtype transfer functions needed the API, flag it.
-     * TODO: We ignore other flags (currently only if floating point errors are
-     *       possible).  This is incorrect, the combined flags should be
-     *       exposed explicitly.
-     */
+    /* Store the combined transfer flags on the iterator */
+    NIT_ITFLAGS(iter) |= cflags << NPY_ITFLAG_TRANSFERFLAGS_SHIFT;
+    assert(NIT_ITFLAGS(iter) >> NPY_ITFLAG_TRANSFERFLAGS_SHIFT == cflags);
+
+    /* If any of the dtype transfer functions needed the API, flag it. */
     if (cflags & NPY_METH_REQUIRES_PYAPI) {
         NIT_ITFLAGS(iter) |= NPY_ITFLAG_NEEDSAPI;
     }
diff --git a/numpy/core/src/multiarray/nditer_impl.h b/numpy/core/src/multiarray/nditer_impl.h
index 2a82b7e5410d..459675ea8574 100644
--- a/numpy/core/src/multiarray/nditer_impl.h
+++ b/numpy/core/src/multiarray/nditer_impl.h
@@ -76,33 +76,38 @@
 /* Internal iterator flags */
 
 /* The perm is the identity */
-#define NPY_ITFLAG_IDENTPERM    0x0001
+#define NPY_ITFLAG_IDENTPERM    (1 << 0)
 /* The perm has negative entries (indicating flipped axes) */
-#define NPY_ITFLAG_NEGPERM      0x0002
+#define NPY_ITFLAG_NEGPERM      (1 << 1)
 /* The iterator is tracking an index */
-#define NPY_ITFLAG_HASINDEX     0x0004
+#define NPY_ITFLAG_HASINDEX     (1 << 2)
 /* The iterator is tracking a multi-index */
-#define NPY_ITFLAG_HASMULTIINDEX    0x0008
+#define NPY_ITFLAG_HASMULTIINDEX    (1 << 3)
 /* The iteration order was forced on construction */
-#define NPY_ITFLAG_FORCEDORDER  0x0010
+#define NPY_ITFLAG_FORCEDORDER  (1 << 4)
 /* The inner loop is handled outside the iterator */
-#define NPY_ITFLAG_EXLOOP      0x0020
+#define NPY_ITFLAG_EXLOOP      (1 << 5)
 /* The iterator is ranged */
-#define NPY_ITFLAG_RANGE        0x0040
+#define NPY_ITFLAG_RANGE        (1 << 6)
 /* The iterator is buffered */
-#define NPY_ITFLAG_BUFFER       0x0080
+#define NPY_ITFLAG_BUFFER       (1 << 7)
 /* The iterator should grow the buffered inner loop when possible */
-#define NPY_ITFLAG_GROWINNER    0x0100
+#define NPY_ITFLAG_GROWINNER    (1 << 8)
 /* There is just one iteration, can specialize iternext for that */
-#define NPY_ITFLAG_ONEITERATION 0x0200
+#define NPY_ITFLAG_ONEITERATION (1 << 9)
 /* Delay buffer allocation until first Reset* call */
-#define NPY_ITFLAG_DELAYBUF     0x0400
+#define NPY_ITFLAG_DELAYBUF     (1 << 10)
 /* Iteration needs API access during iternext */
-#define NPY_ITFLAG_NEEDSAPI     0x0800
+#define NPY_ITFLAG_NEEDSAPI     (1 << 11)
 /* Iteration includes one or more operands being reduced */
-#define NPY_ITFLAG_REDUCE       0x1000
+#define NPY_ITFLAG_REDUCE       (1 << 12)
 /* Reduce iteration doesn't need to recalculate reduce loops next time */
-#define NPY_ITFLAG_REUSE_REDUCE_LOOPS 0x2000
+#define NPY_ITFLAG_REUSE_REDUCE_LOOPS (1 << 13)
+/*
+ * Offset of (combined) ArrayMethod flags for all transfer functions.
+ * For now, we use the top 8 bits.
+ */
+#define NPY_ITFLAG_TRANSFERFLAGS_SHIFT 24
 
 /* Internal iterator per-operand iterator flags */
 
@@ -356,4 +361,12 @@ npyiter_copy_to_buffers(NpyIter *iter, char **prev_dataptrs);
 NPY_NO_EXPORT void
 npyiter_clear_buffers(NpyIter *iter);
 
+/*
+ * Function to get the ArrayMethod flags of the transfer functions.
+ * TODO: This function should be public and removed from `nditer_impl.h`, but
+ *       this requires making the ArrayMethod flags public API first.
+ */
+NPY_NO_EXPORT int
+NpyIter_GetTransferFlags(NpyIter *iter);
+
 #endif  /* NUMPY_CORE_SRC_MULTIARRAY_NDITER_IMPL_H_ */
diff --git a/numpy/core/src/umath/ufunc_object.c b/numpy/core/src/umath/ufunc_object.c
index fce7d61deb58..2636396d35cb 100644
--- a/numpy/core/src/umath/ufunc_object.c
+++ b/numpy/core/src/umath/ufunc_object.c
@@ -57,6 +57,10 @@
 #include "legacy_array_method.h"
 #include "abstractdtypes.h"
 
+/* TODO: Only for `NpyIter_GetTransferFlags` until it is public */
+#define NPY_ITERATOR_IMPLEMENTATION_CODE
+#include "nditer_impl.h"
+
 /********** PRINTF DEBUG TRACING **************/
 #define NPY_UF_DBG_TRACING 0
 
@@ -1544,10 +1548,6 @@ execute_ufunc_loop(PyArrayMethod_Context *context, int masked,
     if (masked) {
         baseptrs[nop] = PyArray_BYTES(op_it[nop]);
     }
-    if (NpyIter_ResetBasePointers(iter, baseptrs, NULL) != NPY_SUCCEED) {
-        NpyIter_Deallocate(iter);
-        return -1;
-    }
 
     /*
      * Get the inner loop, with the possibility of specialization
@@ -1584,17 +1584,25 @@ execute_ufunc_loop(PyArrayMethod_Context *context, int masked,
     char **dataptr = NpyIter_GetDataPtrArray(iter);
     npy_intp *strides = NpyIter_GetInnerStrideArray(iter);
     npy_intp *countptr = NpyIter_GetInnerLoopSizePtr(iter);
-    int needs_api = NpyIter_IterationNeedsAPI(iter);
 
     NPY_BEGIN_THREADS_DEF;
 
+    flags = PyArrayMethod_COMBINED_FLAGS(flags, NpyIter_GetTransferFlags(iter));
+
     if (!(flags & NPY_METH_NO_FLOATINGPOINT_ERRORS)) {
         npy_clear_floatstatus_barrier((char *)context);
     }
-    if (!needs_api && !(flags & NPY_METH_REQUIRES_PYAPI)) {
+    if (!(flags & NPY_METH_REQUIRES_PYAPI)) {
         NPY_BEGIN_THREADS_THRESHOLDED(full_size);
     }
 
+    /* The reset may copy the first buffer chunk, which could cause FPEs */
+    if (NpyIter_ResetBasePointers(iter, baseptrs, NULL) != NPY_SUCCEED) {
+        NPY_AUXDATA_FREE(auxdata);
+        NpyIter_Deallocate(iter);
+        return -1;
+    }
+
     NPY_UF_DBG_PRINT("Actual inner loop:\n");
     /* Execute the loop */
     int res;
@@ -2388,7 +2396,8 @@ PyUFunc_GeneralizedFunctionInternal(PyUFuncObject *ufunc,
                  NPY_ITER_MULTI_INDEX |
                  NPY_ITER_REFS_OK |
                  NPY_ITER_ZEROSIZE_OK |
-                 NPY_ITER_COPY_IF_OVERLAP;
+                 NPY_ITER_COPY_IF_OVERLAP |
+                 NPY_ITER_DELAY_BUFALLOC;
 
     /* Create the iterator */
     iter = NpyIter_AdvancedNew(nop, op, iter_flags,

From 4c2a3d0030c244e152eb77365c8973dc07621186 Mon Sep 17 00:00:00 2001
From: Sebastian Berg <sebastian@sipsolutions.net>
Date: Sun, 17 Apr 2022 03:27:15 -0700
Subject: [PATCH 07/20] fixups

---
 numpy/core/include/numpy/ndarraytypes.h | 5 ++++-
 numpy/core/src/multiarray/mapping.c     | 5 +----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/numpy/core/include/numpy/ndarraytypes.h b/numpy/core/include/numpy/ndarraytypes.h
index c295f34bb51a..97e0f4e2a470 100644
--- a/numpy/core/include/numpy/ndarraytypes.h
+++ b/numpy/core/include/numpy/ndarraytypes.h
@@ -1380,7 +1380,10 @@ typedef struct {
         int                   nd_fancy;
         npy_intp              fancy_dims[NPY_MAXDIMS];
 
-        /* Whether the iterator (any of the iterators) requires API */
+        /*
+         * Whether the iterator (any of the iterators) requires API.  This is
+         * unused by NumPy itself; ArrayMethod flags are more precise.
+         */
         int                   needs_api;
 
         /*
diff --git a/numpy/core/src/multiarray/mapping.c b/numpy/core/src/multiarray/mapping.c
index 55884422793d..646a570ddc2c 100644
--- a/numpy/core/src/multiarray/mapping.c
+++ b/numpy/core/src/multiarray/mapping.c
@@ -3101,12 +3101,9 @@ PyArray_MapIterNew(npy_index_info *indices , int index_num, int index_type,
             0,
             &cast_info,
             &flags) != NPY_SUCCEED) {
-        return -1;
+        goto fail;
     }
 
-
-
-
     if (NpyIter_IterationNeedsAPI(mit->subspace_iter)) {
         mit->needs_api = 1;
         /*

From a5bf28b0167356098e6ed1ad69e3bc232dac7a26 Mon Sep 17 00:00:00 2001
From: Sebastian Berg <sebastian@sipsolutions.net>
Date: Thu, 21 Apr 2022 10:34:54 +0200
Subject: [PATCH 08/20] ENH: Add floating point error handling to advanced
 indexing

---
 .../core/src/common/lowlevel_strided_loops.h  |   8 +-
 .../multiarray/lowlevel_strided_loops.c.src   |  40 +----
 numpy/core/src/multiarray/mapping.c           | 156 +++++++++++++-----
 numpy/core/src/multiarray/mapping.h           |   2 +-
 .../test_casting_floatingpoint_errors.py      |  13 +-
 5 files changed, 142 insertions(+), 77 deletions(-)

diff --git a/numpy/core/src/common/lowlevel_strided_loops.h b/numpy/core/src/common/lowlevel_strided_loops.h
index c06cd4551ab0..924a34db5ee3 100644
--- a/numpy/core/src/common/lowlevel_strided_loops.h
+++ b/numpy/core/src/common/lowlevel_strided_loops.h
@@ -336,10 +336,14 @@ mapiter_trivial_set(PyArrayObject *self, PyArrayObject *ind,
                        PyArrayObject *result);
 
 NPY_NO_EXPORT int
-mapiter_get(PyArrayMapIterObject *mit);
+mapiter_get(
+        PyArrayMapIterObject *mit, NPY_cast_info *cast_info,
+        NPY_ARRAYMETHOD_FLAGS flags, int is_aligned);
 
 NPY_NO_EXPORT int
-mapiter_set(PyArrayMapIterObject *mit);
+mapiter_set(
+        PyArrayMapIterObject *mit, NPY_cast_info *cast_info,
+        NPY_ARRAYMETHOD_FLAGS flags, int is_aligned);
 
 /*
  * Prepares shape and strides for a simple raw array iteration.
diff --git a/numpy/core/src/multiarray/lowlevel_strided_loops.c.src b/numpy/core/src/multiarray/lowlevel_strided_loops.c.src
index c46f733f38c6..827f0b615a5b 100644
--- a/numpy/core/src/multiarray/lowlevel_strided_loops.c.src
+++ b/numpy/core/src/multiarray/lowlevel_strided_loops.c.src
@@ -1559,14 +1559,16 @@ mapiter_trivial_@name@(PyArrayObject *self, PyArrayObject *ind,
  * General advanced indexing iteration.
  */
 NPY_NO_EXPORT int
-mapiter_@name@(PyArrayMapIterObject *mit)
+mapiter_@name@(
+        PyArrayMapIterObject *mit, NPY_cast_info *cast_info,
+        NPY_ARRAYMETHOD_FLAGS flags, int is_aligned)
 {
     npy_intp *counter, count;
-    int i, is_aligned;
+    int i;
 
     /* Cached mit info */
     int numiter = mit->numiter;
-    int needs_api = mit->needs_api;
+    int needs_api = (flags & NPY_METH_REQUIRES_PYAPI) != 0;
     /* Constant information */
     npy_intp fancy_dims[NPY_MAXDIMS];
     npy_intp fancy_strides[NPY_MAXDIMS];
@@ -1588,13 +1590,6 @@ mapiter_@name@(PyArrayMapIterObject *mit)
         fancy_strides[i] = mit->fancy_strides[i];
     }
 
-    /*
-     * Alignment information (swapping is never needed, since we buffer),
-     * could also check extra_op is buffered, but it should rarely matter.
-     */
-
-    is_aligned = IsUintAligned(array) && IsUintAligned(mit->extra_op);
-
     if (mit->size == 0) {
        return 0;
     }
@@ -1744,9 +1739,6 @@ mapiter_@name@(PyArrayMapIterObject *mit)
             if (!needs_api) {
                 NPY_BEGIN_THREADS;
             }
-            if (!(flags & NPY_METH_NO_FLOATINGPOINT_ERRORS)) {
-                npy_clear_floatstatus_barrier((char *)mit);
-            }
 
             /* Outer iteration (safe because mit->size != 0) */
             do {
@@ -1757,7 +1749,6 @@ mapiter_@name@(PyArrayMapIterObject *mit)
 #if @isget@ && @one_iter@
                     if (check_and_adjust_index(&indval, fancy_dims[i],
                                                iteraxis, _save) < 0 ) {
-                        NPY_cast_info_xfree(&cast_info);
                         return -1;
                     }
 #else
@@ -1789,7 +1780,6 @@ mapiter_@name@(PyArrayMapIterObject *mit)
                                                    &errmsg)) {
                         NPY_END_THREADS;
                         PyErr_SetString(PyExc_ValueError, errmsg);
-                        NPY_cast_info_xfree(&cast_info);
                         return -1;
                     }
                     if (is_subiter_trivial != 0) {
@@ -1819,7 +1809,6 @@ mapiter_@name@(PyArrayMapIterObject *mit)
                  *       not at all...
                  */
                 if (needs_api && PyErr_Occurred()) {
-                    NPY_cast_info_xfree(&cast_info);
                     return -1;
                 }
 #endif
@@ -1827,21 +1816,19 @@ mapiter_@name@(PyArrayMapIterObject *mit)
                 do {
 
 #if @isget@
-                    if (NPY_UNLIKELY(cast_info.func(&cast_info.context,
+                    if (NPY_UNLIKELY(cast_info->func(&cast_info->context,
                             subspace_ptrs, counter, subspace_strides,
-                            cast_info.auxdata) < 0)) {
+                            cast_info->auxdata) < 0)) {
                         NPY_END_THREADS;
-                        NPY_cast_info_xfree(&cast_info);
                         return -1;
                     }
 #else
                     /* The operand order is reversed here */
                     char *args[2] = {subspace_ptrs[1], subspace_ptrs[0]};
                     npy_intp strides[2] = {subspace_strides[1], subspace_strides[0]};
-                    if (NPY_UNLIKELY(cast_info.func(&cast_info.context,
-                            args, counter, strides, cast_info.auxdata) < 0)) {
+                    if (NPY_UNLIKELY(cast_info->func(&cast_info->context,
+                            args, counter, strides, cast_info->auxdata) < 0)) {
                         NPY_END_THREADS;
-                        NPY_cast_info_xfree(&cast_info);
                         return -1;
                     }
 #endif
@@ -1852,15 +1839,6 @@ mapiter_@name@(PyArrayMapIterObject *mit)
             NPY_END_THREADS;
         }
 /**end repeat1**/
-
-        NPY_cast_info_xfree(&cast_info);
-
-        if (!(flags & NPY_METH_NO_FLOATINGPOINT_ERRORS)) {
-            int fpes = npy_get_floatstatus_barrier((char *)mit);
-            if (fpes && PyUFunc_GiveFloatingpointErrors("cast", fpes) < 0) {
-                return -1;
-            }
-        }
     }
     return 0;
 }
diff --git a/numpy/core/src/multiarray/mapping.c b/numpy/core/src/multiarray/mapping.c
index 646a570ddc2c..2f3b2795f2a4 100644
--- a/numpy/core/src/multiarray/mapping.c
+++ b/numpy/core/src/multiarray/mapping.c
@@ -26,6 +26,9 @@
 #include "mem_overlap.h"
 #include "array_assign.h"
 #include "array_coercion.h"
+/* TODO: Only for `NpyIter_GetTransferFlags` until it is public */
+#define NPY_ITERATOR_IMPLEMENTATION_CODE
+#include "nditer_impl.h"
 
 #include "umathmodule.h"
 
@@ -1432,6 +1435,8 @@ array_subscript(PyArrayObject *self, PyObject *op)
     int index_type;
     int index_num;
     int i, ndim, fancy_ndim;
+    NPY_cast_info cast_info = {.func = NULL};
+
     /*
      * Index info array. We can have twice as many indices as dimensions
      * (because of None). The + 1 is to not need to check as much.
@@ -1597,7 +1602,43 @@ array_subscript(PyArrayObject *self, PyObject *op)
         goto finish;
     }
 
-    if (mapiter_get(mit) < 0) {
+    /*
+     * Alignment information (swapping is never needed, since we buffer),
+     * could also check extra_op is buffered, but it should rarely matter.
+     */
+    int is_aligned = IsUintAligned(self) && IsUintAligned(mit->extra_op);
+    /*
+     * NOTE: Getting never actually casts, so we currently do not bother to do
+     *       the full checks (floating point errors) here (unlike assignment).
+     */
+    int meth_flags = NpyIter_GetTransferFlags(mit->outer);
+    if (mit->extra_op_iter) {
+        int extra_op_flags = NpyIter_GetTransferFlags(mit->extra_op_iter);
+        meth_flags = PyArrayMethod_COMBINED_FLAGS(meth_flags, extra_op_flags);
+    }
+
+    if (mit->subspace_iter != NULL) {
+        int extra_op_flags = NpyIter_GetTransferFlags(mit->subspace_iter);
+        meth_flags = PyArrayMethod_COMBINED_FLAGS(meth_flags, extra_op_flags);
+
+        NPY_ARRAYMETHOD_FLAGS transfer_flags;
+        npy_intp fixed_strides[2];
+        /*
+         * Get a dtype transfer function, since there are no
+         * buffers, this is safe.
+         */
+        NpyIter_GetInnerFixedStrideArray(mit->subspace_iter, fixed_strides);
+
+        if (PyArray_GetDTypeTransferFunction(is_aligned,
+                fixed_strides[0], fixed_strides[1],
+                PyArray_DESCR(self), PyArray_DESCR(mit->extra_op),
+                0, &cast_info, &transfer_flags) != NPY_SUCCEED) {
+            goto finish;
+        }
+        meth_flags = PyArrayMethod_COMBINED_FLAGS(meth_flags, transfer_flags);
+    }
+
+    if (mapiter_get(mit, &cast_info, meth_flags, is_aligned) < 0) {
         goto finish;
     }
 
@@ -1632,6 +1673,7 @@ array_subscript(PyArrayObject *self, PyObject *op)
     }
 
   finish:
+    NPY_cast_info_xfree(&cast_info);
     Py_XDECREF(mit);
     Py_XDECREF(view);
     /* Clean up indices */
@@ -1717,6 +1759,9 @@ array_assign_subscript(PyArrayObject *self, PyObject *ind, PyObject *op)
 
     PyArrayMapIterObject *mit = NULL;
 
+    /* When a subspace is used, casting is done manually. */
+    NPY_cast_info cast_info = {.func = NULL};
+
     if (op == NULL) {
         PyErr_SetString(PyExc_ValueError,
                         "cannot delete array elements");
@@ -1953,12 +1998,50 @@ array_assign_subscript(PyArrayObject *self, PyObject *ind, PyObject *op)
         }
     }
 
-    /* Can now reset the outer iterator (delayed bufalloc) */
-    if (NpyIter_Reset(mit->outer, NULL) < 0) {
+    if (PyArray_MapIterCheckIndices(mit) < 0) {
         goto fail;
     }
 
-    if (PyArray_MapIterCheckIndices(mit) < 0) {
+    /*
+     * Alignment information (swapping is never needed, since we buffer),
+     * could also check extra_op is buffered, but it should rarely matter.
+     */
+    int is_aligned = IsUintAligned(self) && IsUintAligned(mit->extra_op);
+    int meth_flags = NpyIter_GetTransferFlags(mit->outer);
+
+    if (mit->extra_op_iter) {
+        int extra_op_flags = NpyIter_GetTransferFlags(mit->extra_op_iter);
+        meth_flags = PyArrayMethod_COMBINED_FLAGS(meth_flags, extra_op_flags);
+    }
+
+    if (mit->subspace_iter != NULL) {
+        int extra_op_flags = NpyIter_GetTransferFlags(mit->subspace_iter);
+        meth_flags = PyArrayMethod_COMBINED_FLAGS(meth_flags, extra_op_flags);
+
+        NPY_ARRAYMETHOD_FLAGS transfer_flags;
+        npy_intp fixed_strides[2];
+
+        /*
+         * Get a dtype transfer function, since there are no
+         * buffers, this is safe.
+         */
+        NpyIter_GetInnerFixedStrideArray(mit->subspace_iter, fixed_strides);
+
+        if (PyArray_GetDTypeTransferFunction(is_aligned,
+                fixed_strides[1], fixed_strides[0],
+                PyArray_DESCR(mit->extra_op), PyArray_DESCR(self),
+                0, &cast_info, &transfer_flags) != NPY_SUCCEED) {
+            goto fail;
+        }
+        meth_flags = PyArrayMethod_COMBINED_FLAGS(meth_flags, transfer_flags);
+    }
+
+    if (!(meth_flags & NPY_METH_NO_FLOATINGPOINT_ERRORS)) {
+        npy_clear_floatstatus_barrier((char *)mit);
+    }
+
+    /* Can now reset the outer iterator (delayed bufalloc) */
+    if (NpyIter_Reset(mit->outer, NULL) < 0) {
         goto fail;
     }
 
@@ -1966,11 +2049,17 @@ array_assign_subscript(PyArrayObject *self, PyObject *ind, PyObject *op)
      * Could add a casting check, but apparently most assignments do
      * not care about safe casting.
      */
-
-    if (mapiter_set(mit) < 0) {
+    if (mapiter_set(mit, &cast_info, meth_flags, is_aligned) < 0) {
         goto fail;
     }
 
+    if (!(meth_flags & NPY_METH_NO_FLOATINGPOINT_ERRORS)) {
+        int fpes = npy_get_floatstatus_barrier((char *)mit);
+        if (fpes && PyUFunc_GiveFloatingpointErrors("cast", fpes) < 0) {
+            goto fail;
+        }
+    }
+
     Py_DECREF(mit);
     goto success;
 
@@ -1979,6 +2068,8 @@ array_assign_subscript(PyArrayObject *self, PyObject *ind, PyObject *op)
     Py_XDECREF((PyObject *)view);
     Py_XDECREF((PyObject *)tmp_arr);
     Py_XDECREF((PyObject *)mit);
+    NPY_cast_info_xfree(&cast_info);
+
     for (i=0; i < index_num; i++) {
         Py_XDECREF(indices[i].object);
     }
@@ -1987,6 +2078,8 @@ array_assign_subscript(PyArrayObject *self, PyObject *ind, PyObject *op)
   success:
     Py_XDECREF((PyObject *)view);
     Py_XDECREF((PyObject *)tmp_arr);
+    NPY_cast_info_xfree(&cast_info);
+
     for (i=0; i < index_num; i++) {
         Py_XDECREF(indices[i].object);
     }
@@ -2107,7 +2200,7 @@ _nonzero_indices(PyObject *myBool, PyArrayObject **arrays)
 
 
 /* Reset the map iterator to the beginning */
-NPY_NO_EXPORT void
+NPY_NO_EXPORT int
 PyArray_MapIterReset(PyArrayMapIterObject *mit)
 {
     npy_intp indval;
@@ -2115,12 +2208,16 @@ PyArray_MapIterReset(PyArrayMapIterObject *mit)
     int i;
 
     if (mit->size == 0) {
-        return;
+        return 0;
     }
 
-    NpyIter_Reset(mit->outer, NULL);
+    if (!NpyIter_Reset(mit->outer, NULL)) {
+        return -1;
+    }
     if (mit->extra_op_iter) {
-        NpyIter_Reset(mit->extra_op_iter, NULL);
+        if (!NpyIter_Reset(mit->extra_op_iter, NULL)) {
+            return -1;
+        }
 
         baseptrs[1] = mit->extra_op_ptrs[0];
     }
@@ -2137,14 +2234,16 @@ PyArray_MapIterReset(PyArrayMapIterObject *mit)
     mit->dataptr = baseptrs[0];
 
     if (mit->subspace_iter) {
-        NpyIter_ResetBasePointers(mit->subspace_iter, baseptrs, NULL);
+        if (!NpyIter_ResetBasePointers(mit->subspace_iter, baseptrs, NULL)) {
+            return -1;
+        }
         mit->iter_count = *NpyIter_GetInnerLoopSizePtr(mit->subspace_iter);
     }
     else {
         mit->iter_count = *NpyIter_GetInnerLoopSizePtr(mit->outer);
     }
 
-    return;
+    return 0;
 }
 
 
@@ -2981,7 +3080,8 @@ PyArray_MapIterNew(npy_index_info *indices , int index_num, int index_type,
         mit->extra_op_iter = NpyIter_AdvancedNew(1, &extra_op,
                                                  NPY_ITER_ZEROSIZE_OK |
                                                  NPY_ITER_REFS_OK |
-                                                 NPY_ITER_GROWINNER,
+                                                 NPY_ITER_GROWINNER |
+                                                 NPY_ITER_DELAY_BUFALLOC,
                                                  NPY_CORDER,
                                                  NPY_NO_CASTING,
                                                  &extra_op_flags,
@@ -3080,30 +3180,6 @@ PyArray_MapIterNew(npy_index_info *indices , int index_num, int index_type,
     mit->subspace_ptrs = NpyIter_GetDataPtrArray(mit->subspace_iter);
     mit->subspace_strides = NpyIter_GetInnerStrideArray(mit->subspace_iter);
 
-    NPY_cast_info *cast_info = (NPY_cast_info *)&mit->subspace_castinfo;
-    NPY_ARRAYMETHOD_FLAGS flags;
-    npy_intp fixed_strides[2];
-
-    /*
-     * Get a dtype transfer function, since there are no
-     * buffers, this is safe.
-     */
-    NpyIter_GetInnerFixedStrideArray(mit->subspace_iter, fixed_strides);
-
-    if (PyArray_GetDTypeTransferFunction(is_aligned,
-#if @isget@
-    fixed_strides[0], fixed_strides[1],
-                        PyArray_DESCR(array), PyArray_DESCR(mit->extra_op),
-#else
-            fixed_strides[1], fixed_strides[0],
-            PyArray_DESCR(mit->extra_op), PyArray_DESCR(array),
-#endif
-            0,
-            &cast_info,
-            &flags) != NPY_SUCCEED) {
-        goto fail;
-    }
-
     if (NpyIter_IterationNeedsAPI(mit->subspace_iter)) {
         mit->needs_api = 1;
         /*
@@ -3255,9 +3331,12 @@ PyArray_MapIterArrayCopyIfOverlap(PyArrayObject * a, PyObject * index,
         goto fail;
     }
 
+    if (PyArray_MapIterReset(mit) < 0) {
+        goto fail;
+    }
+
     Py_XDECREF(a_copy);
     Py_XDECREF(subspace);
-    PyArray_MapIterReset(mit);
 
     for (i=0; i < index_num; i++) {
         Py_XDECREF(indices[i].object);
@@ -3311,7 +3390,6 @@ arraymapiter_dealloc(PyArrayMapIterObject *mit)
     }
     if (mit->subspace_iter != NULL) {
         NpyIter_Deallocate(mit->subspace_iter);
-        NPY_cast_info_xfree((NPY_cast_info *)(&(mit->subspace_castinfo)));
     }
     if (mit->extra_op_iter != NULL) {
         NpyIter_Deallocate(mit->extra_op_iter);
diff --git a/numpy/core/src/multiarray/mapping.h b/numpy/core/src/multiarray/mapping.h
index e929b8b3f729..4e5d06238795 100644
--- a/numpy/core/src/multiarray/mapping.h
+++ b/numpy/core/src/multiarray/mapping.h
@@ -51,7 +51,7 @@ array_assign_item(PyArrayObject *self, Py_ssize_t i, PyObject *v);
  * Prototypes for Mapping calls --- not part of the C-API
  * because only useful as part of a getitem call.
  */
-NPY_NO_EXPORT void
+NPY_NO_EXPORT int
 PyArray_MapIterReset(PyArrayMapIterObject *mit);
 
 NPY_NO_EXPORT void
diff --git a/numpy/core/tests/test_casting_floatingpoint_errors.py b/numpy/core/tests/test_casting_floatingpoint_errors.py
index 797dc3387e09..70a59a72ba64 100644
--- a/numpy/core/tests/test_casting_floatingpoint_errors.py
+++ b/numpy/core/tests/test_casting_floatingpoint_errors.py
@@ -47,17 +47,21 @@ def check_operations(dtype, value):
     floating point errors which occurred during those casts.
     """
     if dtype.kind != 'i':
+        # These assignments use the stricter setitem logic:
         def assignment():
             arr = np.empty(3, dtype=dtype)
             arr[0] = value
 
         yield assignment
 
-        def fill():
-            arr = np.empty(3, dtype=dtype)
-            arr.fill(value)
+        # TODO: This constraint is a bug in arr.fill() and should be removed
+        #       e.g. by gh-20924
+        if value != 10**100:
+            def fill():
+                arr = np.empty(3, dtype=dtype)
+                arr.fill(value)
 
-        yield fill
+            yield fill
 
     def copyto_scalar():
         arr = np.empty(3, dtype=dtype)
@@ -133,3 +137,4 @@ def test_floatingpoint_errors_casting(dtype, value):
         with np.errstate(all="raise"):
             with pytest.raises(FloatingPointError, match=match):
                 operation()
+

From 96fd8ba3f51c6fe7ecfc797c4c33d7ae4dc2821a Mon Sep 17 00:00:00 2001
From: Sebastian Berg <sebastian@sipsolutions.net>
Date: Wed, 4 May 2022 09:01:08 +0200
Subject: [PATCH 09/20] BUG: Fix invalid access due to `iter` already being
 cleaned up

---
 numpy/core/src/multiarray/mapping.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/numpy/core/src/multiarray/mapping.c b/numpy/core/src/multiarray/mapping.c
index 2f3b2795f2a4..d378dc8e586d 100644
--- a/numpy/core/src/multiarray/mapping.c
+++ b/numpy/core/src/multiarray/mapping.c
@@ -1192,7 +1192,7 @@ array_assign_boolean_subscript(PyArrayObject *self,
             NPY_BEGIN_THREADS_NDITER(iter);
         }
         if (!(flags & NPY_METH_NO_FLOATINGPOINT_ERRORS)) {
-            npy_clear_floatstatus_barrier((char *)iter);
+            npy_clear_floatstatus_barrier((char *)self);
         }
 
         npy_intp strides[2] = {v_stride, self_stride};
@@ -1233,7 +1233,7 @@ array_assign_boolean_subscript(PyArrayObject *self,
             res = -1;
         }
         if (res == 0 && !(flags & NPY_METH_NO_FLOATINGPOINT_ERRORS)) {
-            int fpes = npy_get_floatstatus_barrier((char*)iter);
+            int fpes = npy_get_floatstatus_barrier((char *)self);
             if (fpes && PyUFunc_GiveFloatingpointErrors("cast", fpes) < 0) {
                 return -1;
             }

From fc865d8c6212b51bb03190f2cd732f1d484539dd Mon Sep 17 00:00:00 2001
From: Sebastian Berg <sebastian@sipsolutions.net>
Date: Wed, 4 May 2022 09:47:13 +0200
Subject: [PATCH 10/20] TST: Fix the weird boolean+advanced indexing test

The test meant to assert shapes, but didn't actually do it.
---
 numpy/core/tests/test_indexing.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/numpy/core/tests/test_indexing.py b/numpy/core/tests/test_indexing.py
index efcb92c2e6d1..9ef30eae2652 100644
--- a/numpy/core/tests/test_indexing.py
+++ b/numpy/core/tests/test_indexing.py
@@ -1297,11 +1297,10 @@ def test_bool_as_int_argument_errors(self):
     def test_boolean_indexing_weirdness(self):
         # Weird boolean indexing things
         a = np.ones((2, 3, 4))
-        a[False, True, ...].shape == (0, 2, 3, 4)
-        a[True, [0, 1], True, True, [1], [[2]]] == (1, 2)
+        assert a[False, True, ...].shape == (0, 2, 3, 4)
+        assert a[True, [0, 1], True, True, [1], [[2]]].shape == (1, 2)
         assert_raises(IndexError, lambda: a[False, [0, 1], ...])
 
-
     def test_boolean_indexing_fast_path(self):
         # These used to either give the wrong error, or incorrectly give no
         # error.

From 219697afb8bc0dfe3b0332a9a6bf3efd8a558e45 Mon Sep 17 00:00:00 2001
From: Sebastian Berg <sebastian@sipsolutions.net>
Date: Wed, 4 May 2022 13:08:48 +0200
Subject: [PATCH 11/20] TST: Fixup test name and check for ppc64le

---
 numpy/core/tests/test_casting_floatingpoint_errors.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/numpy/core/tests/test_casting_floatingpoint_errors.py b/numpy/core/tests/test_casting_floatingpoint_errors.py
index 70a59a72ba64..669a0d174e26 100644
--- a/numpy/core/tests/test_casting_floatingpoint_errors.py
+++ b/numpy/core/tests/test_casting_floatingpoint_errors.py
@@ -26,9 +26,13 @@ def values_and_dtypes():
     yield param(np.longdouble(1e300), "float32", id="longdouble-to-f2")
     yield param(np.float64(1e300), "float32", id="double-to-f2")
     # Casting to float64:
-    if np.finfo(np.longdouble).max > np.finfo(np.float64).max:
+    # If longdouble is double-double, its max can be rounded down to the double
+    # max.  So we correct the double spacing (a bit weird, admittedly):
+    max_ld = np.finfo(np.longdouble).max
+    spacing = np.spacing(np.nextafter(np.finfo("f8").max, 0))
+    if max_ld - spacing > np.finfo("f8").max:
         yield param(np.finfo(np.longdouble).max, "float64",
-                    id="longdouble-to-f4")
+                    id="longdouble-to-f8")
 
     # Invalid float to integer casts:
     with np.errstate(over="ignore"):

From 9c74efa571dc0ac5fd18799105fd678cd8042282 Mon Sep 17 00:00:00 2001
From: Sebastian Berg <sebastian@sipsolutions.net>
Date: Wed, 4 May 2022 14:48:22 +0200
Subject: [PATCH 12/20] BUG: Fix needs_api for avanced assignments without
 subspace and remove unnecessary delay

The delay is not necessary, because the iterator does not cast in any case, so we do not
actually care about the possibility of errors being raised.
---
 numpy/core/src/multiarray/lowlevel_strided_loops.c.src | 6 ++++--
 numpy/core/src/multiarray/mapping.c                    | 4 +---
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/numpy/core/src/multiarray/lowlevel_strided_loops.c.src b/numpy/core/src/multiarray/lowlevel_strided_loops.c.src
index 827f0b615a5b..8e3afd3cc658 100644
--- a/numpy/core/src/multiarray/lowlevel_strided_loops.c.src
+++ b/numpy/core/src/multiarray/lowlevel_strided_loops.c.src
@@ -1597,9 +1597,11 @@ mapiter_@name@(
     if (mit->subspace_iter == NULL) {
         /*
          * Item by item copy situation, the operand is buffered
-         * so use copyswap.
+         * so use copyswap.  The iterator may not do any transfers, so may
+         * not have set `needs_api` yet, set it if necessary:
          */
-         PyArray_CopySwapFunc *copyswap = PyArray_DESCR(array)->f->copyswap;
+        needs_api |= PyDataType_REFCHK(PyArray_DESCR(array));
+        PyArray_CopySwapFunc *copyswap = PyArray_DESCR(array)->f->copyswap;
 
         /* We have only one iterator handling everything */
         counter = NpyIter_GetInnerLoopSizePtr(mit->outer);
diff --git a/numpy/core/src/multiarray/mapping.c b/numpy/core/src/multiarray/mapping.c
index d378dc8e586d..78a2335f2eff 100644
--- a/numpy/core/src/multiarray/mapping.c
+++ b/numpy/core/src/multiarray/mapping.c
@@ -1934,7 +1934,6 @@ array_assign_subscript(PyArrayObject *self, PyObject *ind, PyObject *op)
             index_num == 1 && tmp_arr) {
         /* The array being indexed has one dimension and it is a fancy index */
         PyArrayObject *ind = (PyArrayObject*)indices[0].object;
-
         /* Check if the type is equivalent */
         if (PyArray_EquivTypes(PyArray_DESCR(self),
                                    PyArray_DESCR(tmp_arr)) &&
@@ -3080,8 +3079,7 @@ PyArray_MapIterNew(npy_index_info *indices , int index_num, int index_type,
         mit->extra_op_iter = NpyIter_AdvancedNew(1, &extra_op,
                                                  NPY_ITER_ZEROSIZE_OK |
                                                  NPY_ITER_REFS_OK |
-                                                 NPY_ITER_GROWINNER |
-                                                 NPY_ITER_DELAY_BUFALLOC,
+                                                 NPY_ITER_GROWINNER,
                                                  NPY_CORDER,
                                                  NPY_NO_CASTING,
                                                  &extra_op_flags,

From d41f1de03794bee3f722e6921bc5fce5c04cce04 Mon Sep 17 00:00:00 2001
From: Sebastian Berg <sebastian@sipsolutions.net>
Date: Wed, 4 May 2022 17:18:29 +0200
Subject: [PATCH 13/20] BUG: Add volatile to float -> datetime casts to work
 around compiler deficiencies

---
 numpy/core/src/multiarray/arraytypes.c.src | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/numpy/core/src/multiarray/arraytypes.c.src b/numpy/core/src/multiarray/arraytypes.c.src
index ca06643cebd6..a9f8dfdd2d40 100644
--- a/numpy/core/src/multiarray/arraytypes.c.src
+++ b/numpy/core/src/multiarray/arraytypes.c.src
@@ -1197,8 +1197,12 @@ static void
     @totype@ *op = output;
 
     while (n--) {
-        @fromtype@ f = *ip++;
 #if @supports_nat@ && @floatingpoint@
+        /*
+         * volatile works around clang (and gcc sometimes) not branching
+         * correctly, leading to floating point errors in the test suite.
+         */
+        volatile @fromtype@ f = *ip++;
         @totype@ t;
         /* Avoid undefined behaviour and warning for NaN -> NaT */
         if (npy_isnan(f)) {
@@ -1208,7 +1212,7 @@ static void
             t = (@totype@)f;
         }
 #else
-        @totype@ t = (@totype@)f;
+        @totype@ t = (@totype@)*ip++;
 #endif
         *op++ = t;
     }
@@ -1228,8 +1232,12 @@ static void
     @totype@ *op = output;
 
     while (n--) {
-        @fromtype@ f = *ip;
 #if @supports_nat@
+        /*
+         * volatile works around clang (and gcc sometimes) not branching
+         * correctly, leading to floating point errors in the test suite.
+         */
+        volatile @fromtype@ f = *ip;
         @totype@ t;
         /* Avoid undefined behaviour and warning for NaN -> NaT */
         if (npy_isnan(f)) {
@@ -1239,7 +1247,7 @@ static void
             t = (@totype@)f;
         }
 #else
-        @totype@ t = (@totype@)f;
+        @totype@ t = (@totype@)*ip;
 #endif
         *op++ = t;
         ip += 2;

From 8d2d9a58cc824c6bad6b17e552fbd79f19403756 Mon Sep 17 00:00:00 2001
From: Sebastian Berg <sebastian@sipsolutions.net>
Date: Wed, 4 May 2022 19:59:28 +0200
Subject: [PATCH 14/20] TST: Add type check to not trigger an invalid FPE
 within PyPy

---
 numpy/core/tests/test_casting_floatingpoint_errors.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/numpy/core/tests/test_casting_floatingpoint_errors.py b/numpy/core/tests/test_casting_floatingpoint_errors.py
index 669a0d174e26..ea348e65890c 100644
--- a/numpy/core/tests/test_casting_floatingpoint_errors.py
+++ b/numpy/core/tests/test_casting_floatingpoint_errors.py
@@ -59,8 +59,10 @@ def assignment():
         yield assignment
 
         # TODO: This constraint is a bug in arr.fill() and should be removed
-        #       e.g. by gh-20924
-        if value != 10**100:
+        #       e.g. by gh-20924.  The type check works around the fact that
+        #       PyPy seems to create an "invalid" error itself, and we see it
+        #       due to gh-21416.
+        if type(value) is int and value != 10**100:
             def fill():
                 arr = np.empty(3, dtype=dtype)
                 arr.fill(value)

From b463a7fcee642eb75ec7fd5faa33f3fabbcfb258 Mon Sep 17 00:00:00 2001
From: Sebastian Berg <sebastian@sipsolutions.net>
Date: Wed, 1 Jun 2022 08:23:08 -0700
Subject: [PATCH 15/20] TST: Improve test coverage for complex FPE cast errors

---
 numpy/core/tests/test_casting_floatingpoint_errors.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/numpy/core/tests/test_casting_floatingpoint_errors.py b/numpy/core/tests/test_casting_floatingpoint_errors.py
index ea348e65890c..7248e7a0899b 100644
--- a/numpy/core/tests/test_casting_floatingpoint_errors.py
+++ b/numpy/core/tests/test_casting_floatingpoint_errors.py
@@ -34,6 +34,12 @@ def values_and_dtypes():
         yield param(np.finfo(np.longdouble).max, "float64",
                     id="longdouble-to-f8")
 
+    # Cast to complex32:
+    yield param(2e300, "complex64", id="float-to-c8")
+    yield param(2e300+0j, "complex64", id="complex-to-c8")
+    yield param(2e300j, "complex64", id="complex-to-c8")
+    yield param(np.longdouble(2e300), "complex64", id="longdouble-to-c8")
+
     # Invalid float to integer casts:
     with np.errstate(over="ignore"):
         for to_dt in np.typecodes["AllInteger"]:

From 63fa74a6150b7cea0497e232a60bee9d10a87e8d Mon Sep 17 00:00:00 2001
From: Sebastian Berg <sebastian@sipsolutions.net>
Date: Wed, 1 Jun 2022 19:09:35 -0700
Subject: [PATCH 16/20] BUG: Avoid `NpyIter_EnableExternalLoop` as it resets
 the buffer

Resetting the buffer will copy it, which may cause casting errors.
And we want to prefer/catch those later ideally.
---
 numpy/core/src/multiarray/mapping.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/numpy/core/src/multiarray/mapping.c b/numpy/core/src/multiarray/mapping.c
index 78a2335f2eff..98c2d7edaa27 100644
--- a/numpy/core/src/multiarray/mapping.c
+++ b/numpy/core/src/multiarray/mapping.c
@@ -2991,6 +2991,11 @@ PyArray_MapIterNew(npy_index_info *indices , int index_num, int index_type,
 
     /* If external array is iterated, and no subspace is needed */
     nops = mit->numiter;
+
+    if (!uses_subspace) {
+        outer_flags |= NPY_ITER_EXTERNAL_LOOP;
+    }
+
     if (extra_op_flags && !uses_subspace) {
         /*
          * NOTE: This small limitation should practically not matter.
@@ -3038,9 +3043,6 @@ PyArray_MapIterNew(npy_index_info *indices , int index_num, int index_type,
     if (mit->outer == NULL) {
         goto fail;
     }
-    if (!uses_subspace) {
-        NpyIter_EnableExternalLoop(mit->outer);
-    }
 
     mit->outer_next = NpyIter_GetIterNext(mit->outer, NULL);
     if (mit->outer_next == NULL) {

From a7e58a7e22ce95d05648987a49227d8cadf3182b Mon Sep 17 00:00:00 2001
From: Sebastian Berg <sebastian@sipsolutions.net>
Date: Wed, 1 Jun 2022 19:10:29 -0700
Subject: [PATCH 17/20] TST: Enable inteer array assignment test and add flat
 test

---
 numpy/core/tests/test_casting_floatingpoint_errors.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/numpy/core/tests/test_casting_floatingpoint_errors.py b/numpy/core/tests/test_casting_floatingpoint_errors.py
index 7248e7a0899b..87ebc194bebc 100644
--- a/numpy/core/tests/test_casting_floatingpoint_errors.py
+++ b/numpy/core/tests/test_casting_floatingpoint_errors.py
@@ -124,7 +124,7 @@ def integer_array_assignment():
 
         arr[[0, 1]] = values
 
-    #yield integer_array_assignment
+    yield integer_array_assignment
 
     def integer_array_assignment_with_subspace():
         arr = np.empty((5, 3), dtype=dtype)
@@ -134,6 +134,12 @@ def integer_array_assignment_with_subspace():
 
     yield integer_array_assignment_with_subspace
 
+    def flat_assignment():
+        arr = np.empty((3,), dtype=dtype)
+        values = np.array([value, value, value])
+        arr.flat[:] = values
+
+    yield flat_assignment
 
 @pytest.mark.parametrize(["value", "dtype"], values_and_dtypes())
 @pytest.mark.filterwarnings("ignore::numpy.ComplexWarning")

From f7b0493d51198b1cbb204409795398d7c8b5b059 Mon Sep 17 00:00:00 2001
From: Sebastian Berg <sebastian@sipsolutions.net>
Date: Mon, 6 Jun 2022 11:42:03 -0700
Subject: [PATCH 18/20] TST: Add final set of cast (and FPE in cast) test to
 ufuncs

The remaining uncovered paths seem to me like code that should be
effectively unreachable.
(I do actually wonder if `PyArray_MapIterReset` should be removed.)
---
 numpy/core/tests/test_ufunc.py | 24 +++++++++++++++++++++++-
 1 file changed, 23 insertions(+), 1 deletion(-)

diff --git a/numpy/core/tests/test_ufunc.py b/numpy/core/tests/test_ufunc.py
index 56ca7f4bd252..3466178a33dc 100644
--- a/numpy/core/tests/test_ufunc.py
+++ b/numpy/core/tests/test_ufunc.py
@@ -2457,7 +2457,7 @@ def test_ufunc_warn_with_nan(ufunc):
 
 
 @pytest.mark.skipif(not HAS_REFCOUNT, reason="Python lacks refcounts")
-def test_ufunc_casterrors():
+def test_ufunc_out_casterrors():
     # Tests that casting errors are correctly reported and buffers are
     # cleared.
     # The following array can be added to itself as an object array, but
@@ -2488,6 +2488,28 @@ def test_ufunc_casterrors():
     assert out[-1] == 1
 
 
+@pytest.mark.parametrize("bad_offset", [0, int(np.BUFSIZE * 1.5)])
+def test_ufunc_input_casterrors(bad_offset):
+    value = 123
+    arr = np.array([value] * bad_offset +
+                   ["string"] +
+                   [value] * int(1.5 * np.BUFSIZE), dtype=object)
+    with pytest.raises(ValueError):
+        # Force cast inputs, but the buffered cast of `arr` to intp fails:
+        np.add(arr, arr, dtype=np.intp, casting="unsafe")
+
+
+@pytest.mark.parametrize("bad_offset", [0, int(np.BUFSIZE * 1.5)])
+def test_ufunc_input_floatingpoint_error(bad_offset):
+    value = 123
+    arr = np.array([value] * bad_offset +
+                   [np.nan] +
+                   [value] * int(1.5 * np.BUFSIZE))
+    with np.errstate(invalid="raise"), pytest.raises(FloatingPointError):
+        # Force cast inputs, but the buffered cast of `arr` to intp fails:
+        np.add(arr, arr, dtype=np.intp, casting="unsafe")
+
+
 def test_trivial_loop_invalid_cast():
     # This tests the fast-path "invalid cast", see gh-19904.
     with pytest.raises(TypeError,

From 7e6d8ef3be68265540b04130dd26ef6dfdf741e1 Mon Sep 17 00:00:00 2001
From: Sebastian Berg <sebastian@sipsolutions.net>
Date: Mon, 6 Jun 2022 11:55:19 -0700
Subject: [PATCH 19/20] DOC: Add a release note on the floating point errors in
 cast change

---
 .../upcoming_changes/21437.improvement.rst    | 33 +++++++++++++++++++
 1 file changed, 33 insertions(+)
 create mode 100644 doc/release/upcoming_changes/21437.improvement.rst

diff --git a/doc/release/upcoming_changes/21437.improvement.rst b/doc/release/upcoming_changes/21437.improvement.rst
new file mode 100644
index 000000000000..256bfd600288
--- /dev/null
+++ b/doc/release/upcoming_changes/21437.improvement.rst
@@ -0,0 +1,33 @@
+NumPy now gives floating point errors in casts
+----------------------------------------------
+
+In most cases, NumPy previously did not give floating point
+warnings or errors when these happened during casts.
+For examples, casts like::
+
+    np.array([2e300]).astype(np.float32)  # overflow for float32
+    np.array([np.inf]).astype(np.int64)
+
+Should now generally give floating point warnings.  These warnings
+should warn that floating point overflow occurred.
+For errors when converting floating point values to integers users
+should expect invalid value warnings.
+
+Users can modify the behavior of these warnings using `np.errstate`.
+
+Note that for float to int casts, the exact warnings that are given may
+be platform dependend.  For example::
+
+    arr = np.full(100, value=1000, dtype=np.float64)
+    arr.astype(np.int8)
+
+May give a result equivalent to (the intermediat means no warning is given)::
+
+    arr.astype(np.int64).astype(np.int8)
+
+May may return an undefined result, with a warning set::
+
+    RuntimeWarning: invalid value encountered in cast
+
+The precise behavior if subject to the C99 standard and its implementation
+in both software and hardware.

From a546ee19523f9b97db985afa8d161af0dee1fafa Mon Sep 17 00:00:00 2001
From: Sebastian Berg <sebastian@sipsolutions.net>
Date: Mon, 13 Jun 2022 09:38:09 -0700
Subject: [PATCH 20/20] TST: Remove FPE `fill` special case after rebase

---
 .../tests/test_casting_floatingpoint_errors.py    | 15 +++++----------
 1 file changed, 5 insertions(+), 10 deletions(-)

diff --git a/numpy/core/tests/test_casting_floatingpoint_errors.py b/numpy/core/tests/test_casting_floatingpoint_errors.py
index 87ebc194bebc..4fafc4ed8091 100644
--- a/numpy/core/tests/test_casting_floatingpoint_errors.py
+++ b/numpy/core/tests/test_casting_floatingpoint_errors.py
@@ -64,16 +64,11 @@ def assignment():
 
         yield assignment
 
-        # TODO: This constraint is a bug in arr.fill() and should be removed
-        #       e.g. by gh-20924.  The type check works around the fact that
-        #       PyPy seems to create an "invalid" error itself, and we see it
-        #       due to gh-21416.
-        if type(value) is int and value != 10**100:
-            def fill():
-                arr = np.empty(3, dtype=dtype)
-                arr.fill(value)
-
-            yield fill
+        def fill():
+            arr = np.empty(3, dtype=dtype)
+            arr.fill(value)
+
+        yield fill
 
     def copyto_scalar():
         arr = np.empty(3, dtype=dtype)