From 9c6081ef884d0d8cdc740c2b5f4f571d1fdf944c Mon Sep 17 00:00:00 2001 From: Sebastian Berg Date: Tue, 15 Jun 2021 20:08:39 -0500 Subject: [PATCH 1/3] MAINT: Align masked with normal ufunc loops This removes the ability to specialize masked inner loops (for now) as was already noted in NEP 41 and NEP 43. The masked array is now passed in as the last argument to use the identical signature and avoid duplicating the code unnecessary. This is part of the longer process to refactor ufuncs to NEP 43 and split out, to keep the diff's shorter (or at least easier to read). --- doc/release/upcoming_changes/19259.c_api.rst | 14 + numpy/core/include/numpy/ufuncobject.h | 35 +-- numpy/core/src/umath/ufunc_object.c | 276 ++++++------------- numpy/core/src/umath/ufunc_type_resolution.c | 122 ++++---- numpy/core/src/umath/ufunc_type_resolution.h | 6 +- numpy/core/tests/test_umath.py | 10 +- 6 files changed, 158 insertions(+), 305 deletions(-) create mode 100644 doc/release/upcoming_changes/19259.c_api.rst diff --git a/doc/release/upcoming_changes/19259.c_api.rst b/doc/release/upcoming_changes/19259.c_api.rst new file mode 100644 index 000000000000..0fbc3d0bf07d --- /dev/null +++ b/doc/release/upcoming_changes/19259.c_api.rst @@ -0,0 +1,14 @@ +Masked inner-loops cannot be customized anymore +----------------------------------------------- +The masked inner-loop selector is now never used. A warning +will be given in the unlikely event that it was customized. + +We do not expect that any code uses this. If you do use it, +you must unset unset the selector on newer NumPy version. +Please also contact the NumPy developers, we do anticipate +providing a new, more specific, mechanism. + +This change will not affect the results of operations, since +the fallback (which is always used internally) will handle +the operation equivalently, the customization was a planned +feature to allow for faster masked operation. diff --git a/numpy/core/include/numpy/ufuncobject.h b/numpy/core/include/numpy/ufuncobject.h index 333a326ee60e..0f3b8529aaa3 100644 --- a/numpy/core/include/numpy/ufuncobject.h +++ b/numpy/core/include/numpy/ufuncobject.h @@ -66,27 +66,14 @@ typedef int (PyUFunc_TypeResolutionFunc)( PyArray_Descr **out_dtypes); /* - * Given an array of DTypes as returned by the PyUFunc_TypeResolutionFunc, - * and an array of fixed strides (the array will contain NPY_MAX_INTP for - * strides which are not necessarily fixed), returns an inner loop - * with associated auxiliary data. - * - * For backwards compatibility, there is a variant of the inner loop - * selection which returns an inner loop irrespective of the strides, - * and with a void* static auxiliary data instead of an NpyAuxData * - * dynamically allocatable auxiliary data. + * Legacy loop selector. (This should NOT normally be used and we can expect + * that only the `PyUFunc_DefaultLegacyInnerLoopSelector` is ever set). + * However, unlike the masked version, it probably still works. * * ufunc: The ufunc object. * dtypes: An array which has been populated with dtypes, * in most cases by the type resolution function * for the same ufunc. - * fixed_strides: For each input/output, either the stride that - * will be used every time the function is called - * or NPY_MAX_INTP if the stride might change or - * is not known ahead of time. The loop selection - * function may use this stride to pick inner loops - * which are optimized for contiguous or 0-stride - * cases. * out_innerloop: Should be populated with the correct ufunc inner * loop for the given type. * out_innerloopdata: Should be populated with the void* data to @@ -101,15 +88,7 @@ typedef int (PyUFunc_LegacyInnerLoopSelectionFunc)( PyUFuncGenericFunction *out_innerloop, void **out_innerloopdata, int *out_needs_api); -typedef int (PyUFunc_MaskedInnerLoopSelectionFunc)( - struct _tagPyUFuncObject *ufunc, - PyArray_Descr **dtypes, - PyArray_Descr *mask_dtype, - npy_intp *fixed_strides, - npy_intp fixed_mask_stride, - PyUFunc_MaskedStridedInnerLoopFunc **out_innerloop, - NpyAuxData **out_innerloopdata, - int *out_needs_api); + typedef struct _tagPyUFuncObject { PyObject_HEAD @@ -199,10 +178,8 @@ typedef struct _tagPyUFuncObject { #else void *reserved2; #endif - /* - * A function which returns a masked inner loop for the ufunc. - */ - PyUFunc_MaskedInnerLoopSelectionFunc *masked_inner_loop_selector; + /* Was previously the `PyUFunc_MaskedInnerLoopSelectionFunc` */ + void *_always_null_previously_masked_innerloop_selector; /* * List of flags for each operand when ufunc is called by nditer object. diff --git a/numpy/core/src/umath/ufunc_object.c b/numpy/core/src/umath/ufunc_object.c index 37e297ed50aa..070978675bd4 100644 --- a/numpy/core/src/umath/ufunc_object.c +++ b/numpy/core/src/umath/ufunc_object.c @@ -1123,8 +1123,7 @@ prepare_ufunc_output(PyUFuncObject *ufunc, static NPY_INLINE int try_trivial_single_output_loop(PyUFuncObject *ufunc, PyArrayObject *op[], PyArray_Descr *dtypes[], - NPY_ORDER order, PyObject *arr_prep[], ufunc_full_args full_args, - PyUFuncGenericFunction innerloop, void *innerloopdata) + NPY_ORDER order, PyObject *arr_prep[], ufunc_full_args full_args) { int nin = ufunc->nin; int nop = nin + 1; @@ -1235,6 +1234,13 @@ try_trivial_single_output_loop(PyUFuncObject *ufunc, int needs_api = 0; NPY_BEGIN_THREADS_DEF; + PyUFuncGenericFunction innerloop; + void *innerloopdata = NULL; + if (ufunc->legacy_inner_loop_selector(ufunc, dtypes, + &innerloop, &innerloopdata, &needs_api) < 0) { + return -1; + } + for (int iop = 0; iop < nop; iop++) { data[iop] = PyArray_BYTES(op[iop]); needs_api |= PyDataType_REFCHK(dtypes[iop]); @@ -1252,20 +1258,49 @@ try_trivial_single_output_loop(PyUFuncObject *ufunc, static int -iterator_loop(PyUFuncObject *ufunc, +execute_ufunc_loop(PyUFuncObject *ufunc, + int masked, PyArrayObject **op, - PyArray_Descr **dtype, + PyArray_Descr **dtypes, NPY_ORDER order, npy_intp buffersize, PyObject **arr_prep, ufunc_full_args full_args, - PyUFuncGenericFunction innerloop, - void *innerloopdata, npy_uint32 *op_flags) { int nin = ufunc->nin, nout = ufunc->nout; int nop = nin + nout; + if (masked) { + assert(PyArray_TYPE(op[nop]) == NPY_BOOL); + if (ufunc->_always_null_previously_masked_innerloop_selector != NULL) { + if (PyErr_WarnFormat(PyExc_UserWarning, 1, + "The ufunc %s has a custom masked-inner-loop-selector." + "NumPy assumes that this is NEVER used. If you do make " + "use of this please notify the NumPy developers to discuss " + "future solutions. (See NEP 41 and 43)\n" + "NumPy will continue, but ignore the custom loop selector. " + "This should only affect performance.", + ufunc_get_name_cstr(ufunc)) < 0) { + return -1; + } + } + + /* + * NOTE: In the masked version, we consider the output read-write, + * this gives a best-effort of preserving the input, but does + * not always work. It could allow the operand to be copied + * due to copy-if-overlap, but only if it was passed in. + * In that case `__array_prepare__` is called before it happens. + */ + for (int i = nin; i < nop; ++i) { + op_flags[i] |= (op[i] != NULL ? NPY_ITER_READWRITE : NPY_ITER_WRITEONLY); + } + op_flags[nop] = NPY_ITER_READONLY | NPY_ITER_ARRAYMASK; /* mask */ + } + + NPY_UF_DBG_PRINT("Making iterator\n"); + npy_uint32 iter_flags = ufunc->iter_flags | NPY_ITER_EXTERNAL_LOOP | NPY_ITER_REFS_OK | @@ -1295,10 +1330,10 @@ iterator_loop(PyUFuncObject *ufunc, * were already checked, we use the casting rule 'unsafe' which * is faster to calculate. */ - NpyIter *iter = NpyIter_AdvancedNew(nop, op, + NpyIter *iter = NpyIter_AdvancedNew(nop + masked, op, iter_flags, order, NPY_UNSAFE_CASTING, - op_flags, dtype, + op_flags, dtypes, -1, NULL, NULL, buffersize); if (iter == NULL) { return -1; @@ -1354,21 +1389,49 @@ iterator_loop(PyUFuncObject *ufunc, for (int i = 0; i < nin; i++) { baseptrs[i] = PyArray_BYTES(op_it[i]); } + if (masked) { + baseptrs[nop] = PyArray_BYTES(op_it[nop]); + } if (NpyIter_ResetBasePointers(iter, baseptrs, NULL) != NPY_SUCCEED) { NpyIter_Deallocate(iter); return -1; } + /* + * Get the inner loop. + */ + int needs_api = 0; + PyUFuncGenericFunction innerloop; + void *innerloopdata = NULL; + if (masked) { + if (PyUFunc_DefaultMaskedInnerLoopSelector(ufunc, + dtypes, &innerloop, (NpyAuxData **)&innerloopdata, + &needs_api) < 0) { + NpyIter_Deallocate(iter); + return -1; + } + } + else { + if (ufunc->legacy_inner_loop_selector(ufunc, dtypes, + &innerloop, &innerloopdata, &needs_api) < 0) { + NpyIter_Deallocate(iter); + return -1; + } + } + /* Get the variables needed for the loop */ NpyIter_IterNextFunc *iternext = NpyIter_GetIterNext(iter, NULL); if (iternext == NULL) { NpyIter_Deallocate(iter); + if (masked) { + NPY_AUXDATA_FREE((NpyAuxData *)innerloopdata); + } return -1; } char **dataptr = NpyIter_GetDataPtrArray(iter); npy_intp *strides = NpyIter_GetInnerStrideArray(iter); npy_intp *countptr = NpyIter_GetInnerLoopSizePtr(iter); - int needs_api = NpyIter_IterationNeedsAPI(iter); + needs_api |= NpyIter_IterationNeedsAPI(iter); NPY_BEGIN_THREADS_DEF; @@ -1384,6 +1447,9 @@ iterator_loop(PyUFuncObject *ufunc, } while (!(needs_api && PyErr_Occurred()) && iternext(iter)); NPY_END_THREADS; + if (masked) { + NPY_AUXDATA_FREE((NpyAuxData *)innerloopdata); + } /* * Currently `innerloop` may leave an error set, in this case @@ -1417,20 +1483,10 @@ execute_legacy_ufunc_loop(PyUFuncObject *ufunc, ufunc_full_args full_args, npy_uint32 *op_flags) { - PyUFuncGenericFunction innerloop; - void *innerloopdata; - int needs_api = 0; - - if (ufunc->legacy_inner_loop_selector(ufunc, dtypes, - &innerloop, &innerloopdata, &needs_api) < 0) { - return -1; - } - /* First check for the trivial cases that don't need an iterator */ if (trivial_loop_ok && ufunc->nout == 1) { int fast_path_result = try_trivial_single_output_loop(ufunc, - op, dtypes, order, arr_prep, full_args, - innerloop, innerloopdata); + op, dtypes, order, arr_prep, full_args); if (fast_path_result != -2) { return fast_path_result; } @@ -1441,186 +1497,14 @@ execute_legacy_ufunc_loop(PyUFuncObject *ufunc, * resolve broadcasting, etc */ NPY_UF_DBG_PRINT("iterator loop\n"); - if (iterator_loop(ufunc, op, dtypes, order, - buffersize, arr_prep, full_args, - innerloop, innerloopdata, op_flags) < 0) { + if (execute_ufunc_loop(ufunc, 0, op, dtypes, order, + buffersize, arr_prep, full_args, op_flags) < 0) { return -1; } return 0; } -/* - * nin - number of inputs - * nout - number of outputs - * wheremask - if not NULL, the 'where=' parameter to the ufunc. - * op - the operands (nin + nout of them) - * order - the loop execution order/output memory order - * buffersize - how big of a buffer to use - * arr_prep - the __array_prepare__ functions for the outputs - * innerloop - the inner loop function - * innerloopdata - data to pass to the inner loop - */ -static int -execute_fancy_ufunc_loop(PyUFuncObject *ufunc, - PyArrayObject *wheremask, - PyArrayObject **op, - PyArray_Descr **dtypes, - NPY_ORDER order, - npy_intp buffersize, - PyObject **arr_prep, - ufunc_full_args full_args, - npy_uint32 *op_flags) -{ - int i, nin = ufunc->nin, nout = ufunc->nout; - int nop = nin + nout; - NpyIter *iter; - int needs_api; - - NpyIter_IterNextFunc *iternext; - char **dataptr; - npy_intp *strides; - npy_intp *countptr; - - PyArrayObject **op_it; - npy_uint32 iter_flags; - - for (i = nin; i < nop; ++i) { - op_flags[i] |= (op[i] != NULL ? NPY_ITER_READWRITE : NPY_ITER_WRITEONLY); - } - - if (wheremask != NULL) { - op_flags[nop] = NPY_ITER_READONLY | NPY_ITER_ARRAYMASK; - } - - NPY_UF_DBG_PRINT("Making iterator\n"); - - iter_flags = ufunc->iter_flags | - NPY_ITER_EXTERNAL_LOOP | - NPY_ITER_REFS_OK | - NPY_ITER_ZEROSIZE_OK | - NPY_ITER_BUFFERED | - NPY_ITER_GROWINNER | - NPY_ITER_COPY_IF_OVERLAP; - - /* - * Allocate the iterator. Because the types of the inputs - * were already checked, we use the casting rule 'unsafe' which - * is faster to calculate. - */ - iter = NpyIter_AdvancedNew(nop + ((wheremask != NULL) ? 1 : 0), op, - iter_flags, - order, NPY_UNSAFE_CASTING, - op_flags, dtypes, - -1, NULL, NULL, buffersize); - if (iter == NULL) { - return -1; - } - - NPY_UF_DBG_PRINT("Made iterator\n"); - - needs_api = NpyIter_IterationNeedsAPI(iter); - - /* Call the __array_prepare__ functions where necessary */ - op_it = NpyIter_GetOperandArray(iter); - for (i = 0; i < nout; ++i) { - PyArrayObject *op_tmp; - - /* - * The array can be allocated by the iterator -- it is placed in op[i] - * and returned to the caller, and this needs an extra incref. - */ - if (op[i+nin] == NULL) { - op_tmp = op_it[i+nin]; - Py_INCREF(op_tmp); - } - else { - op_tmp = op[i+nin]; - op[i+nin] = NULL; - } - - /* prepare_ufunc_output may decref & replace the pointer */ - char *original_data = PyArray_BYTES(op_tmp); - - if (prepare_ufunc_output(ufunc, &op_tmp, - arr_prep[i], full_args, i) < 0) { - NpyIter_Deallocate(iter); - return -1; - } - - /* Validate that the prepare_ufunc_output didn't mess with pointers */ - if (PyArray_BYTES(op_tmp) != original_data) { - PyErr_SetString(PyExc_ValueError, - "The __array_prepare__ functions modified the data " - "pointer addresses in an invalid fashion"); - Py_DECREF(op_tmp); - NpyIter_Deallocate(iter); - return -1; - } - - /* - * Put the updated operand back. If COPY_IF_OVERLAP made a temporary - * copy, the output will be copied by WRITEBACKIFCOPY even if op[i] - * was changed by prepare_ufunc_output. - */ - op[i+nin] = op_tmp; - } - - /* Only do the loop if the iteration size is non-zero */ - if (NpyIter_GetIterSize(iter) != 0) { - PyUFunc_MaskedStridedInnerLoopFunc *innerloop; - NpyAuxData *innerloopdata; - npy_intp fixed_strides[2*NPY_MAXARGS]; - PyArray_Descr **iter_dtypes; - NPY_BEGIN_THREADS_DEF; - - /* - * Get the inner loop, with the possibility of specialization - * based on the fixed strides. - */ - NpyIter_GetInnerFixedStrideArray(iter, fixed_strides); - iter_dtypes = NpyIter_GetDescrArray(iter); - if (ufunc->masked_inner_loop_selector(ufunc, dtypes, - wheremask != NULL ? iter_dtypes[nop] - : iter_dtypes[nop + nin], - fixed_strides, - wheremask != NULL ? fixed_strides[nop] - : fixed_strides[nop + nin], - &innerloop, &innerloopdata, &needs_api) < 0) { - NpyIter_Deallocate(iter); - return -1; - } - - /* Get the variables needed for the loop */ - iternext = NpyIter_GetIterNext(iter, NULL); - if (iternext == NULL) { - NpyIter_Deallocate(iter); - return -1; - } - dataptr = NpyIter_GetDataPtrArray(iter); - strides = NpyIter_GetInnerStrideArray(iter); - countptr = NpyIter_GetInnerLoopSizePtr(iter); - needs_api = NpyIter_IterationNeedsAPI(iter); - - NPY_BEGIN_THREADS_NDITER(iter); - - NPY_UF_DBG_PRINT("Actual inner loop:\n"); - /* Execute the loop */ - do { - NPY_UF_DBG_PRINT1("iterator loop count %d\n", (int)*countptr); - innerloop(dataptr, strides, - dataptr[nop], strides[nop], - *countptr, innerloopdata); - } while (!(needs_api && PyErr_Occurred()) && iternext(iter)); - - NPY_END_THREADS; - - NPY_AUXDATA_FREE(innerloopdata); - } - - return NpyIter_Deallocate(iter); -} - /* * Validate that operands have enough dimensions, accounting for @@ -2634,7 +2518,7 @@ PyUFunc_GenericFunctionInternal(PyUFuncObject *ufunc, /* Set up the flags */ npy_clear_floatstatus_barrier((char*)&ufunc); - retval = execute_fancy_ufunc_loop(ufunc, wheremask, + retval = execute_ufunc_loop(ufunc, 1, op, operation_descrs, order, buffersize, output_array_prepare, full_args, op_flags); @@ -5192,7 +5076,7 @@ PyUFunc_FromFuncAndDataAndSignatureAndIdentity(PyUFuncGenericFunction *func, voi /* Type resolution and inner loop selection functions */ ufunc->type_resolver = &PyUFunc_DefaultTypeResolver; ufunc->legacy_inner_loop_selector = &PyUFunc_DefaultLegacyInnerLoopSelector; - ufunc->masked_inner_loop_selector = &PyUFunc_DefaultMaskedInnerLoopSelector; + ufunc->_always_null_previously_masked_innerloop_selector = NULL; if (name == NULL) { ufunc->name = "?"; diff --git a/numpy/core/src/umath/ufunc_type_resolution.c b/numpy/core/src/umath/ufunc_type_resolution.c index 2834235e409f..72b3a6535b98 100644 --- a/numpy/core/src/umath/ufunc_type_resolution.c +++ b/numpy/core/src/umath/ufunc_type_resolution.c @@ -1508,30 +1508,30 @@ PyUFunc_DefaultLegacyInnerLoopSelector(PyUFuncObject *ufunc, return raise_no_loop_found_error(ufunc, dtypes); } + +/* + * Support for masked inner-strided loops. These are currently ONLY used + * for normal ufuncs, and only a generic loop getter exists. + * It may make sense to generalize this in the future or allow specialization. + * Until then, the inner-loop signature is flexible. + */ typedef struct { NpyAuxData base; - PyUFuncGenericFunction unmasked_innerloop; - void *unmasked_innerloopdata; + PyUFuncGenericFunction unmasked_stridedloop; + void *innerloopdata; int nargs; -} _ufunc_masker_data; - -static NpyAuxData * -ufunc_masker_data_clone(NpyAuxData *data) -{ - _ufunc_masker_data *n; + char *dataptrs[]; +} _masked_stridedloop_data; - /* Allocate a new one */ - n = (_ufunc_masker_data *)PyArray_malloc(sizeof(_ufunc_masker_data)); - if (n == NULL) { - return NULL; - } - - /* Copy the data (unmasked data doesn't have object semantics) */ - memcpy(n, data, sizeof(_ufunc_masker_data)); - return (NpyAuxData *)n; +static void +_masked_stridedloop_data_free(NpyAuxData *auxdata) +{ + _masked_stridedloop_data *data = (_masked_stridedloop_data *)auxdata; + PyMem_Free(data); } + /* * This function wraps a regular unmasked ufunc inner loop as a * masked ufunc inner loop, only calling the function for @@ -1539,43 +1539,39 @@ ufunc_masker_data_clone(NpyAuxData *data) */ static void unmasked_ufunc_loop_as_masked( - char **dataptrs, npy_intp *strides, - char *mask, npy_intp mask_stride, - npy_intp loopsize, - NpyAuxData *innerloopdata) + char **data, const npy_intp *dimensions, + const npy_intp *strides, void *_auxdata) { - _ufunc_masker_data *data; - int iargs, nargs; - PyUFuncGenericFunction unmasked_innerloop; - void *unmasked_innerloopdata; - npy_intp subloopsize; - - /* Put the aux data into local variables */ - data = (_ufunc_masker_data *)innerloopdata; - unmasked_innerloop = data->unmasked_innerloop; - unmasked_innerloopdata = data->unmasked_innerloopdata; - nargs = data->nargs; + _masked_stridedloop_data *auxdata = (_masked_stridedloop_data *)_auxdata; + int nargs = auxdata->nargs; + PyUFuncGenericFunction strided_loop = auxdata->unmasked_stridedloop; + void *innerloopdata = auxdata->innerloopdata; + + char **dataptrs = auxdata->dataptrs; + memcpy(dataptrs, data, nargs * sizeof(char *)); + char *mask = data[nargs]; + npy_intp mask_stride = strides[nargs]; + npy_intp N = dimensions[0]; /* Process the data as runs of unmasked values */ do { + ssize_t subloopsize; + /* Skip masked values */ - mask = npy_memchr(mask, 0, mask_stride, loopsize, &subloopsize, 1); - for (iargs = 0; iargs < nargs; ++iargs) { - dataptrs[iargs] += subloopsize * strides[iargs]; + mask = npy_memchr(mask, 0, mask_stride, N, &subloopsize, 1); + for (int i = 0; i < nargs; i++) { + dataptrs[i] += subloopsize * strides[i]; } - loopsize -= subloopsize; - /* - * Process unmasked values (assumes unmasked loop doesn't - * mess with the 'args' pointer values) - */ - mask = npy_memchr(mask, 0, mask_stride, loopsize, &subloopsize, 0); - unmasked_innerloop(dataptrs, &subloopsize, strides, - unmasked_innerloopdata); - for (iargs = 0; iargs < nargs; ++iargs) { - dataptrs[iargs] += subloopsize * strides[iargs]; - } - loopsize -= subloopsize; - } while (loopsize > 0); + N -= subloopsize; + + /* Process unmasked values */ + mask = npy_memchr(mask, 0, mask_stride, N, &subloopsize, 0); + strided_loop(dataptrs, &subloopsize, strides, innerloopdata); + for (int i = 0; i < nargs; i++) { + dataptrs[i] += subloopsize * strides[i]; + } + N -= subloopsize; + } while (N > 0); } @@ -1587,15 +1583,13 @@ unmasked_ufunc_loop_as_masked( NPY_NO_EXPORT int PyUFunc_DefaultMaskedInnerLoopSelector(PyUFuncObject *ufunc, PyArray_Descr **dtypes, - PyArray_Descr *mask_dtype, - npy_intp *NPY_UNUSED(fixed_strides), - npy_intp NPY_UNUSED(fixed_mask_stride), - PyUFunc_MaskedStridedInnerLoopFunc **out_innerloop, + PyUFuncGenericFunction *out_innerloop, NpyAuxData **out_innerloopdata, int *out_needs_api) { int retcode; - _ufunc_masker_data *data; + _masked_stridedloop_data *data; + int nargs = ufunc->nin + ufunc->nout; if (ufunc->legacy_inner_loop_selector == NULL) { PyErr_SetString(PyExc_RuntimeError, @@ -1605,27 +1599,21 @@ PyUFunc_DefaultMaskedInnerLoopSelector(PyUFuncObject *ufunc, return -1; } - if (mask_dtype->type_num != NPY_BOOL) { - PyErr_SetString(PyExc_ValueError, - "only boolean masks are supported in ufunc inner loops " - "presently"); - return -1; - } - - /* Create a new NpyAuxData object for the masker data */ - data = (_ufunc_masker_data *)PyArray_malloc(sizeof(_ufunc_masker_data)); + /* Add working memory for the data pointers, to modify them in-place */ + data = PyMem_Malloc(sizeof(_masked_stridedloop_data) + + sizeof(char *) * nargs); if (data == NULL) { PyErr_NoMemory(); return -1; } - memset(data, 0, sizeof(_ufunc_masker_data)); - data->base.free = (NpyAuxData_FreeFunc *)&PyArray_free; - data->base.clone = &ufunc_masker_data_clone; - data->nargs = ufunc->nin + ufunc->nout; + data->base.free = _masked_stridedloop_data_free; + data->base.clone = NULL; /* not currently used */ + data->unmasked_stridedloop = NULL; + data->nargs = nargs; /* Get the unmasked ufunc inner loop */ retcode = ufunc->legacy_inner_loop_selector(ufunc, dtypes, - &data->unmasked_innerloop, &data->unmasked_innerloopdata, + &data->unmasked_stridedloop, &data->innerloopdata, out_needs_api); if (retcode < 0) { PyArray_free(data); diff --git a/numpy/core/src/umath/ufunc_type_resolution.h b/numpy/core/src/umath/ufunc_type_resolution.h index b11c69852889..fdad19b3da56 100644 --- a/numpy/core/src/umath/ufunc_type_resolution.h +++ b/numpy/core/src/umath/ufunc_type_resolution.h @@ -138,11 +138,7 @@ PyUFunc_DefaultLegacyInnerLoopSelector(PyUFuncObject *ufunc, NPY_NO_EXPORT int PyUFunc_DefaultMaskedInnerLoopSelector(PyUFuncObject *ufunc, PyArray_Descr **dtypes, - PyArray_Descr *mask_dtypes, - npy_intp *NPY_UNUSED(fixed_strides), - npy_intp NPY_UNUSED(fixed_mask_stride), - PyUFunc_MaskedStridedInnerLoopFunc - **out_innerloop, + PyUFuncGenericFunction *out_innerloop, NpyAuxData **out_innerloopdata, int *out_needs_api); diff --git a/numpy/core/tests/test_umath.py b/numpy/core/tests/test_umath.py index a2d6b3989cb7..a065ca3630e9 100644 --- a/numpy/core/tests/test_umath.py +++ b/numpy/core/tests/test_umath.py @@ -2267,10 +2267,7 @@ def __array_prepare__(self, arr, context): a = np.array(1).view(type=with_prepare) if use_where: - # Currently raises, due to the array being replaced during prepare - with pytest.raises(ValueError): - x = np.add(a, a, where=np.array(True)) - return + x = np.add(a, a, where=np.array(True)) else: x = np.add(a, a) assert_equal(x, np.array(2)) @@ -2287,10 +2284,7 @@ def __array_prepare__(self, arr, context): a = np.array([1]).view(type=with_prepare) if use_where: - # Currently raises, due to the array being replaced during prepare - with pytest.raises(ValueError): - x = np.add(a, a, a, where=[True]) - return + x = np.add(a, a, a, where=[True]) else: x = np.add(a, a, a) # Returned array is new, because of the strange From 4a77b710741cc31c0c0858d5fb78ff0d68d0882c Mon Sep 17 00:00:00 2001 From: Matti Picus Date: Sun, 27 Jun 2021 19:20:49 +0300 Subject: [PATCH 2/3] typo --- doc/release/upcoming_changes/19259.c_api.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/release/upcoming_changes/19259.c_api.rst b/doc/release/upcoming_changes/19259.c_api.rst index 0fbc3d0bf07d..4f29e18a5875 100644 --- a/doc/release/upcoming_changes/19259.c_api.rst +++ b/doc/release/upcoming_changes/19259.c_api.rst @@ -4,7 +4,7 @@ The masked inner-loop selector is now never used. A warning will be given in the unlikely event that it was customized. We do not expect that any code uses this. If you do use it, -you must unset unset the selector on newer NumPy version. +you must unset the selector on newer NumPy version. Please also contact the NumPy developers, we do anticipate providing a new, more specific, mechanism. From 061ac77a06edbc503bb552e4d4ca6729d524c861 Mon Sep 17 00:00:00 2001 From: Matti Picus Date: Sun, 27 Jun 2021 19:23:23 +0300 Subject: [PATCH 3/3] shorten release note --- doc/release/upcoming_changes/19259.c_api.rst | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/doc/release/upcoming_changes/19259.c_api.rst b/doc/release/upcoming_changes/19259.c_api.rst index 4f29e18a5875..dac9f520a76f 100644 --- a/doc/release/upcoming_changes/19259.c_api.rst +++ b/doc/release/upcoming_changes/19259.c_api.rst @@ -8,7 +8,5 @@ you must unset the selector on newer NumPy version. Please also contact the NumPy developers, we do anticipate providing a new, more specific, mechanism. -This change will not affect the results of operations, since -the fallback (which is always used internally) will handle -the operation equivalently, the customization was a planned -feature to allow for faster masked operation. +The customization was part of a never-implemented feature to allow +for faster masked operations.