From fa4a2cc5a8359f5edb38effa70ccf430def2ad7f Mon Sep 17 00:00:00 2001 From: Sebastian Berg Date: Tue, 15 Jun 2021 20:08:39 -0500 Subject: [PATCH] MAINT: Align masked with normal ufunc loops This removes the ability to specialize masked inner loops (for now) as was already noted in NEP 41 and NEP 43. The masked array is now passed in as the last argument to use the identical signature and avoid duplicating the code unnecessary. This is part of the longer process to refactor ufuncs to NEP 43 and split out, to keep the diff's shorter (or at least easier to read). --- doc/release/upcoming_changes/19259.c_api.rst | 14 + numpy/core/include/numpy/ufuncobject.h | 35 +-- numpy/core/src/umath/ufunc_object.c | 277 ++++++------------- numpy/core/src/umath/ufunc_type_resolution.c | 122 ++++---- numpy/core/src/umath/ufunc_type_resolution.h | 6 +- 5 files changed, 156 insertions(+), 298 deletions(-) create mode 100644 doc/release/upcoming_changes/19259.c_api.rst diff --git a/doc/release/upcoming_changes/19259.c_api.rst b/doc/release/upcoming_changes/19259.c_api.rst new file mode 100644 index 000000000000..0fbc3d0bf07d --- /dev/null +++ b/doc/release/upcoming_changes/19259.c_api.rst @@ -0,0 +1,14 @@ +Masked inner-loops cannot be customized anymore +----------------------------------------------- +The masked inner-loop selector is now never used. A warning +will be given in the unlikely event that it was customized. + +We do not expect that any code uses this. If you do use it, +you must unset unset the selector on newer NumPy version. +Please also contact the NumPy developers, we do anticipate +providing a new, more specific, mechanism. + +This change will not affect the results of operations, since +the fallback (which is always used internally) will handle +the operation equivalently, the customization was a planned +feature to allow for faster masked operation. diff --git a/numpy/core/include/numpy/ufuncobject.h b/numpy/core/include/numpy/ufuncobject.h index 333a326ee60e..0f3b8529aaa3 100644 --- a/numpy/core/include/numpy/ufuncobject.h +++ b/numpy/core/include/numpy/ufuncobject.h @@ -66,27 +66,14 @@ typedef int (PyUFunc_TypeResolutionFunc)( PyArray_Descr **out_dtypes); /* - * Given an array of DTypes as returned by the PyUFunc_TypeResolutionFunc, - * and an array of fixed strides (the array will contain NPY_MAX_INTP for - * strides which are not necessarily fixed), returns an inner loop - * with associated auxiliary data. - * - * For backwards compatibility, there is a variant of the inner loop - * selection which returns an inner loop irrespective of the strides, - * and with a void* static auxiliary data instead of an NpyAuxData * - * dynamically allocatable auxiliary data. + * Legacy loop selector. (This should NOT normally be used and we can expect + * that only the `PyUFunc_DefaultLegacyInnerLoopSelector` is ever set). + * However, unlike the masked version, it probably still works. * * ufunc: The ufunc object. * dtypes: An array which has been populated with dtypes, * in most cases by the type resolution function * for the same ufunc. - * fixed_strides: For each input/output, either the stride that - * will be used every time the function is called - * or NPY_MAX_INTP if the stride might change or - * is not known ahead of time. The loop selection - * function may use this stride to pick inner loops - * which are optimized for contiguous or 0-stride - * cases. * out_innerloop: Should be populated with the correct ufunc inner * loop for the given type. * out_innerloopdata: Should be populated with the void* data to @@ -101,15 +88,7 @@ typedef int (PyUFunc_LegacyInnerLoopSelectionFunc)( PyUFuncGenericFunction *out_innerloop, void **out_innerloopdata, int *out_needs_api); -typedef int (PyUFunc_MaskedInnerLoopSelectionFunc)( - struct _tagPyUFuncObject *ufunc, - PyArray_Descr **dtypes, - PyArray_Descr *mask_dtype, - npy_intp *fixed_strides, - npy_intp fixed_mask_stride, - PyUFunc_MaskedStridedInnerLoopFunc **out_innerloop, - NpyAuxData **out_innerloopdata, - int *out_needs_api); + typedef struct _tagPyUFuncObject { PyObject_HEAD @@ -199,10 +178,8 @@ typedef struct _tagPyUFuncObject { #else void *reserved2; #endif - /* - * A function which returns a masked inner loop for the ufunc. - */ - PyUFunc_MaskedInnerLoopSelectionFunc *masked_inner_loop_selector; + /* Was previously the `PyUFunc_MaskedInnerLoopSelectionFunc` */ + void *_always_null_previously_masked_innerloop_selector; /* * List of flags for each operand when ufunc is called by nditer object. diff --git a/numpy/core/src/umath/ufunc_object.c b/numpy/core/src/umath/ufunc_object.c index 44973cf09720..f0eeea84b7d0 100644 --- a/numpy/core/src/umath/ufunc_object.c +++ b/numpy/core/src/umath/ufunc_object.c @@ -1123,8 +1123,7 @@ prepare_ufunc_output(PyUFuncObject *ufunc, static NPY_INLINE int try_trivial_single_output_loop(PyUFuncObject *ufunc, PyArrayObject *op[], PyArray_Descr *dtypes[], - NPY_ORDER order, PyObject *arr_prep[], ufunc_full_args full_args, - PyUFuncGenericFunction innerloop, void *innerloopdata) + NPY_ORDER order, PyObject *arr_prep[], ufunc_full_args full_args) { int nin = ufunc->nin; int nop = nin + 1; @@ -1235,6 +1234,13 @@ try_trivial_single_output_loop(PyUFuncObject *ufunc, int needs_api = 0; NPY_BEGIN_THREADS_DEF; + PyUFuncGenericFunction innerloop; + void *innerloopdata = NULL; + if (ufunc->legacy_inner_loop_selector(ufunc, dtypes, + &innerloop, &innerloopdata, &needs_api) < 0) { + return -1; + } + for (int iop = 0; iop < nop; iop++) { data[iop] = PyArray_BYTES(op[iop]); needs_api |= PyDataType_REFCHK(dtypes[iop]); @@ -1252,20 +1258,49 @@ try_trivial_single_output_loop(PyUFuncObject *ufunc, static int -iterator_loop(PyUFuncObject *ufunc, +execute_ufunc_loop(PyUFuncObject *ufunc, + int masked, PyArrayObject **op, - PyArray_Descr **dtype, + PyArray_Descr **dtypes, NPY_ORDER order, npy_intp buffersize, PyObject **arr_prep, ufunc_full_args full_args, - PyUFuncGenericFunction innerloop, - void *innerloopdata, npy_uint32 *op_flags) { int nin = ufunc->nin, nout = ufunc->nout; int nop = nin + nout; + if (masked) { + assert(PyArray_TYPE(op[nop]) == NPY_BOOL); + if (ufunc->_always_null_previously_masked_innerloop_selector != NULL) { + if (PyErr_WarnFormat(PyExc_UserWarning, 1, + "The ufunc %s has a custom masked-inner-loop-selector." + "NumPy assumes that this is NEVER used. If you do make " + "use of this please notify the NumPy developers to discuss " + "future solutions. (See NEP 41 and 43)\n" + "NumPy will continue, but ignore the custom loop selector. " + "This should only affect performance.", + ufunc_get_name_cstr(ufunc)) < 0) { + return -1; + } + } + + /* + * NOTE: In the masked version, we consider the output read-write, + * this gives a best-effort of preserving the input, but does + * not always work. It could allow the operand to be copied + * due to copy-if-overlap, but only if it was passed in. + * In that case `__array_prepare__` is called before it happens. + */ + for (int i = nin; i < nop; ++i) { + op_flags[i] |= (op[i] != NULL ? NPY_ITER_READWRITE : NPY_ITER_WRITEONLY); + } + op_flags[nop] = NPY_ITER_READONLY | NPY_ITER_ARRAYMASK; /* mask */ + } + + NPY_UF_DBG_PRINT("Making iterator\n"); + npy_uint32 iter_flags = ufunc->iter_flags | NPY_ITER_EXTERNAL_LOOP | NPY_ITER_REFS_OK | @@ -1295,10 +1330,10 @@ iterator_loop(PyUFuncObject *ufunc, * were already checked, we use the casting rule 'unsafe' which * is faster to calculate. */ - NpyIter *iter = NpyIter_AdvancedNew(nop, op, + NpyIter *iter = NpyIter_AdvancedNew(nop + masked, op, iter_flags, order, NPY_UNSAFE_CASTING, - op_flags, dtype, + op_flags, dtypes, -1, NULL, NULL, buffersize); if (iter == NULL) { return -1; @@ -1354,21 +1389,49 @@ iterator_loop(PyUFuncObject *ufunc, for (int i = 0; i < nin; i++) { baseptrs[i] = PyArray_BYTES(op_it[i]); } + if (masked) { + baseptrs[nop] = PyArray_BYTES(op_it[nop]); + } if (NpyIter_ResetBasePointers(iter, baseptrs, NULL) != NPY_SUCCEED) { NpyIter_Deallocate(iter); return -1; } + /* + * Get the inner loop. + */ + int needs_api = 0; + PyUFuncGenericFunction innerloop; + void *innerloopdata = NULL; + if (masked) { + if (PyUFunc_DefaultMaskedInnerLoopSelector(ufunc, + dtypes, &innerloop, (NpyAuxData **)&innerloopdata, + &needs_api) < 0) { + NpyIter_Deallocate(iter); + return -1; + } + } + else { + if (ufunc->legacy_inner_loop_selector(ufunc, dtypes, + &innerloop, &innerloopdata, &needs_api) < 0) { + NpyIter_Deallocate(iter); + return -1; + } + } + /* Get the variables needed for the loop */ NpyIter_IterNextFunc *iternext = NpyIter_GetIterNext(iter, NULL); if (iternext == NULL) { NpyIter_Deallocate(iter); + if (masked) { + NPY_AUXDATA_FREE((NpyAuxData *)innerloopdata); + } return -1; } char **dataptr = NpyIter_GetDataPtrArray(iter); npy_intp *strides = NpyIter_GetInnerStrideArray(iter); npy_intp *countptr = NpyIter_GetInnerLoopSizePtr(iter); - int needs_api = NpyIter_IterationNeedsAPI(iter); + needs_api |= NpyIter_IterationNeedsAPI(iter); NPY_BEGIN_THREADS_DEF; @@ -1384,6 +1447,9 @@ iterator_loop(PyUFuncObject *ufunc, } while (!(needs_api && PyErr_Occurred()) && iternext(iter)); NPY_END_THREADS; + if (masked) { + NPY_AUXDATA_FREE((NpyAuxData *)innerloopdata); + } /* * Currently `innerloop` may leave an error set, in this case @@ -1417,20 +1483,10 @@ execute_legacy_ufunc_loop(PyUFuncObject *ufunc, ufunc_full_args full_args, npy_uint32 *op_flags) { - PyUFuncGenericFunction innerloop; - void *innerloopdata; - int needs_api = 0; - - if (ufunc->legacy_inner_loop_selector(ufunc, dtypes, - &innerloop, &innerloopdata, &needs_api) < 0) { - return -1; - } - /* First check for the trivial cases that don't need an iterator */ if (trivial_loop_ok && ufunc->nout == 1) { int fast_path_result = try_trivial_single_output_loop(ufunc, - op, dtypes, order, arr_prep, full_args, - innerloop, innerloopdata); + op, dtypes, order, arr_prep, full_args); if (fast_path_result != -2) { return fast_path_result; } @@ -1441,187 +1497,14 @@ execute_legacy_ufunc_loop(PyUFuncObject *ufunc, * resolve broadcasting, etc */ NPY_UF_DBG_PRINT("iterator loop\n"); - if (iterator_loop(ufunc, op, dtypes, order, - buffersize, arr_prep, full_args, - innerloop, innerloopdata, op_flags) < 0) { + if (execute_ufunc_loop(ufunc, 0, op, dtypes, order, + buffersize, arr_prep, full_args, op_flags) < 0) { return -1; } return 0; } -/* - * nin - number of inputs - * nout - number of outputs - * wheremask - if not NULL, the 'where=' parameter to the ufunc. - * op - the operands (nin + nout of them) - * order - the loop execution order/output memory order - * buffersize - how big of a buffer to use - * arr_prep - the __array_prepare__ functions for the outputs - * innerloop - the inner loop function - * innerloopdata - data to pass to the inner loop - */ -static int -execute_fancy_ufunc_loop(PyUFuncObject *ufunc, - PyArrayObject *wheremask, - PyArrayObject **op, - PyArray_Descr **dtypes, - NPY_ORDER order, - npy_intp buffersize, - PyObject **arr_prep, - ufunc_full_args full_args, - npy_uint32 *op_flags) -{ - int i, nin = ufunc->nin, nout = ufunc->nout; - int nop = nin + nout; - NpyIter *iter; - int needs_api; - - NpyIter_IterNextFunc *iternext; - char **dataptr; - npy_intp *strides; - npy_intp *countptr; - - PyArrayObject **op_it; - npy_uint32 iter_flags; - - for (i = nin; i < nop; ++i) { - op_flags[i] |= (op[i] != NULL ? NPY_ITER_READWRITE : NPY_ITER_WRITEONLY); - } - - if (wheremask != NULL) { - op_flags[nop] = NPY_ITER_READONLY | NPY_ITER_ARRAYMASK; - } - - NPY_UF_DBG_PRINT("Making iterator\n"); - - iter_flags = ufunc->iter_flags | - NPY_ITER_EXTERNAL_LOOP | - NPY_ITER_REFS_OK | - NPY_ITER_ZEROSIZE_OK | - NPY_ITER_BUFFERED | - NPY_ITER_GROWINNER | - NPY_ITER_COPY_IF_OVERLAP; - - /* - * Allocate the iterator. Because the types of the inputs - * were already checked, we use the casting rule 'unsafe' which - * is faster to calculate. - */ - iter = NpyIter_AdvancedNew(nop + ((wheremask != NULL) ? 1 : 0), op, - iter_flags, - order, NPY_UNSAFE_CASTING, - op_flags, dtypes, - -1, NULL, NULL, buffersize); - if (iter == NULL) { - return -1; - } - - NPY_UF_DBG_PRINT("Made iterator\n"); - - needs_api = NpyIter_IterationNeedsAPI(iter); - - /* Call the __array_prepare__ functions where necessary */ - op_it = NpyIter_GetOperandArray(iter); - for (i = nin; i < nop; ++i) { - PyArrayObject *op_tmp, *orig_op_tmp; - - /* - * The array can be allocated by the iterator -- it is placed in op[i] - * and returned to the caller, and this needs an extra incref. - */ - if (op[i] == NULL) { - op_tmp = op_it[i]; - Py_INCREF(op_tmp); - } - else { - op_tmp = op[i]; - } - - /* prepare_ufunc_output may decref & replace the pointer */ - orig_op_tmp = op_tmp; - Py_INCREF(op_tmp); - - if (prepare_ufunc_output(ufunc, &op_tmp, - arr_prep[i], full_args, i) < 0) { - NpyIter_Deallocate(iter); - return -1; - } - - /* Validate that the prepare_ufunc_output didn't mess with pointers */ - if (PyArray_BYTES(op_tmp) != PyArray_BYTES(orig_op_tmp)) { - PyErr_SetString(PyExc_ValueError, - "The __array_prepare__ functions modified the data " - "pointer addresses in an invalid fashion"); - Py_DECREF(op_tmp); - NpyIter_Deallocate(iter); - return -1; - } - - /* - * Put the updated operand back and undo the DECREF above. If - * COPY_IF_OVERLAP made a temporary copy, the output will be copied - * by UPDATEIFCOPY even if op[i] was changed by prepare_ufunc_output. - */ - op[i] = op_tmp; - Py_DECREF(op_tmp); - } - - /* Only do the loop if the iteration size is non-zero */ - if (NpyIter_GetIterSize(iter) != 0) { - PyUFunc_MaskedStridedInnerLoopFunc *innerloop; - NpyAuxData *innerloopdata; - npy_intp fixed_strides[2*NPY_MAXARGS]; - PyArray_Descr **iter_dtypes; - NPY_BEGIN_THREADS_DEF; - - /* - * Get the inner loop, with the possibility of specialization - * based on the fixed strides. - */ - NpyIter_GetInnerFixedStrideArray(iter, fixed_strides); - iter_dtypes = NpyIter_GetDescrArray(iter); - if (ufunc->masked_inner_loop_selector(ufunc, dtypes, - wheremask != NULL ? iter_dtypes[nop] - : iter_dtypes[nop + nin], - fixed_strides, - wheremask != NULL ? fixed_strides[nop] - : fixed_strides[nop + nin], - &innerloop, &innerloopdata, &needs_api) < 0) { - NpyIter_Deallocate(iter); - return -1; - } - - /* Get the variables needed for the loop */ - iternext = NpyIter_GetIterNext(iter, NULL); - if (iternext == NULL) { - NpyIter_Deallocate(iter); - return -1; - } - dataptr = NpyIter_GetDataPtrArray(iter); - strides = NpyIter_GetInnerStrideArray(iter); - countptr = NpyIter_GetInnerLoopSizePtr(iter); - needs_api = NpyIter_IterationNeedsAPI(iter); - - NPY_BEGIN_THREADS_NDITER(iter); - - NPY_UF_DBG_PRINT("Actual inner loop:\n"); - /* Execute the loop */ - do { - NPY_UF_DBG_PRINT1("iterator loop count %d\n", (int)*countptr); - innerloop(dataptr, strides, - dataptr[nop], strides[nop], - *countptr, innerloopdata); - } while (!(needs_api && PyErr_Occurred()) && iternext(iter)); - - NPY_END_THREADS; - - NPY_AUXDATA_FREE(innerloopdata); - } - - return NpyIter_Deallocate(iter); -} - /* * Validate that operands have enough dimensions, accounting for @@ -2725,7 +2608,7 @@ PyUFunc_GenericFunctionInternal(PyUFuncObject *ufunc, PyArrayObject **op, /* Set up the flags */ npy_clear_floatstatus_barrier((char*)&ufunc); - retval = execute_fancy_ufunc_loop(ufunc, wheremask, + retval = execute_ufunc_loop(ufunc, 1, op, dtypes, order, buffersize, arr_prep, full_args, op_flags); } @@ -5261,7 +5144,7 @@ PyUFunc_FromFuncAndDataAndSignatureAndIdentity(PyUFuncGenericFunction *func, voi /* Type resolution and inner loop selection functions */ ufunc->type_resolver = &PyUFunc_DefaultTypeResolver; ufunc->legacy_inner_loop_selector = &PyUFunc_DefaultLegacyInnerLoopSelector; - ufunc->masked_inner_loop_selector = &PyUFunc_DefaultMaskedInnerLoopSelector; + ufunc->_always_null_previously_masked_innerloop_selector = NULL; if (name == NULL) { ufunc->name = "?"; diff --git a/numpy/core/src/umath/ufunc_type_resolution.c b/numpy/core/src/umath/ufunc_type_resolution.c index 2834235e409f..72b3a6535b98 100644 --- a/numpy/core/src/umath/ufunc_type_resolution.c +++ b/numpy/core/src/umath/ufunc_type_resolution.c @@ -1508,30 +1508,30 @@ PyUFunc_DefaultLegacyInnerLoopSelector(PyUFuncObject *ufunc, return raise_no_loop_found_error(ufunc, dtypes); } + +/* + * Support for masked inner-strided loops. These are currently ONLY used + * for normal ufuncs, and only a generic loop getter exists. + * It may make sense to generalize this in the future or allow specialization. + * Until then, the inner-loop signature is flexible. + */ typedef struct { NpyAuxData base; - PyUFuncGenericFunction unmasked_innerloop; - void *unmasked_innerloopdata; + PyUFuncGenericFunction unmasked_stridedloop; + void *innerloopdata; int nargs; -} _ufunc_masker_data; - -static NpyAuxData * -ufunc_masker_data_clone(NpyAuxData *data) -{ - _ufunc_masker_data *n; + char *dataptrs[]; +} _masked_stridedloop_data; - /* Allocate a new one */ - n = (_ufunc_masker_data *)PyArray_malloc(sizeof(_ufunc_masker_data)); - if (n == NULL) { - return NULL; - } - - /* Copy the data (unmasked data doesn't have object semantics) */ - memcpy(n, data, sizeof(_ufunc_masker_data)); - return (NpyAuxData *)n; +static void +_masked_stridedloop_data_free(NpyAuxData *auxdata) +{ + _masked_stridedloop_data *data = (_masked_stridedloop_data *)auxdata; + PyMem_Free(data); } + /* * This function wraps a regular unmasked ufunc inner loop as a * masked ufunc inner loop, only calling the function for @@ -1539,43 +1539,39 @@ ufunc_masker_data_clone(NpyAuxData *data) */ static void unmasked_ufunc_loop_as_masked( - char **dataptrs, npy_intp *strides, - char *mask, npy_intp mask_stride, - npy_intp loopsize, - NpyAuxData *innerloopdata) + char **data, const npy_intp *dimensions, + const npy_intp *strides, void *_auxdata) { - _ufunc_masker_data *data; - int iargs, nargs; - PyUFuncGenericFunction unmasked_innerloop; - void *unmasked_innerloopdata; - npy_intp subloopsize; - - /* Put the aux data into local variables */ - data = (_ufunc_masker_data *)innerloopdata; - unmasked_innerloop = data->unmasked_innerloop; - unmasked_innerloopdata = data->unmasked_innerloopdata; - nargs = data->nargs; + _masked_stridedloop_data *auxdata = (_masked_stridedloop_data *)_auxdata; + int nargs = auxdata->nargs; + PyUFuncGenericFunction strided_loop = auxdata->unmasked_stridedloop; + void *innerloopdata = auxdata->innerloopdata; + + char **dataptrs = auxdata->dataptrs; + memcpy(dataptrs, data, nargs * sizeof(char *)); + char *mask = data[nargs]; + npy_intp mask_stride = strides[nargs]; + npy_intp N = dimensions[0]; /* Process the data as runs of unmasked values */ do { + ssize_t subloopsize; + /* Skip masked values */ - mask = npy_memchr(mask, 0, mask_stride, loopsize, &subloopsize, 1); - for (iargs = 0; iargs < nargs; ++iargs) { - dataptrs[iargs] += subloopsize * strides[iargs]; + mask = npy_memchr(mask, 0, mask_stride, N, &subloopsize, 1); + for (int i = 0; i < nargs; i++) { + dataptrs[i] += subloopsize * strides[i]; } - loopsize -= subloopsize; - /* - * Process unmasked values (assumes unmasked loop doesn't - * mess with the 'args' pointer values) - */ - mask = npy_memchr(mask, 0, mask_stride, loopsize, &subloopsize, 0); - unmasked_innerloop(dataptrs, &subloopsize, strides, - unmasked_innerloopdata); - for (iargs = 0; iargs < nargs; ++iargs) { - dataptrs[iargs] += subloopsize * strides[iargs]; - } - loopsize -= subloopsize; - } while (loopsize > 0); + N -= subloopsize; + + /* Process unmasked values */ + mask = npy_memchr(mask, 0, mask_stride, N, &subloopsize, 0); + strided_loop(dataptrs, &subloopsize, strides, innerloopdata); + for (int i = 0; i < nargs; i++) { + dataptrs[i] += subloopsize * strides[i]; + } + N -= subloopsize; + } while (N > 0); } @@ -1587,15 +1583,13 @@ unmasked_ufunc_loop_as_masked( NPY_NO_EXPORT int PyUFunc_DefaultMaskedInnerLoopSelector(PyUFuncObject *ufunc, PyArray_Descr **dtypes, - PyArray_Descr *mask_dtype, - npy_intp *NPY_UNUSED(fixed_strides), - npy_intp NPY_UNUSED(fixed_mask_stride), - PyUFunc_MaskedStridedInnerLoopFunc **out_innerloop, + PyUFuncGenericFunction *out_innerloop, NpyAuxData **out_innerloopdata, int *out_needs_api) { int retcode; - _ufunc_masker_data *data; + _masked_stridedloop_data *data; + int nargs = ufunc->nin + ufunc->nout; if (ufunc->legacy_inner_loop_selector == NULL) { PyErr_SetString(PyExc_RuntimeError, @@ -1605,27 +1599,21 @@ PyUFunc_DefaultMaskedInnerLoopSelector(PyUFuncObject *ufunc, return -1; } - if (mask_dtype->type_num != NPY_BOOL) { - PyErr_SetString(PyExc_ValueError, - "only boolean masks are supported in ufunc inner loops " - "presently"); - return -1; - } - - /* Create a new NpyAuxData object for the masker data */ - data = (_ufunc_masker_data *)PyArray_malloc(sizeof(_ufunc_masker_data)); + /* Add working memory for the data pointers, to modify them in-place */ + data = PyMem_Malloc(sizeof(_masked_stridedloop_data) + + sizeof(char *) * nargs); if (data == NULL) { PyErr_NoMemory(); return -1; } - memset(data, 0, sizeof(_ufunc_masker_data)); - data->base.free = (NpyAuxData_FreeFunc *)&PyArray_free; - data->base.clone = &ufunc_masker_data_clone; - data->nargs = ufunc->nin + ufunc->nout; + data->base.free = _masked_stridedloop_data_free; + data->base.clone = NULL; /* not currently used */ + data->unmasked_stridedloop = NULL; + data->nargs = nargs; /* Get the unmasked ufunc inner loop */ retcode = ufunc->legacy_inner_loop_selector(ufunc, dtypes, - &data->unmasked_innerloop, &data->unmasked_innerloopdata, + &data->unmasked_stridedloop, &data->innerloopdata, out_needs_api); if (retcode < 0) { PyArray_free(data); diff --git a/numpy/core/src/umath/ufunc_type_resolution.h b/numpy/core/src/umath/ufunc_type_resolution.h index b11c69852889..fdad19b3da56 100644 --- a/numpy/core/src/umath/ufunc_type_resolution.h +++ b/numpy/core/src/umath/ufunc_type_resolution.h @@ -138,11 +138,7 @@ PyUFunc_DefaultLegacyInnerLoopSelector(PyUFuncObject *ufunc, NPY_NO_EXPORT int PyUFunc_DefaultMaskedInnerLoopSelector(PyUFuncObject *ufunc, PyArray_Descr **dtypes, - PyArray_Descr *mask_dtypes, - npy_intp *NPY_UNUSED(fixed_strides), - npy_intp NPY_UNUSED(fixed_mask_stride), - PyUFunc_MaskedStridedInnerLoopFunc - **out_innerloop, + PyUFuncGenericFunction *out_innerloop, NpyAuxData **out_innerloopdata, int *out_needs_api);