Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BUG, SIMD: Fix memory overlap in ufunc comparison loops #22851

Merged
merged 2 commits into from
Dec 22, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
13 changes: 0 additions & 13 deletions numpy/core/src/umath/fast_loop_macros.h
Original file line number Diff line number Diff line change
Expand Up @@ -378,19 +378,6 @@ abs_ptrdiff(char *a, char *b)

#undef abs_ptrdiff

#define IS_BLOCKABLE_BINARY_BOOL(esize, vsize) \
(steps[0] == (esize) && steps[0] == steps[1] && steps[2] == (1) && \
npy_is_aligned(args[1], (esize)) && \
npy_is_aligned(args[0], (esize)))

#define IS_BLOCKABLE_BINARY_SCALAR1_BOOL(esize, vsize) \
(steps[0] == 0 && steps[1] == (esize) && steps[2] == (1) && \
npy_is_aligned(args[1], (esize)))

#define IS_BLOCKABLE_BINARY_SCALAR2_BOOL(esize, vsize) \
(steps[0] == (esize) && steps[1] == 0 && steps[2] == (1) && \
npy_is_aligned(args[0], (esize)))

/* align var to alignment */
#define LOOP_BLOCK_ALIGN_VAR(var, type, alignment)\
npy_intp i, peel = npy_aligned_block_offset(var, sizeof(type),\
Expand Down
30 changes: 17 additions & 13 deletions numpy/core/src/umath/loops_comparison.dispatch.c.src
Original file line number Diff line number Diff line change
Expand Up @@ -312,19 +312,23 @@ static inline void
run_binary_simd_@kind@_@sfx@(char **args, npy_intp const *dimensions, npy_intp const *steps)
{
#if @VECTOR@
/* argument one scalar */
if (IS_BLOCKABLE_BINARY_SCALAR1_BOOL(sizeof(@type@), NPY_SIMD_WIDTH)) {
simd_binary_scalar1_@kind@_@sfx@(args, dimensions[0]);
return;
}
/* argument two scalar */
else if (IS_BLOCKABLE_BINARY_SCALAR2_BOOL(sizeof(@type@), NPY_SIMD_WIDTH)) {
simd_binary_scalar2_@kind@_@sfx@(args, dimensions[0]);
return;
}
else if (IS_BLOCKABLE_BINARY_BOOL(sizeof(@type@), NPY_SIMD_WIDTH)) {
simd_binary_@kind@_@sfx@(args, dimensions[0]);
return;
if (!is_mem_overlap(args[0], steps[0], args[2], steps[2], dimensions[0]) &&
!is_mem_overlap(args[1], steps[1], args[2], steps[2], dimensions[0])
) {
/* argument one scalar */
if (IS_BINARY_CONT_S1(@type@, npy_bool)) {
simd_binary_scalar1_@kind@_@sfx@(args, dimensions[0]);
return;
}
/* argument two scalar */
else if (IS_BINARY_CONT_S2(@type@, npy_bool)) {
simd_binary_scalar2_@kind@_@sfx@(args, dimensions[0]);
return;
}
else if (IS_BINARY_CONT(@type@, npy_bool)) {
simd_binary_@kind@_@sfx@(args, dimensions[0]);
return;
}
}
#endif

Expand Down
40 changes: 40 additions & 0 deletions numpy/core/tests/test_umath.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,13 @@
uf for uf in UFUNCS_UNARY if 'f->f' in uf.types
]

UFUNCS_BINARY = [
uf for uf in UFUNCS if uf.nin == 2
]
UFUNCS_BINARY_ACC = [
uf for uf in UFUNCS_BINARY if hasattr(uf, "accumulate") and uf.nout == 1
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Btw. the hasattr shouldn't do anything right now. Checking here that ufunc.signature is None would make more sense to me.
But its mainly a nitpick and the rest is important for backporting.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for clarification, in that case, the following skip can be avoided:

if ufunc.signature:
pytest.skip('For generic signatures only')

]

def interesting_binop_operands(val1, val2, dtype):
"""
Helper to create "interesting" operands to cover common code paths:
Expand Down Expand Up @@ -4329,6 +4336,7 @@ def test_rint_big_int():
# Rint should not change the value
assert_equal(val, np.rint(val))


@pytest.mark.parametrize('ftype', [np.float32, np.float64])
def test_memoverlap_accumulate(ftype):
# Reproduces bug https://github.com/numpy/numpy/issues/15597
Expand All @@ -4338,6 +4346,38 @@ def test_memoverlap_accumulate(ftype):
assert_equal(np.maximum.accumulate(arr), out_max)
assert_equal(np.minimum.accumulate(arr), out_min)

@pytest.mark.parametrize("ufunc, dtype", [
(ufunc, t[0])
for ufunc in UFUNCS_BINARY_ACC
for t in ufunc.types
if t[-1] == '?' and t[0] not in 'DFGMmO'
])
def test_memoverlap_accumulate_cmp(ufunc, dtype):
if ufunc.signature:
pytest.skip('For generic signatures only')
for size in (2, 8, 32, 64, 128, 256):
arr = np.array([0, 1, 1]*size, dtype=dtype)
acc = ufunc.accumulate(arr, dtype='?')
acc_u8 = acc.view(np.uint8)
exp = np.array(list(itertools.accumulate(arr, ufunc)), dtype=np.uint8)
assert_equal(exp, acc_u8)

@pytest.mark.parametrize("ufunc, dtype", [
(ufunc, t[0])
for ufunc in UFUNCS_BINARY_ACC
for t in ufunc.types
if t[0] == t[1] and t[0] == t[-1] and t[0] not in 'DFGMmO?'
])
def test_memoverlap_accumulate_symmetric(ufunc, dtype):
if ufunc.signature:
pytest.skip('For generic signatures only')
with np.errstate(all='ignore'):
for size in (2, 8, 32, 64, 128, 256):
arr = np.array([0, 1, 2]*size).astype(dtype)
acc = ufunc.accumulate(arr, dtype=dtype)
exp = np.array(list(itertools.accumulate(arr, ufunc)), dtype=dtype)
assert_equal(exp, acc)

def test_signaling_nan_exceptions():
with assert_no_warnings():
a = np.ndarray(shape=(), dtype='float32', buffer=b'\x00\xe0\xbf\xff')
Expand Down