Skip to content

Commit

Permalink
ENH, SIMD: update dispatch-able sources to support IBM ZArch SIMD"
Browse files Browse the repository at this point in the history
  • Loading branch information
seiko2plus committed Jan 27, 2022
1 parent 3025bb3 commit 73c476d
Show file tree
Hide file tree
Showing 6 changed files with 19 additions and 11 deletions.
2 changes: 1 addition & 1 deletion numpy/core/src/multiarray/einsum_sumprod.c.src
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@
* 0*3#
* #NPYV_CHK = 0*5,
* 0*5,
* 0, NPY_SIMD, NPY_SIMD_F64, 0,
* 0, NPY_SIMD_F32, NPY_SIMD_F64, 0,
* 0*3#
*/

Expand Down
5 changes: 3 additions & 2 deletions numpy/core/src/umath/loops_arithm_fp.dispatch.c.src
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
/*@targets
** $maxopt baseline
** sse2 avx2 avx512f
** vx vxe
**/
#define _UMATHMODULE
#define _MULTIARRAYMODULE
Expand Down Expand Up @@ -364,7 +365,7 @@ sse2_binary_scalar2_@kind@_@TYPE@(@type@ * op, @type@ * ip1, @type@ * ip2, npy_i
* #type = npy_float, npy_double#
* #TYPE = FLOAT, DOUBLE#
* #sfx = f32, f64#
* #CHK = , _F64#
* #CHK = _F32, _F64#
*/
#if NPY_SIMD@CHK@
/**begin repeat1
Expand Down Expand Up @@ -444,7 +445,7 @@ simd_binary_scalar2_@kind@_@TYPE@(@type@ * op, @type@ * ip1, @type@ * ip2, npy_i
* #type = npy_float, npy_double, npy_longdouble#
* #TYPE = FLOAT, DOUBLE, LONGDOUBLE#
* #vector = 1, 1, 0#
* #VECTOR = NPY_SIMD, NPY_SIMD_F64, 0 #
* #VECTOR = NPY_SIMD_F32, NPY_SIMD_F64, 0 #
*/
/**begin repeat1
* Arithmetic
Expand Down
1 change: 1 addition & 0 deletions numpy/core/src/umath/loops_arithmetic.dispatch.c.src
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
** sse2 sse41 avx2 avx512f avx512_skx
** vsx2
** neon
** vx
**/
#define _UMATHMODULE
#define _MULTIARRAYMODULE
Expand Down
8 changes: 6 additions & 2 deletions numpy/core/src/umath/loops_minmax.dispatch.c.src
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
** neon asimd
** sse2 avx2 avx512_skx
** vsx2
** vx vxe
**/
#define _UMATHMODULE
#define _MULTIARRAYMODULE
Expand Down Expand Up @@ -142,7 +143,7 @@ NPY_FINLINE @type@ scalar_@op@_@c_sfx@(@type@ a, @type@ b) {
/**begin repeat
* #sfx = f32, f64#
* #bsfx = b32, b64#
* #simd_chk = NPY_SIMD, NPY_SIMD_F64#
* #simd_chk = NPY_SIMD_F32, NPY_SIMD_F64#
* #scalar_sfx = f, d#
*/
#if @simd_chk@
Expand Down Expand Up @@ -194,7 +195,7 @@ NPY_FINLINE @type@ scalar_@op@_@c_sfx@(@type@ a, @type@ b) {
******************************************************************************/
/**begin repeat
* #sfx = s8, u8, s16, u16, s32, u32, s64, u64, f32, f64#
* #simd_chk = NPY_SIMD*9, NPY_SIMD_F64#
* #simd_chk = NPY_SIMD*8, NPY_SIMD_F32, NPY_SIMD_F64#
* #is_fp = 0*8, 1, 1#
* #scalar_sfx = i*8, f, d#
*/
Expand Down Expand Up @@ -393,6 +394,9 @@ simd_binary_@intrin@_@sfx@(const npyv_lanetype_@sfx@ *ip1, npy_intp sip1,
#elif NPY_SIMD && NPY_BITSOF_@BTYPE@ == @len@
#if @is_fp@
#define TO_SIMD_SFX(X) X##_f@len@
#if NPY_BITSOF_@BTYPE@ == 32 && !NPY_SIMD_F32
#undef TO_SIMD_SFX
#endif
#if NPY_BITSOF_@BTYPE@ == 64 && !NPY_SIMD_F64
#undef TO_SIMD_SFX
#endif
Expand Down
5 changes: 3 additions & 2 deletions numpy/core/src/umath/loops_trigonometric.dispatch.c.src
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
** (avx2 fma3) avx512f
** vsx2
** neon_vfpv4
** vxe vxe2
**/
#include "numpy/npy_math.h"
#include "simd/simd.h"
Expand All @@ -13,7 +14,7 @@
* - use vectorized version of Payne-Hanek style reduction for large elements or
* when there's no native FUSED support instead of fallback to libc
*/
#if NPY_SIMD_FMA3 // native support
#if NPY_SIMD_F32 && NPY_SIMD_FMA3 // native support
/*
* Vectorized Cody-Waite range reduction technique
* Performs the reduction step x* = x - y*C in three steps:
Expand Down Expand Up @@ -210,7 +211,7 @@ NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(FLOAT_@func@)
const npy_intp sdst = steps[1] / lsize;
npy_intp len = dimensions[0];
assert(len <= 1 || (steps[0] % lsize == 0 && steps[1] % lsize == 0));
#if NPY_SIMD_FMA3
#if NPY_SIMD_F32 && NPY_SIMD_FMA3
if (is_mem_overlap(src, steps[0], dst, steps[1], len) ||
!npyv_loadable_stride_f32(ssrc) || !npyv_storable_stride_f32(sdst)
) {
Expand Down
9 changes: 5 additions & 4 deletions numpy/core/src/umath/loops_unary_fp.dispatch.c.src
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
** sse2 sse41
** vsx2
** neon asimd
** vx vxe
**/
/**
* Force use SSE only on x86, even if AVX2 or AVX512F are enabled
Expand All @@ -18,7 +19,7 @@
/**********************************************************
** Scalars
**********************************************************/
#if !NPY_SIMD
#if !NPY_SIMD_F32
NPY_FINLINE float c_recip_f32(float a)
{ return 1.0f / a; }
NPY_FINLINE float c_abs_f32(float a)
Expand All @@ -29,7 +30,7 @@ NPY_FINLINE float c_abs_f32(float a)
}
NPY_FINLINE float c_square_f32(float a)
{ return a * a; }
#endif // !NPY_SIMD
#endif // !NPY_SIMD_F32

#if !NPY_SIMD_F64
NPY_FINLINE double c_recip_f64(double a)
Expand Down Expand Up @@ -144,7 +145,7 @@ NPY_FINLINE double c_square_f64(double a)
/**begin repeat
* #TYPE = FLOAT, DOUBLE#
* #sfx = f32, f64#
* #VCHK = NPY_SIMD, NPY_SIMD_F64#
* #VCHK = NPY_SIMD_F32, NPY_SIMD_F64#
*/
#if @VCHK@
/**begin repeat1
Expand Down Expand Up @@ -256,7 +257,7 @@ static void simd_@TYPE@_@kind@_@STYPE@_@DTYPE@
/**begin repeat
* #TYPE = FLOAT, DOUBLE#
* #sfx = f32, f64#
* #VCHK = NPY_SIMD, NPY_SIMD_F64#
* #VCHK = NPY_SIMD_F32, NPY_SIMD_F64#
*/
/**begin repeat1
* #kind = rint, floor, ceil, trunc, sqrt, absolute, square, reciprocal#
Expand Down

0 comments on commit 73c476d

Please sign in to comment.