Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

need implent simd func #3

Open
luyahan opened this issue Apr 3, 2024 · 1 comment
Open

need implent simd func #3

luyahan opened this issue Apr 3, 2024 · 1 comment

Comments

@luyahan
Copy link
Collaborator

luyahan commented Apr 3, 2024

Proposed new feature or change:

CMD:

gcc -DNPY_SIMD=128 -D_NPY_SIMD_H_ -D__aarch64__  -E ./neon.h -o neon.hpp

neon.patch

Avx2

gcc  -D_NPY_SIMD_H_   -E ./avx2.h -o avx2.hpp
luyahan@plct-c7:~/source/numpy/numpy/_core/src/common/simd/avx2$ cat ./avx2.hpp | grep NPY_FINLINE | wc -l
363

Avx512

luyahan@plct-c7:~/source/numpy/numpy/_core/src/common/simd/avx512$ gcc  -D_NPY_SIMD_H_   -E ./avx512.h -o avx512.hpp
luyahan@plct-c7:~/source/numpy/numpy/_core/src/common/simd/avx512$ cat ./avx512.hpp | grep NPY_FINLINE | wc -l
413
luyahan@plct-c7:~/source/numpy/numpy/_core/src/common/simd/avx512$ 

vec

luyahan@plct-c7:~/source/numpy/numpy/_core/src/common/simd/vec$ gcc  -D_NPY_SIMD_H_ -DNPY_HAVE_VX   -E ./vec.h -o vec.hpp
luyahan@plct-c7:~/source/numpy/numpy/_core/src/common/simd/vec$ cat ./vec.hpp | grep NPY_FINLINE | wc -l
343
luyahan@plct-c7:~/source/numpy/numpy/_core/src/common/simd/vec$ 
@luyahan
Copy link
Collaborator Author

luyahan commented Apr 3, 2024

luyahan@plct-c7:~/source/numpy/numpy/_core/src/common/simd/neon$ cat ./neon.hpp | grep NPY_FINLINE
NPY_FINLINE npyv_u8 npyv_load_u8(const npyv_lanetype_u8* ptr) {
NPY_FINLINE npyv_u8 npyv_loada_u8(const npyv_lanetype_u8* ptr) {
NPY_FINLINE npyv_u8 npyv_loads_u8(const npyv_lanetype_u8* ptr) {
NPY_FINLINE npyv_u8 npyv_loadl_u8(const npyv_lanetype_u8* ptr) {
NPY_FINLINE void npyv_store_u8(npyv_lanetype_u8* ptr, npyv_u8 vec) {
NPY_FINLINE void npyv_storea_u8(npyv_lanetype_u8* ptr, npyv_u8 vec) {
NPY_FINLINE void npyv_stores_u8(npyv_lanetype_u8* ptr, npyv_u8 vec) {
NPY_FINLINE void npyv_storel_u8(npyv_lanetype_u8* ptr, npyv_u8 vec) {
NPY_FINLINE void npyv_storeh_u8(npyv_lanetype_u8* ptr, npyv_u8 vec) {
NPY_FINLINE npyv_s8 npyv_load_s8(const npyv_lanetype_s8* ptr) {
NPY_FINLINE npyv_s8 npyv_loada_s8(const npyv_lanetype_s8* ptr) {
NPY_FINLINE npyv_s8 npyv_loads_s8(const npyv_lanetype_s8* ptr) {
NPY_FINLINE npyv_s8 npyv_loadl_s8(const npyv_lanetype_s8* ptr) {
NPY_FINLINE void npyv_store_s8(npyv_lanetype_s8* ptr, npyv_s8 vec) {
NPY_FINLINE void npyv_storea_s8(npyv_lanetype_s8* ptr, npyv_s8 vec) {
NPY_FINLINE void npyv_stores_s8(npyv_lanetype_s8* ptr, npyv_s8 vec) {
NPY_FINLINE void npyv_storel_s8(npyv_lanetype_s8* ptr, npyv_s8 vec) {
NPY_FINLINE void npyv_storeh_s8(npyv_lanetype_s8* ptr, npyv_s8 vec) {
NPY_FINLINE npyv_u16 npyv_load_u16(const npyv_lanetype_u16* ptr) {
NPY_FINLINE npyv_u16 npyv_loada_u16(const npyv_lanetype_u16* ptr) {
NPY_FINLINE npyv_u16 npyv_loads_u16(const npyv_lanetype_u16* ptr) {
NPY_FINLINE npyv_u16 npyv_loadl_u16(const npyv_lanetype_u16* ptr) {
NPY_FINLINE void npyv_store_u16(npyv_lanetype_u16* ptr, npyv_u16 vec) {
NPY_FINLINE void npyv_storea_u16(npyv_lanetype_u16* ptr, npyv_u16 vec) {
NPY_FINLINE void npyv_stores_u16(npyv_lanetype_u16* ptr, npyv_u16 vec) {
NPY_FINLINE void npyv_storel_u16(npyv_lanetype_u16* ptr, npyv_u16 vec) {
NPY_FINLINE void npyv_storeh_u16(npyv_lanetype_u16* ptr, npyv_u16 vec) {
NPY_FINLINE npyv_s16 npyv_load_s16(const npyv_lanetype_s16* ptr) {
NPY_FINLINE npyv_s16 npyv_loada_s16(const npyv_lanetype_s16* ptr) {
NPY_FINLINE npyv_s16 npyv_loads_s16(const npyv_lanetype_s16* ptr) {
NPY_FINLINE npyv_s16 npyv_loadl_s16(const npyv_lanetype_s16* ptr) {
NPY_FINLINE void npyv_store_s16(npyv_lanetype_s16* ptr, npyv_s16 vec) {
NPY_FINLINE void npyv_storea_s16(npyv_lanetype_s16* ptr, npyv_s16 vec) {
NPY_FINLINE void npyv_stores_s16(npyv_lanetype_s16* ptr, npyv_s16 vec) {
NPY_FINLINE void npyv_storel_s16(npyv_lanetype_s16* ptr, npyv_s16 vec) {
NPY_FINLINE void npyv_storeh_s16(npyv_lanetype_s16* ptr, npyv_s16 vec) {
NPY_FINLINE npyv_u32 npyv_load_u32(const npyv_lanetype_u32* ptr) {
NPY_FINLINE npyv_u32 npyv_loada_u32(const npyv_lanetype_u32* ptr) {
NPY_FINLINE npyv_u32 npyv_loads_u32(const npyv_lanetype_u32* ptr) {
NPY_FINLINE npyv_u32 npyv_loadl_u32(const npyv_lanetype_u32* ptr) {
NPY_FINLINE void npyv_store_u32(npyv_lanetype_u32* ptr, npyv_u32 vec) {
NPY_FINLINE void npyv_storea_u32(npyv_lanetype_u32* ptr, npyv_u32 vec) {
NPY_FINLINE void npyv_stores_u32(npyv_lanetype_u32* ptr, npyv_u32 vec) {
NPY_FINLINE void npyv_storel_u32(npyv_lanetype_u32* ptr, npyv_u32 vec) {
NPY_FINLINE void npyv_storeh_u32(npyv_lanetype_u32* ptr, npyv_u32 vec) {
NPY_FINLINE npyv_s32 npyv_load_s32(const npyv_lanetype_s32* ptr) {
NPY_FINLINE npyv_s32 npyv_loada_s32(const npyv_lanetype_s32* ptr) {
NPY_FINLINE npyv_s32 npyv_loads_s32(const npyv_lanetype_s32* ptr) {
NPY_FINLINE npyv_s32 npyv_loadl_s32(const npyv_lanetype_s32* ptr) {
NPY_FINLINE void npyv_store_s32(npyv_lanetype_s32* ptr, npyv_s32 vec) {
NPY_FINLINE void npyv_storea_s32(npyv_lanetype_s32* ptr, npyv_s32 vec) {
NPY_FINLINE void npyv_stores_s32(npyv_lanetype_s32* ptr, npyv_s32 vec) {
NPY_FINLINE void npyv_storel_s32(npyv_lanetype_s32* ptr, npyv_s32 vec) {
NPY_FINLINE void npyv_storeh_s32(npyv_lanetype_s32* ptr, npyv_s32 vec) {
NPY_FINLINE npyv_u64 npyv_load_u64(const npyv_lanetype_u64* ptr) {
NPY_FINLINE npyv_u64 npyv_loada_u64(const npyv_lanetype_u64* ptr) {
NPY_FINLINE npyv_u64 npyv_loads_u64(const npyv_lanetype_u64* ptr) {
NPY_FINLINE npyv_u64 npyv_loadl_u64(const npyv_lanetype_u64* ptr) {
NPY_FINLINE void npyv_store_u64(npyv_lanetype_u64* ptr, npyv_u64 vec) {
NPY_FINLINE void npyv_storea_u64(npyv_lanetype_u64* ptr, npyv_u64 vec) {
NPY_FINLINE void npyv_stores_u64(npyv_lanetype_u64* ptr, npyv_u64 vec) {
NPY_FINLINE void npyv_storel_u64(npyv_lanetype_u64* ptr, npyv_u64 vec) {
NPY_FINLINE void npyv_storeh_u64(npyv_lanetype_u64* ptr, npyv_u64 vec) {
NPY_FINLINE npyv_s64 npyv_load_s64(const npyv_lanetype_s64* ptr) {
NPY_FINLINE npyv_s64 npyv_loada_s64(const npyv_lanetype_s64* ptr) {
NPY_FINLINE npyv_s64 npyv_loads_s64(const npyv_lanetype_s64* ptr) {
NPY_FINLINE npyv_s64 npyv_loadl_s64(const npyv_lanetype_s64* ptr) {
NPY_FINLINE void npyv_store_s64(npyv_lanetype_s64* ptr, npyv_s64 vec) {
NPY_FINLINE void npyv_storea_s64(npyv_lanetype_s64* ptr, npyv_s64 vec) {
NPY_FINLINE void npyv_stores_s64(npyv_lanetype_s64* ptr, npyv_s64 vec) {
NPY_FINLINE void npyv_storel_s64(npyv_lanetype_s64* ptr, npyv_s64 vec) {
NPY_FINLINE void npyv_storeh_s64(npyv_lanetype_s64* ptr, npyv_s64 vec) {
NPY_FINLINE npyv_f32 npyv_load_f32(const npyv_lanetype_f32* ptr) {
NPY_FINLINE npyv_f32 npyv_loada_f32(const npyv_lanetype_f32* ptr) {
NPY_FINLINE npyv_f32 npyv_loads_f32(const npyv_lanetype_f32* ptr) {
NPY_FINLINE npyv_f32 npyv_loadl_f32(const npyv_lanetype_f32* ptr) {
NPY_FINLINE void npyv_store_f32(npyv_lanetype_f32* ptr, npyv_f32 vec) {
NPY_FINLINE void npyv_storea_f32(npyv_lanetype_f32* ptr, npyv_f32 vec) {
NPY_FINLINE void npyv_stores_f32(npyv_lanetype_f32* ptr, npyv_f32 vec) {
NPY_FINLINE void npyv_storel_f32(npyv_lanetype_f32* ptr, npyv_f32 vec) {
NPY_FINLINE void npyv_storeh_f32(npyv_lanetype_f32* ptr, npyv_f32 vec) {
NPY_FINLINE npyv_f64 npyv_load_f64(const npyv_lanetype_f64* ptr) {
NPY_FINLINE npyv_f64 npyv_loada_f64(const npyv_lanetype_f64* ptr) {
NPY_FINLINE npyv_f64 npyv_loads_f64(const npyv_lanetype_f64* ptr) {
NPY_FINLINE npyv_f64 npyv_loadl_f64(const npyv_lanetype_f64* ptr) {
NPY_FINLINE void npyv_store_f64(npyv_lanetype_f64* ptr, npyv_f64 vec) {
NPY_FINLINE void npyv_storea_f64(npyv_lanetype_f64* ptr, npyv_f64 vec) {
NPY_FINLINE void npyv_stores_f64(npyv_lanetype_f64* ptr, npyv_f64 vec) {
NPY_FINLINE void npyv_storel_f64(npyv_lanetype_f64* ptr, npyv_f64 vec) {
NPY_FINLINE void npyv_storeh_f64(npyv_lanetype_f64* ptr, npyv_f64 vec) {
NPY_FINLINE npyv_s32 npyv_loadn_s32(const npy_int32* ptr, npy_intp stride) {
NPY_FINLINE npyv_u32 npyv_loadn_u32(const npy_uint32* ptr, npy_intp stride) {
NPY_FINLINE npyv_f32 npyv_loadn_f32(const float* ptr, npy_intp stride) {
NPY_FINLINE npyv_s64 npyv_loadn_s64(const npy_int64* ptr, npy_intp stride) {
NPY_FINLINE npyv_u64 npyv_loadn_u64(const npy_uint64* ptr, npy_intp stride) {
NPY_FINLINE npyv_f64 npyv_loadn_f64(const double* ptr, npy_intp stride) {
NPY_FINLINE npyv_u32 npyv_loadn2_u32(const npy_uint32* ptr, npy_intp stride) {
NPY_FINLINE npyv_s32 npyv_loadn2_s32(const npy_int32* ptr, npy_intp stride) {
NPY_FINLINE npyv_f32 npyv_loadn2_f32(const float* ptr, npy_intp stride) {
NPY_FINLINE npyv_u64 npyv_loadn2_u64(const npy_uint64* ptr, npy_intp stride) {
NPY_FINLINE npyv_s64 npyv_loadn2_s64(const npy_int64* ptr, npy_intp stride) {
NPY_FINLINE npyv_f64 npyv_loadn2_f64(const double* ptr, npy_intp stride) {
NPY_FINLINE void npyv_storen_s32(npy_int32* ptr, npy_intp stride, npyv_s32 a) {
NPY_FINLINE void npyv_storen_u32(npy_uint32* ptr, npy_intp stride, npyv_u32 a) {
NPY_FINLINE void npyv_storen_f32(float* ptr, npy_intp stride, npyv_f32 a) {
NPY_FINLINE void npyv_storen_s64(npy_int64* ptr, npy_intp stride, npyv_s64 a) {
NPY_FINLINE void npyv_storen_u64(npy_uint64* ptr, npy_intp stride, npyv_u64 a) {
NPY_FINLINE void npyv_storen_f64(double* ptr, npy_intp stride, npyv_f64 a) {
NPY_FINLINE void npyv_storen2_u32(npy_uint32* ptr,
NPY_FINLINE void npyv_storen2_s32(npy_int32* ptr, npy_intp stride, npyv_s32 a) {
NPY_FINLINE void npyv_storen2_f32(float* ptr, npy_intp stride, npyv_f32 a) {
NPY_FINLINE void npyv_storen2_u64(npy_uint64* ptr,
NPY_FINLINE void npyv_storen2_s64(npy_int64* ptr, npy_intp stride, npyv_s64 a) {
NPY_FINLINE void npyv_storen2_f64(double* ptr, npy_intp stride, npyv_f64 a) {
NPY_FINLINE npyv_s32 npyv_load_till_s32(const npy_int32* ptr,
NPY_FINLINE npyv_s32 npyv_load_tillz_s32(const npy_int32* ptr,
NPY_FINLINE npyv_s64 npyv_load_till_s64(const npy_int64* ptr,
NPY_FINLINE npyv_s64 npyv_load_tillz_s64(const npy_int64* ptr,
NPY_FINLINE npyv_s32 npyv_load2_till_s32(const npy_int32* ptr,
NPY_FINLINE npyv_s32 npyv_load2_tillz_s32(const npy_int32* ptr,
NPY_FINLINE npyv_s64 npyv_load2_till_s64(const npy_int64* ptr,
NPY_FINLINE npyv_s64 npyv_load2_tillz_s64(const npy_int64* ptr,
NPY_FINLINE npyv_s32 npyv_loadn_till_s32(const npy_int32* ptr,
NPY_FINLINE npyv_s32 npyv_loadn_tillz_s32(const npy_int32* ptr,
NPY_FINLINE npyv_s64 npyv_loadn_till_s64(const npy_int64* ptr,
NPY_FINLINE npyv_s64 npyv_loadn_tillz_s64(const npy_int64* ptr,
NPY_FINLINE npyv_s32 npyv_loadn2_till_s32(const npy_int32* ptr,
NPY_FINLINE npyv_s32 npyv_loadn2_tillz_s32(const npy_int32* ptr,
NPY_FINLINE npyv_s64 npyv_loadn2_till_s64(const npy_int64* ptr,
NPY_FINLINE npyv_s64 npyv_loadn2_tillz_s64(const npy_int64* ptr,
NPY_FINLINE void npyv_store_till_s32(npy_int32* ptr,
NPY_FINLINE void npyv_store_till_s64(npy_int64* ptr,
NPY_FINLINE void npyv_store2_till_s32(npy_int32* ptr,
NPY_FINLINE void npyv_store2_till_s64(npy_int64* ptr,
NPY_FINLINE void npyv_storen_till_s32(npy_int32* ptr,
NPY_FINLINE void npyv_storen_till_s64(npy_int64* ptr,
NPY_FINLINE void npyv_storen2_till_s32(npy_int32* ptr,
NPY_FINLINE void npyv_storen2_till_s64(npy_int64* ptr,
NPY_FINLINE npyv_u32 npyv_load_till_u32(const npyv_lanetype_u32* ptr,
NPY_FINLINE npyv_u32 npyv_loadn_till_u32(const npyv_lanetype_u32* ptr,
NPY_FINLINE npyv_u32 npyv_load_tillz_u32(const npyv_lanetype_u32* ptr,
NPY_FINLINE npyv_u32 npyv_loadn_tillz_u32(const npyv_lanetype_u32* ptr,
NPY_FINLINE void npyv_store_till_u32(npyv_lanetype_u32* ptr,
NPY_FINLINE void npyv_storen_till_u32(npyv_lanetype_u32* ptr,
NPY_FINLINE npyv_f32 npyv_load_till_f32(const npyv_lanetype_f32* ptr,
NPY_FINLINE npyv_f32 npyv_loadn_till_f32(const npyv_lanetype_f32* ptr,
NPY_FINLINE npyv_f32 npyv_load_tillz_f32(const npyv_lanetype_f32* ptr,
NPY_FINLINE npyv_f32 npyv_loadn_tillz_f32(const npyv_lanetype_f32* ptr,
NPY_FINLINE void npyv_store_till_f32(npyv_lanetype_f32* ptr,
NPY_FINLINE void npyv_storen_till_f32(npyv_lanetype_f32* ptr,
NPY_FINLINE npyv_u64 npyv_load_till_u64(const npyv_lanetype_u64* ptr,
NPY_FINLINE npyv_u64 npyv_loadn_till_u64(const npyv_lanetype_u64* ptr,
NPY_FINLINE npyv_u64 npyv_load_tillz_u64(const npyv_lanetype_u64* ptr,
NPY_FINLINE npyv_u64 npyv_loadn_tillz_u64(const npyv_lanetype_u64* ptr,
NPY_FINLINE void npyv_store_till_u64(npyv_lanetype_u64* ptr,
NPY_FINLINE void npyv_storen_till_u64(npyv_lanetype_u64* ptr,
NPY_FINLINE npyv_f64 npyv_load_till_f64(const npyv_lanetype_f64* ptr,
NPY_FINLINE npyv_f64 npyv_loadn_till_f64(const npyv_lanetype_f64* ptr,
NPY_FINLINE npyv_f64 npyv_load_tillz_f64(const npyv_lanetype_f64* ptr,
NPY_FINLINE npyv_f64 npyv_loadn_tillz_f64(const npyv_lanetype_f64* ptr,
NPY_FINLINE void npyv_store_till_f64(npyv_lanetype_f64* ptr,
NPY_FINLINE void npyv_storen_till_f64(npyv_lanetype_f64* ptr,
NPY_FINLINE npyv_u32 npyv_load2_till_u32(const npyv_lanetype_u32* ptr,
NPY_FINLINE npyv_u32 npyv_loadn2_till_u32(const npyv_lanetype_u32* ptr,
NPY_FINLINE npyv_u32 npyv_load2_tillz_u32(const npyv_lanetype_u32* ptr,
NPY_FINLINE npyv_u32 npyv_loadn2_tillz_u32(const npyv_lanetype_u32* ptr,
NPY_FINLINE void npyv_store2_till_u32(npyv_lanetype_u32* ptr,
NPY_FINLINE void npyv_storen2_till_u32(npyv_lanetype_u32* ptr,
NPY_FINLINE npyv_f32 npyv_load2_till_f32(const npyv_lanetype_f32* ptr,
NPY_FINLINE npyv_f32 npyv_loadn2_till_f32(const npyv_lanetype_f32* ptr,
NPY_FINLINE npyv_f32 npyv_load2_tillz_f32(const npyv_lanetype_f32* ptr,
NPY_FINLINE npyv_f32 npyv_loadn2_tillz_f32(const npyv_lanetype_f32* ptr,
NPY_FINLINE void npyv_store2_till_f32(npyv_lanetype_f32* ptr,
NPY_FINLINE void npyv_storen2_till_f32(npyv_lanetype_f32* ptr,
NPY_FINLINE npyv_u64 npyv_load2_till_u64(const npyv_lanetype_u64* ptr,
NPY_FINLINE npyv_u64 npyv_loadn2_till_u64(const npyv_lanetype_u64* ptr,
NPY_FINLINE npyv_u64 npyv_load2_tillz_u64(const npyv_lanetype_u64* ptr,
NPY_FINLINE npyv_u64 npyv_loadn2_tillz_u64(const npyv_lanetype_u64* ptr,
NPY_FINLINE void npyv_store2_till_u64(npyv_lanetype_u64* ptr,
NPY_FINLINE void npyv_storen2_till_u64(npyv_lanetype_u64* ptr,
NPY_FINLINE npyv_f64 npyv_load2_till_f64(const npyv_lanetype_f64* ptr,
NPY_FINLINE npyv_f64 npyv_loadn2_till_f64(const npyv_lanetype_f64* ptr,
NPY_FINLINE npyv_f64 npyv_load2_tillz_f64(const npyv_lanetype_f64* ptr,
NPY_FINLINE npyv_f64 npyv_loadn2_tillz_f64(const npyv_lanetype_f64* ptr,
NPY_FINLINE void npyv_store2_till_f64(npyv_lanetype_f64* ptr,
NPY_FINLINE void npyv_storen2_till_f64(npyv_lanetype_f64* ptr,
NPY_FINLINE npyv_u8x2 npyv_load_u8x2(const npyv_lanetype_u8* ptr) {
NPY_FINLINE void npyv_store_u8x2(npyv_lanetype_u8* ptr, npyv_u8x2 v) {
NPY_FINLINE npyv_s8x2 npyv_load_s8x2(const npyv_lanetype_s8* ptr) {
NPY_FINLINE void npyv_store_s8x2(npyv_lanetype_s8* ptr, npyv_s8x2 v) {
NPY_FINLINE npyv_u16x2 npyv_load_u16x2(const npyv_lanetype_u16* ptr) {
NPY_FINLINE void npyv_store_u16x2(npyv_lanetype_u16* ptr, npyv_u16x2 v) {
NPY_FINLINE npyv_s16x2 npyv_load_s16x2(const npyv_lanetype_s16* ptr) {
NPY_FINLINE void npyv_store_s16x2(npyv_lanetype_s16* ptr, npyv_s16x2 v) {
NPY_FINLINE npyv_u32x2 npyv_load_u32x2(const npyv_lanetype_u32* ptr) {
NPY_FINLINE void npyv_store_u32x2(npyv_lanetype_u32* ptr, npyv_u32x2 v) {
NPY_FINLINE npyv_s32x2 npyv_load_s32x2(const npyv_lanetype_s32* ptr) {
NPY_FINLINE void npyv_store_s32x2(npyv_lanetype_s32* ptr, npyv_s32x2 v) {
NPY_FINLINE npyv_f32x2 npyv_load_f32x2(const npyv_lanetype_f32* ptr) {
NPY_FINLINE void npyv_store_f32x2(npyv_lanetype_f32* ptr, npyv_f32x2 v) {
NPY_FINLINE npyv_f64x2 npyv_load_f64x2(const npyv_lanetype_f64* ptr) {
NPY_FINLINE void npyv_store_f64x2(npyv_lanetype_f64* ptr, npyv_f64x2 v) {
NPY_FINLINE npyv_u64x2 npyv_load_u64x2(const npyv_lanetype_u64* ptr) {
NPY_FINLINE void npyv_store_u64x2(npyv_lanetype_u64* ptr, npyv_u64x2 v) {
NPY_FINLINE npyv_s64x2 npyv_load_s64x2(const npyv_lanetype_s64* ptr) {
NPY_FINLINE void npyv_store_s64x2(npyv_lanetype_s64* ptr, npyv_s64x2 v) {
NPY_FINLINE npyv_u32 npyv_lut32_u32(const npy_uint32* table, npyv_u32 idx) {
NPY_FINLINE npyv_s32 npyv_lut32_s32(const npy_int32* table, npyv_u32 idx) {
NPY_FINLINE npyv_f32 npyv_lut32_f32(const float* table, npyv_u32 idx) {
NPY_FINLINE npyv_u64 npyv_lut16_u64(const npy_uint64* table, npyv_u64 idx) {
NPY_FINLINE npyv_s64 npyv_lut16_s64(const npy_int64* table, npyv_u64 idx) {
NPY_FINLINE npyv_f64 npyv_lut16_f64(const double* table, npyv_u64 idx) {
NPY_FINLINE npyv_u8x2 npyv_combine_u8(npyv_u8 a, npyv_u8 b) {
NPY_FINLINE npyv_s8x2 npyv_combine_s8(npyv_s8 a, npyv_s8 b) {
NPY_FINLINE npyv_u16x2 npyv_combine_u16(npyv_u16 a, npyv_u16 b) {
NPY_FINLINE npyv_s16x2 npyv_combine_s16(npyv_s16 a, npyv_s16 b) {
NPY_FINLINE npyv_u32x2 npyv_combine_u32(npyv_u32 a, npyv_u32 b) {
NPY_FINLINE npyv_s32x2 npyv_combine_s32(npyv_s32 a, npyv_s32 b) {
NPY_FINLINE npyv_u64x2 npyv_combine_u64(npyv_u64 a, npyv_u64 b) {
NPY_FINLINE npyv_s64x2 npyv_combine_s64(npyv_s64 a, npyv_s64 b) {
NPY_FINLINE npyv_f32x2 npyv_combine_f32(npyv_f32 a, npyv_f32 b) {
NPY_FINLINE npyv_f64x2 npyv_combine_f64(npyv_f64 a, npyv_f64 b) {
NPY_FINLINE npyv_u8x2 npyv_zip_u8(npyv_u8 a, npyv_u8 b) {
NPY_FINLINE npyv_u8x2 npyv_unzip_u8(npyv_u8 a, npyv_u8 b) {
NPY_FINLINE npyv_s8x2 npyv_zip_s8(npyv_s8 a, npyv_s8 b) {
NPY_FINLINE npyv_s8x2 npyv_unzip_s8(npyv_s8 a, npyv_s8 b) {
NPY_FINLINE npyv_u16x2 npyv_zip_u16(npyv_u16 a, npyv_u16 b) {
NPY_FINLINE npyv_u16x2 npyv_unzip_u16(npyv_u16 a, npyv_u16 b) {
NPY_FINLINE npyv_s16x2 npyv_zip_s16(npyv_s16 a, npyv_s16 b) {
NPY_FINLINE npyv_s16x2 npyv_unzip_s16(npyv_s16 a, npyv_s16 b) {
NPY_FINLINE npyv_u32x2 npyv_zip_u32(npyv_u32 a, npyv_u32 b) {
NPY_FINLINE npyv_u32x2 npyv_unzip_u32(npyv_u32 a, npyv_u32 b) {
NPY_FINLINE npyv_s32x2 npyv_zip_s32(npyv_s32 a, npyv_s32 b) {
NPY_FINLINE npyv_s32x2 npyv_unzip_s32(npyv_s32 a, npyv_s32 b) {
NPY_FINLINE npyv_f32x2 npyv_zip_f32(npyv_f32 a, npyv_f32 b) {
NPY_FINLINE npyv_f32x2 npyv_unzip_f32(npyv_f32 a, npyv_f32 b) {
NPY_FINLINE npyv_b32 npyv_notnan_f32(npyv_f32 a) {
NPY_FINLINE npyv_b64 npyv_notnan_f64(npyv_f64 a) {
NPY_FINLINE bool npyv_any_b8(npyv_b8 a) {
NPY_FINLINE bool npyv_all_b8(npyv_b8 a) {
NPY_FINLINE bool npyv_any_b16(npyv_b16 a) {
NPY_FINLINE bool npyv_all_b16(npyv_b16 a) {
NPY_FINLINE bool npyv_any_b32(npyv_b32 a) {
NPY_FINLINE bool npyv_all_b32(npyv_b32 a) {
NPY_FINLINE bool npyv_any_u8(npyv_u8 a) {
NPY_FINLINE bool npyv_all_u8(npyv_u8 a) {
NPY_FINLINE bool npyv_any_s8(npyv_s8 a) {
NPY_FINLINE bool npyv_all_s8(npyv_s8 a) {
NPY_FINLINE bool npyv_any_u16(npyv_u16 a) {
NPY_FINLINE bool npyv_all_u16(npyv_u16 a) {
NPY_FINLINE bool npyv_any_s16(npyv_s16 a) {
NPY_FINLINE bool npyv_all_s16(npyv_s16 a) {
NPY_FINLINE bool npyv_any_u32(npyv_u32 a) {
NPY_FINLINE bool npyv_all_u32(npyv_u32 a) {
NPY_FINLINE bool npyv_any_s32(npyv_s32 a) {
NPY_FINLINE bool npyv_all_s32(npyv_s32 a) {
NPY_FINLINE bool npyv_any_b64(npyv_b64 a) {
NPY_FINLINE bool npyv_all_b64(npyv_b64 a) {
NPY_FINLINE bool npyv_all_u64(npyv_u64 a) {
NPY_FINLINE bool npyv_any_s64(npyv_s64 a) {
NPY_FINLINE bool npyv_all_s64(npyv_s64 a) {
NPY_FINLINE bool npyv_any_f32(npyv_f32 a) {
NPY_FINLINE bool npyv_all_f32(npyv_f32 a) {
NPY_FINLINE bool npyv_any_f64(npyv_f64 a) {
NPY_FINLINE bool npyv_all_f64(npyv_f64 a) {
NPY_FINLINE npy_uint64 npyv_tobits_b8(npyv_b8 a) {
NPY_FINLINE npy_uint64 npyv_tobits_b16(npyv_b16 a) {
NPY_FINLINE npy_uint64 npyv_tobits_b32(npyv_b32 a) {
NPY_FINLINE npy_uint64 npyv_tobits_b64(npyv_b64 a) {
NPY_FINLINE npyv_u16x2 npyv_expand_u16_u8(npyv_u8 data) {
NPY_FINLINE npyv_u32x2 npyv_expand_u32_u16(npyv_u16 data) {
NPY_FINLINE npyv_b8 npyv_pack_b8_b16(npyv_b16 a, npyv_b16 b) {
NPY_FINLINE npyv_b8 npyv_pack_b8_b32(npyv_b32 a,
NPY_FINLINE npyv_b8 npyv_pack_b8_b64(npyv_b64 a,
NPY_FINLINE npyv_s32 npyv_round_s32_f64(npyv_f64 a, npyv_f64 b) {
NPY_FINLINE npyv_u8 npyv_divc_u8(npyv_u8 a, const npyv_u8x3 divisor) {
NPY_FINLINE npyv_s8 npyv_divc_s8(npyv_s8 a, const npyv_s8x3 divisor) {
NPY_FINLINE npyv_u16 npyv_divc_u16(npyv_u16 a, const npyv_u16x3 divisor) {
NPY_FINLINE npyv_s16 npyv_divc_s16(npyv_s16 a, const npyv_s16x3 divisor) {
NPY_FINLINE npyv_u32 npyv_divc_u32(npyv_u32 a, const npyv_u32x3 divisor) {
NPY_FINLINE npyv_s32 npyv_divc_s32(npyv_s32 a, const npyv_s32x3 divisor) {
NPY_FINLINE npyv_u64 npyv_divc_u64(npyv_u64 a, const npyv_u64x3 divisor) {
NPY_FINLINE npyv_s64 npyv_divc_s64(npyv_s64 a, const npyv_s64x3 divisor) {
NPY_FINLINE npyv_f32 npyv_muladd_f32(npyv_f32 a, npyv_f32 b, npyv_f32 c) {
NPY_FINLINE npyv_f32 npyv_mulsub_f32(npyv_f32 a, npyv_f32 b, npyv_f32 c) {
NPY_FINLINE npyv_f32 npyv_nmuladd_f32(npyv_f32 a, npyv_f32 b, npyv_f32 c) {
NPY_FINLINE npyv_f32 npyv_nmulsub_f32(npyv_f32 a, npyv_f32 b, npyv_f32 c) {
NPY_FINLINE npyv_f32 npyv_muladdsub_f32(npyv_f32 a, npyv_f32 b, npyv_f32 c) {
NPY_FINLINE npyv_f64 npyv_muladd_f64(npyv_f64 a, npyv_f64 b, npyv_f64 c) {
NPY_FINLINE npyv_f64 npyv_mulsub_f64(npyv_f64 a, npyv_f64 b, npyv_f64 c) {
NPY_FINLINE npyv_f64 npyv_nmuladd_f64(npyv_f64 a, npyv_f64 b, npyv_f64 c) {
NPY_FINLINE npyv_f64 npyv_nmulsub_f64(npyv_f64 a, npyv_f64 b, npyv_f64 c) {
NPY_FINLINE npyv_f64 npyv_muladdsub_f64(npyv_f64 a, npyv_f64 b, npyv_f64 c) {
NPY_FINLINE npyv_f32 npyv_square_f32(npyv_f32 a) {
NPY_FINLINE npyv_f64 npyv_square_f64(npyv_f64 a) {
NPY_FINLINE npyv_f32 npyv_recip_f32(npyv_f32 a) {
NPY_FINLINE npyv_f64 npyv_recip_f64(npyv_f64 a) {
NPY_FINLINE npyv_f32 npyv_maxp_f32(npyv_f32 a, npyv_f32 b) {
NPY_FINLINE npyv_u64 npyv_max_u64(npyv_u64 a, npyv_u64 b) {
NPY_FINLINE npyv_s64 npyv_max_s64(npyv_s64 a, npyv_s64 b) {
NPY_FINLINE npyv_f32 npyv_minp_f32(npyv_f32 a, npyv_f32 b) {
NPY_FINLINE npyv_u64 npyv_min_u64(npyv_u64 a, npyv_u64 b) {
NPY_FINLINE npyv_s64 npyv_min_s64(npyv_s64 a, npyv_s64 b) {
NPY_FINLINE npy_uint64 npyv_reduce_max_u64(npyv_u64 a) {
NPY_FINLINE npy_int64 npyv_reduce_max_s64(npyv_s64 a) {
NPY_FINLINE npy_uint64 npyv_reduce_min_u64(npyv_u64 a) {
NPY_FINLINE npy_int64 npyv_reduce_min_s64(npyv_s64 a) {
NPY_FINLINE npyv_f32 npyv_rint_f32(npyv_f32 a) {
NPY_FINLINE npyv_f32 npyv_ceil_f32(npyv_f32 a) {
NPY_FINLINE npyv_f32 npyv_trunc_f32(npyv_f32 a) {
NPY_FINLINE npyv_f32 npyv_floor_f32(npyv_f32 a) {
luyahan@plct-c7:~/source/numpy/numpy/_core/src/common/simd/neon$ cat ./neon.hpp | grep NPY_FINLINE | wc -l
311
luyahan@plct-c7:~/source/numpy/numpy/_core/src/common/simd/neon$ 

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant