From 2ad82de616ad1cb45fb82574d93ccc617b4038f6 Mon Sep 17 00:00:00 2001
From: Sayed Adel <seiko@imavr.com>
Date: Thu, 18 Aug 2022 20:48:56 +0200
Subject: [PATCH 1/3] CI: Test NumPy build against old versions of GCC(6, 7, 8)

---
 .github/workflows/build_test.yml | 44 ++++++++++++++++++++++++++++++++
 1 file changed, 44 insertions(+)

diff --git a/.github/workflows/build_test.yml b/.github/workflows/build_test.yml
index 44cb27b7267b..67986ef63a8e 100644
--- a/.github/workflows/build_test.yml
+++ b/.github/workflows/build_test.yml
@@ -75,6 +75,50 @@ jobs:
         python-version: ${{ matrix.python-version }}
     - uses: ./.github/actions
 
+  old_gcc:
+    needs: [smoke_test]
+    # provides GCC 6, 7, 8
+    runs-on: ubuntu-18.04
+    steps:
+    - uses: actions/checkout@v3
+      with:
+        submodules: recursive
+        fetch-depth: 0
+    # comes with python3.6
+    - name: Install Python3.8
+      run: |
+        sudo apt update
+        # for add-apt-repository
+        sudo apt install software-properties-common -y
+        sudo add-apt-repository ppa:deadsnakes/ppa -y
+        sudo apt install python3.8-dev -y
+        sudo ln -s /usr/bin/python3.8 /usr/bin/pythonx
+        pythonx -m pip install --upgrade pip setuptools wheel
+        pythonx -m pip install -r test_requirements.txt
+    - name: Install Compilers
+      run: sudo apt install g++-6 g++-7 g++-8 -y
+    - name: Build gcc-6
+      run: |
+        export CC=/usr/bin/gcc-6
+        export CXX=/usr/bin/g++-6
+        pythonx setup.py install --user
+    - name: Runtests gcc-6
+      run: pythonx runtests.py -n
+    - name: Build gcc-7
+      run: |
+        export CC=/usr/bin/gcc-7
+        export CXX=/usr/bin/g++-7
+        rm -rf build && pythonx setup.py install --user
+    - name: Runtests gcc-7
+      run: pythonx runtests.py -n
+    - name: Build gcc-8
+      run: |
+        export CC=/usr/bin/gcc-8
+        export CXX=/usr/bin/g++-8
+        rm -rf build && pythonx setup.py install --user
+    - name: Runtests gcc-8
+      run: pythonx runtests.py -n
+
   without_optimizations:
     needs: [smoke_test]
     runs-on: ubuntu-latest

From ab5bd90f7d232df5d5d2caf803c9d25de653056d Mon Sep 17 00:00:00 2001
From: Sayed Adel <seiko@imavr.com>
Date: Thu, 18 Aug 2022 22:32:24 +0200
Subject: [PATCH 2/3] BUG, SIMD: Fix C++ AVX512/qsort on old gcc compilers

---
 .../core/src/common/simd/avx512/arithmetic.h  | 24 +++++++++----------
 numpy/core/src/npysort/x86-qsort.dispatch.cpp |  6 +----
 2 files changed, 13 insertions(+), 17 deletions(-)

diff --git a/numpy/core/src/common/simd/avx512/arithmetic.h b/numpy/core/src/common/simd/avx512/arithmetic.h
index 93e9d9d45197..850a0c05adf5 100644
--- a/numpy/core/src/common/simd/avx512/arithmetic.h
+++ b/numpy/core/src/common/simd/avx512/arithmetic.h
@@ -384,8 +384,8 @@ NPY_FINLINE npyv_s64 npyv_divc_s64(npyv_s64 a, const npyv_s64x3 divisor)
         const npyv_u32 idx2 = _mm512_set_epi32(3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12);
         npyv_u32 a1 = _mm512_max_epu32(a, _mm512_permutex2var_epi32(a, idx1, a));
         npyv_u32 a2 = _mm512_max_epu32(a1, _mm512_permutex2var_epi32(a1, idx2, a1));
-        npyv_u32 a3 = _mm512_max_epu32(a2, _mm512_shuffle_epi32(a2, (1<<6 | 0<<4 | 3<<2 | 2)));
-        npyv_u32 a4 = _mm512_max_epu32(a3, _mm512_shuffle_epi32(a3, (2<<6 | 3<<4 | 0<<2 | 1)));
+        npyv_u32 a3 = _mm512_max_epu32(a2, _mm512_shuffle_epi32(a2, (_MM_PERM_ENUM)(1<<6 | 0<<4 | 3<<2 | 2)));
+        npyv_u32 a4 = _mm512_max_epu32(a3, _mm512_shuffle_epi32(a3, (_MM_PERM_ENUM)(2<<6 | 3<<4 | 0<<2 | 1)));
         return _mm_cvtsi128_si32(_mm512_extracti32x4_epi32(a4, 0x00));
     }
 
@@ -395,8 +395,8 @@ NPY_FINLINE npyv_s64 npyv_divc_s64(npyv_s64 a, const npyv_s64x3 divisor)
         const npyv_u32 idx2 = _mm512_set_epi32(3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12);
         npyv_s32 a1 = _mm512_max_epi32(a, _mm512_permutex2var_epi32(a, idx1, a));
         npyv_s32 a2 = _mm512_max_epi32(a1, _mm512_permutex2var_epi32(a1, idx2, a1));
-        npyv_s32 a3 = _mm512_max_epi32(a2, _mm512_shuffle_epi32(a2, (1<<6 | 0<<4 | 3<<2 | 2)));
-        npyv_s32 a4 = _mm512_max_epi32(a3, _mm512_shuffle_epi32(a3, (2<<6 | 3<<4 | 0<<2 | 1)));
+        npyv_s32 a3 = _mm512_max_epi32(a2, _mm512_shuffle_epi32(a2, (_MM_PERM_ENUM)(1<<6 | 0<<4 | 3<<2 | 2)));
+        npyv_s32 a4 = _mm512_max_epi32(a3, _mm512_shuffle_epi32(a3, (_MM_PERM_ENUM)(2<<6 | 3<<4 | 0<<2 | 1)));
         return _mm_cvtsi128_si32(_mm512_extracti32x4_epi32(a4, 0x00));
     }
 
@@ -406,8 +406,8 @@ NPY_FINLINE npyv_s64 npyv_divc_s64(npyv_s64 a, const npyv_s64x3 divisor)
         const npyv_u32 idx2 = _mm512_set_epi32(3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12);
         npyv_f32 a1 = _mm512_max_ps(a, _mm512_permutex2var_ps(a, idx1, a));
         npyv_f32 a2 = _mm512_max_ps(a1, _mm512_permutex2var_ps(a1, idx2, a1));
-        npyv_f32 a3 = _mm512_max_ps(a2, _mm512_shuffle_ps(a2, a2, (1<<6 | 0<<4 | 3<<2 | 2)));
-        npyv_f32 a4 = _mm512_max_ps(a3, _mm512_shuffle_sp(a3, a3, (2<<6 | 3<<4 | 0<<2 | 1)));
+        npyv_f32 a3 = _mm512_max_ps(a2, _mm512_shuffle_ps(a2, a2, (_MM_PERM_ENUM)(1<<6 | 0<<4 | 3<<2 | 2)));
+        npyv_f32 a4 = _mm512_max_ps(a3, _mm512_shuffle_ps(a3, a3, (_MM_PERM_ENUM)(2<<6 | 3<<4 | 0<<2 | 1)));
         return _mm_cvtss_f32(_mm512_extractf32x4_ps(a4, 0x00));
     }
 
@@ -417,8 +417,8 @@ NPY_FINLINE npyv_s64 npyv_divc_s64(npyv_s64 a, const npyv_s64x3 divisor)
         const npyv_u32 idx2 = _mm512_set_epi32(3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12);
         npyv_u32 a1 = _mm512_min_epu32(a, _mm512_permutex2var_epi32(a, idx1, a));
         npyv_u32 a2 = _mm512_min_epu32(a1, _mm512_permutex2var_epi32(a1, idx2, a1));
-        npyv_u32 a3 = _mm512_min_epu32(a2, _mm512_shuffle_epi32(a2, (1<<6 | 0<<4 | 3<<2 | 2)));
-        npyv_u32 a4 = _mm512_min_epu32(a3, _mm512_shuffle_epi32(a3, (2<<6 | 3<<4 | 0<<2 | 1)));
+        npyv_u32 a3 = _mm512_min_epu32(a2, _mm512_shuffle_epi32(a2, (_MM_PERM_ENUM)(1<<6 | 0<<4 | 3<<2 | 2)));
+        npyv_u32 a4 = _mm512_min_epu32(a3, _mm512_shuffle_epi32(a3, (_MM_PERM_ENUM)(2<<6 | 3<<4 | 0<<2 | 1)));
         return _mm_cvtsi128_si32(_mm512_extracti32x4_epi32(a4, 0x00));
     }
 
@@ -428,8 +428,8 @@ NPY_FINLINE npyv_s64 npyv_divc_s64(npyv_s64 a, const npyv_s64x3 divisor)
         const npyv_u32 idx2 = _mm512_set_epi32(3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12);
         npyv_s32 a1 = _mm512_min_epi32(a, _mm512_permutex2var_epi32(a, idx1, a));
         npyv_s32 a2 = _mm512_min_epi32(a1, _mm512_permutex2var_epi32(a1, idx2, a1));
-        npyv_s32 a3 = _mm512_min_epi32(a2, _mm512_shuffle_epi32(a2, (1<<6 | 0<<4 | 3<<2 | 2)));
-        npyv_s32 a4 = _mm512_min_epi32(a3, _mm512_shuffle_epi32(a3, (2<<6 | 3<<4 | 0<<2 | 1)));
+        npyv_s32 a3 = _mm512_min_epi32(a2, _mm512_shuffle_epi32(a2, (_MM_PERM_ENUM)(1<<6 | 0<<4 | 3<<2 | 2)));
+        npyv_s32 a4 = _mm512_min_epi32(a3, _mm512_shuffle_epi32(a3, (_MM_PERM_ENUM)(2<<6 | 3<<4 | 0<<2 | 1)));
         return _mm_cvtsi128_si32(_mm512_extracti32x4_epi32(a4, 0x00));
     }
 
@@ -439,8 +439,8 @@ NPY_FINLINE npyv_s64 npyv_divc_s64(npyv_s64 a, const npyv_s64x3 divisor)
         const npyv_u32 idx2 = _mm512_set_epi32(3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12);
         npyv_f32 a1 = _mm512_min_ps(a, _mm512_permutex2var_ps(a, idx1, a));
         npyv_f32 a2 = _mm512_min_ps(a1, _mm512_permutex2var_ps(a1, idx2, a1));
-        npyv_f32 a3 = _mm512_min_ps(a2, _mm512_shuffle_ps(a2, a2, (1<<6 | 0<<4 | 3<<2 | 2)));
-        npyv_f32 a4 = _mm512_min_ps(a3, _mm512_shuffle_sp(a3, a3, (2<<6 | 3<<4 | 0<<2 | 1)));
+        npyv_f32 a3 = _mm512_min_ps(a2, _mm512_shuffle_ps(a2, a2, (_MM_PERM_ENUM)(1<<6 | 0<<4 | 3<<2 | 2)));
+        npyv_f32 a4 = _mm512_min_ps(a3, _mm512_shuffle_ps(a3, a3, (_MM_PERM_ENUM)(2<<6 | 3<<4 | 0<<2 | 1)));
         return _mm_cvtss_f32(_mm512_extractf32x4_ps(a4, 0x00));
     }
 
diff --git a/numpy/core/src/npysort/x86-qsort.dispatch.cpp b/numpy/core/src/npysort/x86-qsort.dispatch.cpp
index 39067229beaf..01fa16e3e350 100644
--- a/numpy/core/src/npysort/x86-qsort.dispatch.cpp
+++ b/numpy/core/src/npysort/x86-qsort.dispatch.cpp
@@ -648,11 +648,7 @@ partition_vec(type_t *arr, npy_intp left, npy_intp right, const zmm_t curr_vec,
     /* which elements are larger than the pivot */
     __mmask16 gt_mask = vtype::ge(curr_vec, pivot_vec);
     npy_int amount_gt_pivot = _mm_popcnt_u32((npy_int)gt_mask);
-#if defined(_MSC_VER) && _MSC_VER < 1922
-    vtype::mask_compressstoreu(arr + left, ~gt_mask, curr_vec);
-#else
-    vtype::mask_compressstoreu(arr + left, _knot_mask16(gt_mask), curr_vec);
-#endif
+    vtype::mask_compressstoreu(arr + left, _mm512_knot(gt_mask), curr_vec);
     vtype::mask_compressstoreu(arr + right - amount_gt_pivot, gt_mask,
                                curr_vec);
     *smallest_vec = vtype::min(curr_vec, *smallest_vec);

From c56afe796999ee7b3e2c1d4d7cf2e52192794d8b Mon Sep 17 00:00:00 2001
From: Ralf Gommers <ralf.gommers@gmail.com>
Date: Fri, 19 Aug 2022 10:50:35 +0300
Subject: [PATCH 3/3] DOC: add notes on GCC 6.5 being the minimum supported GCC
 version.

Given that GCC 6 is mainly kept for Ubuntu LTS and that has 6.5
when you install gcc-6 with `apt`, let's document that as the
minimum version (also that is what is tested in CI).

[ci skip]
---
 INSTALL.rst                  | 3 ++-
 doc/source/user/building.rst | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/INSTALL.rst b/INSTALL.rst
index 130306d06c07..f136b7cfcd32 100644
--- a/INSTALL.rst
+++ b/INSTALL.rst
@@ -75,7 +75,8 @@ skipped when running the test suite if no Fortran compiler is available.  For
 building Scipy a Fortran compiler is needed though, so we include some details
 on Fortran compilers in the rest of this section.
 
-On OS X and Linux, all common compilers will work.
+On OS X and Linux, all common compilers will work. The minimum supported GCC
+version is 6.5.
 
 For Fortran, ``gfortran`` works, ``g77`` does not.  In case ``g77`` is
 installed then ``g77`` will be detected and used first.  To explicitly select
diff --git a/doc/source/user/building.rst b/doc/source/user/building.rst
index 4bd0b7183ea0..81f6d33a797b 100644
--- a/doc/source/user/building.rst
+++ b/doc/source/user/building.rst
@@ -59,7 +59,7 @@ Building NumPy requires the following software installed:
    MSVC and Clang compilers. Compilers from other vendors such as Intel,
    Absoft, Sun, NAG, Compaq, Vast, Portland, Lahey, HP, IBM are only
    supported in the form of community feedback, and may not work out of the
-   box.  GCC 4.x (and later) compilers are recommended. On ARM64 (aarch64)
+   box.  GCC 6.5 (and later) compilers are recommended. On ARM64 (aarch64)
    GCC 8.x (and later) are recommended.
 
 3) Linear Algebra libraries