Add heir-simd-vectorizer pipeline

This is intended to be a consistent bundling of the ported HECO optimizations, so that all tests can use them identically. Fixes #556 Depends on #531 for changes to hamming-distance test PiperOrigin-RevId: 619348713
google · Mar 27, 2024 · 6085ea8 · 6085ea8
1 parent c67f0a0
commit 6085ea8
Show file tree

Hide file tree

Showing 9 changed files with 90 additions and 28 deletions.
diff --git a/docs/content/en/docs/pipelines.md b/docs/content/en/docs/pipelines.md
@@ -7,6 +7,27 @@ weight: 9
 
 ## `heir-opt`
 
+### `--heir-simd-vectorizer`
+
+Run scheme-agnostic passes to convert FHE programs that operate on scalar types
+to equivalent programs that operate on vectors.
+
+This pass is intended to process FHE programs that are known to be good for
+SIMD, but a specific FHE scheme has not yet been chosen. It expects to handle
+`arith` ops operating on `tensor` types (with or without `secret.generic`).
+
+The pass unrolls all loops, then applies a series of passes that convert scalar
+operations on tensor elements to SIMD operations on full tensors. This uses the
+FHE computational model common to BGV, BFV, and CKKS, in which data is packed
+in polynomial ciphertexts, interpreted as vectors of individual data elements,
+and arithmetic can be applied across entire ciphertexts, with some limited
+support for rotations via automorphisms of the underlying ring.
+
+Along the way, this pipeline applies heuristic optimizations to minimize the
+number of rotations needed, relying on the implicit cost model that rotations
+are generally expensive. The specific set of passes can be found in
+`tools/heir-opt.cpp` where the pipeline is defined.
+
 ### `--heir-tosa-to-arith`
 
 Lowers a TOSA MLIR model to `func`, `arith`, and `memref`.
@@ -26,6 +47,7 @@ tool can lower a TFLite FlatBuffer to textual MLIR with
 [hello_world.tosa.mlir](https://github.com/google/heir/blob/main/tests/hello_world.tosa.mlir)
 for an example.
 
+
 ### `--yosys-optimizer`
 
 Uses Yosys to booleanize and optimize MLIR functions.

diff --git a/scripts/lit_to_bazel.py b/scripts/lit_to_bazel.py
@@ -1,6 +1,6 @@
+from collections import deque
 import os
 import pathlib
-from collections import deque
 
 import fire
 
@@ -11,6 +11,7 @@
 IN_REDIRECT = "<"
 RUN_PREFIX = "// RUN:"
 
+
 def strip_run_prefix(line):
     if RUN_PREFIX in line:
         return line.split(RUN_PREFIX)[1]
@@ -28,8 +29,8 @@ def convert_to_run_commands(run_lines):
 
         line = strip_run_prefix(line)
 
-        if '|' in line:
-            first, second = line.split('|', maxsplit=1)
+        if "|" in line:
+            first, second = line.split("|", maxsplit=1)
             current_command += " " + first.strip()
             cmds.append(current_command.strip())
             current_command = ""
@@ -43,8 +44,8 @@ def convert_to_run_commands(run_lines):
             current_command = ""
             continue
 
-        if line.strip().endswith('\\'):
-            current_command += " " + line.replace('\\', '').strip()
+        if line.strip().endswith("\\"):
+            current_command += " " + line.replace("\\", "").strip()
             continue
 
         current_command += line
@@ -56,6 +57,7 @@ def convert_to_run_commands(run_lines):
 
 def lit_to_bazel(
     lit_test_file: str,
+    git_root: str = "",
 ):
     """A helper CLI that converts MLIR test files to bazel run commands.
 
@@ -64,9 +66,11 @@ def lit_to_bazel(
         command.
     """
 
-    git_root = pathlib.Path(__file__).parent.parent
-    if not os.path.isdir(git_root / ".git"):
-        raise RuntimeError(f"Could not find git root, looked at {git_root}")
+    if not git_root:
+        git_root = pathlib.Path(__file__).parent.parent
+        if not os.path.isdir(git_root / ".git"):
+            raise RuntimeError(f"Could not find git root, looked at {git_root}")
+    # if git root is manually specified, just trust it
 
     if not lit_test_file:
         raise ValueError("lit_test_file must be provided")
@@ -81,7 +85,7 @@ def lit_to_bazel(
                 run_lines.append(line)
 
     commands = convert_to_run_commands(run_lines)
-    commands = [x for x in commands if 'FileCheck' not in x]
+    commands = [x for x in commands if "FileCheck" not in x]
     # remove consecutive and trailing pipes
     if commands[-1] == PIPE:
         commands.pop()
@@ -95,8 +99,13 @@ def lit_to_bazel(
     # I would consider using bazel-bin/tools/heir-opt, but the yosys
     # requirement requires additional env vars to be set for the yosys and ABC
     # paths, which is not yet worth doing for this script.
-    joined = joined.replace("heir-opt", "bazel run --noallow_analysis_cache_discard //tools:heir-opt --")
-    joined = joined.replace("heir-translate", f"{git_root}/bazel-bin/tools/heir-translate")
+    joined = joined.replace(
+        "heir-opt",
+        "bazel run --noallow_analysis_cache_discard //tools:heir-opt --",
+    )
+    joined = joined.replace(
+        "heir-translate", f"{git_root}/bazel-bin/tools/heir-translate"
+    )
     joined = joined.replace("%s", str(pathlib.Path(lit_test_file).absolute()))
     print(joined)
 

diff --git a/tests/simd/BUILD → tests/heir_simd_vectorizer/BUILD b/tests/simd/BUILD → tests/heir_simd_vectorizer/BUILD
diff --git a/tests/simd/box_blur_4x4.mlir → tests/heir_simd_vectorizer/box_blur_4x4.mlir b/tests/simd/box_blur_4x4.mlir → tests/heir_simd_vectorizer/box_blur_4x4.mlir
@@ -1,13 +1,11 @@
 // RUN: heir-opt --secretize=entry-function=box_blur --wrap-generic --canonicalize --cse \
-// RUN:   --full-loop-unroll \
-// RUN:   --insert-rotate --cse --canonicalize --collapse-insertion-chains --canonicalize --cse \
-// RUN:   %s | FileCheck %s
+// RUN:   --heir-simd-vectorizer %s | FileCheck %s
 
 module  {
   // CHECK-LABEL: @box_blur
   // CHECK-NOT: tensor.extract
   // CHECK-COUNT-7: tensor_ext.rotate
-  func.func @box_blur(%arg0: tensor<16xi16> {secret.secret}) -> tensor<16xi16> {
+  func.func @box_blur(%arg0: tensor<16xi16>) -> tensor<16xi16> {
     %c16 = arith.constant 16 : index
     %c4 = arith.constant 4 : index
     %0 = affine.for %x = 0 to 4 iter_args(%arg0_x = %arg0) -> (tensor<16xi16>) {

diff --git a/tests/simd/box_blur_64x64.mlir → .../heir_simd_vectorizer/box_blur_64x64.mlir b/tests/simd/box_blur_64x64.mlir → .../heir_simd_vectorizer/box_blur_64x64.mlir
@@ -1,6 +1,5 @@
-// RUN: heir-opt --secretize=entry-function=box_blur --wrap-generic --canonicalize --cse --full-loop-unroll \
-// RUN:   --insert-rotate --cse --canonicalize --collapse-insertion-chains --canonicalize --cse \
-// RUN:   %s | FileCheck %s
+// RUN: heir-opt --secretize=entry-function=box_blur --wrap-generic --canonicalize --cse \
+// RUN:   --heir-simd-vectorizer %s | FileCheck %s
 
 module  {
   // CHECK-LABEL: @box_blur
@@ -30,7 +29,7 @@ module  {
   // CHECK-NEXT:     secret.yield %[[v15]]
   // CHECK-NEXT:   } -> !secret.secret<tensor<4096xi16>>
   // CHECK-NEXT:   return %[[v0]]
-  func.func @box_blur(%arg0: tensor<4096xi16> {secret.secret}) -> tensor<4096xi16> {
+  func.func @box_blur(%arg0: tensor<4096xi16>) -> tensor<4096xi16> {
     %c4096 = arith.constant 4096 : index
     %c64 = arith.constant 64 : index
     %0 = affine.for %x = 0 to 64 iter_args(%arg0_x = %arg0) -> (tensor<4096xi16>) {

diff --git a/tests/simd/hamming_distance.mlir → ...eir_simd_vectorizer/hamming_distance.mlir b/tests/simd/hamming_distance.mlir → ...eir_simd_vectorizer/hamming_distance.mlir
@@ -1,6 +1,5 @@
 // RUN: heir-opt --secretize=entry-function=hamming --wrap-generic --canonicalize --cse \
-// RUN:   --full-loop-unroll --cse --canonicalize --insert-rotate --cse --canonicalize \
-// RUN:   %s | FileCheck %s
+// RUN:   --heir-simd-vectorizer %s | FileCheck %s
 
 // CHECK-LABEL: @hamming
 // CHECK: secret.generic
@@ -15,10 +14,11 @@
 // CHECK-NEXT: tensor.extract
 // CHECK-NEXT: secret.yield
 
-// TODO(#521): support rotate-and-reduce when the input is already a series of incremental rotations,
-// as this IR is currently lowered to 4-1 rotate operations to sum after doing (x-y)**2 in SIMD.
+// TODO(#521): Fix rotate-and-reduce to work on this IR.
+// The problem is that the lattice identifies the rotate-version of this IR as
+// being overdetermined.
 
-func.func @hamming(%arg0: tensor<4xi16> {secret.secret}, %arg1: tensor<4xi16> {secret.secret}) -> i16 {
+func.func @hamming(%arg0: tensor<4xi16>, %arg1: tensor<4xi16>) -> i16 {
   %c0 = arith.constant 0 : index
   %c0_si16 = arith.constant 0 : i16
   %0 = affine.for %arg2 = 0 to 4 iter_args(%arg6 = %c0_si16) -> i16 {

diff --git a/tests/simd/simple_sum.mlir → tests/heir_simd_vectorizer/simple_sum.mlir b/tests/simd/simple_sum.mlir → tests/heir_simd_vectorizer/simple_sum.mlir
@@ -1,14 +1,12 @@
 // RUN: heir-opt --secretize=entry-function=simple_sum --wrap-generic --canonicalize --cse \
-// RUN:   --full-loop-unroll --insert-rotate --cse --canonicalize \
-// RUN:   --rotate-and-reduce --canonicalize \
-// RUN:   %s | FileCheck %s
+// RUN:   --heir-simd-vectorizer %s | FileCheck %s
 
 // Sum all entries of a tensor into a single scalar
 // CHECK-LABEL: @simple_sum
 // CHECK: secret.generic
 // CHECK-COUNT-5: tensor_ext.rotate
 // CHECK-NOT: tensor_ext.rotate
-func.func @simple_sum(%arg0: tensor<32xi16> {secret.secret}) -> i16 {
+func.func @simple_sum(%arg0: tensor<32xi16>) -> i16 {
   %c0 = arith.constant 0 : index
   %c0_si16 = arith.constant 0 : i16
   %0 = affine.for %i = 0 to 32 iter_args(%sum_iter = %c0_si16) -> i16 {

diff --git a/tools/BUILD b/tools/BUILD
@@ -56,6 +56,9 @@ cc_binary(
         "@heir//lib/Dialect/Secret/Transforms:DistributeGeneric",
         "@heir//lib/Dialect/TensorExt/IR:Dialect",
         "@heir//lib/Dialect/TensorExt/Transforms",
+        "@heir//lib/Dialect/TensorExt/Transforms:CollapseInsertionChains",
+        "@heir//lib/Dialect/TensorExt/Transforms:InsertRotate",
+        "@heir//lib/Dialect/TensorExt/Transforms:RotateAndReduce",
         "@heir//lib/Dialect/TfheRust/IR:Dialect",
         "@heir//lib/Dialect/TfheRustBool/IR:Dialect",
         "@heir//lib/Transforms/ElementwiseToAffine",

diff --git a/tools/heir-opt.cpp b/tools/heir-opt.cpp
@@ -22,7 +22,10 @@
 #include "include/Dialect/Secret/Transforms/DistributeGeneric.h"
 #include "include/Dialect/Secret/Transforms/Passes.h"
 #include "include/Dialect/TensorExt/IR/TensorExtDialect.h"
+#include "include/Dialect/TensorExt/Transforms/CollapseInsertionChains.h"
+#include "include/Dialect/TensorExt/Transforms/InsertRotate.h"
 #include "include/Dialect/TensorExt/Transforms/Passes.h"
+#include "include/Dialect/TensorExt/Transforms/RotateAndReduce.h"
 #include "include/Dialect/TfheRust/IR/TfheRustDialect.h"
 #include "include/Dialect/TfheRustBool/IR/TfheRustBoolDialect.h"
 #include "include/Transforms/ElementwiseToAffine/ElementwiseToAffine.h"
@@ -174,6 +177,29 @@ void polynomialToLLVMPipelineBuilder(OpPassManager &manager) {
   manager.addPass(createSymbolDCEPass());
 }
 
+void heirSIMDVectorizerPipelineBuilder(OpPassManager &manager) {
+  // For now we unroll loops to enable insert-rotate, but we would like to be
+  // smarter about this and do an affine loop analysis.
+  manager.addPass(createFullLoopUnroll());
+
+  // Insert rotations aligned to slot targets. Future work should provide
+  // alternative methods to optimally align rotations, and allow the user to
+  // configure this via pipeline options.
+  manager.addPass(tensor_ext::createInsertRotate());
+  manager.addPass(createCSEPass());
+  manager.addPass(createCanonicalizerPass());
+
+  manager.addPass(tensor_ext::createCollapseInsertionChains());
+  manager.addPass(createCSEPass());
+  manager.addPass(createSCCPPass());
+  manager.addPass(createCanonicalizerPass());
+
+  manager.addPass(tensor_ext::createRotateAndReduce());
+  manager.addPass(createCSEPass());
+  manager.addPass(createSCCPPass());
+  manager.addPass(createCanonicalizerPass());
+}
+
 #ifndef HEIR_NO_YOSYS
 struct TosaToBooleanTfheOptions
     : public PassPipelineOptions<TosaToBooleanTfheOptions> {
@@ -333,6 +359,13 @@ int main(int argc, char **argv) {
       "Run passes to lower the polynomial dialect to LLVM",
       polynomialToLLVMPipelineBuilder);
 
+  PassPipelineRegistration<>(
+      "heir-simd-vectorizer",
+      "Run scheme-agnostic passes to convert FHE programs that operate on "
+      "scalar types to equivalent programs that operate on vectors and use "
+      "tensor_ext.rotate",
+      heirSIMDVectorizerPipelineBuilder);
+
   return asMainReturnCode(
       MlirOptMain(argc, argv, "HEIR Pass Driver", registry));
 }