Skip to content

Commit

Permalink
Merge pull request #9937 from xavierleroy/arm64-codegen
Browse files Browse the repository at this point in the history
Improved ARM64 code generation
  • Loading branch information
xavierleroy committed Nov 26, 2020
2 parents 082bdf5 + 5e15e3c commit f1ce133
Show file tree
Hide file tree
Showing 4 changed files with 130 additions and 90 deletions.
4 changes: 4 additions & 0 deletions Changes
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@ Working version

### Code generation and optimizations:

- #9937: improvements in ARM64 code generation (constants, sign extensions)
(Xavier Leroy, review by Stephen Dolan)

### Standard library:

### Other libraries:
Expand Down Expand Up @@ -38,6 +41,7 @@ Working version
- #10005: Try expanding aliases in Ctype.nondep_type_rec
(Stephen Dolan, review by Gabriel Scherer, Leo White and Xavier Leroy)


OCaml 4.12.0
------------

Expand Down
72 changes: 72 additions & 0 deletions asmcomp/arm64/arch.ml
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ type specific_operation =
| Isqrtf (* floating-point square root *)
| Ibswap of int (* endianness conversion *)
| Imove32 (* 32-bit integer move *)
| Isignext of int (* sign extension *)

and arith_operation =
Ishiftadd
Expand Down Expand Up @@ -169,3 +170,74 @@ let print_specific_operation printreg op ppf arg =
| Imove32 ->
fprintf ppf "move32 %a"
printreg arg.(0)
| Isignext n ->
fprintf ppf "signext%d %a"
n printreg arg.(0)

(* Recognition of logical immediate arguments *)

(* An automaton to recognize ( 0+1+0* | 1+0+1* )
0 1 0
/ \ / \ / \
\ / \ / \ /
-0--> [1] --1--> [2] --0--> [3]
/
[0]
\
-1--> [4] --0--> [5] --1--> [6]
/ \ / \ / \
\ / \ / \ /
1 0 1
The accepting states are 2, 3, 5 and 6. *)

let auto_table = [| (* accepting?, next on 0, next on 1 *)
(* state 0 *) (false, 1, 4);
(* state 1 *) (false, 1, 2);
(* state 2 *) (true, 3, 2);
(* state 3 *) (true, 3, 7);
(* state 4 *) (false, 5, 4);
(* state 5 *) (true, 5, 6);
(* state 6 *) (true, 7, 6);
(* state 7 *) (false, 7, 7) (* error state *)
|]

let rec run_automata nbits state input =
let (acc, next0, next1) = auto_table.(state) in
if nbits <= 0
then acc
else run_automata (nbits - 1)
(if Nativeint.logand input 1n = 0n then next0 else next1)
(Nativeint.shift_right_logical input 1)

(* The following function determines a length [e]
such that [x] is a repetition [BB...B] of a bit pattern [B] of length [e].
[e] ranges over 64, 32, 16, 8, 4, 2. The smaller [e] the better. *)

let logical_imm_length x =
(* [test n] checks that the low [2n] bits of [x] are of the
form [BB], that is, two occurrences of the same [n] bits *)
let test n =
let mask = Nativeint.(sub (shift_left 1n n) 1n) in
let low_n_bits = Nativeint.(logand x mask) in
let next_n_bits = Nativeint.(logand (shift_right_logical x n) mask) in
low_n_bits = next_n_bits in
(* If [test n] fails, we know that the length [e] is
at least [2n]. Hence we test with decreasing values of [n]:
32, 16, 8, 4, 2. *)
if not (test 32) then 64
else if not (test 16) then 32
else if not (test 8) then 16
else if not (test 4) then 8
else if not (test 2) then 4
else 2

(* A valid logical immediate is
- neither [0] nor [-1];
- composed of a repetition [BBBBB] of a bit-pattern [B] of length [e]
- the low [e] bits of the number, that is, [B], match [0+1+0*] or [1+0+1*].
*)

let is_logical_immediate x =
x <> 0n && x <> -1n && run_automata (logical_imm_length x) 0 x
95 changes: 45 additions & 50 deletions asmcomp/arm64/emit.mlp
Original file line number Diff line number Diff line change
Expand Up @@ -226,63 +226,55 @@ let name_for_int_operation = function
| Iasr -> "asr"
| _ -> assert false

(* Decompose an integer constant into four 16-bit shifted fragments.
Omit the fragments that are equal to "default" (16 zeros or 16 ones). *)

let decompose_int default n =
let rec decomp n pos =
if pos >= 64 then [] else begin
let frag = Nativeint.logand n 0xFFFFn
and rem = Nativeint.shift_right_logical n 16 in
if frag = default
then decomp rem (pos + 16)
else (frag, pos) :: decomp rem (pos + 16)
end
in decomp n 0

(* Load an integer constant into a register *)

let emit_movk dst (f, p) =
` movk {emit_reg dst}, #{emit_nativeint f}, lsl #{emit_int p}\n`

let emit_intconst dst n =
let rec emit_pos first shift =
if shift < 0 then begin
if first then ` mov {emit_reg dst}, xzr\n`
end else begin
let s = Nativeint.(logand (shift_right_logical n shift) 0xFFFFn) in
if s = 0n then emit_pos first (shift - 16) else begin
if first then
` movz {emit_reg dst}, #{emit_nativeint s}, lsl #{emit_int shift}\n`
else
` movk {emit_reg dst}, #{emit_nativeint s}, lsl #{emit_int shift}\n`;
emit_pos false (shift - 16)
end
end
and emit_neg first shift =
if shift < 0 then begin
if first then ` movn {emit_reg dst}, #0\n`
if is_logical_immediate n then
` orr {emit_reg dst}, xzr, #{emit_nativeint n}\n`
else begin
let dz = decompose_int 0x0000n n
and dn = decompose_int 0xFFFFn n in
if List.length dz <= List.length dn then begin
match dz with
| [] ->
` mov {emit_reg dst}, xzr\n`
| (f, p) :: l ->
` movz {emit_reg dst}, #{emit_nativeint f}, lsl #{emit_int p}\n`;
List.iter (emit_movk dst) l
end else begin
let s = Nativeint.(logand (shift_right_logical n shift) 0xFFFFn) in
if s = 0xFFFFn then emit_neg first (shift - 16) else begin
if first then
` movn {emit_reg dst}, #{emit_nativeint (Nativeint.logxor s 0xFFFFn)}, lsl #{emit_int shift}\n`
else
` movk {emit_reg dst}, #{emit_nativeint s}, lsl #{emit_int shift}\n`;
emit_neg false (shift - 16)
end
match dn with
| [] ->
` movn {emit_reg dst}, #0\n`
| (f, p) :: l ->
let nf = Nativeint.logxor f 0xFFFFn in
` movn {emit_reg dst}, #{emit_nativeint nf}, lsl #{emit_int p}\n`;
List.iter (emit_movk dst) l
end
in
if n < 0n then emit_neg true 48 else emit_pos true 48
end

let num_instructions_for_intconst n =
let num_instructions = ref 0 in
let rec count_pos first shift =
if shift < 0 then begin
if first then incr num_instructions
end else begin
let s = Nativeint.(logand (shift_right_logical n shift) 0xFFFFn) in
if s = 0n then count_pos first (shift - 16) else begin
incr num_instructions;
count_pos false (shift - 16)
end
end
and count_neg first shift =
if shift < 0 then begin
if first then incr num_instructions
end else begin
let s = Nativeint.(logand (shift_right_logical n shift) 0xFFFFn) in
if s = 0xFFFFn then count_neg first (shift - 16) else begin
incr num_instructions;
count_neg false (shift - 16)
end
end
in
if n < 0n then count_neg true 48 else count_pos true 48;
!num_instructions
if is_logical_immediate n then 1 else begin
let dz = decompose_int 0x0000n n
and dn = decompose_int 0xFFFFn n in
max 1 (min (List.length dz) (List.length dn))
end

(* Recognize float constants appropriate for FMOV dst, #fpimm instruction:
"a normalized binary floating point encoding with 1 sign bit, 4
Expand Down Expand Up @@ -534,6 +526,7 @@ module BR = Branch_relaxation.Make (struct
| Lop (Ispecific (Ibswap 16)) -> 2
| Lop (Ispecific (Ibswap _)) -> 1
| Lop (Ispecific Imove32) -> 1
| Lop (Ispecific (Isignext _)) -> 1
| Lop (Iname_for_debugger _) -> 0
| Lreloadretaddr -> 0
| Lreturn -> epilogue_size ()
Expand Down Expand Up @@ -880,6 +873,8 @@ let emit_instr i =
| _ ->
assert false
end
| Lop(Ispecific(Isignext size)) ->
` sbfm {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, #0, #{emit_int (size - 1)}\n`
| Lop (Iname_for_debugger _) -> ()
| Lreloadretaddr ->
()
Expand Down
49 changes: 9 additions & 40 deletions asmcomp/arm64/selection.ml
Original file line number Diff line number Diff line change
Expand Up @@ -34,47 +34,8 @@ let is_offset chunk n =
| Word_int | Word_val | Double | Double_u ->
n land 7 = 0 && n lsr 3 < 0x1000)

(* An automaton to recognize ( 0+1+0* | 1+0+1* )
0 1 0
/ \ / \ / \
\ / \ / \ /
-0--> [1] --1--> [2] --0--> [3]
/
[0]
\
-1--> [4] --0--> [5] --1--> [6]
/ \ / \ / \
\ / \ / \ /
1 0 1
The accepting states are 2, 3, 5 and 6. *)

let auto_table = [| (* accepting?, next on 0, next on 1 *)
(* state 0 *) (false, 1, 4);
(* state 1 *) (false, 1, 2);
(* state 2 *) (true, 3, 2);
(* state 3 *) (true, 3, 7);
(* state 4 *) (false, 5, 4);
(* state 5 *) (true, 5, 6);
(* state 6 *) (true, 7, 6);
(* state 7 *) (false, 7, 7) (* error state *)
|]

let rec run_automata nbits state input =
let (acc, next0, next1) = auto_table.(state) in
if nbits <= 0
then acc
else run_automata (nbits - 1)
(if input land 1 = 0 then next0 else next1)
(input asr 1)

(* We are very conservative wrt what ARM64 supports: we don't support
repetitions of a 000111000 or 1110000111 pattern, just a single
pattern of this kind. *)

let is_logical_immediate n =
n <> 0 && n <> -1 && run_automata 64 0 n
Arch.is_logical_immediate (Nativeint.of_int n)

(* Signed immediates are simpler *)

Expand Down Expand Up @@ -199,6 +160,14 @@ method! select_operation op args dbg =
| _ ->
super#select_operation op args dbg
end
(* Recognize sign extension *)
| Casr ->
begin match args with
[Cop(Clsl, [k; Cconst_int (n, _)], _); Cconst_int (n', _)]
when n' = n && 0 < n && n < 64 ->
(Ispecific (Isignext (64 - n)), [k])
| _ -> super#select_operation op args dbg
end
(* Recognize floating-point negate and multiply *)
| Cnegf ->
begin match args with
Expand Down

0 comments on commit f1ce133

Please sign in to comment.