Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improved ARM64 code generation #9937

Merged
merged 4 commits into from
Nov 26, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
4 changes: 4 additions & 0 deletions Changes
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@ Working version

### Code generation and optimizations:

- #9937: improvements in ARM64 code generation (constants, sign extensions)
(Xavier Leroy, review by Stephen Dolan)

### Standard library:

### Other libraries:
Expand Down Expand Up @@ -38,6 +41,7 @@ Working version
- #10005: Try expanding aliases in Ctype.nondep_type_rec
(Stephen Dolan, review by Gabriel Scherer, Leo White and Xavier Leroy)


OCaml 4.12.0
------------

Expand Down
72 changes: 72 additions & 0 deletions asmcomp/arm64/arch.ml
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ type specific_operation =
| Isqrtf (* floating-point square root *)
| Ibswap of int (* endianness conversion *)
| Imove32 (* 32-bit integer move *)
| Isignext of int (* sign extension *)

and arith_operation =
Ishiftadd
Expand Down Expand Up @@ -169,3 +170,74 @@ let print_specific_operation printreg op ppf arg =
| Imove32 ->
fprintf ppf "move32 %a"
printreg arg.(0)
| Isignext n ->
fprintf ppf "signext%d %a"
n printreg arg.(0)

(* Recognition of logical immediate arguments *)

(* An automaton to recognize ( 0+1+0* | 1+0+1* )

0 1 0
/ \ / \ / \
\ / \ / \ /
-0--> [1] --1--> [2] --0--> [3]
/
[0]
\
-1--> [4] --0--> [5] --1--> [6]
/ \ / \ / \
\ / \ / \ /
1 0 1

The accepting states are 2, 3, 5 and 6. *)

let auto_table = [| (* accepting?, next on 0, next on 1 *)
(* state 0 *) (false, 1, 4);
(* state 1 *) (false, 1, 2);
(* state 2 *) (true, 3, 2);
(* state 3 *) (true, 3, 7);
(* state 4 *) (false, 5, 4);
(* state 5 *) (true, 5, 6);
(* state 6 *) (true, 7, 6);
(* state 7 *) (false, 7, 7) (* error state *)
|]

let rec run_automata nbits state input =
let (acc, next0, next1) = auto_table.(state) in
if nbits <= 0
then acc
else run_automata (nbits - 1)
(if Nativeint.logand input 1n = 0n then next0 else next1)
(Nativeint.shift_right_logical input 1)

(* The following function determines a length [e]
such that [x] is a repetition [BB...B] of a bit pattern [B] of length [e].
[e] ranges over 64, 32, 16, 8, 4, 2. The smaller [e] the better. *)

let logical_imm_length x =
(* [test n] checks that the low [2n] bits of [x] are of the
form [BB], that is, two occurrences of the same [n] bits *)
let test n =
let mask = Nativeint.(sub (shift_left 1n n) 1n) in
let low_n_bits = Nativeint.(logand x mask) in
let next_n_bits = Nativeint.(logand (shift_right_logical x n) mask) in
low_n_bits = next_n_bits in
(* If [test n] fails, we know that the length [e] is
at least [2n]. Hence we test with decreasing values of [n]:
32, 16, 8, 4, 2. *)
if not (test 32) then 64
else if not (test 16) then 32
else if not (test 8) then 16
else if not (test 4) then 8
else if not (test 2) then 4
else 2

(* A valid logical immediate is
- neither [0] nor [-1];
- composed of a repetition [BBBBB] of a bit-pattern [B] of length [e]
- the low [e] bits of the number, that is, [B], match [0+1+0*] or [1+0+1*].
*)

let is_logical_immediate x =
x <> 0n && x <> -1n && run_automata (logical_imm_length x) 0 x
95 changes: 45 additions & 50 deletions asmcomp/arm64/emit.mlp
Original file line number Diff line number Diff line change
Expand Up @@ -226,63 +226,55 @@ let name_for_int_operation = function
| Iasr -> "asr"
| _ -> assert false

(* Decompose an integer constant into four 16-bit shifted fragments.
Omit the fragments that are equal to "default" (16 zeros or 16 ones). *)

let decompose_int default n =
let rec decomp n pos =
if pos >= 64 then [] else begin
let frag = Nativeint.logand n 0xFFFFn
and rem = Nativeint.shift_right_logical n 16 in
if frag = default
then decomp rem (pos + 16)
else (frag, pos) :: decomp rem (pos + 16)
end
in decomp n 0

(* Load an integer constant into a register *)

let emit_movk dst (f, p) =
` movk {emit_reg dst}, #{emit_nativeint f}, lsl #{emit_int p}\n`

let emit_intconst dst n =
let rec emit_pos first shift =
if shift < 0 then begin
if first then ` mov {emit_reg dst}, xzr\n`
end else begin
let s = Nativeint.(logand (shift_right_logical n shift) 0xFFFFn) in
if s = 0n then emit_pos first (shift - 16) else begin
if first then
` movz {emit_reg dst}, #{emit_nativeint s}, lsl #{emit_int shift}\n`
else
` movk {emit_reg dst}, #{emit_nativeint s}, lsl #{emit_int shift}\n`;
emit_pos false (shift - 16)
end
end
and emit_neg first shift =
if shift < 0 then begin
if first then ` movn {emit_reg dst}, #0\n`
if is_logical_immediate n then
` orr {emit_reg dst}, xzr, #{emit_nativeint n}\n`
else begin
let dz = decompose_int 0x0000n n
and dn = decompose_int 0xFFFFn n in
if List.length dz <= List.length dn then begin
match dz with
| [] ->
` mov {emit_reg dst}, xzr\n`
stedolan marked this conversation as resolved.
Show resolved Hide resolved
| (f, p) :: l ->
` movz {emit_reg dst}, #{emit_nativeint f}, lsl #{emit_int p}\n`;
List.iter (emit_movk dst) l
end else begin
let s = Nativeint.(logand (shift_right_logical n shift) 0xFFFFn) in
if s = 0xFFFFn then emit_neg first (shift - 16) else begin
if first then
` movn {emit_reg dst}, #{emit_nativeint (Nativeint.logxor s 0xFFFFn)}, lsl #{emit_int shift}\n`
else
` movk {emit_reg dst}, #{emit_nativeint s}, lsl #{emit_int shift}\n`;
emit_neg false (shift - 16)
end
match dn with
| [] ->
` movn {emit_reg dst}, #0\n`
stedolan marked this conversation as resolved.
Show resolved Hide resolved
| (f, p) :: l ->
let nf = Nativeint.logxor f 0xFFFFn in
` movn {emit_reg dst}, #{emit_nativeint nf}, lsl #{emit_int p}\n`;
List.iter (emit_movk dst) l
end
in
if n < 0n then emit_neg true 48 else emit_pos true 48
end

let num_instructions_for_intconst n =
let num_instructions = ref 0 in
let rec count_pos first shift =
if shift < 0 then begin
if first then incr num_instructions
end else begin
let s = Nativeint.(logand (shift_right_logical n shift) 0xFFFFn) in
if s = 0n then count_pos first (shift - 16) else begin
incr num_instructions;
count_pos false (shift - 16)
end
end
and count_neg first shift =
if shift < 0 then begin
if first then incr num_instructions
end else begin
let s = Nativeint.(logand (shift_right_logical n shift) 0xFFFFn) in
if s = 0xFFFFn then count_neg first (shift - 16) else begin
incr num_instructions;
count_neg false (shift - 16)
end
end
in
if n < 0n then count_neg true 48 else count_pos true 48;
!num_instructions
if is_logical_immediate n then 1 else begin
let dz = decompose_int 0x0000n n
and dn = decompose_int 0xFFFFn n in
max 1 (min (List.length dz) (List.length dn))
end

(* Recognize float constants appropriate for FMOV dst, #fpimm instruction:
"a normalized binary floating point encoding with 1 sign bit, 4
Expand Down Expand Up @@ -534,6 +526,7 @@ module BR = Branch_relaxation.Make (struct
| Lop (Ispecific (Ibswap 16)) -> 2
| Lop (Ispecific (Ibswap _)) -> 1
| Lop (Ispecific Imove32) -> 1
| Lop (Ispecific (Isignext _)) -> 1
| Lop (Iname_for_debugger _) -> 0
| Lreloadretaddr -> 0
| Lreturn -> epilogue_size ()
Expand Down Expand Up @@ -880,6 +873,8 @@ let emit_instr i =
| _ ->
assert false
end
| Lop(Ispecific(Isignext size)) ->
` sbfm {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, #0, #{emit_int (size - 1)}\n`
| Lop (Iname_for_debugger _) -> ()
| Lreloadretaddr ->
()
Expand Down
49 changes: 9 additions & 40 deletions asmcomp/arm64/selection.ml
Original file line number Diff line number Diff line change
Expand Up @@ -34,47 +34,8 @@ let is_offset chunk n =
| Word_int | Word_val | Double | Double_u ->
n land 7 = 0 && n lsr 3 < 0x1000)

(* An automaton to recognize ( 0+1+0* | 1+0+1* )

0 1 0
/ \ / \ / \
\ / \ / \ /
-0--> [1] --1--> [2] --0--> [3]
/
[0]
\
-1--> [4] --0--> [5] --1--> [6]
/ \ / \ / \
\ / \ / \ /
1 0 1

The accepting states are 2, 3, 5 and 6. *)

let auto_table = [| (* accepting?, next on 0, next on 1 *)
(* state 0 *) (false, 1, 4);
(* state 1 *) (false, 1, 2);
(* state 2 *) (true, 3, 2);
(* state 3 *) (true, 3, 7);
(* state 4 *) (false, 5, 4);
(* state 5 *) (true, 5, 6);
(* state 6 *) (true, 7, 6);
(* state 7 *) (false, 7, 7) (* error state *)
|]

let rec run_automata nbits state input =
let (acc, next0, next1) = auto_table.(state) in
if nbits <= 0
then acc
else run_automata (nbits - 1)
(if input land 1 = 0 then next0 else next1)
(input asr 1)

(* We are very conservative wrt what ARM64 supports: we don't support
repetitions of a 000111000 or 1110000111 pattern, just a single
pattern of this kind. *)

let is_logical_immediate n =
n <> 0 && n <> -1 && run_automata 64 0 n
Arch.is_logical_immediate (Nativeint.of_int n)

(* Signed immediates are simpler *)

Expand Down Expand Up @@ -199,6 +160,14 @@ method! select_operation op args dbg =
| _ ->
super#select_operation op args dbg
end
(* Recognize sign extension *)
| Casr ->
begin match args with
[Cop(Clsl, [k; Cconst_int (n, _)], _); Cconst_int (n', _)]
when n' = n && 0 < n && n < 64 ->
(Ispecific (Isignext (64 - n)), [k])
| _ -> super#select_operation op args dbg
end
(* Recognize floating-point negate and multiply *)
| Cnegf ->
begin match args with
Expand Down