Skip to content

Commit

Permalink
flambda-backend: Backport ocaml/ocaml#10595 from upstream/trunk (#471)
Browse files Browse the repository at this point in the history
  • Loading branch information
Gbury committed Feb 8, 2022
1 parent 1010539 commit 58c72d5
Show file tree
Hide file tree
Showing 24 changed files with 336 additions and 137 deletions.
12 changes: 12 additions & 0 deletions Changes
Original file line number Diff line number Diff line change
@@ -1,3 +1,15 @@

OCaml 4.14, maintenance version
-------------------------------

### Code generation and optimizations:

- #10595: Tail calls with up to 64 arguments are guaranteed to be compiled
as tail calls. To this end, memory locations in the domain state
are used for passing arguments that do not fit in registers.
(Xavier Leroy, review by Vincent Laviron)


OCaml 4.12, maintenance version
-------------------------------

Expand Down
15 changes: 10 additions & 5 deletions asmcomp/amd64/emit.mlp
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ let slot_offset loc cl =
then !stack_offset + n * 8
else !stack_offset + (num_stack_slots.(0) + n) * 8
| Outgoing n -> n
| Domainstate _ -> assert false (* not a stack slot *)

let emit_stack_offset n =
if n < 0
Expand Down Expand Up @@ -200,14 +201,18 @@ let emit_Llabel fallthrough lbl =

(* Output a pseudo-register *)

let x86_data_type_for_stack_slot = function
| Float -> REAL8
| _ -> QWORD

let reg = function
| { loc = Reg.Reg r } -> register_name r
| { loc = Stack s; typ = Float } as r ->
let ofs = slot_offset s (register_class r) in
mem64 REAL8 ofs RSP
| { loc = Stack s } as r ->
| { loc = Stack (Domainstate n); typ = ty } ->
let ofs = n + Domainstate.(idx_of_field Domain_extra_params) * 8 in
mem64 (x86_data_type_for_stack_slot ty) ofs R14
| { loc = Stack s; typ = ty } as r ->
let ofs = slot_offset s (register_class r) in
mem64 QWORD ofs RSP
mem64 (x86_data_type_for_stack_slot ty) ofs RSP
| { loc = Unknown } ->
assert false

Expand Down
38 changes: 25 additions & 13 deletions asmcomp/amd64/proc.ml
Original file line number Diff line number Diff line change
Expand Up @@ -76,8 +76,6 @@ let win64 = Arch.win64
the GC regs block).
*)

let max_arguments_for_tailcalls = 10

let int_reg_name =
match Config.ccomp_type with
| "msvc" ->
Expand Down Expand Up @@ -157,12 +155,15 @@ let word_addressed = false

(* Calling conventions *)

let calling_conventions first_int last_int first_float last_float make_stack
let size_domainstate_args = 64 * size_int

let calling_conventions first_int last_int first_float last_float
make_stack first_stack
arg =
let loc = Array.make (Array.length arg) Reg.dummy in
let int = ref first_int in
let float = ref first_float in
let ofs = ref 0 in
let ofs = ref first_stack in
for i = 0 to Array.length arg - 1 do
match arg.(i) with
| Val | Int | Addr as ty ->
Expand All @@ -183,21 +184,32 @@ let calling_conventions first_int last_int first_float last_float make_stack
ofs := !ofs + size_float
end
done;
(loc, Misc.align !ofs 16) (* keep stack 16-aligned *)

let incoming ofs = Incoming ofs
let outgoing ofs = Outgoing ofs
(loc, Misc.align (max 0 !ofs) 16) (* keep stack 16-aligned *)

let incoming ofs =
if ofs >= 0
then Incoming ofs
else Domainstate (ofs + size_domainstate_args)
let outgoing ofs =
if ofs >= 0
then Outgoing ofs
else Domainstate (ofs + size_domainstate_args)
let not_supported _ofs = fatal_error "Proc.loc_results: cannot call"

let loc_arguments arg =
calling_conventions 0 9 100 109 outgoing arg
calling_conventions 0 9 100 109 outgoing (- size_domainstate_args) arg
let loc_parameters arg =
let (loc, _ofs) =
calling_conventions 0 9 100 109 incoming arg
calling_conventions 0 9 100 109 incoming (- size_domainstate_args) arg
in
loc

let loc_results res =
let (loc, _ofs) = calling_conventions 0 0 100 100 not_supported res in loc
let (loc, _ofs) =
calling_conventions 0 0 100 100 not_supported 0 res
in loc

let max_arguments_for_tailcalls = 10 (* in regs *) + 64 (* in domain state *)

(* C calling conventions under Unix:
first integer args in rdi, rsi, rdx, rcx, r8, r9
Expand All @@ -213,10 +225,10 @@ let loc_results res =
Return value in rax or xmm0. *)

let loc_external_results res =
let (loc, _ofs) = calling_conventions 0 0 100 100 not_supported res in loc
let (loc, _ofs) = calling_conventions 0 0 100 100 not_supported 0 res in loc

let unix_loc_external_arguments arg =
calling_conventions 2 7 100 107 outgoing arg
calling_conventions 2 7 100 107 outgoing 0 arg

let win64_int_external_arguments =
[| 5 (*rcx*); 4 (*rdx*); 6 (*r8*); 7 (*r9*) |]
Expand Down
7 changes: 6 additions & 1 deletion asmcomp/arm/emit.mlp
Original file line number Diff line number Diff line change
Expand Up @@ -87,13 +87,18 @@ let slot_offset loc cl =
| Outgoing n ->
assert (n >= 0);
n
| Domainstate _ -> assert false (* not a stack slot *)

(* Output a stack reference *)

let emit_stack r =
match r.loc with
| Stack (Domainstate n) ->
let ofs =n + Domainstate.(idx_of_field Domain_extra_params) * 8 in
`[domain_state_ptr, #{emit_int ofs}]`
| Stack s ->
let ofs = slot_offset s (register_class r) in `[sp, #{emit_int ofs}]`
let ofs = slot_offset s (register_class r) in
`[sp, #{emit_int ofs}]`
| _ -> fatal_error "Emit_arm.emit_stack"

(* Output an addressing mode *)
Expand Down
36 changes: 23 additions & 13 deletions asmcomp/arm/proc.ml
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,8 @@ let stack_slot slot ty =

(* Calling conventions *)

let size_domainstate_args = 64 * size_int

let loc_int last_int make_stack int ofs =
if !int <= last_int then begin
let l = phys_reg !int in
Expand Down Expand Up @@ -149,41 +151,49 @@ let loc_int_pair last_int make_stack int ofs =
[| stack_lower; stack_upper |]
end

let calling_conventions first_int last_int first_float last_float make_stack
arg =
let calling_conventions first_int last_int first_float last_float
make_stack first_stack arg =
let loc = Array.make (Array.length arg) Reg.dummy in
let int = ref first_int in
let float = ref first_float in
let ofs = ref 0 in
let ofs = ref first_stack in
for i = 0 to Array.length arg - 1 do
match arg.(i) with
| Val | Int | Addr ->
loc.(i) <- loc_int last_int make_stack int ofs
| Float ->
loc.(i) <- loc_float last_float make_stack float ofs
done;
(loc, Misc.align !ofs 8) (* keep stack 8-aligned *)

let incoming ofs = Incoming ofs
let outgoing ofs = Outgoing ofs
(loc, Misc.align (max 0 !ofs) 8) (* keep stack 8-aligned *)

let incoming ofs =
if ofs >= 0
then Incoming ofs
else Domainstate (ofs + size_domainstate_args)
let outgoing ofs =
if ofs >= 0
then Outgoing ofs
else Domainstate (ofs + size_domainstate_args)
let not_supported _ofs = fatal_error "Proc.loc_results: cannot call"

(* OCaml calling convention:
first integer args in r0...r7
first float args in d0...d15 (EABI+VFP)
remaining args on stack.
remaining args in domain state area, then on stack.
Return values in r0...r7 or d0...d15. *)

let max_arguments_for_tailcalls = 8
let max_arguments_for_tailcalls = 8 (* in regs *) + 64 (* in domain state *)

let loc_arguments arg =
calling_conventions 0 7 100 115 outgoing arg
calling_conventions 0 7 100 115 outgoing (- size_domainstate_args) arg

let loc_parameters arg =
let (loc, _) = calling_conventions 0 7 100 115 incoming arg in loc
let (loc, _) =
calling_conventions 0 7 100 115 incoming (- size_domainstate_args) arg
in loc

let loc_results res =
let (loc, _) = calling_conventions 0 7 100 115 not_supported res in loc
let (loc, _) = calling_conventions 0 7 100 115 not_supported 0 res in loc

(* C calling convention:
first integer args in r0...r3
Expand Down Expand Up @@ -218,7 +228,7 @@ let loc_external_arguments ty_args =
external_calling_conventions 0 3 100 107 outgoing ty_args

let loc_external_results res =
let (loc, _) = calling_conventions 0 1 100 100 not_supported res
let (loc, _) = calling_conventions 0 1 100 100 not_supported 0 res
in loc

let loc_exn_bucket = phys_reg 0
Expand Down
7 changes: 6 additions & 1 deletion asmcomp/arm64/emit.mlp
Original file line number Diff line number Diff line change
Expand Up @@ -118,13 +118,18 @@ let slot_offset loc cl =
| Outgoing n ->
assert (n >= 0);
n
| Domainstate _ -> assert false (* not a satck slot *)

(* Output a stack reference *)

let emit_stack r =
match r.loc with
| Stack (Domainstate n) ->
let ofs = n + Domainstate.(idx_of_field Domain_extra_params) * 8 in
`[{emit_reg reg_domain_state_ptr}, #{emit_int ofs}]`
| Stack s ->
let ofs = slot_offset s (register_class r) in `[sp, #{emit_int ofs}]`
let ofs = slot_offset s (register_class r) in
`[sp, #{emit_int ofs}]`
| _ -> fatal_error "Emit.emit_stack"

(* Output an addressing mode *)
Expand Down
35 changes: 23 additions & 12 deletions asmcomp/arm64/proc.ml
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,8 @@ let stack_slot slot ty =

(* Calling conventions *)

let size_domainstate_args = 64 * size_int

let loc_int last_int make_stack int ofs =
if !int <= last_int then begin
let l = phys_reg !int in
Expand Down Expand Up @@ -138,43 +140,52 @@ let loc_int32 last_int make_stack int ofs =
end

let calling_conventions
first_int last_int first_float last_float make_stack arg =
first_int last_int first_float last_float make_stack first_stack arg =
let loc = Array.make (Array.length arg) Reg.dummy in
let int = ref first_int in
let float = ref first_float in
let ofs = ref 0 in
let ofs = ref first_stack in
for i = 0 to Array.length arg - 1 do
match arg.(i) with
| Val | Int | Addr ->
loc.(i) <- loc_int last_int make_stack int ofs
| Float ->
loc.(i) <- loc_float last_float make_stack float ofs
done;
(loc, Misc.align !ofs 16) (* keep stack 16-aligned *)

let incoming ofs = Incoming ofs
let outgoing ofs = Outgoing ofs
(loc, Misc.align (max 0 !ofs) 16) (* keep stack 16-aligned *)

let incoming ofs =
if ofs >= 0
then Incoming ofs
else Domainstate (ofs + size_domainstate_args)
let outgoing ofs =
if ofs >= 0
then Outgoing ofs
else Domainstate (ofs + size_domainstate_args)
let not_supported _ofs = fatal_error "Proc.loc_results: cannot call"

(* OCaml calling convention:
first integer args in r0...r15
first float args in d0...d15
remaining args on stack.
remaining args in domain area, then on stack.
Return values in r0...r15 or d0...d15. *)

let max_arguments_for_tailcalls = 16
let max_arguments_for_tailcalls = 16 (* in regs *) + 64 (* in domain state *)

let last_int_register = if macosx then 7 else 15

let loc_arguments arg =
calling_conventions 0 last_int_register 100 115 outgoing arg
calling_conventions 0 last_int_register 100 115
outgoing (- size_domainstate_args) arg
let loc_parameters arg =
let (loc, _) =
calling_conventions 0 last_int_register 100 115 incoming arg
calling_conventions 0 last_int_register 100 115
incoming (- size_domainstate_args) arg
in
loc
let loc_results res =
let (loc, _) =
calling_conventions 0 last_int_register 100 115 not_supported res
calling_conventions 0 last_int_register 100 115 not_supported 0 res
in
loc

Expand Down Expand Up @@ -208,7 +219,7 @@ let loc_external_arguments ty_args =
external_calling_conventions 0 7 100 107 outgoing ty_args

let loc_external_results res =
let (loc, _) = calling_conventions 0 1 100 100 not_supported res in loc
let (loc, _) = calling_conventions 0 1 100 100 not_supported 0 res in loc

let loc_exn_bucket = phys_reg 0

Expand Down
23 changes: 16 additions & 7 deletions asmcomp/i386/emit.mlp
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ let slot_offset loc cl =
| Outgoing n ->
assert (n >= 0);
n
| Domainstate _ -> assert false (* not a stack slot *)

(* Record symbols used and defined - at the end generate extern for those
used but not defined *)
Expand Down Expand Up @@ -146,16 +147,24 @@ let domain_field f r =
let load_domain_state r =
I.mov (sym32 "Caml_state") r

let x86_data_type_for_stack_slot = function
| Float -> REAL8
| _ -> DWORD

(* The Domainstate locations are mapped to a global array "caml_extra_params"
defined in runtime/i386*. We cannot access the domain state here
because in the i386 port there is no register that always point to the
domain state. A global array works because i386 does not
support multiple domains. *)

let reg = function
| { loc = Reg r } -> register_name r
| { loc = Stack(Incoming n | Outgoing n) } when n < 0 ->
sym32 "caml_extra_params" ~ofs:(n + 64)
| { loc = Stack s; typ = Float } as r ->
let ofs = slot_offset s (register_class r) in
mem32 REAL8 ofs RSP
| { loc = Stack s } as r ->
| { loc = Stack(Domainstate n); typ = ty } ->
mem_sym (x86_data_type_for_stack_slot ty)
(emit_symbol "caml_extra_params") ~ofs:n
| { loc = Stack s; typ = ty } as r ->
let ofs = slot_offset s (register_class r) in
mem32 DWORD ofs RSP
mem32 (x86_data_type_for_stack_slot ty) ofs RSP
| { loc = Unknown } ->
fatal_error "Emit_i386.reg"

Expand Down

0 comments on commit 58c72d5

Please sign in to comment.