Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support more arguments to tail calls by passing them through the domain state #10595

Merged
merged 8 commits into from
Sep 9, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
5 changes: 5 additions & 0 deletions Changes
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,11 @@ Working version
and long register allocation times.
(Xavier Leroy, report by Edwin Török, review by Nicolás Ojeda Bär)

- #10595: Tail calls with up to 64 arguments are guaranteed to be compiled
as tail calls. To this end, memory locations in the domain state
are used for passing arguments that do not fit in registers.
(Xavier Leroy, review by Vincent Laviron)

### Standard library:

* #7812, #10475: `Filename.chop_suffix name suff` now checks that `suff`
Expand Down
18 changes: 12 additions & 6 deletions asmcomp/amd64/emit.mlp
Original file line number Diff line number Diff line change
Expand Up @@ -81,12 +81,13 @@ let frame_size env = (* includes return address *)

let slot_offset env loc cl =
match loc with
| Incoming n -> (frame_size env) + n
| Incoming n -> frame_size env + n
| Local n ->
if cl = 0
then env.stack_offset + n * 8
else env.stack_offset + (env.f.fun_num_stack_slots.(0) + n) * 8
| Outgoing n -> n
| Domainstate _ -> assert false (* not a stack slot *)

(* Symbols *)

Expand Down Expand Up @@ -171,14 +172,18 @@ let emit_Llabel env fallthrough lbl =

(* Output a pseudo-register *)

let x86_data_type_for_stack_slot = function
| Float -> REAL8
| _ -> QWORD

let reg env = function
| { loc = Reg.Reg r } -> register_name r
| { loc = Stack s; typ = Float } as r ->
let ofs = slot_offset env s (register_class r) in
mem64 REAL8 ofs RSP
| { loc = Stack s } as r ->
| { loc = Stack(Domainstate n); typ = ty } ->
let ofs = n + Domainstate.(idx_of_field Domain_extra_params) * 8 in
mem64 (x86_data_type_for_stack_slot ty) ofs R14
| { loc = Stack s; typ = ty } as r ->
let ofs = slot_offset env s (register_class r) in
mem64 QWORD ofs RSP
mem64 (x86_data_type_for_stack_slot ty) ofs RSP
| { loc = Unknown } ->
assert false

Expand All @@ -188,6 +193,7 @@ let reg64 = function

let arg env i n = reg env i.arg.(n)
let res env i n = reg env i.res.(n)

(* Output a reference to the lower 8, 16 or 32 bits of a register *)

let reg_low_8_name = Array.map (fun r -> Reg8L r) int_reg_name
Expand Down
39 changes: 24 additions & 15 deletions asmcomp/amd64/proc.ml
Original file line number Diff line number Diff line change
Expand Up @@ -76,8 +76,6 @@ let win64 = Arch.win64
stub saves them into the GC regs block).
*)

let max_arguments_for_tailcalls = 10

let int_reg_name =
match Config.ccomp_type with
| "msvc" ->
Expand Down Expand Up @@ -157,12 +155,15 @@ let word_addressed = false

(* Calling conventions *)

let calling_conventions first_int last_int first_float last_float make_stack
let size_domainstate_args = 64 * size_int

let calling_conventions first_int last_int first_float last_float
make_stack first_stack
arg =
let loc = Array.make (Array.length arg) Reg.dummy in
let int = ref first_int in
let float = ref first_float in
let ofs = ref 0 in
let ofs = ref first_stack in
for i = 0 to Array.length arg - 1 do
match arg.(i) with
| Val | Int | Addr as ty ->
Expand All @@ -183,21 +184,29 @@ let calling_conventions first_int last_int first_float last_float make_stack
ofs := !ofs + size_float
end
done;
(loc, Misc.align !ofs 16) (* keep stack 16-aligned *)

let incoming ofs = Incoming ofs
let outgoing ofs = Outgoing ofs
(loc, Misc.align (max 0 !ofs) 16) (* keep stack 16-aligned *)

let incoming ofs =
if ofs >= 0
then Incoming ofs
else Domainstate (ofs + size_domainstate_args)
let outgoing ofs =
if ofs >= 0
then Outgoing ofs
else Domainstate (ofs + size_domainstate_args)
let not_supported _ofs = fatal_error "Proc.loc_results: cannot call"

let loc_arguments arg =
calling_conventions 0 9 100 109 outgoing arg
calling_conventions 0 9 100 109 outgoing (- size_domainstate_args) arg
let loc_parameters arg =
let (loc, _ofs) =
calling_conventions 0 9 100 109 incoming arg
in
loc
calling_conventions 0 9 100 109 incoming (- size_domainstate_args) arg
in loc
let loc_results res =
let (loc, _ofs) = calling_conventions 0 0 100 100 not_supported res in loc
let (loc, _ofs) = calling_conventions 0 0 100 100 not_supported 0 res
in loc

let max_arguments_for_tailcalls = 10 (* in regs *) + 64 (* in domain state *)

(* C calling conventions under Unix:
first integer args in rdi, rsi, rdx, rcx, r8, r9
Expand All @@ -213,10 +222,10 @@ let loc_results res =
Return value in rax or xmm0. *)

let loc_external_results res =
let (loc, _ofs) = calling_conventions 0 0 100 100 not_supported res in loc
let (loc, _ofs) = calling_conventions 0 0 100 100 not_supported 0 res in loc

let unix_loc_external_arguments arg =
calling_conventions 2 7 100 107 outgoing arg
calling_conventions 2 7 100 107 outgoing 0 arg

let win64_int_external_arguments =
[| 5 (*rcx*); 4 (*rdx*); 6 (*r8*); 7 (*r9*) |]
Expand Down
7 changes: 6 additions & 1 deletion asmcomp/arm/emit.mlp
Original file line number Diff line number Diff line change
Expand Up @@ -74,13 +74,18 @@ let slot_offset env loc cl =
| Outgoing n ->
assert (n >= 0);
n
| Domainstate _ -> assert false (* not a stack slot *)

(* Output a stack reference *)

let emit_stack env r =
match r.loc with
| Stack (Domainstate n) ->
let ofs = n + Domainstate.(idx_of_field Domain_extra_params) * 8 in
lthls marked this conversation as resolved.
Show resolved Hide resolved
`[domain_state_ptr, #{emit_int ofs}]`
| Stack s ->
let ofs = slot_offset env s (register_class r) in `[sp, #{emit_int ofs}]`
let ofs = slot_offset env s (register_class r) in
`[sp, #{emit_int ofs}]`
| _ -> fatal_error "Emit_arm.emit_stack"

(* Output an addressing mode *)
Expand Down
36 changes: 23 additions & 13 deletions asmcomp/arm/proc.ml
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,8 @@ let stack_slot slot ty =

(* Calling conventions *)

let size_domainstate_args = 64 * size_int

let loc_int last_int make_stack int ofs =
if !int <= last_int then begin
let l = phys_reg !int in
Expand Down Expand Up @@ -149,41 +151,49 @@ let loc_int_pair last_int make_stack int ofs =
[| stack_lower; stack_upper |]
end

let calling_conventions first_int last_int first_float last_float make_stack
arg =
let calling_conventions first_int last_int first_float last_float
make_stack first_stack arg =
let loc = Array.make (Array.length arg) Reg.dummy in
let int = ref first_int in
let float = ref first_float in
let ofs = ref 0 in
let ofs = ref first_stack in
for i = 0 to Array.length arg - 1 do
match arg.(i) with
| Val | Int | Addr ->
loc.(i) <- loc_int last_int make_stack int ofs
| Float ->
loc.(i) <- loc_float last_float make_stack float ofs
done;
(loc, Misc.align !ofs 8) (* keep stack 8-aligned *)

let incoming ofs = Incoming ofs
let outgoing ofs = Outgoing ofs
(loc, Misc.align (max 0 !ofs) 8) (* keep stack 8-aligned *)

let incoming ofs =
if ofs >= 0
then Incoming ofs
else Domainstate (ofs + size_domainstate_args)
let outgoing ofs =
if ofs >= 0
then Outgoing ofs
else Domainstate (ofs + size_domainstate_args)
let not_supported _ofs = fatal_error "Proc.loc_results: cannot call"

(* OCaml calling convention:
first integer args in r0...r7
first float args in d0...d15 (EABI+VFP)
remaining args on stack.
remaining args in domain state area, then on stack.
Return values in r0...r7 or d0...d15. *)

let max_arguments_for_tailcalls = 8
let max_arguments_for_tailcalls = 8 (* in regs *) + 64 (* in domain state *)

let loc_arguments arg =
calling_conventions 0 7 100 115 outgoing arg
calling_conventions 0 7 100 115 outgoing (- size_domainstate_args) arg

let loc_parameters arg =
let (loc, _) = calling_conventions 0 7 100 115 incoming arg in loc
let (loc, _) =
calling_conventions 0 7 100 115 incoming (- size_domainstate_args) arg
in loc

let loc_results res =
let (loc, _) = calling_conventions 0 7 100 115 not_supported res in loc
let (loc, _) = calling_conventions 0 7 100 115 not_supported 0 res in loc

(* C calling convention:
first integer args in r0...r3
Expand Down Expand Up @@ -218,7 +228,7 @@ let loc_external_arguments ty_args =
external_calling_conventions 0 3 100 107 outgoing ty_args

let loc_external_results res =
let (loc, _) = calling_conventions 0 1 100 100 not_supported res
let (loc, _) = calling_conventions 0 1 100 100 not_supported 0 res
in loc

let loc_exn_bucket = phys_reg 0
Expand Down
7 changes: 6 additions & 1 deletion asmcomp/arm64/emit.mlp
Original file line number Diff line number Diff line change
Expand Up @@ -104,13 +104,18 @@ let slot_offset env loc cl =
| Outgoing n ->
assert (n >= 0);
n
| Domainstate _ -> assert false (* not a stack slot *)

(* Output a stack reference *)

let emit_stack env r =
match r.loc with
| Stack (Domainstate n) ->
let ofs = n + Domainstate.(idx_of_field Domain_extra_params) * 8 in
`[{emit_reg reg_domain_state_ptr}, #{emit_int ofs}]`
| Stack s ->
let ofs = slot_offset env s (register_class r) in `[sp, #{emit_int ofs}]`
let ofs = slot_offset env s (register_class r) in
`[sp, #{emit_int ofs}]`
| _ -> fatal_error "Emit.emit_stack"

(* Output an addressing mode *)
Expand Down
35 changes: 23 additions & 12 deletions asmcomp/arm64/proc.ml
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,8 @@ let stack_slot slot ty =

(* Calling conventions *)

let size_domainstate_args = 64 * size_int

let loc_int last_int make_stack int ofs =
if !int <= last_int then begin
let l = phys_reg !int in
Expand Down Expand Up @@ -138,43 +140,52 @@ let loc_int32 last_int make_stack int ofs =
end

let calling_conventions
first_int last_int first_float last_float make_stack arg =
first_int last_int first_float last_float make_stack first_stack arg =
let loc = Array.make (Array.length arg) Reg.dummy in
let int = ref first_int in
let float = ref first_float in
let ofs = ref 0 in
let ofs = ref first_stack in
for i = 0 to Array.length arg - 1 do
match arg.(i) with
| Val | Int | Addr ->
loc.(i) <- loc_int last_int make_stack int ofs
| Float ->
loc.(i) <- loc_float last_float make_stack float ofs
done;
(loc, Misc.align !ofs 16) (* keep stack 16-aligned *)

let incoming ofs = Incoming ofs
let outgoing ofs = Outgoing ofs
(loc, Misc.align (max 0 !ofs) 16) (* keep stack 16-aligned *)

let incoming ofs =
if ofs >= 0
then Incoming ofs
else Domainstate (ofs + size_domainstate_args)
let outgoing ofs =
if ofs >= 0
then Outgoing ofs
else Domainstate (ofs + size_domainstate_args)
let not_supported _ofs = fatal_error "Proc.loc_results: cannot call"

(* OCaml calling convention:
first integer args in r0...r15
first float args in d0...d15
remaining args on stack.
remaining args in domain state area, then on stack.
Return values in r0...r15 or d0...d15. *)

let max_arguments_for_tailcalls = 16
let max_arguments_for_tailcalls = 16 (* in regs *) + 64 (* in domain state *)

let last_int_register = if macosx then 7 else 15

let loc_arguments arg =
calling_conventions 0 last_int_register 100 115 outgoing arg
calling_conventions 0 last_int_register 100 115
outgoing (- size_domainstate_args) arg
let loc_parameters arg =
let (loc, _) =
calling_conventions 0 last_int_register 100 115 incoming arg
calling_conventions 0 last_int_register 100 115
incoming (- size_domainstate_args) arg
in
loc
let loc_results res =
let (loc, _) =
calling_conventions 0 last_int_register 100 115 not_supported res
calling_conventions 0 last_int_register 100 115 not_supported 0 res
in
loc

Expand Down Expand Up @@ -208,7 +219,7 @@ let loc_external_arguments ty_args =
external_calling_conventions 0 7 100 107 outgoing ty_args

let loc_external_results res =
let (loc, _) = calling_conventions 0 1 100 100 not_supported res in loc
let (loc, _) = calling_conventions 0 1 100 100 not_supported 0 res in loc

let loc_exn_bucket = phys_reg 0

Expand Down
23 changes: 16 additions & 7 deletions asmcomp/i386/emit.mlp
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ let slot_offset env loc cl =
| Outgoing n ->
assert (n >= 0);
n
| Domainstate _ -> assert false (* not a stack slot *)

(* Record symbols used and defined - at the end generate extern for those
used but not defined *)
Expand Down Expand Up @@ -138,16 +139,24 @@ let domain_field f r =
let load_domain_state r =
I.mov (sym32 "Caml_state") r

let x86_data_type_for_stack_slot = function
| Float -> REAL8
| _ -> DWORD

(* The Domainstate locations are mapped to a global array "caml_extra_params"
defined in runtime/i386*. We cannot access the domain state here
because in the i386 port there is no register that always point to the
domain state. A global array works because i386 does not
support multiple domains. *)

let reg env = function
| { loc = Reg r } -> register_name r
| { loc = Stack(Incoming n | Outgoing n) } when n < 0 ->
sym32 "caml_extra_params" ~ofs:(n + 64)
| { loc = Stack s; typ = Float } as r ->
let ofs = slot_offset env s (register_class r) in
mem32 REAL8 ofs RSP
| { loc = Stack s } as r ->
| { loc = Stack(Domainstate n); typ = ty } ->
mem_sym (x86_data_type_for_stack_slot ty)
(emit_symbol "caml_extra_params") ~ofs:n
| { loc = Stack s; typ = ty } as r ->
let ofs = slot_offset env s (register_class r) in
mem32 DWORD ofs RSP
mem32 (x86_data_type_for_stack_slot ty) ofs RSP
| { loc = Unknown } ->
fatal_error "Emit_i386.reg"

Expand Down