Skip to content

Commit

Permalink
Prefetching optimisations for sweeping (#9934)
Browse files Browse the repository at this point in the history
  • Loading branch information
stedolan committed Feb 3, 2021
1 parent 9f51fac commit 8a90546
Show file tree
Hide file tree
Showing 4 changed files with 32 additions and 12 deletions.
5 changes: 5 additions & 0 deletions Changes
Expand Up @@ -20,6 +20,11 @@ Working version
including the search path for shared stub libraries.
(David Allsopp, review by Xavier Leroy)

- #9934: Optimise sweeping using prefetching.
(Stephen Dolan and Will Hasenplaugh, review by David Allsopp, Xavier
Leroy and Damien Doligez, benchmarking by Shubham Kumar and KC
Sivaramakrishnan)

- #10025: Track custom blocks (e.g. Bigarray) with Statmemprof
(Stephen Dolan, review by Leo White, Gabriel Scherer and Jacques-Henri
Jourdan)
Expand Down
11 changes: 11 additions & 0 deletions runtime/caml/misc.h
Expand Up @@ -113,6 +113,17 @@ CAMLdeprecated_typedef(addr, char *);
#error "How do I align values on this platform?"
#endif

/* Prefetching */

#ifdef CAML_INTERNALS
#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
#define caml_prefetch(p) __builtin_prefetch((p), 1, 3)
/* 1 = intent to write; 3 = all cache levels */
#else
#define caml_prefetch(p)
#endif
#endif

/* CAMLunused is preserved for compatibility reasons.
Instead of the legacy GCC/Clang-only
CAMLunused foo;
Expand Down
1 change: 1 addition & 0 deletions runtime/freelist.c
Expand Up @@ -1662,6 +1662,7 @@ static header_t *bf_merge_block (value bp, char *limit)
}
caml_fl_cur_wsz += Whsize_val (cur);
next:
caml_prefetch(Hp_val(cur + 4096));
cur = Next_in_mem (cur);
if (Hp_val (cur) >= (header_t *) limit){
CAMLassert (Hp_val (cur) == (header_t *) limit);
Expand Down
27 changes: 15 additions & 12 deletions runtime/major_gc.c
Expand Up @@ -72,7 +72,7 @@ extern value caml_fl_merge; /* Defined in freelist.c. */
redarkening required */
static char *redarken_first_chunk = NULL;

static char *sweep_chunk, *sweep_limit;
static char *sweep_chunk;
static double p_backlog = 0.0; /* backlog for the gc speedup parameter */

int caml_gc_subphase; /* Subphase_{mark_roots,mark_main,mark_final} */
Expand Down Expand Up @@ -397,7 +397,6 @@ static void init_sweep_phase(void)
caml_gc_phase = Phase_sweep;
sweep_chunk = caml_heap_start;
caml_gc_sweep_hp = sweep_chunk;
sweep_limit = sweep_chunk + Chunk_size (sweep_chunk);
caml_fl_wsz_at_phase_change = caml_fl_cur_wsz;
if (caml_major_gc_hook) (*caml_major_gc_hook)();
}
Expand Down Expand Up @@ -698,21 +697,24 @@ static void clean_slice (intnat work)

static void sweep_slice (intnat work)
{
char *hp;
char *hp, *sweep_hp, *limit;
header_t hd;

caml_gc_message (0x40, "Sweeping %"
ARCH_INTNAT_PRINTF_FORMAT "d words\n", work);
sweep_hp = caml_gc_sweep_hp;
limit = sweep_chunk + Chunk_size(sweep_chunk);
while (work > 0){
if (caml_gc_sweep_hp < sweep_limit){
hp = caml_gc_sweep_hp;
if (sweep_hp < limit){
caml_prefetch(sweep_hp + 4000);
hp = sweep_hp;
hd = Hd_hp (hp);
work -= Whsize_hd (hd);
caml_gc_sweep_hp += Bhsize_hd (hd);
sweep_hp += Bhsize_hd (hd);
switch (Color_hd (hd)){
case Caml_white:
caml_gc_sweep_hp =
(char *)caml_fl_merge_block(Val_hp (hp), sweep_limit);
caml_gc_sweep_hp = sweep_hp;
sweep_hp = (char *) caml_fl_merge_block (Val_hp (hp), limit);
break;
case Caml_blue:
/* Only the blocks of the free-list are blue. See [freelist.c]. */
Expand All @@ -723,21 +725,23 @@ static void sweep_slice (intnat work)
Hd_hp (hp) = Whitehd_hd (hd);
break;
}
CAMLassert (caml_gc_sweep_hp <= sweep_limit);
CAMLassert (sweep_hp <= limit);
}else{
sweep_chunk = Chunk_next (sweep_chunk);
if (sweep_chunk == NULL){
/* Sweeping is done. */
caml_gc_sweep_hp = sweep_hp;
++ Caml_state->stat_major_collections;
work = 0;
caml_gc_phase = Phase_idle;
caml_request_minor_gc ();
}else{
caml_gc_sweep_hp = sweep_chunk;
sweep_limit = sweep_chunk + Chunk_size (sweep_chunk);
sweep_hp = sweep_chunk;
limit = sweep_chunk + Chunk_size (sweep_chunk);
}
}
}
caml_gc_sweep_hp = sweep_hp;
}

/* The main entry point for the major GC. Called about once for each
Expand Down Expand Up @@ -1085,7 +1089,6 @@ void caml_finalise_heap (void)
caml_gc_phase = Phase_sweep;
sweep_chunk = caml_heap_start;
caml_gc_sweep_hp = sweep_chunk;
sweep_limit = sweep_chunk + Chunk_size (sweep_chunk);
while (caml_gc_phase == Phase_sweep)
sweep_slice (LONG_MAX);
}
Expand Down

0 comments on commit 8a90546

Please sign in to comment.