Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Reimplementation of Random using an LXM pseudo-random number generator #10742

Merged
merged 6 commits into from Jan 17, 2022
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
4 changes: 2 additions & 2 deletions runtime/Makefile
Expand Up @@ -27,7 +27,7 @@ BYTECODE_C_SOURCES := $(addsuffix .c, \
lexing callback debugger weak finalise custom dynlink \
platform fiber shared_heap addrmap \
afl $(UNIX_OR_WIN32) bigarray main memprof domain sync \
skiplist lf_skiplist codefrag)
skiplist lf_skiplist codefrag prng)

NATIVE_C_SOURCES := $(addsuffix .c, \
startup_aux startup_nat main fail_nat roots signals \
Expand All @@ -37,7 +37,7 @@ NATIVE_C_SOURCES := $(addsuffix .c, \
globroots backtrace_nat backtrace dynlink_nat debugger meta \
platform fiber shared_heap addrmap frame_descriptors \
dynlink clambda_checks afl bigarray \
memprof domain sync skiplist lf_skiplist codefrag)
memprof domain sync skiplist lf_skiplist codefrag prng)

# Header files generated by configure
CONFIGURED_HEADERS := caml/m.h caml/s.h caml/version.h
Expand Down
2 changes: 1 addition & 1 deletion runtime/gen_primitives.sh
Expand Up @@ -26,7 +26,7 @@ export LC_ALL=C
lexing md5 meta memprof obj parsing signals str sys callback weak \
finalise domain platform fiber memory startup_aux sync \
dynlink backtrace_byt backtrace afl \
bigarray eventlog
bigarray eventlog prng
do
sed -n -e 's/^CAMLprim value \([a-z0-9_][a-z0-9_]*\).*/\1/p' "$prim.c"
done
Expand Down
69 changes: 69 additions & 0 deletions runtime/prng.c
@@ -0,0 +1,69 @@
/**************************************************************************/
/* */
/* OCaml */
/* */
/* Xavier Leroy, projet Cambium, College de France and Inria */
/* */
/* Copyright 2021 Institut National de Recherche en Informatique et */
/* en Automatique. */
/* */
/* All rights reserved. This file is distributed under the terms of */
/* the GNU Lesser General Public License version 2.1, with the */
/* special exception on linking described in the file LICENSE. */
/* */
/**************************************************************************/

#include <string.h>
#include "caml/alloc.h"
#include "caml/bigarray.h"
#include "caml/mlvalues.h"

/* The L64X128 member of the LXM family. Taken from figure 1 in
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think that the mixing function (lea64) should be mentioned as well. (The LXM authors describe "better mixing functions" as interesting future work, so it may be that in ten years what people understand as "The L64X128 member" is not that one anymore.)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The reference to figure 1 in the paper makes it clear which variant is being implemented.

"LXM: Better Splittable Pseudorandom Number Generators
(and Almost as Fast)" by Guy L. Steele Jr. and Sebastiano Vigna,
OOPSLA 2021. */

static const uint64_t M = 0xd1342543de82ef95;

struct LXM_state {
uint64_t a; /* per-instance additive parameter (odd) */
uint64_t s; /* state of the LCG subgenerator */
uint64_t x[2]; /* state of the XBG subgenerator (not 0) */
};

/* In OCaml, states are represented as a 1D big array of 64-bit integers */

#define LXM_val(v) ((struct LXM_state *) Caml_ba_data_val(v))

Caml_inline uint64_t rotl(const uint64_t x, int k) {
return (x << k) | (x >> (64 - k));
}

CAMLprim uint64_t caml_lxm_next_unboxed(value v)
{
uint64_t z, q0, q1;
struct LXM_state * st = LXM_val(v);

/* Combining operation */
z = st->s + st->x[0];
/* Mixing function */
z = (z ^ (z >> 32)) * 0xdaba0b6eb09322e3;
z = (z ^ (z >> 32)) * 0xdaba0b6eb09322e3;
z = (z ^ (z >> 32));
/* LCG update */
st->s = st->s * M + st->a;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the paper uses M * s + a, not sure why there is a slight difference here.

/* XBG update */
q0 = st->x[0]; q1 = st->x[1];
q1 ^= q0;
q0 = rotl(q0, 24);
q0 = q0 ^ q1 ^ (q1 << 16);
q1 = rotl(q1, 37);
st->x[0] = q0; st->x[1] = q1;
/* Return result */
return z;
}

CAMLprim value caml_lxm_next(value v)
{
return caml_copy_int64(caml_lxm_next_unboxed(v));
}
12 changes: 4 additions & 8 deletions stdlib/.depend
Expand Up @@ -623,27 +623,23 @@ stdlib__Queue.cmx : queue.ml \
stdlib__Queue.cmi : queue.mli \
stdlib__Seq.cmi
stdlib__Random.cmo : random.ml \
stdlib__String.cmi \
stdlib.cmi \
stdlib__Nativeint.cmi \
stdlib__Int64.cmi \
stdlib__Int32.cmi \
stdlib__Int.cmi \
stdlib__Domain.cmi \
stdlib__Digest.cmi \
stdlib__Char.cmi \
stdlib__Bytes.cmi \
stdlib__Bigarray.cmi \
stdlib__Array.cmi \
stdlib__Random.cmi
stdlib__Random.cmx : random.ml \
stdlib__String.cmx \
stdlib.cmx \
stdlib__Nativeint.cmx \
stdlib__Int64.cmx \
stdlib__Int32.cmx \
stdlib__Int.cmx \
stdlib__Domain.cmx \
stdlib__Digest.cmx \
stdlib__Char.cmx \
stdlib__Bytes.cmx \
stdlib__Bigarray.cmx \
stdlib__Array.cmx \
stdlib__Random.cmi
stdlib__Random.cmi : random.mli \
Expand Down
4 changes: 2 additions & 2 deletions stdlib/StdlibModules
Expand Up @@ -42,10 +42,10 @@ STDLIB_MODULE_BASENAMES = \
lexing parsing set map stack queue stream buffer \
atomic mutex condition semaphore domain \
camlinternalFormat printf arg \
printexc fun gc digest random hashtbl weak \
printexc fun gc digest bigarray random hashtbl weak \
format scanf callback camlinternalOO oo camlinternalMod genlex ephemeron \
filename complex arrayLabels listLabels bytesLabels stringLabels moreLabels \
stdLabels bigarray in_channel out_channel effect
stdLabels in_channel out_channel effect

STDLIB_PREFIXED_MODULES = \
$(filter-out stdlib camlinternal%, $(STDLIB_MODULE_BASENAMES))
Expand Down