Skip to content

Commit

Permalink
Reimplementation of Random using an LXM pseudo-random number generator (
Browse files Browse the repository at this point in the history
#10742)

As described in the paper [LXM: Better Splittable Pseudorandom Number
Generators (and Almost as Fast)](https://doi.org/10.1145/3485525) by
Guy L. Steele Jr. and Sebastiano Vigna, proceedings of OOPSLA 2021.

This provides a fast, state-of-the-art PRNG with full support for splitting.

Initialization from a seed (an array of integers) proceeds by
serializing the array to a byte sequence (each seed element occupies 8
bytes in little-endian representation) then hashing twice using the
MD5 hash function and two different suffixes, obtaining 256 bits of
initialization data.

More tests were added in testsuite/tests/lib-random, and the lone test
testsuite/tests/basic-more/testrandom.ml was moved there.
  • Loading branch information
xavierleroy committed Jan 17, 2022
1 parent 8a4f2d2 commit 1d6ff7b
Show file tree
Hide file tree
Showing 21 changed files with 306 additions and 291 deletions.
5 changes: 5 additions & 0 deletions Changes
Expand Up @@ -18,6 +18,11 @@ Working version

### Standard library:

- #10742: Use LXM as the pseudo-random number generator for module Random.
Add `Random.State.split` and `Random.split` to "split" a PRNG off
another PRNG.
(Xavier Leroy, review by Gabriel Scherer and Hugo Heuzard)

* #10867: Remove deprecated values: Array.create, Array.make_float,
Array.create_matrix, Bytes.uppercase, Bytes.lowercase, Bytes.capitalize,
Bytes.uncapitalize, Char.lowercase, Char.uppercase, Filename.temp_dir_name,
Expand Down
4 changes: 2 additions & 2 deletions runtime/Makefile
Expand Up @@ -27,7 +27,7 @@ BYTECODE_C_SOURCES := $(addsuffix .c, \
lexing callback debugger weak finalise custom dynlink \
platform fiber shared_heap addrmap \
afl $(UNIX_OR_WIN32) bigarray main memprof domain sync \
skiplist lf_skiplist codefrag)
skiplist lf_skiplist codefrag prng)

NATIVE_C_SOURCES := $(addsuffix .c, \
startup_aux startup_nat main fail_nat roots signals \
Expand All @@ -37,7 +37,7 @@ NATIVE_C_SOURCES := $(addsuffix .c, \
globroots backtrace_nat backtrace dynlink_nat debugger meta \
platform fiber shared_heap addrmap frame_descriptors \
dynlink clambda_checks afl bigarray \
memprof domain sync skiplist lf_skiplist codefrag)
memprof domain sync skiplist lf_skiplist codefrag prng)

# Header files generated by configure
CONFIGURED_HEADERS := caml/m.h caml/s.h caml/version.h
Expand Down
2 changes: 1 addition & 1 deletion runtime/gen_primitives.sh
Expand Up @@ -26,7 +26,7 @@ export LC_ALL=C
lexing md5 meta memprof obj parsing signals str sys callback weak \
finalise domain platform fiber memory startup_aux sync \
dynlink backtrace_byt backtrace afl \
bigarray eventlog
bigarray eventlog prng
do
sed -n -e 's/^CAMLprim value \([a-z0-9_][a-z0-9_]*\).*/\1/p' "$prim.c"
done
Expand Down
69 changes: 69 additions & 0 deletions runtime/prng.c
@@ -0,0 +1,69 @@
/**************************************************************************/
/* */
/* OCaml */
/* */
/* Xavier Leroy, projet Cambium, College de France and Inria */
/* */
/* Copyright 2021 Institut National de Recherche en Informatique et */
/* en Automatique. */
/* */
/* All rights reserved. This file is distributed under the terms of */
/* the GNU Lesser General Public License version 2.1, with the */
/* special exception on linking described in the file LICENSE. */
/* */
/**************************************************************************/

#include <string.h>
#include "caml/alloc.h"
#include "caml/bigarray.h"
#include "caml/mlvalues.h"

/* The L64X128 member of the LXM family. Taken from figure 1 in
"LXM: Better Splittable Pseudorandom Number Generators
(and Almost as Fast)" by Guy L. Steele Jr. and Sebastiano Vigna,
OOPSLA 2021. */

static const uint64_t M = 0xd1342543de82ef95;

struct LXM_state {
uint64_t a; /* per-instance additive parameter (odd) */
uint64_t s; /* state of the LCG subgenerator */
uint64_t x[2]; /* state of the XBG subgenerator (not 0) */
};

/* In OCaml, states are represented as a 1D big array of 64-bit integers */

#define LXM_val(v) ((struct LXM_state *) Caml_ba_data_val(v))

Caml_inline uint64_t rotl(const uint64_t x, int k) {
return (x << k) | (x >> (64 - k));
}

CAMLprim uint64_t caml_lxm_next_unboxed(value v)
{
uint64_t z, q0, q1;
struct LXM_state * st = LXM_val(v);

/* Combining operation */
z = st->s + st->x[0];
/* Mixing function */
z = (z ^ (z >> 32)) * 0xdaba0b6eb09322e3;
z = (z ^ (z >> 32)) * 0xdaba0b6eb09322e3;
z = (z ^ (z >> 32));
/* LCG update */
st->s = st->s * M + st->a;
/* XBG update */
q0 = st->x[0]; q1 = st->x[1];
q1 ^= q0;
q0 = rotl(q0, 24);
q0 = q0 ^ q1 ^ (q1 << 16);
q1 = rotl(q1, 37);
st->x[0] = q0; st->x[1] = q1;
/* Return result */
return z;
}

CAMLprim value caml_lxm_next(value v)
{
return caml_copy_int64(caml_lxm_next_unboxed(v));
}
10 changes: 4 additions & 6 deletions stdlib/.depend
Expand Up @@ -624,26 +624,24 @@ stdlib__Queue.cmi : queue.mli \
stdlib__Seq.cmi
stdlib__Random.cmo : random.ml \
stdlib__String.cmi \
stdlib.cmi \
stdlib__Nativeint.cmi \
stdlib__Int64.cmi \
stdlib__Int32.cmi \
stdlib__Int.cmi \
stdlib__Domain.cmi \
stdlib__Digest.cmi \
stdlib__Char.cmi \
stdlib__Bytes.cmi \
stdlib__Bigarray.cmi \
stdlib__Array.cmi \
stdlib__Random.cmi
stdlib__Random.cmx : random.ml \
stdlib__String.cmx \
stdlib.cmx \
stdlib__Nativeint.cmx \
stdlib__Int64.cmx \
stdlib__Int32.cmx \
stdlib__Int.cmx \
stdlib__Domain.cmx \
stdlib__Digest.cmx \
stdlib__Char.cmx \
stdlib__Bytes.cmx \
stdlib__Bigarray.cmx \
stdlib__Array.cmx \
stdlib__Random.cmi
stdlib__Random.cmi : random.mli \
Expand Down
4 changes: 2 additions & 2 deletions stdlib/StdlibModules
Expand Up @@ -42,10 +42,10 @@ STDLIB_MODULE_BASENAMES = \
lexing parsing set map stack queue stream buffer \
atomic mutex condition semaphore domain \
camlinternalFormat printf arg \
printexc fun gc digest random hashtbl weak \
printexc fun gc digest bigarray random hashtbl weak \
format scanf callback camlinternalOO oo camlinternalMod genlex ephemeron \
filename complex arrayLabels listLabels bytesLabels stringLabels moreLabels \
stdLabels bigarray in_channel out_channel effect
stdLabels in_channel out_channel effect

STDLIB_PREFIXED_MODULES = \
$(filter-out stdlib camlinternal%, $(STDLIB_MODULE_BASENAMES))
Expand Down

0 comments on commit 1d6ff7b

Please sign in to comment.