From dd79030184e7aaa072c12d9182d79b3d9eb74757 Mon Sep 17 00:00:00 2001 From: Kevin Sheppard Date: Wed, 13 Jul 2022 09:19:43 +0100 Subject: [PATCH 1/5] ENH: Add the capability to swap the singleton bit generator Add a new version or seed that supports seeding any bit gen Add set/get_bit_generator as explicity methodds to support swapping closes #21808 --- .../upcoming_changes/21976.new_feature.rst | 25 ++++ numpy/random/mtrand.pyx | 119 +++++++++++++++--- numpy/random/tests/test_randomstate.py | 67 ++++++++++ 3 files changed, 196 insertions(+), 15 deletions(-) create mode 100644 doc/release/upcoming_changes/21976.new_feature.rst diff --git a/doc/release/upcoming_changes/21976.new_feature.rst b/doc/release/upcoming_changes/21976.new_feature.rst new file mode 100644 index 000000000000..fd2417bb9b6e --- /dev/null +++ b/doc/release/upcoming_changes/21976.new_feature.rst @@ -0,0 +1,25 @@ +The bit generator underlying the singleton RandomState can be changed +--------------------------------------------------------------------- +The singleton ``RandomState`` instance exposed in the ``numpy.random`` module +id initialized using system-provided entropy with the ``MT19937` bit generator. +The function ``set_bit_generator`` allows the default bit generator to be +replaced with a user-provided bit generator. This function has been introduced to +provide a method allowing seemless integration of a high-quality, modern bit generator +in new code with existing code that makes use of the singleton-provided random +variate generating functions. + +The preferred method to generate reproducible random numbers is to use a modern +bit generator in an instance of ``Generator``. The function ``default_rng`` +simplifies instantization. + + >>> rg = np.random.default_rng(3728973198) + >>> rg.random() + +The same bit generator can then shared with the singleton instance so that +calling functions in the ``random`` module will use the same bit generator. + + >>> np.random.set_bit_generator(rg.bit_generator) + >>> np.random.normal() + +The swap is permanent (until reversed) and so any call to functions +in the ``random`` module will use the new bit generator. diff --git a/numpy/random/mtrand.pyx b/numpy/random/mtrand.pyx index c9cdb5839a04..cbf37e51d087 100644 --- a/numpy/random/mtrand.pyx +++ b/numpy/random/mtrand.pyx @@ -186,16 +186,7 @@ cdef class RandomState: else: bit_generator = seed - self._bit_generator = bit_generator - capsule = bit_generator.capsule - cdef const char *name = "BitGenerator" - if not PyCapsule_IsValid(capsule, name): - raise ValueError("Invalid bit generator. The bit generator must " - "be instantized.") - self._bitgen = ( PyCapsule_GetPointer(capsule, name))[0] - self._aug_state.bit_generator = &self._bitgen - self._reset_gauss() - self.lock = bit_generator.lock + self._initialize_bit_generator(bit_generator) def __repr__(self): return self.__str__() + ' at 0x{:X}'.format(id(self)) @@ -218,13 +209,25 @@ cdef class RandomState: from ._pickle import __randomstate_ctor return __randomstate_ctor, (name_tpl[0], ctor), self.get_state(legacy=False) + cdef _initialize_bit_generator(self, bit_generator): + self._bit_generator = bit_generator + capsule = bit_generator.capsule + cdef const char *name = "BitGenerator" + if not PyCapsule_IsValid(capsule, name): + raise ValueError("Invalid bit generator. The bit generator must " + "be instantized.") + self._bitgen = ( PyCapsule_GetPointer(capsule, name))[0] + self._aug_state.bit_generator = &self._bitgen + self._reset_gauss() + self.lock = bit_generator.lock + cdef _reset_gauss(self): self._aug_state.has_gauss = 0 self._aug_state.gauss = 0.0 def seed(self, seed=None): """ - seed(self, seed=None) + seed(seed=None) Reseed a legacy MT19937 BitGenerator @@ -249,7 +252,7 @@ cdef class RandomState: def get_state(self, legacy=True): """ - get_state() + get_state(legacy=True) Return a tuple representing the internal state of the generator. @@ -259,12 +262,13 @@ cdef class RandomState: ---------- legacy : bool, optional Flag indicating to return a legacy tuple state when the BitGenerator - is MT19937, instead of a dict. + is MT19937, instead of a dict. Raises ValueError if the underlying + bit generator is not an instance of MT19937. Returns ------- out : {tuple(str, ndarray of 624 uints, int, int, float), dict} - The returned tuple has the following items: + If legacy is True, the returned tuple has the following items: 1. the string 'MT19937'. 2. a 1-D array of 624 unsigned integer keys. @@ -294,6 +298,11 @@ cdef class RandomState: legacy = False st['has_gauss'] = self._aug_state.has_gauss st['gauss'] = self._aug_state.gauss + if legacy and not isinstance(self._bit_generator, _MT19937): + raise ValueError( + "legacy can only be True when the underlyign bitgenerator is " + "an instance of MT19937." + ) if legacy: return (st['bit_generator'], st['state']['key'], st['state']['pos'], st['has_gauss'], st['gauss']) @@ -4690,7 +4699,6 @@ random = _rand.random random_integers = _rand.random_integers random_sample = _rand.random_sample rayleigh = _rand.rayleigh -seed = _rand.seed set_state = _rand.set_state shuffle = _rand.shuffle standard_cauchy = _rand.standard_cauchy @@ -4705,6 +4713,85 @@ wald = _rand.wald weibull = _rand.weibull zipf = _rand.zipf +def seed(seed=None): + """ + seed(seed=None) + + Reseed the singleton RandomState instance. + + Notes + ----- + This is a convenience, legacy function that exists to support + older code that uses the singleton RandomState. Best practice + is to use a dedicated ``Generator`` instance rather than + the random variate generation methods exposed directly in + the random module. + + See Also + -------- + numpy.random.Generator + """ + if isinstance(_rand._bit_generator, _MT19937): + return _rand.seed(seed) + else: + bg_type = type(_rand._bit_generator) + _rand._bit_generator.state = bg_type(seed).state + +def get_bit_generator(): + """ + Returns the singleton RandomState's bit generator + + Returns + ------- + BitGenerator + The bit generator that underlies the singleton RandomState instance + + Notes + ----- + The singleton RandomState provides the random variate generators in the + NumPy random namespace. This function, and its counterpart set method, + provides a path to hot-swap the default MT19937 bit generator with a + user provided alternative. These function are intended to provide + a continuous path where a single underlying bit generator can be + used both with an instance of ``Generator`` and with the singleton + instance of RandomState. + + See Also + -------- + set_bit_generator + numpy.random.Generator + """ + return _rand._bit_generator + +def set_bit_generator(bitgen): + """ + Sets the singleton RandomState's bit generator + + Parameters + ---------- + bitgen + A bit generator instance + + Notes + ----- + The singleton RandomState provides the random variate generators in the + NumPy random namespace. This function, and its counterpart get method, + provides a path to hot-swap the default MT19937 bit generator with a + user provided alternative. These function are intended to provide + a continuous path where a single underlying bit generator can be + used both with an instance of ``Generator`` and with the singleton + instance of RandomState. + + See Also + -------- + get_bit_generator + numpy.random.Generator + """ + cdef RandomState singleton + singleton = _rand + singleton._initialize_bit_generator(bitgen) + + # Old aliases that should not be removed def sample(*args, **kwargs): """ @@ -4731,6 +4818,7 @@ __all__ = [ 'f', 'gamma', 'geometric', + 'get_bit_generator', 'get_state', 'gumbel', 'hypergeometric', @@ -4758,6 +4846,7 @@ __all__ = [ 'rayleigh', 'sample', 'seed', + 'set_bit_generator', 'set_state', 'shuffle', 'standard_cauchy', diff --git a/numpy/random/tests/test_randomstate.py b/numpy/random/tests/test_randomstate.py index 1e22880ead38..fdd194fb9df1 100644 --- a/numpy/random/tests/test_randomstate.py +++ b/numpy/random/tests/test_randomstate.py @@ -53,6 +53,14 @@ def int_func(request): INT_FUNC_HASHES[request.param]) +@pytest.fixture +def restore_singleton_bitgen(): + """Ensures that the singleton bitgen is restored after a test""" + orig_bitgen = np.random.get_bit_generator() + yield + np.random.set_bit_generator(orig_bitgen) + + def assert_mt19937_state_equal(a, b): assert_equal(a['bit_generator'], b['bit_generator']) assert_array_equal(a['state']['key'], b['state']['key']) @@ -2038,3 +2046,62 @@ def test_randomstate_ctor_old_style_pickle(): assert_array_equal(state_a['state']['pos'], state_b['state']['pos']) assert_equal(state_a['has_gauss'], state_b['has_gauss']) assert_equal(state_a['gauss'], state_b['gauss']) + +def test_hot_swap(restore_singleton_bitgen): + # GH 21808 + def_bg = np.random.default_rng(0) + bg = def_bg.bit_generator + np.random.set_bit_generator(bg) + assert isinstance(np.random.mtrand._rand._bit_generator, type(bg)) + + second_bg = np.random.get_bit_generator() + assert bg is second_bg + + +def test_seed_alt_bit_gen(restore_singleton_bitgen): + # GH 21808 + bg = PCG64(0) + np.random.set_bit_generator(bg) + state = np.random.get_state(legacy=False) + np.random.seed(1) + new_state = np.random.get_state(legacy=False) + print(state) + print(new_state) + assert state["bit_generator"] == "PCG64" + assert state["state"]["state"] != new_state["state"]["state"] + assert state["state"]["inc"] != new_state["state"]["inc"] + + +def test_state_error_alt_bit_gen(restore_singleton_bitgen): + # GH 21808 + state = np.random.get_state() + bg = PCG64(0) + np.random.set_bit_generator(bg) + with pytest.raises(ValueError, match="state must be for a PCG64"): + np.random.set_state(state) + + +def test_swap_worked(restore_singleton_bitgen): + # GH 21808 + np.random.seed(98765) + vals = np.random.randint(0, 2 ** 30, 10) + bg = PCG64(0) + state = bg.state + np.random.set_bit_generator(bg) + state_direct = np.random.get_state(legacy=False) + for field in state: + assert state[field] == state_direct[field] + np.random.seed(98765) + pcg_vals = np.random.randint(0, 2 ** 30, 10) + assert not np.all(vals == pcg_vals) + new_state = bg.state + assert new_state["state"]["state"] != state["state"]["state"] + assert new_state["state"]["inc"] == new_state["state"]["inc"] + + +def test_swapped_singleton_against_direct(restore_singleton_bitgen): + np.random.set_bit_generator(PCG64(98765)) + singleton_vals = np.random.randint(0, 2 ** 30, 10) + rg = np.random.RandomState(PCG64(98765)) + non_singleton_vals = rg.randint(0, 2 ** 30, 10) + assert_equal(non_singleton_vals, singleton_vals) From 4b8cd57bedd1b10fd4cebb2eaaff2acead71b70c Mon Sep 17 00:00:00 2001 From: Kevin Sheppard Date: Fri, 15 Jul 2022 10:04:31 +0100 Subject: [PATCH 2/5] MAINT: Update typing information --- doc/release/upcoming_changes/21976.new_feature.rst | 10 ++++++++-- numpy/random/mtrand.pyi | 6 ++++++ numpy/typing/tests/data/pass/random.py | 3 +++ numpy/typing/tests/data/reveal/random.pyi | 3 +++ 4 files changed, 20 insertions(+), 2 deletions(-) diff --git a/doc/release/upcoming_changes/21976.new_feature.rst b/doc/release/upcoming_changes/21976.new_feature.rst index fd2417bb9b6e..7d1cb0405c0a 100644 --- a/doc/release/upcoming_changes/21976.new_feature.rst +++ b/doc/release/upcoming_changes/21976.new_feature.rst @@ -6,7 +6,9 @@ The function ``set_bit_generator`` allows the default bit generator to be replaced with a user-provided bit generator. This function has been introduced to provide a method allowing seemless integration of a high-quality, modern bit generator in new code with existing code that makes use of the singleton-provided random -variate generating functions. +variate generating functions. The companion function ``get_bit_generator`` returns the current bit generator +being used by the singleton ``RandomState``. This is provided to simplify restoring +the original source of randomness if required. The preferred method to generate reproducible random numbers is to use a modern bit generator in an instance of ``Generator``. The function ``default_rng`` @@ -18,8 +20,12 @@ simplifies instantization. The same bit generator can then shared with the singleton instance so that calling functions in the ``random`` module will use the same bit generator. + >>> orig_bit_gen = np.random.get_bit_generator() >>> np.random.set_bit_generator(rg.bit_generator) >>> np.random.normal() The swap is permanent (until reversed) and so any call to functions -in the ``random`` module will use the new bit generator. +in the ``random`` module will use the new bit generator. The orginal +can be restored if required for code to run correctly. + + >>> np.random.set_bit_generator(orig_bit_gen) diff --git a/numpy/random/mtrand.pyi b/numpy/random/mtrand.pyi index b6eb77f00df2..271cb9787470 100644 --- a/numpy/random/mtrand.pyi +++ b/numpy/random/mtrand.pyi @@ -562,3 +562,9 @@ zipf = _rand.zipf # Two legacy that are trivial wrappers around random_sample sample = _rand.random_sample ranf = _rand.random_sample + +def set_bit_generator(bitgen: BitGenerator) -> None: + ... + +def get_bit_generator() -> BitGenerator: + ... diff --git a/numpy/typing/tests/data/pass/random.py b/numpy/typing/tests/data/pass/random.py index 9816cd2c3f95..6a4d99f12b13 100644 --- a/numpy/typing/tests/data/pass/random.py +++ b/numpy/typing/tests/data/pass/random.py @@ -1494,3 +1494,6 @@ random_st.tomaxint() random_st.tomaxint(1) random_st.tomaxint((1,)) + +np.random.set_bit_generator(SEED_PCG64) +np.random.get_bit_generator() diff --git a/numpy/typing/tests/data/reveal/random.pyi b/numpy/typing/tests/data/reveal/random.pyi index edea6a2911e4..b65e47006408 100644 --- a/numpy/typing/tests/data/reveal/random.pyi +++ b/numpy/typing/tests/data/reveal/random.pyi @@ -1537,3 +1537,6 @@ reveal_type(random_st.random_sample(size=(1, 2))) # E: ndarray[Any, dtype[float reveal_type(random_st.tomaxint()) # E: int reveal_type(random_st.tomaxint(1)) # E: ndarray[Any, dtype[{int_}]] reveal_type(random_st.tomaxint((1,))) # E: ndarray[Any, dtype[{int_}]] + +reveal_type(np.random.set_bit_generator(pcg64)) # E: BitGenerator +reveal_type(np.random.get_bit_generator()) # E: None From cf3ca1587bcadfb0747b7ec31c3e0fdc988ef1e8 Mon Sep 17 00:00:00 2001 From: Kevin Sheppard Date: Fri, 15 Jul 2022 18:44:30 +0100 Subject: [PATCH 3/5] TYP: Import into random module --- numpy/random/__init__.pyi | 2 ++ numpy/typing/tests/data/reveal/random.pyi | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/numpy/random/__init__.pyi b/numpy/random/__init__.pyi index 32bd64a0b01d..99ef6f3e2f2a 100644 --- a/numpy/random/__init__.pyi +++ b/numpy/random/__init__.pyi @@ -23,6 +23,7 @@ from numpy.random.mtrand import ( f as f, gamma as gamma, geometric as geometric, + get_bit_generator as get_bit_generator, get_state as get_state, gumbel as gumbel, hypergeometric as hypergeometric, @@ -50,6 +51,7 @@ from numpy.random.mtrand import ( rayleigh as rayleigh, sample as sample, seed as seed, + set_bit_generator as set_bit_generator, set_state as set_state, shuffle as shuffle, standard_cauchy as standard_cauchy, diff --git a/numpy/typing/tests/data/reveal/random.pyi b/numpy/typing/tests/data/reveal/random.pyi index b65e47006408..67a5d3e7aad8 100644 --- a/numpy/typing/tests/data/reveal/random.pyi +++ b/numpy/typing/tests/data/reveal/random.pyi @@ -1538,5 +1538,5 @@ reveal_type(random_st.tomaxint()) # E: int reveal_type(random_st.tomaxint(1)) # E: ndarray[Any, dtype[{int_}]] reveal_type(random_st.tomaxint((1,))) # E: ndarray[Any, dtype[{int_}]] -reveal_type(np.random.set_bit_generator(pcg64)) # E: BitGenerator -reveal_type(np.random.get_bit_generator()) # E: None +reveal_type(np.random.set_bit_generator(pcg64)) # E: None +reveal_type(np.random.get_bit_generator()) # E: BitGenerator From 962535f7cbf3265e4f11a339b682dd3c7d312424 Mon Sep 17 00:00:00 2001 From: Kevin Sheppard Date: Sun, 17 Jul 2022 14:16:24 +0100 Subject: [PATCH 4/5] DOC: Correct doc errors Fix typos and wrap length --- .../upcoming_changes/21976.new_feature.rst | 33 ++++++++++--------- numpy/random/mtrand.pyx | 4 +-- 2 files changed, 19 insertions(+), 18 deletions(-) diff --git a/doc/release/upcoming_changes/21976.new_feature.rst b/doc/release/upcoming_changes/21976.new_feature.rst index 7d1cb0405c0a..d0960fd94553 100644 --- a/doc/release/upcoming_changes/21976.new_feature.rst +++ b/doc/release/upcoming_changes/21976.new_feature.rst @@ -1,31 +1,32 @@ The bit generator underlying the singleton RandomState can be changed --------------------------------------------------------------------- The singleton ``RandomState`` instance exposed in the ``numpy.random`` module -id initialized using system-provided entropy with the ``MT19937` bit generator. -The function ``set_bit_generator`` allows the default bit generator to be -replaced with a user-provided bit generator. This function has been introduced to -provide a method allowing seemless integration of a high-quality, modern bit generator -in new code with existing code that makes use of the singleton-provided random -variate generating functions. The companion function ``get_bit_generator`` returns the current bit generator -being used by the singleton ``RandomState``. This is provided to simplify restoring +is initialized at startup with the ``MT19937` bit generator. The function new ``set_bit_generator`` allows the default bit generator to be +replaced with a user-provided bit generator. This function has been introduced +to provide a method allowing seamless integration of a high-quality, modern bit +generator in new code with existing code that makes use of the +singleton-provided random variate generating functions. The companion function +``get_bit_generator`` returns the current bit generator being used by the +singleton ``RandomState``. This is provided to simplify restoring the original source of randomness if required. The preferred method to generate reproducible random numbers is to use a modern bit generator in an instance of ``Generator``. The function ``default_rng`` simplifies instantization. - >>> rg = np.random.default_rng(3728973198) - >>> rg.random() + >>> rg = np.random.default_rng(3728973198) + >>> rg.random() -The same bit generator can then shared with the singleton instance so that -calling functions in the ``random`` module will use the same bit generator. +The same bit generator can then be shared with the singleton instance so that +calling functions in the ``random`` module will use the same bit +generator. - >>> orig_bit_gen = np.random.get_bit_generator() - >>> np.random.set_bit_generator(rg.bit_generator) - >>> np.random.normal() + >>> orig_bit_gen = np.random.get_bit_generator() + >>> np.random.set_bit_generator(rg.bit_generator) + >>> np.random.normal() The swap is permanent (until reversed) and so any call to functions -in the ``random`` module will use the new bit generator. The orginal +in the ``random`` module will use the new bit generator. The original can be restored if required for code to run correctly. - >>> np.random.set_bit_generator(orig_bit_gen) + >>> np.random.set_bit_generator(orig_bit_gen) diff --git a/numpy/random/mtrand.pyx b/numpy/random/mtrand.pyx index cbf37e51d087..5672ed60bd19 100644 --- a/numpy/random/mtrand.pyx +++ b/numpy/random/mtrand.pyx @@ -4749,7 +4749,7 @@ def get_bit_generator(): Notes ----- The singleton RandomState provides the random variate generators in the - NumPy random namespace. This function, and its counterpart set method, + ``numpy.random`` namespace. This function, and its counterpart set method, provides a path to hot-swap the default MT19937 bit generator with a user provided alternative. These function are intended to provide a continuous path where a single underlying bit generator can be @@ -4775,7 +4775,7 @@ def set_bit_generator(bitgen): Notes ----- The singleton RandomState provides the random variate generators in the - NumPy random namespace. This function, and its counterpart get method, + ``numpy.random``namespace. This function, and its counterpart get method, provides a path to hot-swap the default MT19937 bit generator with a user provided alternative. These function are intended to provide a continuous path where a single underlying bit generator can be From 69530cf09c571023ff523034ef435386a907ca01 Mon Sep 17 00:00:00 2001 From: Kevin Sheppard Date: Mon, 18 Jul 2022 00:07:10 +0100 Subject: [PATCH 5/5] Update doc/release/upcoming_changes/21976.new_feature.rst Co-authored-by: Matti Picus --- doc/release/upcoming_changes/21976.new_feature.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/release/upcoming_changes/21976.new_feature.rst b/doc/release/upcoming_changes/21976.new_feature.rst index d0960fd94553..387f093dd2e2 100644 --- a/doc/release/upcoming_changes/21976.new_feature.rst +++ b/doc/release/upcoming_changes/21976.new_feature.rst @@ -1,7 +1,8 @@ The bit generator underlying the singleton RandomState can be changed --------------------------------------------------------------------- The singleton ``RandomState`` instance exposed in the ``numpy.random`` module -is initialized at startup with the ``MT19937` bit generator. The function new ``set_bit_generator`` allows the default bit generator to be +is initialized at startup with the ``MT19937` bit generator. The new +function ``set_bit_generator`` allows the default bit generator to be replaced with a user-provided bit generator. This function has been introduced to provide a method allowing seamless integration of a high-quality, modern bit generator in new code with existing code that makes use of the