BUG: Get full precision for 32 bit floating point random values.

The formula to convert a 32 bit random integer to a random float32, (next_uint32(bitgen_state) >> 9) * (1.0f / 8388608.0f) shifts by one bit too many, resulting in uniform float32 samples always having a 0 in the least significant bit. The formula is corrected to (next_uint32(bitgen_state) >> 8) * (1.0f / 16777216.0f) Occurrences of the incorrect formula in numpy/random/tests/test_direct.py were also corrected. Closes gh-17478.
numpy · Nov 6, 2021 · 4b9e569 · 4b9e569
1 parent 7c21101
commit 4b9e569
Show file tree

Hide file tree

Showing 3 changed files with 21 additions and 6 deletions.
diff --git a/numpy/random/src/distributions/distributions.c b/numpy/random/src/distributions/distributions.c
@@ -17,7 +17,7 @@ static NPY_INLINE uint64_t next_uint64(bitgen_t *bitgen_state) {
 }
 
 static NPY_INLINE float next_float(bitgen_t *bitgen_state) {
-  return (next_uint32(bitgen_state) >> 9) * (1.0f / 8388608.0f);
+  return (next_uint32(bitgen_state) >> 8) * (1.0f / 16777216.0f);
 }
 
 /* Random generators for external use */

diff --git a/numpy/random/tests/test_direct.py b/numpy/random/tests/test_direct.py
@@ -46,25 +46,27 @@ def assert_state_equal(actual, target):
             assert actual[key] == target[key]
 
 
+def uint32_to_float32(u):
+    return ((u >> np.uint32(8)) * (1.0 / 2**24)).astype(np.float32)
+
+
 def uniform32_from_uint64(x):
     x = np.uint64(x)
     upper = np.array(x >> np.uint64(32), dtype=np.uint32)
     lower = np.uint64(0xffffffff)
     lower = np.array(x & lower, dtype=np.uint32)
     joined = np.column_stack([lower, upper]).ravel()
-    out = (joined >> np.uint32(9)) * (1.0 / 2 ** 23)
-    return out.astype(np.float32)
+    return uint32_to_float32(joined)
 
 
 def uniform32_from_uint53(x):
     x = np.uint64(x) >> np.uint64(16)
     x = np.uint32(x & np.uint64(0xffffffff))
-    out = (x >> np.uint32(9)) * (1.0 / 2 ** 23)
-    return out.astype(np.float32)
+    return uint32_to_float32(x)
 
 
 def uniform32_from_uint32(x):
-    return (x >> np.uint32(9)) * (1.0 / 2 ** 23)
+    return uint32_to_float32(x)
 
 
 def uniform32_from_uint(x, bits):
@@ -126,6 +128,7 @@ def gauss_from_uint(x, n, bits):
 
     return gauss[:n]
 
+
 def test_seedsequence():
     from numpy.random.bit_generator import (ISeedSequence,
                                             ISpawnableSeedSequence,

diff --git a/numpy/random/tests/test_generator_mt19937.py b/numpy/random/tests/test_generator_mt19937.py
@@ -774,6 +774,18 @@ def test_random_float_scalar(self):
         desired = 0.0969992
         assert_array_almost_equal(actual, desired, decimal=7)
 
+    @pytest.mark.parametrize('dtype, uint_view_type',
+                             [(np.float32, np.uint32),
+                              (np.float64, np.uint64)])
+    def test_random_distribution_of_lsb(self, dtype, uint_view_type):
+        random = Generator(MT19937(self.seed))
+        sample = random.random(100000, dtype=dtype)
+        num_ones_in_lsb = np.count_nonzero(sample.view(uint_view_type) & 1)
+        # The probability of a 1 in the least significant bit is 0.25.
+        # With a sample size of 100000, the probability that num_ones_in_lsb
+        # is outside the following range is less than 5e-11.
+        assert 24100 < num_ones_in_lsb < 25900
+
     def test_random_unsupported_type(self):
         assert_raises(TypeError, random.random, dtype='int32')