Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ENH: Sort __globals__ dict to make pickle string more deterministic #418

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
4 changes: 2 additions & 2 deletions cloudpickle/cloudpickle_fast.py
Expand Up @@ -155,8 +155,8 @@ def _function_getstate(func):
}

f_globals_ref = _extract_code_globals(func.__code__)
f_globals = {k: func.__globals__[k] for k in f_globals_ref if k in
func.__globals__}
f_globals = {k: func.__globals__[k] for k in sorted(f_globals_ref)
if k in func.__globals__}

closure_values = (
list(map(_get_cell_contents, func.__closure__))
Expand Down
18 changes: 18 additions & 0 deletions tests/cloudpickle_test.py
Expand Up @@ -49,6 +49,7 @@
from cloudpickle.cloudpickle import _extract_class_dict, _whichmodule
from cloudpickle.cloudpickle import _lookup_module_and_qualname

from .testutils import subprocess_pickle_string
from .testutils import subprocess_pickle_echo
from .testutils import assert_run_python_script
from .testutils import subprocess_worker
Expand All @@ -57,6 +58,7 @@


_TEST_GLOBAL_VARIABLE = "default_value"
_TEST_GLOBAL_VARIABLE2 = "another_value"


class RaiserOnPickle(object):
Expand Down Expand Up @@ -2321,6 +2323,22 @@ def __type__(self):
o = MyClass()
pickle_depickle(o, protocol=self.protocol)

@pytest.mark.skipif(
sys.version_info < (3, 6, 0),
reason="Dict determinism is a lost cause in Python < 3.6")
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

With 3.5 EOL, it does not seem worth putting any effort into this. Downstream tools that care about determinism have likely been 3.6+ anyway.

def test_sorted_globals(self):
vals = set()

def func_with_globals():
return _TEST_GLOBAL_VARIABLE + _TEST_GLOBAL_VARIABLE2

for i in range(5):
vals.add(
subprocess_pickle_string(func_with_globals,
protocol=self.protocol,
add_env={"PYTHONHASHSEED": str(i)}))
assert len(vals) == 1


class Protocol2CloudPickleTest(CloudPickleTest):

Expand Down
36 changes: 28 additions & 8 deletions tests/testutils.py
Expand Up @@ -38,24 +38,25 @@ def _make_cwd_env():
return cloudpickle_repo_folder, env


def subprocess_pickle_echo(input_data, protocol=None, timeout=TIMEOUT):
"""Echo function with a child Python process
def subprocess_pickle_string(input_data, protocol=None, timeout=TIMEOUT,
add_env=None):
"""Retrieve pickle string of an object generated by a child Python process

Pickle the input data into a buffer, send it to a subprocess via
stdin, expect the subprocess to unpickle, re-pickle that data back
and send it back to the parent process via stdout for final unpickling.
and send it back to the parent process via stdout.

>>> subprocess_pickle_echo([1, 'a', None])
[1, 'a', None]
>>> testutils.subprocess_pickle_string([1, 'a', None], protocol=2)
b'\x80\x02]q\x00(K\x01X\x01\x00\x00\x00aq\x01Ne.'

"""
# run then pickle_echo(protocol=protocol) in __main__:

# Protect stderr from any warning, as we will assume an error will happen
# if it is not empty. A concrete example is pytest using the imp module,
# which is deprecated in python 3.8
cmd = [sys.executable, '-W ignore', __file__, "--protocol", str(protocol)]
cwd, env = _make_cwd_env()
if add_env:
env.update(add_env)
proc = Popen(cmd, stdin=PIPE, stdout=PIPE, stderr=PIPE, cwd=cwd, env=env,
bufsize=4096)
pickle_string = dumps(input_data, protocol=protocol)
Expand All @@ -67,14 +68,33 @@ def subprocess_pickle_echo(input_data, protocol=None, timeout=TIMEOUT):
message = "Subprocess returned %d: " % proc.returncode
message += err.decode('utf-8')
raise RuntimeError(message)
return loads(out)
return out
except TimeoutExpired as e:
proc.kill()
out, err = proc.communicate()
message = u"\n".join([out.decode('utf-8'), err.decode('utf-8')])
raise RuntimeError(message) from e


def subprocess_pickle_echo(input_data, protocol=None, timeout=TIMEOUT,
add_env=None):
"""Echo function with a child Python process

Pickle the input data into a buffer, send it to a subprocess via
stdin, expect the subprocess to unpickle, re-pickle that data back
and send it back to the parent process via stdout for final unpickling.

>>> subprocess_pickle_echo([1, 'a', None])
[1, 'a', None]

"""
out = subprocess_pickle_string(input_data,
protocol=protocol,
timeout=timeout,
add_env=add_env)
return loads(out)


def _read_all_bytes(stream_in, chunk_size=4096):
all_data = b""
while True:
Expand Down