Skip to content

Commit

Permalink
FIX do not swallow PicklingError in cache (#1359)
Browse files Browse the repository at this point in the history
Co-authored-by: jeremie du boisberranger <jeremiedbb@yahoo.fr>
Co-authored-by: Jérémie du Boisberranger <34657725+jeremiedbb@users.noreply.github.com>
  • Loading branch information
3 people committed Feb 15, 2023
1 parent 83ba40f commit 061d4ad
Show file tree
Hide file tree
Showing 4 changed files with 67 additions and 5 deletions.
5 changes: 5 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,11 @@ In development
in May 2022) that it should be safe to remove this.
https://github.com/joblib/joblib/pull/1361

- A warning is raised when a pickling error occurs during caching operations.
In version 1.5, this warning will be turned into an error. For all other
errors, a new warning has been introduced: `joblib.memory.CacheWarning`.
https://github.com/joblib/joblib/pull/1359

Release 1.2.0
-------------

Expand Down
26 changes: 22 additions & 4 deletions joblib/_store_backends.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""Storage providers backends for Memory caching."""

from pickle import PicklingError
import re
import os
import os.path
Expand All @@ -20,6 +21,11 @@
'path size last_access')


class CacheWarning(Warning):
"""Warning to capture dump failures except for PicklingError."""
pass


def concurrency_safe_write(object_to_write, filename, write_func):
"""Writes an object into a unique file in a concurrency-safe way."""
thread_id = id(threading.current_thread())
Expand Down Expand Up @@ -185,12 +191,24 @@ def dump_item(self, path, item, verbose=1):

def write_func(to_write, dest_filename):
with self._open_item(dest_filename, "wb") as f:
numpy_pickle.dump(to_write, f,
compress=self.compress)
try:
numpy_pickle.dump(to_write, f, compress=self.compress)
except PicklingError as e:
# TODO(1.5) turn into error
warnings.warn(
"Unable to cache to disk: failed to pickle "
"output. In version 1.5 this will raise an "
f"exception. Exception: {e}.",
FutureWarning
)

self._concurrency_safe_write(item, filename, write_func)
except: # noqa: E722
" Race condition in the creation of the directory "
except Exception as e: # noqa: E722
warnings.warn(
"Unable to cache to disk. Possibly a race condition in the "
f"creation of the directory. Exception: {e}.",
CacheWarning
)

def clear_item(self, path):
"""Clear the item at the path, given as a list of strings."""
Expand Down
1 change: 1 addition & 0 deletions joblib/memory.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
from .func_inspect import format_signature
from .logger import Logger, format_time, pformat
from ._store_backends import StoreBackendBase, FileSystemStoreBackend
from ._store_backends import CacheWarning # noqa


FIRST_LINE_TEXT = "# first line:"
Expand Down
40 changes: 39 additions & 1 deletion joblib/test/test_store_backends.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,25 @@

try:
# Python 2.7: use the C pickle to speed up
# test_concurrency_safe_write which pickles big python objects
import cPickle as cpickle
except ImportError:
import pickle as cpickle
import functools
from pickle import PicklingError
import time

import pytest

from joblib.testing import parametrize, timeout
from joblib.test.common import with_multiprocessing
from joblib.backports import concurrency_safe_rename
from joblib import Parallel, delayed
from joblib._store_backends import concurrency_safe_write
from joblib._store_backends import (
concurrency_safe_write,
FileSystemStoreBackend,
CacheWarning,
)


def write_func(output, filename):
Expand Down Expand Up @@ -54,3 +62,33 @@ def test_concurrency_safe_write(tmpdir, backend):
if i % 3 != 2 else load_func for i in range(12)]
Parallel(n_jobs=2, backend=backend)(
delayed(func)(obj, filename) for func in funcs)


def test_warning_on_dump_failure(tmpdir):
# Check that a warning is raised when the dump fails for any reason but
# a PicklingError.
class UnpicklableObject(object):
def __reduce__(self):
raise RuntimeError("some exception")

backend = FileSystemStoreBackend()
backend.location = tmpdir.join('test_warning_on_pickling_error').strpath
backend.compress = None

with pytest.warns(CacheWarning, match="some exception"):
backend.dump_item("testpath", UnpicklableObject())


def test_warning_on_pickling_error(tmpdir):
# This is separate from test_warning_on_dump_failure because in the
# future we will turn this into an exception.
class UnpicklableObject(object):
def __reduce__(self):
raise PicklingError("not picklable")

backend = FileSystemStoreBackend()
backend.location = tmpdir.join('test_warning_on_pickling_error').strpath
backend.compress = None

with pytest.warns(FutureWarning, match="not picklable"):
backend.dump_item("testpath", UnpicklableObject())

0 comments on commit 061d4ad

Please sign in to comment.