Skip to content

Commit

Permalink
feat: add skip_if_exists to download_many (#1161)
Browse files Browse the repository at this point in the history
* feat: add skip_if_exists to download_many

* docstring

* 🦉 Updates from OwlBot post-processor

See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md

---------

Co-authored-by: Owl Bot <gcf-owl-bot[bot]@users.noreply.github.com>
  • Loading branch information
andrewsg and gcf-owl-bot[bot] committed Oct 11, 2023
1 parent fc92ad1 commit c5a983d
Show file tree
Hide file tree
Showing 2 changed files with 48 additions and 0 deletions.
19 changes: 19 additions & 0 deletions google/cloud/storage/transfer_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -273,6 +273,8 @@ def download_many(
raise_exception=False,
worker_type=PROCESS,
max_workers=DEFAULT_MAX_WORKERS,
*,
skip_if_exists=False,
):
"""Download many blobs concurrently via a worker pool.
Expand Down Expand Up @@ -348,6 +350,11 @@ def download_many(
and the default is a conservative number that should work okay in most
cases without consuming excessive resources.
:type skip_if_exists: bool
:param skip_if_exists:
Before downloading each blob, check if the file for the filename exists;
if it does, skip that blob.
:raises: :exc:`concurrent.futures.TimeoutError` if deadline is exceeded.
:rtype: list
Expand All @@ -374,6 +381,10 @@ def download_many(
"Passing in a file object is only supported by the THREAD worker type. Please either select THREAD workers, or pass in filenames only."
)

if skip_if_exists and isinstance(path_or_file, str):
if os.path.isfile(path_or_file):
continue

futures.append(
executor.submit(
_call_method_on_maybe_pickled_blob,
Expand Down Expand Up @@ -589,6 +600,8 @@ def download_many_to_path(
raise_exception=False,
worker_type=PROCESS,
max_workers=DEFAULT_MAX_WORKERS,
*,
skip_if_exists=False,
):
"""Download many files concurrently by their blob names.
Expand Down Expand Up @@ -715,6 +728,11 @@ def download_many_to_path(
and the default is a conservative number that should work okay in most
cases without consuming excessive resources.
:type skip_if_exists: bool
:param skip_if_exists:
Before downloading each blob, check if the file for the filename exists;
if it does, skip that blob. This only works for filenames.
:raises: :exc:`concurrent.futures.TimeoutError` if deadline is exceeded.
:rtype: list
Expand All @@ -740,6 +758,7 @@ def download_many_to_path(
raise_exception=raise_exception,
worker_type=worker_type,
max_workers=max_workers,
skip_if_exists=skip_if_exists,
)


Expand Down
29 changes: 29 additions & 0 deletions tests/unit/test_transfer_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -273,6 +273,32 @@ def test_download_many_with_filenames():
assert result == FAKE_RESULT


def test_download_many_with_skip_if_exists():
with tempfile.NamedTemporaryFile() as tf:
BLOB_FILE_PAIRS = [
(mock.Mock(spec=Blob), "file_a.txt"),
(mock.Mock(spec=Blob), tf.name),
]

for blob_mock, _ in BLOB_FILE_PAIRS:
blob_mock._handle_filename_and_download.return_value = FAKE_RESULT

results = transfer_manager.download_many(
BLOB_FILE_PAIRS,
download_kwargs=DOWNLOAD_KWARGS,
worker_type=transfer_manager.THREAD,
skip_if_exists=True,
)
mock_blob, file = BLOB_FILE_PAIRS[0]
mock_blob._handle_filename_and_download.assert_any_call(
file, **EXPECTED_DOWNLOAD_KWARGS
)
mock_blob, _ = BLOB_FILE_PAIRS[1]
mock_blob._handle_filename_and_download.assert_not_called()
for result in results:
assert result == FAKE_RESULT


def test_download_many_with_file_objs():
BLOB_FILE_PAIRS = [
(mock.Mock(spec=Blob), tempfile.TemporaryFile()),
Expand Down Expand Up @@ -485,6 +511,7 @@ def test_download_many_to_path():
raise_exception=True,
max_workers=MAX_WORKERS,
worker_type=WORKER_TYPE,
skip_if_exists=True,
)

mock_download_many.assert_called_once_with(
Expand All @@ -494,6 +521,7 @@ def test_download_many_to_path():
raise_exception=True,
max_workers=MAX_WORKERS,
worker_type=WORKER_TYPE,
skip_if_exists=True,
)
for blobname in BLOBNAMES:
bucket.blob.assert_any_call(BLOB_NAME_PREFIX + blobname)
Expand Down Expand Up @@ -532,6 +560,7 @@ def test_download_many_to_path_creates_directories():
raise_exception=True,
worker_type=transfer_manager.PROCESS,
max_workers=8,
skip_if_exists=False,
)
for blobname in BLOBNAMES:
bucket.blob.assert_any_call(blobname)
Expand Down

0 comments on commit c5a983d

Please sign in to comment.