Skip to content

Commit

Permalink
docs: remove threads in transfer manager samples (#1029)
Browse files Browse the repository at this point in the history
* docs: remove threads in transfer manager samples

* omit worker type in transfer manager sample processes comments

---------

Co-authored-by: Andrew Gorcester <gorcester@google.com>
  • Loading branch information
MiaCY and andrewsg committed May 4, 2023
1 parent a579212 commit 30c5146
Show file tree
Hide file tree
Showing 5 changed files with 27 additions and 32 deletions.
4 changes: 2 additions & 2 deletions samples/snippets/snippets_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -693,7 +693,7 @@ def test_transfer_manager_snippets(test_bucket, capsys):
test_bucket.name,
BLOB_NAMES,
source_directory="{}/".format(uploads),
threads=2,
processes=8,
)
out, _ = capsys.readouterr()

Expand All @@ -705,7 +705,7 @@ def test_transfer_manager_snippets(test_bucket, capsys):
storage_transfer_manager_download_all_blobs.download_all_blobs_with_transfer_manager(
test_bucket.name,
destination_directory=os.path.join(downloads, ""),
threads=2,
processes=8,
)
out, _ = capsys.readouterr()

Expand Down
15 changes: 7 additions & 8 deletions samples/snippets/storage_transfer_manager_download_all_blobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@


def download_all_blobs_with_transfer_manager(
bucket_name, destination_directory="", threads=4
bucket_name, destination_directory="", processes=8
):
"""Download all of the blobs in a bucket, concurrently in a thread pool.
Expand All @@ -37,12 +37,11 @@ def download_all_blobs_with_transfer_manager(
# intended for unsanitized end user input.
# destination_directory = ""

# The number of threads to use for the operation. The performance impact of
# this value depends on the use case, but generally, smaller files benefit
# from more threads and larger files don't benefit from more threads. Too
# many threads can slow operations, especially with large files, due to
# contention over the Python GIL.
# threads=4
# The maximum number of processes to use for the operation. The performance
# impact of this value depends on the use case, but smaller files usually
# benefit from a higher number of processes. Each additional process occupies
# some CPU and memory resources until finished.
# processes=8

from google.cloud.storage import Client, transfer_manager

Expand All @@ -52,7 +51,7 @@ def download_all_blobs_with_transfer_manager(
blob_names = [blob.name for blob in bucket.list_blobs()]

results = transfer_manager.download_many_to_path(
bucket, blob_names, destination_directory=destination_directory, threads=threads
bucket, blob_names, destination_directory=destination_directory, max_workers=processes
)

for name, result in zip(blob_names, results):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,10 @@ def download_chunks_concurrently(bucket_name, blob_name, filename, processes=8):
# The destination filename or path
# filename = ""

# The maximum number of worker processes that should be used to handle the
# workload of downloading the blob concurrently. PROCESS worker type uses more
# system resources (both memory and CPU) and can result in faster operations
# when working with large files. The optimal number of workers depends heavily
# on the specific use case. Refer to the docstring of the underlining method
# for more details.
# The maximum number of processes to use for the operation. The performance
# impact of this value depends on the use case, but smaller files usually
# benefit from a higher number of processes. Each additional process occupies
# some CPU and memory resources until finished.
# processes=8

from google.cloud.storage import Client, transfer_manager
Expand Down
15 changes: 7 additions & 8 deletions samples/snippets/storage_transfer_manager_upload_directory.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
# limitations under the License.


def upload_directory_with_transfer_manager(bucket_name, source_directory, threads=4):
def upload_directory_with_transfer_manager(bucket_name, source_directory, processes=8):
"""Upload every file in a directory, including all files in subdirectories.
Each blob name is derived from the filename, not including the `directory`
Expand All @@ -30,12 +30,11 @@ def upload_directory_with_transfer_manager(bucket_name, source_directory, thread
# working directory".
# source_directory=""

# The number of threads to use for the operation. The performance impact of
# this value depends on the use case, but generally, smaller files benefit
# from more threads and larger files don't benefit from more threads. Too
# many threads can slow operations, especially with large files, due to
# contention over the Python GIL.
# threads=4
# The maximum number of processes to use for the operation. The performance
# impact of this value depends on the use case, but smaller files usually
# benefit from a higher number of processes. Each additional process occupies
# some CPU and memory resources until finished.
# processes=8

from pathlib import Path

Expand Down Expand Up @@ -66,7 +65,7 @@ def upload_directory_with_transfer_manager(bucket_name, source_directory, thread

# Start the upload.
results = transfer_manager.upload_many_from_filenames(
bucket, string_paths, source_directory=source_directory, threads=threads
bucket, string_paths, source_directory=source_directory, max_workers=processes
)

for name, result in zip(string_paths, results):
Expand Down
15 changes: 7 additions & 8 deletions samples/snippets/storage_transfer_manager_upload_many_blobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@


def upload_many_blobs_with_transfer_manager(
bucket_name, filenames, source_directory="", threads=4
bucket_name, filenames, source_directory="", processes=8
):
"""Upload every file in a list to a bucket, concurrently in a thread pool.
Expand All @@ -40,20 +40,19 @@ def upload_many_blobs_with_transfer_manager(
# end user input.
# source_directory=""

# The number of threads to use for the operation. The performance impact of
# this value depends on the use case, but generally, smaller files benefit
# from more threads and larger files don't benefit from more threads. Too
# many threads can slow operations, especially with large files, due to
# contention over the Python GIL.
# threads=4
# The maximum number of processes to use for the operation. The performance
# impact of this value depends on the use case, but smaller files usually
# benefit from a higher number of processes. Each additional process occupies
# some CPU and memory resources until finished.
# processes=8

from google.cloud.storage import Client, transfer_manager

storage_client = Client()
bucket = storage_client.bucket(bucket_name)

results = transfer_manager.upload_many_from_filenames(
bucket, filenames, source_directory=source_directory, threads=threads
bucket, filenames, source_directory=source_directory, max_workers=processes
)

for name, result in zip(filenames, results):
Expand Down

0 comments on commit 30c5146

Please sign in to comment.