googleapis · Mar 23, 2021 · Mar 24, 2021 · Mar 25, 2021
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -4,6 +4,18 @@
 
 [1]: https://pypi.org/project/google-cloud-storage/#history
 
+## [1.37.0](https://www.github.com/googleapis/python-storage/compare/v1.36.2...v1.37.0) (2021-03-24)
+
+
+### Features
+
+* add blob.open() for file-like I/O ([#385](https://www.github.com/googleapis/python-storage/issues/385)) ([440a0a4](https://www.github.com/googleapis/python-storage/commit/440a0a4ffe00b1f7c562b0e9c1e47dbadeca33e1)), closes [#29](https://www.github.com/googleapis/python-storage/issues/29)
+
+
+### Bug Fixes
+
+* update user_project usage and documentation in bucket/client class methods ([#396](https://www.github.com/googleapis/python-storage/issues/396)) ([1a2734b](https://www.github.com/googleapis/python-storage/commit/1a2734ba6d316ce51e4e141571331e86196462b9))
+
 ### [1.36.2](https://www.github.com/googleapis/python-storage/compare/v1.36.1...v1.36.2) (2021-03-09)
 
 

diff --git a/google/cloud/storage/blob.py b/google/cloud/storage/blob.py
@@ -30,6 +30,7 @@
 import copy
 import hashlib
 from io import BytesIO
+from io import TextIOWrapper
 import logging
 import mimetypes
 import os
@@ -78,6 +79,8 @@
 from google.cloud.storage.retry import DEFAULT_RETRY
 from google.cloud.storage.retry import DEFAULT_RETRY_IF_ETAG_IN_JSON
 from google.cloud.storage.retry import DEFAULT_RETRY_IF_GENERATION_SPECIFIED
+from google.cloud.storage.fileio import BlobReader
+from google.cloud.storage.fileio import BlobWriter
 
 
 _API_ACCESS_ENDPOINT = "https://storage.googleapis.com"
@@ -144,7 +147,9 @@ class Blob(_PropertyMixin):
     :type chunk_size: int
     :param chunk_size:
         (Optional) The size of a chunk of data whenever iterating (in bytes).
-        This must be a multiple of 256 KB per the API specification.
+        This must be a multiple of 256 KB per the API specification. If not
+        specified, the chunk_size of the blob itself is used. If that is not
+        specified, a default value of 40 MB is used.
 
     :type encryption_key: bytes
     :param encryption_key:
@@ -3407,6 +3412,126 @@ def update_storage_class(
                 retry=retry,
             )
 
+    def open(
+        self,
+        mode="r",
+        chunk_size=None,
+        encoding=None,
+        errors=None,
+        newline=None,
+        **kwargs
+    ):
+        r"""Create a file handler for file-like I/O to or from this blob.
+
+        This method can be used as a context manager, just like Python's
+        built-in 'open()' function.
+
+        While reading, as with other read methods, if blob.generation is not set
+        the most recent blob generation will be used. Because the file-like IO
+        reader downloads progressively in chunks, this could result in data from
+        multiple versions being mixed together. If this is a concern, use
+        either bucket.get_blob(), or blob.reload(), which will download the
+        latest generation number and set it; or, if the generation is known, set
+        it manually, for instance with bucket.blob(generation=123456).
+
+        :type mode: str
+        :param mode:
+            (Optional) A mode string, as per standard Python `open()` semantics.The first
+            character must be 'r', to open the blob for reading, or 'w' to open
+            it for writing. The second character, if present, must be 't' for
+            (unicode) text mode, or 'b' for bytes mode. If the second character
+            is omitted, text mode is the default.
+
+        :type chunk_size: long
+        :param chunk_size:
+            (Optional) For reads, the minimum number of bytes to read at a time.
+            If fewer bytes than the chunk_size are requested, the remainder is
+            buffered. For writes, the maximum number of bytes to buffer before
+            sending data to the server, and the size of each request when data
+            is sent. Writes are implemented as a "resumable upload", so
+            chunk_size for writes must be exactly a multiple of 256KiB as with
+            other resumable uploads. The default is 40 MiB.
+
+        :type encoding: str
+        :param encoding:
+            (Optional) For text mode only, the name of the encoding that the stream will
+            be decoded or encoded with. If omitted, it defaults to
+            locale.getpreferredencoding(False).
+
+        :type errors: str
+        :param errors:
+            (Optional) For text mode only, an optional string that specifies how encoding
+            and decoding errors are to be handled. Pass 'strict' to raise a
+            ValueError exception if there is an encoding error (the default of
+            None has the same effect), or pass 'ignore' to ignore errors. (Note
+            that ignoring encoding errors can lead to data loss.) Other more
+            rarely-used options are also available; see the Python 'io' module
+            documentation for 'io.TextIOWrapper' for a complete list.
+
+        :type newline: str
+        :param newline:
+            (Optional) For text mode only, controls how line endings are handled. It can
+            be None, '', '\n', '\r', and '\r\n'. If None, reads use "universal
+            newline mode" and writes use the system default. See the Python
+            'io' module documentation for 'io.TextIOWrapper' for details.
+
+        :param kwargs: Keyword arguments to pass to the underlying API calls.
+            For both uploads and downloads, the following arguments are
+            supported: "if_generation_match", "if_generation_not_match",
+            "if_metageneration_match", "if_metageneration_not_match", "timeout".
+            For uploads only, the following additional arguments are supported:
+            "content_type", "num_retries", "predefined_acl", "checksum".
+
+        :returns: A 'BlobReader' or 'BlobWriter' from
+            'google.cloud.storage.fileio', or an 'io.TextIOWrapper' around one
+            of those classes, depending on the 'mode' argument.
+
+        Example:
+            Read from a text blob by using open() as context manager.
+
+            Using bucket.get_blob() fetches metadata such as the generation,
+            which prevents race conditions in case the blob is modified.
+
+            >>> from google.cloud import storage
+            >>> client = storage.Client()
+            >>> bucket = client.bucket("bucket-name")
+
+            >>> blob = bucket.get_blob("blob-name.txt")
+            >>> with blob.open("rt") as f:
+            >>>     print(f.read())
+
+        """
+        if mode == "rb":
+            if encoding or errors or newline:
+                raise ValueError(
+                    "encoding, errors and newline arguments are for text mode only"
+                )
+            return BlobReader(self, chunk_size=chunk_size, **kwargs)
+        elif mode == "wb":
+            if encoding or errors or newline:
+                raise ValueError(
+                    "encoding, errors and newline arguments are for text mode only"
+                )
+            return BlobWriter(self, chunk_size=chunk_size, **kwargs)
+        elif mode in ("r", "rt"):
+            return TextIOWrapper(
+                BlobReader(self, chunk_size=chunk_size, **kwargs),
+                encoding=encoding,
+                errors=errors,
+                newline=newline,
+            )
+        elif mode in ("w", "wt"):
+            return TextIOWrapper(
+                BlobWriter(self, chunk_size=chunk_size, text_mode=True, **kwargs),
+                encoding=encoding,
+                errors=errors,
+                newline=newline,
+            )
+        else:
+            raise NotImplementedError(
+                "Supported modes strings are 'r', 'rb', 'rt', 'w', 'wb', and 'wt' only."
+            )
+
     cache_control = _scalar_property("cacheControl")
     """HTTP 'Cache-Control' header for this object.
 

diff --git a/google/cloud/storage/bucket.py b/google/cloud/storage/bucket.py
@@ -611,6 +611,10 @@ def user_project(self):
 
         If unset, API requests are billed to the bucket owner.
 
+        A user project is required for all operations on Requester Pays buckets.
+
+        See https://cloud.google.com/storage/docs/requester-pays#requirements for details.
+
         :rtype: str
         """
         return self._user_project
@@ -809,6 +813,9 @@ def create(
     ):
         """DEPRECATED. Creates current bucket.
 
+        .. note::
+          Direct use of this method is deprecated. Use ``Client.create_bucket()`` instead.
+
         If the bucket already exists, will raise
         :class:`google.cloud.exceptions.Conflict`.
 
@@ -825,7 +832,6 @@ def create(
         :param project: (Optional) The project under which the bucket is to
                         be created. If not passed, uses the project set on
                         the client.
-        :raises ValueError: if :attr:`user_project` is set.
         :raises ValueError: if ``project`` is None and client's
                             :attr:`project` is also None.
 
@@ -871,13 +877,12 @@ def create(
             PendingDeprecationWarning,
             stacklevel=1,
         )
-        if self.user_project is not None:
-            raise ValueError("Cannot create bucket with 'user_project' set.")
 
         client = self._require_client(client)
         client.create_bucket(
             bucket_or_name=self,
             project=project,
+            user_project=self.user_project,
             location=location,
             predefined_acl=predefined_acl,
             predefined_default_object_acl=predefined_default_object_acl,
@@ -1328,7 +1333,7 @@ def list_blobs(
             >>> from google.cloud import storage
             >>> client = storage.Client()
 
-            >>> bucket = storage.Bucket("my-bucket-name", user_project='my-project')
+            >>> bucket = storage.Bucket(client, "my-bucket-name", user_project="my-project")
             >>> all_blobs = list(client.list_blobs(bucket))
         """
         client = self._require_client(client)

diff --git a/google/cloud/storage/client.py b/google/cloud/storage/client.py
@@ -839,7 +839,7 @@ def list_blobs(
             >>> from google.cloud import storage
             >>> client = storage.Client()
 
-            >>> bucket = storage.Bucket("my-bucket-name", user_project='my-project')
+            >>> bucket = storage.Bucket(client, "my-bucket-name", user_project="my-project")
             >>> all_blobs = list(client.list_blobs(bucket))
         """
         bucket = self._bucket_arg_to_bucket(bucket_or_name)