googleapis · Hardikr23 · Feb 8, 2021 · Feb 8, 2021 · Feb 26, 2021 · Feb 26, 2021
@@ -349,6 +349,7 @@
     "google-auth": ("https://google-auth.readthedocs.io/en/stable", None),
     "google.api_core": ("https://googleapis.dev/python/google-api-core/latest/", None,),
     "grpc": ("https://grpc.io/grpc/python/", None),
+    "pandas": ("https://pandas.pydata.org/pandas-docs/stable/", None),
 }
 
 

@@ -0,0 +1,69 @@
+# Copyright 2021 Google LLC
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     https://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+import warnings
+
+SPANNER_TO_PANDAS_DTYPES = {
+    "INT64": "int64",
+    "STRING": "object",
+    "BOOL": "bool",
+    "BYTES": "object",
+    "ARRAY": "object",
+    "DATE": "datetime64[ns]",
+    "FLOAT64": "float64",
+    "NUMERIC": "object",
+    "TIMESTAMP": "datetime64[ns]",
+}
+
+
+def pd_dataframe(response_obj):
+    """This functions converts the query results into pandas dataframe
+
+    :rtype: pandas.Dataframe
-    :rtype: pandas.Dataframe
+    :rtype: pandas.DataFrame
-    :rtype: pandas.Dataframe
+    :rtype: pandas.DataFrame
+    :returns: Dataframe with the help of a mapping dictionary which maps every spanner datatype to a pandas compatible datatype.
+    """
+    try:
+        from pandas import DataFrame
+    except ImportError as err:
+        raise ImportError(
+            "Pandas module not found. It is needed for converting query result to Dataframe.\n Try running 'pip3 install pandas'"
-            "Pandas module not found. It is needed for converting query result to Dataframe.\n Try running 'pip3 install pandas'"
+            "Pandas module not found. It is needed for converting query result to DataFrame.\n Try running 'pip3 install pandas'"
-            "Pandas module not found. It is needed for converting query result to Dataframe.\n Try running 'pip3 install pandas'"
+            "Pandas module not found. It is needed for converting query result to DataFrame.\n Try running 'pip3 install pandas'"
+        ) from err
+
+    data = []
+    for row in response_obj:
+        data.append(row)
+
+    columns_dict = {}
+    try:
+        for item in response_obj.fields:
+            columns_dict[item.name] = item.type_.code
+    except:
+        logging.warning("Not able to create spanner to pandas fields mapping")
+
+    # Creating list of columns to be mapped with the data
+    column_list = [k for k, v in columns_dict.items()]
-    column_list = [k for k, v in columns_dict.items()]
+    column_list = list(columns_dict.keys())
-    column_list = [k for k, v in columns_dict.items()]
+    column_list = list(columns_dict.keys())
+
+    # Creating dataframe using column headers and list of data rows
+    df = DataFrame(data, columns=column_list)
+
+    for k, v in columns_dict.items():
+        try:
+            df[k] = df[k].astype(SPANNER_TO_PANDAS_DTYPES[v.name])
+            if v.name == "TIMESTAMP" or v.name == "DATE":
+                df[k] = df[k].dt.tz_localize("UTC")
+        except KeyError:
+            warnings.warn("Spanner Datatype not present in datatype mapping dictionary")
+
+    return df
@@ -20,6 +20,7 @@
 
 # pylint: disable=ungrouped-imports
 from google.cloud.spanner_v1._helpers import _parse_value
+from google.cloud.spanner_v1._pandas_helpers import pd_dataframe
 
 # pylint: enable=ungrouped-imports
 
@@ -143,6 +144,19 @@ def __iter__(self):
             while iter_rows:
                 yield iter_rows.pop(0)
 
+    def to_dataframe(self):
+        """Returns the response in a pandas dataframe of the StreamedResultSet object
-        """Returns the response in a pandas dataframe of the StreamedResultSet object
+        """Creates a pandas DataFrame of all rows in the result set
-        """Returns the response in a pandas dataframe of the StreamedResultSet object
+        """Creates a pandas DataFrame of all rows in the result set
+
+        :param: The response of type StreamedResultSet received from spanner api.
+
+        :rtype: pandas.DataFrame
+
-
-
+        :returns: Dataframe created from the StreamedResultSet response object returned by execute_sql() method
-        :returns: Dataframe created from the StreamedResultSet response object returned by execute_sql() method
+        :returns: DataFrame created from the result set
-        :returns: Dataframe created from the StreamedResultSet response object returned by execute_sql() method
+        :returns: DataFrame created from the result set
+        """
+        response_obj = self
+        df = pd_dataframe(response_obj)
+        return df
+
     def one(self):
         """Return exactly one result, or raise an exception.
 

@@ -41,7 +41,8 @@
         "opentelemetry-api==0.11b0",
         "opentelemetry-sdk==0.11b0",
         "opentelemetry-instrumentation==0.11b0",
-    ]
+    ],
+    "pandas": ["pandas >= 0.25.3"],
 }
 
 

diff --git a/tests/system/test_system_pandasDf.py b/tests/system/test_system_pandasDf.py
@@ -0,0 +1,49 @@
+# Copyright 2021 Google Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from google.cloud import spanner_v1_mod
+from google.auth.credentials import AnonymousCredentials
+import pytest
+
+# for referrence
+TABLE_NAME = "testTable"
+COLUMNS = ["id", "name"]
+VALUES = [[1, "Alice"], [2, "Bob"]]
+
+
+@pytest.fixture
+def snapshot_obj():
+    try:
+        spanner_client = spanner_v1_mod.Client(
+            project="test-project",
+            client_options={"api_endpoint": "0.0.0.0:9010"},
+            credentials=AnonymousCredentials(),
+        )
+        instance_id = "test-instance"
+        instance = spanner_client.instance(instance_id)
+        database_id = "test-database"
+        database = instance.database(database_id)
+        with database.snapshot() as snapshot:
+            return snapshot
+
+    except:
+        pytest.skip("Cloud Spanner Emulator configuration is incorrect")
+
+@pytest.mark.parametrize(("limit"), [(0), (1), (2)])
+def test_df(limit, snapshot_obj):
+    results = snapshot_obj.execute_sql(
+        "Select * from testTable limit {limit}".format(limit=limit)
+    )
+    df = results.to_dataframe()
+    assert len(df) == limit
@@ -0,0 +1,121 @@
+# Copyright 2016 Google LLC All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import unittest
+
+
+class TestPandasDataFrame(unittest.TestCase):
+    def _getTargetClass(self):
+        from google.cloud.spanner_v1.streamed import StreamedResultSet
+
+        return StreamedResultSet
+
+    def _make_one(self, *args, **kwargs):
+        return self._getTargetClass()(*args, **kwargs)
+
+    @staticmethod
+    def _make_scalar_field(name, type_):
+        from google.cloud.spanner_v1 import StructType
+        from google.cloud.spanner_v1 import Type
+
+        return StructType.Field(name=name, type_=Type(code=type_))
+
+    @staticmethod
+    def _make_value(value):
+        from google.cloud.spanner_v1._helpers import _make_value_pb
+
+        return _make_value_pb(value)
+
+    @staticmethod
+    def _make_result_set_metadata(fields=(), transaction_id=None):
+        from google.cloud.spanner_v1 import ResultSetMetadata
+        from google.cloud.spanner_v1 import StructType
+
+        metadata = ResultSetMetadata(row_type=StructType(fields=[]))
+        for field in fields:
+            metadata.row_type.fields.append(field)
+        if transaction_id is not None:
+            metadata.transaction.id = transaction_id
+        return metadata
+
+    @staticmethod
+    def _make_result_set_stats(query_plan=None, **kw):
+        from google.cloud.spanner_v1 import ResultSetStats
+        from google.protobuf.struct_pb2 import Struct
+        from google.cloud.spanner_v1._helpers import _make_value_pb
+
+        query_stats = Struct(
+            fields={key: _make_value_pb(value) for key, value in kw.items()}
+        )
+        return ResultSetStats(query_plan=query_plan, query_stats=query_stats)
+
+    def test_multiple_rows(self):
+        from google.cloud.spanner_v1 import TypeCode
+
+        iterator = _MockCancellableIterator()
+        streamed = self._make_one(iterator)
+        FIELDS = [
+            self._make_scalar_field("Name", TypeCode.STRING),
+            self._make_scalar_field("Age", TypeCode.INT64),
+        ]
+        metadata = streamed._metadata = self._make_result_set_metadata(FIELDS)
+        stats = streamed._stats = self._make_result_set_stats()
+        streamed._rows[:] = [["Alice", 1], ["Bob", 2], ["Adam", 3]]
+        df_obj = streamed.to_dataframe()
+        assert len(df_obj) == 3
+
+    def test_single_rows(self):
+        from google.cloud.spanner_v1 import TypeCode
+
+        iterator = _MockCancellableIterator()
+        streamed = self._make_one(iterator)
+        FIELDS = [
+            self._make_scalar_field("Name", TypeCode.STRING),
+            self._make_scalar_field("Age", TypeCode.INT64),
+        ]
+        metadata = streamed._metadata = self._make_result_set_metadata(FIELDS)
+        stats = streamed._stats = self._make_result_set_stats()
+        streamed._rows[:] = [["Alice", 1]]
+        df_obj = streamed.to_dataframe()
+        assert len(df_obj) == 1
+
+    def test_no_rows(self):
+        from google.cloud.spanner_v1 import TypeCode
+
+        iterator = _MockCancellableIterator()
+        streamed = self._make_one(iterator)
+        FIELDS = [
+            self._make_scalar_field("Name", TypeCode.STRING),
+            self._make_scalar_field("Age", TypeCode.INT64),
+        ]
+        metadata = streamed._metadata = self._make_result_set_metadata(FIELDS)
+        stats = streamed._stats = self._make_result_set_stats()
+        streamed._rows[:] = []
+        df_obj = streamed.to_dataframe()
+        assert len(df_obj) == 0
+
+
+class _MockCancellableIterator(object):
+
+    cancel_calls = 0
+
+    def __init__(self, *values):
+        self.iter_values = iter(values)
+
+    def next(self):
+        return next(self.iter_values)
+
+    def __next__(self):  # pragma: NO COVER Py3k
+        return self.next()