From 7482549cb42ed5302634ab4fb7b4efcd97b35c68 Mon Sep 17 00:00:00 2001 From: Anmol Date: Fri, 18 Feb 2022 13:57:50 -0500 Subject: [PATCH] feat: support BI Engine statistics in query job (#1144) * chore: Add support for accessing BI Engine statistics The REST API returns BiEngineStatistics for a query which denotes if the query was accelerated by BI Engine or not. This commit adds the necessary function to access this information for executed queries. * fix: Removed enums and replaced with string constants * fix: Fixed logic for creating BIEngineStats and added test case * Attempt at mypy fix Co-authored-by: Tim Swast --- google/cloud/bigquery/job/query.py | 47 +++++++++++++++++++++++++ tests/unit/job/test_query.py | 17 +++++++++ tests/unit/job/test_query_stats.py | 56 ++++++++++++++++++++++++++++++ 3 files changed, 120 insertions(+) diff --git a/google/cloud/bigquery/job/query.py b/google/cloud/bigquery/job/query.py index 2dd945984..2fd7afb76 100644 --- a/google/cloud/bigquery/job/query.py +++ b/google/cloud/bigquery/job/query.py @@ -121,6 +121,44 @@ def _to_api_repr_table_defs(value): return {k: ExternalConfig.to_api_repr(v) for k, v in value.items()} +class BiEngineReason(typing.NamedTuple): + """Reason for BI Engine acceleration failure + + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#bienginereason + """ + + code: str = "CODE_UNSPECIFIED" + + reason: str = "" + + @classmethod + def from_api_repr(cls, reason: Dict[str, str]) -> "BiEngineReason": + return cls(reason.get("code", "CODE_UNSPECIFIED"), reason.get("message", "")) + + +class BiEngineStats(typing.NamedTuple): + """Statistics for a BI Engine query + + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#bienginestatistics + """ + + mode: str = "ACCELERATION_MODE_UNSPECIFIED" + """ Specifies which mode of BI Engine acceleration was performed (if any) + """ + + reasons: List[BiEngineReason] = [] + """ Contains explanatory messages in case of DISABLED / PARTIAL acceleration + """ + + @classmethod + def from_api_repr(cls, stats: Dict[str, Any]) -> "BiEngineStats": + mode = stats.get("biEngineMode", "ACCELERATION_MODE_UNSPECIFIED") + reasons = [ + BiEngineReason.from_api_repr(r) for r in stats.get("biEngineReasons", []) + ] + return cls(mode, reasons) + + class DmlStats(typing.NamedTuple): """Detailed statistics for DML statements. @@ -1191,6 +1229,15 @@ def dml_stats(self) -> Optional[DmlStats]: else: return DmlStats.from_api_repr(stats) + @property + def bi_engine_stats(self) -> Optional[BiEngineStats]: + stats = self._job_statistics().get("biEngineStatistics") + + if stats is None: + return None + else: + return BiEngineStats.from_api_repr(stats) + def _blocking_poll(self, timeout=None, **kwargs): self._done_timeout = timeout self._transport_timeout = timeout diff --git a/tests/unit/job/test_query.py b/tests/unit/job/test_query.py index 5fb76b9e9..33a52cfec 100644 --- a/tests/unit/job/test_query.py +++ b/tests/unit/job/test_query.py @@ -877,6 +877,23 @@ def test_estimated_bytes_processed(self): query_stats["estimatedBytesProcessed"] = str(est_bytes) self.assertEqual(job.estimated_bytes_processed, est_bytes) + def test_bi_engine_stats(self): + from google.cloud.bigquery.job.query import BiEngineStats + + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, self.QUERY, client) + assert job.bi_engine_stats is None + + statistics = job._properties["statistics"] = {} + assert job.bi_engine_stats is None + + query_stats = statistics["query"] = {} + assert job.bi_engine_stats is None + + query_stats["biEngineStatistics"] = {"biEngineMode": "FULL"} + assert isinstance(job.bi_engine_stats, BiEngineStats) + assert job.bi_engine_stats.mode == "FULL" + def test_dml_stats(self): from google.cloud.bigquery.job.query import DmlStats diff --git a/tests/unit/job/test_query_stats.py b/tests/unit/job/test_query_stats.py index e70eb097c..13e022ced 100644 --- a/tests/unit/job/test_query_stats.py +++ b/tests/unit/job/test_query_stats.py @@ -15,6 +15,62 @@ from .helpers import _Base +class TestBiEngineStats: + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.job.query import BiEngineStats + + return BiEngineStats + + def _make_one(self, *args, **kw): + return self._get_target_class()(*args, **kw) + + def test_ctor_defaults(self): + bi_engine_stats = self._make_one() + assert bi_engine_stats.mode == "ACCELERATION_MODE_UNSPECIFIED" + assert bi_engine_stats.reasons == [] + + def test_from_api_repr_unspecified(self): + klass = self._get_target_class() + result = klass.from_api_repr({"biEngineMode": "ACCELERATION_MODE_UNSPECIFIED"}) + + assert isinstance(result, klass) + assert result.mode == "ACCELERATION_MODE_UNSPECIFIED" + assert result.reasons == [] + + def test_from_api_repr_full(self): + klass = self._get_target_class() + result = klass.from_api_repr({"biEngineMode": "FULL"}) + + assert isinstance(result, klass) + assert result.mode == "FULL" + assert result.reasons == [] + + def test_from_api_repr_disabled(self): + klass = self._get_target_class() + result = klass.from_api_repr( + { + "biEngineMode": "DISABLED", + "biEngineReasons": [ + { + "code": "OTHER_REASON", + "message": "Unable to support input table xyz due to an internal error.", + } + ], + } + ) + + assert isinstance(result, klass) + assert result.mode == "DISABLED" + + reason = result.reasons[0] + assert reason.code == "OTHER_REASON" + assert ( + reason.reason + == "Unable to support input table xyz due to an internal error." + ) + + class TestDmlStats: @staticmethod def _get_target_class():