From 70927c5e89c3d7da711a832601eb5a2f1333b384 Mon Sep 17 00:00:00 2001 From: aribray <45905583+aribray@users.noreply.github.com> Date: Fri, 21 Oct 2022 15:07:27 -0500 Subject: [PATCH 01/61] feat: add WriteDispositon to to_gbq --- pandas_gbq/gbq.py | 126 ++++++++++---------------------------- pandas_gbq/load.py | 21 +++++-- tests/unit/test_gbq.py | 4 +- tests/unit/test_to_gbq.py | 41 ++++--------- 4 files changed, 62 insertions(+), 130 deletions(-) diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py index 56d6fd70..942fd809 100644 --- a/pandas_gbq/gbq.py +++ b/pandas_gbq/gbq.py @@ -577,6 +577,7 @@ def load_data( self, dataframe, destination_table_ref, + write_disposition, chunksize=None, schema=None, progress_bar=True, @@ -586,12 +587,12 @@ def load_data( from pandas_gbq import load total_rows = len(dataframe) - try: chunks = load.load_chunks( self.client, dataframe, destination_table_ref, + write_disposition, chunksize=chunksize, schema=schema, location=self.location, @@ -609,10 +610,10 @@ def load_data( except self.http_error as ex: self.process_http_error(ex) - def delete_and_recreate_table(self, project_id, dataset_id, table_id, table_schema): - table = _Table(project_id, dataset_id, credentials=self.credentials) - table.delete(table_id) - table.create(table_id, table_schema) + # def delete_and_recreate_table(self, project_id, dataset_id, table_id, table_schema): + # table = _Table(project_id, dataset_id, credentials=self.credentials) + # table.delete(table_id) + # table.create(table_id, table_schema) def _bqschema_to_nullsafe_dtypes(schema_fields): @@ -963,13 +964,13 @@ def to_gbq( project_id=None, chunksize=None, reauth=False, - if_exists="fail", auth_local_webserver=True, table_schema=None, location=None, progress_bar=True, credentials=None, api_method: str = "default", + write_disposition: str = "WRITE_EMPTY", verbose=None, private_key=None, ): @@ -1082,6 +1083,7 @@ def to_gbq( from google.api_core import exceptions as google_exceptions from google.cloud import bigquery + import pdb if verbose is not None and FEATURES.pandas_has_deprecated_verbose: warnings.warn( @@ -1114,9 +1116,6 @@ def to_gbq( stacklevel=2, ) - if if_exists not in ("fail", "replace", "append"): - raise ValueError("'{0}' is not valid for if_exists".format(if_exists)) - if "." not in destination_table: raise NotFoundException( "Invalid Table Name. Should be of the form 'datasetId.tableId' or " @@ -1142,8 +1141,10 @@ def to_gbq( table_id = destination_table_ref.table_id default_schema = _generate_bq_schema(dataframe) + # If table_schema isn't provided, we'll create one for you if not table_schema: table_schema = default_schema + # It table_schema is provided, we'll update the default schema to the provided table_schema else: table_schema = pandas_gbq.schema.update_schema( default_schema, dict(fields=table_schema) @@ -1151,8 +1152,11 @@ def to_gbq( # If table exists, check if_exists parameter try: + # try to get the table table = bqclient.get_table(destination_table_ref) + # and unless the table is not found (doesn't exist)... except google_exceptions.NotFound: + # if the table didn't exist, create it table_connector = _Table( project_id_table, dataset_id, @@ -1161,34 +1165,25 @@ def to_gbq( ) table_connector.create(table_id, table_schema) else: + # convert original schema (the schema that already exists) to pandas-gbq API format + # TODO: rename to "remote_schema" | add keyword arguments? `schema_is_subset(remote_schema=remote_schema, local_schema=table_schema)` original_schema = pandas_gbq.schema.to_pandas_gbq(table.schema) - - if if_exists == "fail": - raise TableCreationError( - "Could not create the table because it " - "already exists. " - "Change the if_exists parameter to " - "'append' or 'replace' data." - ) - elif if_exists == "replace": - connector.delete_and_recreate_table( - project_id_table, dataset_id, table_id, table_schema - ) - else: - if not pandas_gbq.schema.schema_is_subset(original_schema, table_schema): - raise InvalidSchema( - "Please verify that the structure and " - "data types in the DataFrame match the " - "schema of the destination table.", - table_schema, - original_schema, - ) - - # Update the local `table_schema` so mode (NULLABLE/REQUIRED) - # matches. See: https://github.com/pydata/pandas-gbq/issues/315 - table_schema = pandas_gbq.schema.update_schema( - table_schema, original_schema - ) + # check that the schema created here matches the schema of the destination table - does this have to happen here? + # we'd want to catch a mismatch early... + # "original_schema" is "remote_schema", "table_schema" is "local_schema" + # if not pandas_gbq.schema.schema_is_subset(original_schema, table_schema): + # raise InvalidSchema( + # "Please verify that the structure and " + # "data types in the DataFrame match the " + # "schema of the destination table.", + # table_schema, + # original_schema, + # ) + # Update the local `table_schema` so mode (NULLABLE/REQUIRED) + # matches. See: https://github.com/pydata/pandas-gbq/issues/315 + table_schema = pandas_gbq.schema.update_schema( + table_schema, original_schema + ) if dataframe.empty: # Create the table (if needed), but don't try to run a load job with an @@ -1198,6 +1193,7 @@ def to_gbq( connector.load_data( dataframe, destination_table_ref, + write_disposition=write_disposition, chunksize=chunksize, schema=table_schema, progress_bar=progress_bar, @@ -1292,64 +1288,6 @@ def exists(self, table_id): except self.http_error as ex: self.process_http_error(ex) - def create(self, table_id, schema): - """Create a table in Google BigQuery given a table and schema - - Parameters - ---------- - table : str - Name of table to be written - schema : str - Use the generate_bq_schema to generate your table schema from a - dataframe. - """ - from google.cloud.bigquery import DatasetReference - from google.cloud.bigquery import Table - from google.cloud.bigquery import TableReference - - if self.exists(table_id): - raise TableCreationError("Table {0} already exists".format(table_id)) - - if not _Dataset(self.project_id, credentials=self.credentials).exists( - self.dataset_id - ): - _Dataset( - self.project_id, - credentials=self.credentials, - location=self.location, - ).create(self.dataset_id) - - table_ref = TableReference( - DatasetReference(self.project_id, self.dataset_id), table_id - ) - table = Table(table_ref) - table.schema = pandas_gbq.schema.to_google_cloud_bigquery(schema) - - try: - self.client.create_table(table) - except self.http_error as ex: - self.process_http_error(ex) - - def delete(self, table_id): - """Delete a table in Google BigQuery - - Parameters - ---------- - table : str - Name of table to be deleted - """ - from google.api_core.exceptions import NotFound - - table_ref = self._table_ref(table_id) - try: - self.client.delete_table(table_ref) - except NotFound: - # Ignore 404 error which may occur if table already deleted - pass - except self.http_error as ex: - self.process_http_error(ex) - - class _Dataset(GbqConnector): def __init__( self, diff --git a/pandas_gbq/load.py b/pandas_gbq/load.py index 10328069..7bd78dfc 100644 --- a/pandas_gbq/load.py +++ b/pandas_gbq/load.py @@ -16,6 +16,7 @@ from pandas_gbq import exceptions from pandas_gbq.features import FEATURES import pandas_gbq.schema +import pdb def encode_chunk(dataframe): @@ -113,13 +114,13 @@ def load_parquet( client: bigquery.Client, dataframe: pandas.DataFrame, destination_table_ref: bigquery.TableReference, + write_disposition: str, location: Optional[str], schema: Optional[Dict[str, Any]], billing_project: Optional[str] = None, ): job_config = bigquery.LoadJobConfig() - job_config.write_disposition = "WRITE_APPEND" - job_config.create_disposition = "CREATE_NEVER" + job_config.write_disposition = write_disposition job_config.source_format = "PARQUET" if schema is not None: @@ -143,13 +144,14 @@ def load_parquet( def load_csv( dataframe: pandas.DataFrame, + write_disposition: str, chunksize: Optional[int], bq_schema: Optional[List[bigquery.SchemaField]], load_chunk: Callable, ): job_config = bigquery.LoadJobConfig() - job_config.write_disposition = "WRITE_APPEND" - job_config.create_disposition = "CREATE_NEVER" + job_config.write_disposition = write_disposition + # job_config.create_disposition = "CREATE_NEVER" job_config.source_format = "CSV" job_config.allow_quoted_newlines = True @@ -167,6 +169,7 @@ def load_csv_from_dataframe( client: bigquery.Client, dataframe: pandas.DataFrame, destination_table_ref: bigquery.TableReference, + write_disposition: str, location: Optional[str], chunksize: Optional[int], schema: Optional[Dict[str, Any]], @@ -187,13 +190,14 @@ def load_chunk(chunk, job_config): project=billing_project, ).result() - return load_csv(dataframe, chunksize, bq_schema, load_chunk) + return load_csv(dataframe, chunksize, bq_schema, write_disposition, load_chunk) def load_csv_from_file( client: bigquery.Client, dataframe: pandas.DataFrame, destination_table_ref: bigquery.TableReference, + write_disposition: str, location: Optional[str], chunksize: Optional[int], schema: Optional[Dict[str, Any]], @@ -223,24 +227,27 @@ def load_chunk(chunk, job_config): finally: chunk_buffer.close() - return load_csv(dataframe, chunksize, bq_schema, load_chunk) + return load_csv(dataframe, chunksize, bq_schema, write_disposition, load_chunk) def load_chunks( client, dataframe, destination_table_ref, + write_disposition, chunksize=None, schema=None, location=None, api_method="load_parquet", billing_project: Optional[str] = None, ): + # pdb.set_trace() if api_method == "load_parquet": load_parquet( client, dataframe, destination_table_ref, + write_disposition, location, schema, billing_project=billing_project, @@ -256,6 +263,7 @@ def load_chunks( location, chunksize, schema, + write_disposition=write_disposition, billing_project=billing_project, ) else: @@ -266,6 +274,7 @@ def load_chunks( location, chunksize, schema, + write_disposition=write_disposition, billing_project=billing_project, ) else: diff --git a/tests/unit/test_gbq.py b/tests/unit/test_gbq.py index 5184562a..a22b60bf 100644 --- a/tests/unit/test_gbq.py +++ b/tests/unit/test_gbq.py @@ -628,7 +628,7 @@ def test_load_does_not_modify_schema_arg(mock_bigquery_client): "dataset.schematest", project_id="my-project", table_schema=original_schema, - if_exists="fail", + write_disposition="WRITE_EMPTY", ) assert original_schema == original_schema_cp @@ -645,7 +645,7 @@ def test_load_does_not_modify_schema_arg(mock_bigquery_client): "dataset.schematest", project_id="my-project", table_schema=original_schema, - if_exists="append", + write_disposition="WRITE_APPEND", ) assert original_schema == original_schema_cp diff --git a/tests/unit/test_to_gbq.py b/tests/unit/test_to_gbq.py index c8b419ed..273fb6bc 100644 --- a/tests/unit/test_to_gbq.py +++ b/tests/unit/test_to_gbq.py @@ -68,7 +68,7 @@ def test_to_gbq_load_method_translates_exception( expected_load_method.assert_called_once() -def test_to_gbq_with_if_exists_append(mock_bigquery_client, expected_load_method): +def test_to_gbq_with_write_disposition_append(mock_bigquery_client, expected_load_method): from google.cloud.bigquery import SchemaField mock_bigquery_client.get_table.return_value = google.cloud.bigquery.Table( @@ -82,12 +82,12 @@ def test_to_gbq_with_if_exists_append(mock_bigquery_client, expected_load_method DataFrame({"col_a": [0.25, 1.5, -1.0], "col_b": ["a", "b", "c"]}), "my_dataset.my_table", project_id="myproj", - if_exists="append", + write_disposition="WRITE_APPEND", ) expected_load_method.assert_called_once() -def test_to_gbq_with_if_exists_append_mismatch(mock_bigquery_client): +def test_to_gbq_with_write_disposition_append_mismatch(mock_bigquery_client): from google.cloud.bigquery import SchemaField mock_bigquery_client.get_table.return_value = google.cloud.bigquery.Table( @@ -99,7 +99,7 @@ def test_to_gbq_with_if_exists_append_mismatch(mock_bigquery_client): DataFrame({"col_a": [0.25, 1.5, -1.0]}), "my_dataset.my_table", project_id="myproj", - if_exists="append", + write_disposition="WRITE_APPEND", ) exc = exception_block.value @@ -112,7 +112,7 @@ def test_to_gbq_with_if_exists_append_mismatch(mock_bigquery_client): assert exc.local_schema == {"fields": [{"name": "col_a", "type": "FLOAT"}]} -def test_to_gbq_with_if_exists_replace(mock_bigquery_client): +def test_to_gbq_with_write_disposition_truncate(mock_bigquery_client, expected_load_method): mock_bigquery_client.get_table.side_effect = ( # Initial check google.cloud.bigquery.Table("myproj.my_dataset.my_table"), @@ -123,15 +123,12 @@ def test_to_gbq_with_if_exists_replace(mock_bigquery_client): DataFrame([[1]]), "my_dataset.my_table", project_id="myproj", - if_exists="replace", + write_disposition="WRITE_TRUNCATE", ) - # TODO: We can avoid these API calls by using write disposition in the load - # job. See: https://github.com/googleapis/python-bigquery-pandas/issues/118 - assert mock_bigquery_client.delete_table.called - assert mock_bigquery_client.create_table.called + expected_load_method.assert_called_once() -def test_to_gbq_with_if_exists_replace_cross_project( +def test_to_gbq_with_write_disposition_truncate_cross_project( mock_bigquery_client, expected_load_method ): mock_bigquery_client.get_table.side_effect = ( @@ -144,22 +141,10 @@ def test_to_gbq_with_if_exists_replace_cross_project( DataFrame([[1]]), "data-project.my_dataset.my_table", project_id="billing-project", - if_exists="replace", + write_disposition="WRITE_TRUNCATE", ) - # TODO: We can avoid these API calls by using write disposition in the load - # job. See: https://github.com/googleapis/python-bigquery-pandas/issues/118 - assert mock_bigquery_client.delete_table.called - args, _ = mock_bigquery_client.delete_table.call_args - table_delete: google.cloud.bigquery.TableReference = args[0] - assert table_delete.project == "data-project" - assert table_delete.dataset_id == "my_dataset" - assert table_delete.table_id == "my_table" - assert mock_bigquery_client.create_table.called - args, _ = mock_bigquery_client.create_table.call_args - table_create: google.cloud.bigquery.TableReference = args[0] - assert table_create.project == "data-project" - assert table_create.dataset_id == "my_dataset" - assert table_create.table_id == "my_table" + expected_load_method.assert_called_once() + # Check that billing project and destination table is set correctly. expected_load_method.assert_called_once() @@ -171,11 +156,11 @@ def test_to_gbq_with_if_exists_replace_cross_project( assert load_kwargs["project"] == "billing-project" -def test_to_gbq_with_if_exists_unknown(): +def test_to_gbq_with_write_disposition_unspecified(): with pytest.raises(ValueError): gbq.to_gbq( DataFrame([[1]]), "my_dataset.my_table", project_id="myproj", - if_exists="unknown", + write_disposition="WRITE_DISPOSITION_UNSPECIFIED", ) From 15de9f3b4b90ebabc134129611cea6fff79e9af0 Mon Sep 17 00:00:00 2001 From: aribray <45905583+aribray@users.noreply.github.com> Date: Mon, 24 Oct 2022 23:04:13 -0500 Subject: [PATCH 02/61] pass test_load unit tests --- pandas_gbq/gbq.py | 103 +++++++++++++++++++++++++++++--------- pandas_gbq/load.py | 24 +++++---- tests/system/test_gbq.py | 24 ++++----- tests/unit/test_load.py | 12 +++-- tests/unit/test_to_gbq.py | 18 +++---- 5 files changed, 119 insertions(+), 62 deletions(-) diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py index 942fd809..12891c24 100644 --- a/pandas_gbq/gbq.py +++ b/pandas_gbq/gbq.py @@ -8,6 +8,7 @@ import logging import re import time +from tkinter import W import typing from typing import Any, Dict, Optional, Sequence, Union import warnings @@ -109,7 +110,7 @@ class InvalidPageToken(ValueError): pass -class InvalidSchema(ValueError): +class InvalidSchema(GenericGBQException): """ Raised when the provided DataFrame does not match the schema of the destination @@ -117,19 +118,13 @@ class InvalidSchema(ValueError): """ def __init__( - self, message: str, local_schema: Dict[str, Any], remote_schema: Dict[str, Any] - ): + self, message: str): super().__init__(message) - self._local_schema = local_schema - self._remote_schema = remote_schema - - @property - def local_schema(self) -> Dict[str, Any]: - return self._local_schema + self._message = message @property - def remote_schema(self) -> Dict[str, Any]: - return self._remote_schema + def message(self) -> str: + return self._message class NotFoundException(ValueError): @@ -290,9 +285,11 @@ def __init__( global context from google.api_core.exceptions import GoogleAPIError from google.api_core.exceptions import ClientError + from google.api_core.exceptions import BadRequest + from pandas_gbq import auth - self.http_error = (ClientError, GoogleAPIError) + self.http_error = (ClientError, GoogleAPIError, BadRequest) self.project_id = project_id self.location = location self.reauth = reauth @@ -377,13 +374,17 @@ def get_client(self): @staticmethod def process_http_error(ex): + import pdb # See `BigQuery Troubleshooting Errors # `__ - if "cancelled" in ex.message: raise QueryTimeout("Reason: {0}".format(ex)) - - raise GenericGBQException("Reason: {0}".format(ex)) + elif "Provided Schema does not match" in ex.message: + # pdb.set_trace() + error_message = ex.errors[0]["message"] + raise InvalidSchema(f"Reason: {error_message}") + else: + raise GenericGBQException("Reason: {0}".format(ex)) def download_table( self, @@ -577,7 +578,7 @@ def load_data( self, dataframe, destination_table_ref, - write_disposition, + write_disposition: str = "WRITE_EMPTY", chunksize=None, schema=None, progress_bar=True, @@ -592,11 +593,11 @@ def load_data( self.client, dataframe, destination_table_ref, - write_disposition, chunksize=chunksize, schema=schema, location=self.location, api_method=api_method, + write_disposition=write_disposition, billing_project=billing_project, ) if progress_bar and tqdm: @@ -610,12 +611,6 @@ def load_data( except self.http_error as ex: self.process_http_error(ex) - # def delete_and_recreate_table(self, project_id, dataset_id, table_id, table_schema): - # table = _Table(project_id, dataset_id, credentials=self.credentials) - # table.delete(table_id) - # table.create(table_id, table_schema) - - def _bqschema_to_nullsafe_dtypes(schema_fields): """Specify explicit dtypes based on BigQuery schema. @@ -970,10 +965,13 @@ def to_gbq( progress_bar=True, credentials=None, api_method: str = "default", - write_disposition: str = "WRITE_EMPTY", verbose=None, private_key=None, + write_disposition: str = "WRITE_EMPTY" ): + + # write_disposition: str = "WRITE_APPEND", + """Write a DataFrame to a Google BigQuery table. The main method a user calls to export pandas DataFrame contents to @@ -1002,6 +1000,8 @@ def to_gbq( reauth : bool, default False Force Google BigQuery to re-authenticate the user. This is useful if multiple accounts are used. + +TODO: write_disposition if_exists : str, default 'fail' Behavior when the destination table exists. Value can be one of: @@ -1288,6 +1288,61 @@ def exists(self, table_id): except self.http_error as ex: self.process_http_error(ex) + def create(self, table_id, schema): + """Create a table in Google BigQuery given a table and schema + Parameters + ---------- + table : str + Name of table to be written + schema : str + Use the generate_bq_schema to generate your table schema from a + dataframe. + """ + from google.cloud.bigquery import DatasetReference + from google.cloud.bigquery import Table + from google.cloud.bigquery import TableReference + + if self.exists(table_id): + raise TableCreationError("Table {0} already exists".format(table_id)) + + if not _Dataset(self.project_id, credentials=self.credentials).exists( + self.dataset_id + ): + _Dataset( + self.project_id, + credentials=self.credentials, + location=self.location, + ).create(self.dataset_id) + + table_ref = TableReference( + DatasetReference(self.project_id, self.dataset_id), table_id + ) + table = Table(table_ref) + table.schema = pandas_gbq.schema.to_google_cloud_bigquery(schema) + + try: + self.client.create_table(table) + except self.http_error as ex: + self.process_http_error(ex) + + def delete(self, table_id): + """Delete a table in Google BigQuery + Parameters + ---------- + table : str + Name of table to be deleted + """ + from google.api_core.exceptions import NotFound + + table_ref = self._table_ref(table_id) + try: + self.client.delete_table(table_ref) + except NotFound: + # Ignore 404 error which may occur if table already deleted + pass + except self.http_error as ex: + self.process_http_error(ex) + class _Dataset(GbqConnector): def __init__( self, diff --git a/pandas_gbq/load.py b/pandas_gbq/load.py index 7bd78dfc..21e9f3ba 100644 --- a/pandas_gbq/load.py +++ b/pandas_gbq/load.py @@ -16,7 +16,6 @@ from pandas_gbq import exceptions from pandas_gbq.features import FEATURES import pandas_gbq.schema -import pdb def encode_chunk(dataframe): @@ -120,14 +119,18 @@ def load_parquet( billing_project: Optional[str] = None, ): job_config = bigquery.LoadJobConfig() + + # if write_disposition is not None: job_config.write_disposition = write_disposition + # else: + # job_config.write_disposition = "WRITE_EMPTY" + job_config.source_format = "PARQUET" if schema is not None: schema = pandas_gbq.schema.remove_policy_tags(schema) job_config.schema = pandas_gbq.schema.to_google_cloud_bigquery(schema) dataframe = cast_dataframe_for_parquet(dataframe, schema) - try: client.load_table_from_dataframe( dataframe, @@ -150,8 +153,12 @@ def load_csv( load_chunk: Callable, ): job_config = bigquery.LoadJobConfig() + + # if write_disposition is not None: job_config.write_disposition = write_disposition - # job_config.create_disposition = "CREATE_NEVER" + # else: + # job_config.write_disposition = "WRITE_EMPTY" + job_config.source_format = "CSV" job_config.allow_quoted_newlines = True @@ -190,7 +197,7 @@ def load_chunk(chunk, job_config): project=billing_project, ).result() - return load_csv(dataframe, chunksize, bq_schema, write_disposition, load_chunk) + return load_csv(dataframe, write_disposition, chunksize, bq_schema, load_chunk) def load_csv_from_file( @@ -227,21 +234,20 @@ def load_chunk(chunk, job_config): finally: chunk_buffer.close() - return load_csv(dataframe, chunksize, bq_schema, write_disposition, load_chunk) + return load_csv(dataframe, write_disposition, chunksize, bq_schema, load_chunk) def load_chunks( client, dataframe, destination_table_ref, - write_disposition, chunksize=None, schema=None, location=None, api_method="load_parquet", + write_disposition="WRITE_EMPTY", billing_project: Optional[str] = None, ): - # pdb.set_trace() if api_method == "load_parquet": load_parquet( client, @@ -260,10 +266,10 @@ def load_chunks( client, dataframe, destination_table_ref, + write_disposition, location, chunksize, schema, - write_disposition=write_disposition, billing_project=billing_project, ) else: @@ -271,10 +277,10 @@ def load_chunks( client, dataframe, destination_table_ref, + write_disposition, location, chunksize, schema, - write_disposition=write_disposition, billing_project=billing_project, ) else: diff --git a/tests/system/test_gbq.py b/tests/system/test_gbq.py index ee8190b5..4eb6cbf0 100644 --- a/tests/system/test_gbq.py +++ b/tests/system/test_gbq.py @@ -675,7 +675,7 @@ def test_upload_data_if_table_exists_fail(self, project_id): df = make_mixed_dataframe_v2(test_size) self.table.create(TABLE_ID + test_id, gbq._generate_bq_schema(df)) - # Test the default value of if_exists is 'fail' + # Test the default value of write_disposition == 'WRITE_EMPTY' with pytest.raises(gbq.TableCreationError): gbq.to_gbq( df, @@ -684,13 +684,13 @@ def test_upload_data_if_table_exists_fail(self, project_id): credentials=self.credentials, ) - # Test the if_exists parameter with value 'fail' + # Test the write_disposition parameter with value 'WRITE_EMPTY' with pytest.raises(gbq.TableCreationError): gbq.to_gbq( df, self.destination_table + test_id, project_id, - if_exists="fail", + write_disposition="WRITE_EMPTY", credentials=self.credentials, ) @@ -709,12 +709,12 @@ def test_upload_data_if_table_exists_append(self, project_id): credentials=self.credentials, ) - # Test the if_exists parameter with value 'append' + # Test the write_disposition parameter with value 'WRITE_APPEND gbq.to_gbq( df, self.destination_table + test_id, project_id, - if_exists="append", + write_disposition="WRITE_APPEND", credentials=self.credentials, ) @@ -734,7 +734,7 @@ def test_upload_data_if_table_exists_append(self, project_id): df_different_schema, self.destination_table + test_id, project_id, - if_exists="append", + write_disposition="WRITE_APPEND", credentials=self.credentials, ) @@ -755,12 +755,12 @@ def test_upload_subset_columns_if_table_exists_append(self, project_id): credentials=self.credentials, ) - # Test the if_exists parameter with value 'append' + # Test the write_disposition parameter with value 'WRITE_APPEND' gbq.to_gbq( df_subset_cols, self.destination_table + test_id, project_id, - if_exists="append", + write_disposition="WRITE_APPEND", credentials=self.credentials, ) @@ -789,12 +789,12 @@ def test_upload_data_if_table_exists_replace(self, project_id): credentials=self.credentials, ) - # Test the if_exists parameter with the value 'replace'. + # Test the write_disposition parameter with the value 'WRITE_TRUNCATE'. gbq.to_gbq( df_different_schema, self.destination_table + test_id, project_id, - if_exists="replace", + write_disposition="WRITE_TRUNCATE", credentials=self.credentials, ) @@ -924,7 +924,7 @@ def test_upload_data_flexible_column_order(self, project_id): df_columns_reversed, self.destination_table + test_id, project_id, - if_exists="append", + write_disposition="WRITE_APPEND", credentials=self.credentials, ) @@ -1324,7 +1324,7 @@ def test_to_gbq_does_not_override_mode(gbq_table, gbq_connector): pandas.DataFrame({"A": [1.0], "B": [2.0], "C": ["a"]}), "{0}.{1}".format(gbq_table.dataset_id, table_id), project_id=gbq_connector.project_id, - if_exists="append", + write_disposition="WRITE_APPEND", ) assert verify_schema(gbq_connector, gbq_table.dataset_id, table_id, table_schema) diff --git a/tests/unit/test_load.py b/tests/unit/test_load.py index f2209bda..7cdd3684 100644 --- a/tests/unit/test_load.py +++ b/tests/unit/test_load.py @@ -108,7 +108,7 @@ def test_load_csv_from_dataframe_allows_client_to_generate_schema(mock_bigquery_ _ = list( load.load_csv_from_dataframe( - mock_bigquery_client, df, destination, None, None, None + mock_bigquery_client, df, destination, None, None, None, None ) ) @@ -151,7 +151,7 @@ def test_load_csv_from_file_generates_schema(mock_bigquery_client): ) _ = list( - load.load_csv_from_file(mock_bigquery_client, df, destination, None, None, None) + load.load_csv_from_file(mock_bigquery_client, df, destination, None, None, None, None) ) mock_load = mock_bigquery_client.load_table_from_file @@ -211,10 +211,12 @@ def test_load_chunks_omits_policy_tags( mock_bigquery_client, df, destination, schema=schema, api_method=api_method ) ) + import pdb mock_load = load_method(mock_bigquery_client, api_method=api_method) assert mock_load.called _, kwargs = mock_load.call_args + # pdb.set_trace() assert "job_config" in kwargs sent_field = kwargs["job_config"].schema[0].to_api_repr() assert "policyTags" not in sent_field @@ -222,7 +224,7 @@ def test_load_chunks_omits_policy_tags( def test_load_chunks_with_invalid_api_method(): with pytest.raises(ValueError, match="Got unexpected api_method:"): - load.load_chunks(None, None, None, api_method="not_a_thing") + load.load_chunks(None, None, None, None, api_method="not_a_thing") def test_load_parquet_allows_client_to_generate_schema(mock_bigquery_client): @@ -233,7 +235,7 @@ def test_load_parquet_allows_client_to_generate_schema(mock_bigquery_client): "my-project.my_dataset.my_table" ) - load.load_parquet(mock_bigquery_client, df, destination, None, None) + load.load_parquet(mock_bigquery_client, df, destination, None, None, None,) mock_load = mock_bigquery_client.load_table_from_dataframe assert mock_load.called @@ -255,7 +257,7 @@ def test_load_parquet_with_bad_conversion(mock_bigquery_client): ) with pytest.raises(exceptions.ConversionError): - load.load_parquet(mock_bigquery_client, df, destination, None, None) + load.load_parquet(mock_bigquery_client, df, destination, None, None, None) @pytest.mark.parametrize( diff --git a/tests/unit/test_to_gbq.py b/tests/unit/test_to_gbq.py index 273fb6bc..ca69443c 100644 --- a/tests/unit/test_to_gbq.py +++ b/tests/unit/test_to_gbq.py @@ -87,14 +87,18 @@ def test_to_gbq_with_write_disposition_append(mock_bigquery_client, expected_loa expected_load_method.assert_called_once() -def test_to_gbq_with_write_disposition_append_mismatch(mock_bigquery_client): +def test_to_gbq_with_write_disposition_append_mismatch(mock_bigquery_client, expected_load_method): from google.cloud.bigquery import SchemaField + import pdb mock_bigquery_client.get_table.return_value = google.cloud.bigquery.Table( "myproj.my_dataset.my_table", schema=(SchemaField("col_a", "INTEGER"), SchemaField("col_b", "STRING")), ) + + # expected_load_method.side_effect = google.api_core.exceptions.NotFound("schema mismatch") with pytest.raises(gbq.InvalidSchema) as exception_block: + # pdb.set_trace() gbq.to_gbq( DataFrame({"col_a": [0.25, 1.5, -1.0]}), "my_dataset.my_table", @@ -153,14 +157,4 @@ def test_to_gbq_with_write_disposition_truncate_cross_project( assert table_destination.project == "data-project" assert table_destination.dataset_id == "my_dataset" assert table_destination.table_id == "my_table" - assert load_kwargs["project"] == "billing-project" - - -def test_to_gbq_with_write_disposition_unspecified(): - with pytest.raises(ValueError): - gbq.to_gbq( - DataFrame([[1]]), - "my_dataset.my_table", - project_id="myproj", - write_disposition="WRITE_DISPOSITION_UNSPECIFIED", - ) + assert load_kwargs["project"] == "billing-project" \ No newline at end of file From c44f1a4634ab4ee86f590e41dd5cae405339d923 Mon Sep 17 00:00:00 2001 From: aribray <45905583+aribray@users.noreply.github.com> Date: Mon, 24 Oct 2022 23:10:18 -0500 Subject: [PATCH 03/61] nox -s blacken --- pandas_gbq/gbq.py | 244 +++++++++++++++++++------------------- tests/unit/test_load.py | 13 +- tests/unit/test_to_gbq.py | 15 ++- 3 files changed, 143 insertions(+), 129 deletions(-) diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py index 12891c24..e1b617dd 100644 --- a/pandas_gbq/gbq.py +++ b/pandas_gbq/gbq.py @@ -117,8 +117,7 @@ class InvalidSchema(GenericGBQException): table in BigQuery. """ - def __init__( - self, message: str): + def __init__(self, message: str): super().__init__(message) self._message = message @@ -375,6 +374,7 @@ def get_client(self): @staticmethod def process_http_error(ex): import pdb + # See `BigQuery Troubleshooting Errors # `__ if "cancelled" in ex.message: @@ -611,6 +611,7 @@ def load_data( except self.http_error as ex: self.process_http_error(ex) + def _bqschema_to_nullsafe_dtypes(schema_fields): """Specify explicit dtypes based on BigQuery schema. @@ -967,116 +968,116 @@ def to_gbq( api_method: str = "default", verbose=None, private_key=None, - write_disposition: str = "WRITE_EMPTY" + write_disposition: str = "WRITE_EMPTY", ): # write_disposition: str = "WRITE_APPEND", """Write a DataFrame to a Google BigQuery table. - The main method a user calls to export pandas DataFrame contents to - Google BigQuery table. - - This method uses the Google Cloud client library to make requests to - Google BigQuery, documented `here - `__. - - See the :ref:`How to authenticate with Google BigQuery ` - guide for authentication instructions. - - Parameters - ---------- - dataframe : pandas.DataFrame - DataFrame to be written to a Google BigQuery table. - destination_table : str - Name of table to be written, in the form ``dataset.tablename`` or - ``project.dataset.tablename``. - project_id : str, optional - Google Cloud Platform project ID. Optional when available from - the environment. - chunksize : int, optional - Number of rows to be inserted in each chunk from the dataframe. - Set to ``None`` to load the whole dataframe at once. - reauth : bool, default False - Force Google BigQuery to re-authenticate the user. This is useful - if multiple accounts are used. - -TODO: write_disposition - if_exists : str, default 'fail' - Behavior when the destination table exists. Value can be one of: - - ``'fail'`` - If table exists, do nothing. - ``'replace'`` - If table exists, drop it, recreate it, and insert data. - ``'append'`` - If table exists, insert data. Create if does not exist. - auth_local_webserver : bool, default True - Use the `local webserver flow - `_ - instead of the `console flow - `_ - when getting user credentials. Your code must run on the same machine - as your web browser and your web browser can access your application - via ``localhost:808X``. - - .. versionadded:: 0.2.0 - table_schema : list of dicts, optional - List of BigQuery table fields to which according DataFrame - columns conform to, e.g. ``[{'name': 'col1', 'type': - 'STRING'},...]``. The ``type`` values must be BigQuery type names. - - - If ``table_schema`` is provided, it may contain all or a subset of - DataFrame columns. If a subset is provided, the rest will be - inferred from the DataFrame dtypes. If ``table_schema`` contains - columns not in the DataFrame, they'll be ignored. - - If ``table_schema`` is **not** provided, it will be - generated according to dtypes of DataFrame columns. See - `Inferring the Table Schema - `__. - for a description of the schema inference. - - See `BigQuery API documentation on valid column names - __. - - .. versionadded:: 0.3.1 - location : str, optional - Location where the load job should run. See the `BigQuery locations - documentation - `__ for a - list of available locations. The location must match that of the - target dataset. - - .. versionadded:: 0.5.0 - progress_bar : bool, default True - Use the library `tqdm` to show the progress bar for the upload, - chunk by chunk. + The main method a user calls to export pandas DataFrame contents to + Google BigQuery table. - .. versionadded:: 0.5.0 - credentials : google.auth.credentials.Credentials, optional - Credentials for accessing Google APIs. Use this parameter to override - default credentials, such as to use Compute Engine - :class:`google.auth.compute_engine.Credentials` or Service Account - :class:`google.oauth2.service_account.Credentials` directly. + This method uses the Google Cloud client library to make requests to + Google BigQuery, documented `here + `__. - .. versionadded:: 0.8.0 - api_method : str, optional - API method used to upload DataFrame to BigQuery. One of "load_parquet", - "load_csv". Default "load_parquet" if pandas is version 1.1.0+, - otherwise "load_csv". + See the :ref:`How to authenticate with Google BigQuery ` + guide for authentication instructions. - .. versionadded:: 0.16.0 - verbose : bool, deprecated - Deprecated in Pandas-GBQ 0.4.0. Use the `logging module - to adjust verbosity instead - `__. - private_key : str, deprecated - Deprecated in pandas-gbq version 0.8.0. Use the ``credentials`` - parameter and - :func:`google.oauth2.service_account.Credentials.from_service_account_info` - or - :func:`google.oauth2.service_account.Credentials.from_service_account_file` - instead. + Parameters + ---------- + dataframe : pandas.DataFrame + DataFrame to be written to a Google BigQuery table. + destination_table : str + Name of table to be written, in the form ``dataset.tablename`` or + ``project.dataset.tablename``. + project_id : str, optional + Google Cloud Platform project ID. Optional when available from + the environment. + chunksize : int, optional + Number of rows to be inserted in each chunk from the dataframe. + Set to ``None`` to load the whole dataframe at once. + reauth : bool, default False + Force Google BigQuery to re-authenticate the user. This is useful + if multiple accounts are used. + + TODO: write_disposition + if_exists : str, default 'fail' + Behavior when the destination table exists. Value can be one of: + + ``'fail'`` + If table exists, do nothing. + ``'replace'`` + If table exists, drop it, recreate it, and insert data. + ``'append'`` + If table exists, insert data. Create if does not exist. + auth_local_webserver : bool, default True + Use the `local webserver flow + `_ + instead of the `console flow + `_ + when getting user credentials. Your code must run on the same machine + as your web browser and your web browser can access your application + via ``localhost:808X``. + + .. versionadded:: 0.2.0 + table_schema : list of dicts, optional + List of BigQuery table fields to which according DataFrame + columns conform to, e.g. ``[{'name': 'col1', 'type': + 'STRING'},...]``. The ``type`` values must be BigQuery type names. + + - If ``table_schema`` is provided, it may contain all or a subset of + DataFrame columns. If a subset is provided, the rest will be + inferred from the DataFrame dtypes. If ``table_schema`` contains + columns not in the DataFrame, they'll be ignored. + - If ``table_schema`` is **not** provided, it will be + generated according to dtypes of DataFrame columns. See + `Inferring the Table Schema + `__. + for a description of the schema inference. + + See `BigQuery API documentation on valid column names + __. + + .. versionadded:: 0.3.1 + location : str, optional + Location where the load job should run. See the `BigQuery locations + documentation + `__ for a + list of available locations. The location must match that of the + target dataset. + + .. versionadded:: 0.5.0 + progress_bar : bool, default True + Use the library `tqdm` to show the progress bar for the upload, + chunk by chunk. + + .. versionadded:: 0.5.0 + credentials : google.auth.credentials.Credentials, optional + Credentials for accessing Google APIs. Use this parameter to override + default credentials, such as to use Compute Engine + :class:`google.auth.compute_engine.Credentials` or Service Account + :class:`google.oauth2.service_account.Credentials` directly. + + .. versionadded:: 0.8.0 + api_method : str, optional + API method used to upload DataFrame to BigQuery. One of "load_parquet", + "load_csv". Default "load_parquet" if pandas is version 1.1.0+, + otherwise "load_csv". + + .. versionadded:: 0.16.0 + verbose : bool, deprecated + Deprecated in Pandas-GBQ 0.4.0. Use the `logging module + to adjust verbosity instead + `__. + private_key : str, deprecated + Deprecated in pandas-gbq version 0.8.0. Use the ``credentials`` + parameter and + :func:`google.oauth2.service_account.Credentials.from_service_account_info` + or + :func:`google.oauth2.service_account.Credentials.from_service_account_file` + instead. """ _test_google_api_imports() @@ -1181,9 +1182,7 @@ def to_gbq( # ) # Update the local `table_schema` so mode (NULLABLE/REQUIRED) # matches. See: https://github.com/pydata/pandas-gbq/issues/315 - table_schema = pandas_gbq.schema.update_schema( - table_schema, original_schema - ) + table_schema = pandas_gbq.schema.update_schema(table_schema, original_schema) if dataframe.empty: # Create the table (if needed), but don't try to run a load job with an @@ -1326,22 +1325,23 @@ def create(self, table_id, schema): self.process_http_error(ex) def delete(self, table_id): - """Delete a table in Google BigQuery - Parameters - ---------- - table : str - Name of table to be deleted - """ - from google.api_core.exceptions import NotFound - - table_ref = self._table_ref(table_id) - try: - self.client.delete_table(table_ref) - except NotFound: - # Ignore 404 error which may occur if table already deleted - pass - except self.http_error as ex: - self.process_http_error(ex) + """Delete a table in Google BigQuery + Parameters + ---------- + table : str + Name of table to be deleted + """ + from google.api_core.exceptions import NotFound + + table_ref = self._table_ref(table_id) + try: + self.client.delete_table(table_ref) + except NotFound: + # Ignore 404 error which may occur if table already deleted + pass + except self.http_error as ex: + self.process_http_error(ex) + class _Dataset(GbqConnector): def __init__( diff --git a/tests/unit/test_load.py b/tests/unit/test_load.py index 7cdd3684..4bd07642 100644 --- a/tests/unit/test_load.py +++ b/tests/unit/test_load.py @@ -151,7 +151,9 @@ def test_load_csv_from_file_generates_schema(mock_bigquery_client): ) _ = list( - load.load_csv_from_file(mock_bigquery_client, df, destination, None, None, None, None) + load.load_csv_from_file( + mock_bigquery_client, df, destination, None, None, None, None + ) ) mock_load = mock_bigquery_client.load_table_from_file @@ -235,7 +237,14 @@ def test_load_parquet_allows_client_to_generate_schema(mock_bigquery_client): "my-project.my_dataset.my_table" ) - load.load_parquet(mock_bigquery_client, df, destination, None, None, None,) + load.load_parquet( + mock_bigquery_client, + df, + destination, + None, + None, + None, + ) mock_load = mock_bigquery_client.load_table_from_dataframe assert mock_load.called diff --git a/tests/unit/test_to_gbq.py b/tests/unit/test_to_gbq.py index ca69443c..8b2d6919 100644 --- a/tests/unit/test_to_gbq.py +++ b/tests/unit/test_to_gbq.py @@ -68,7 +68,9 @@ def test_to_gbq_load_method_translates_exception( expected_load_method.assert_called_once() -def test_to_gbq_with_write_disposition_append(mock_bigquery_client, expected_load_method): +def test_to_gbq_with_write_disposition_append( + mock_bigquery_client, expected_load_method +): from google.cloud.bigquery import SchemaField mock_bigquery_client.get_table.return_value = google.cloud.bigquery.Table( @@ -87,7 +89,9 @@ def test_to_gbq_with_write_disposition_append(mock_bigquery_client, expected_loa expected_load_method.assert_called_once() -def test_to_gbq_with_write_disposition_append_mismatch(mock_bigquery_client, expected_load_method): +def test_to_gbq_with_write_disposition_append_mismatch( + mock_bigquery_client, expected_load_method +): from google.cloud.bigquery import SchemaField import pdb @@ -116,7 +120,9 @@ def test_to_gbq_with_write_disposition_append_mismatch(mock_bigquery_client, exp assert exc.local_schema == {"fields": [{"name": "col_a", "type": "FLOAT"}]} -def test_to_gbq_with_write_disposition_truncate(mock_bigquery_client, expected_load_method): +def test_to_gbq_with_write_disposition_truncate( + mock_bigquery_client, expected_load_method +): mock_bigquery_client.get_table.side_effect = ( # Initial check google.cloud.bigquery.Table("myproj.my_dataset.my_table"), @@ -149,7 +155,6 @@ def test_to_gbq_with_write_disposition_truncate_cross_project( ) expected_load_method.assert_called_once() - # Check that billing project and destination table is set correctly. expected_load_method.assert_called_once() load_args, load_kwargs = expected_load_method.call_args @@ -157,4 +162,4 @@ def test_to_gbq_with_write_disposition_truncate_cross_project( assert table_destination.project == "data-project" assert table_destination.dataset_id == "my_dataset" assert table_destination.table_id == "my_table" - assert load_kwargs["project"] == "billing-project" \ No newline at end of file + assert load_kwargs["project"] == "billing-project" From bc8faf5a44e6696be3d2d9a75dce661459c3740f Mon Sep 17 00:00:00 2001 From: aribray <45905583+aribray@users.noreply.github.com> Date: Tue, 25 Oct 2022 00:02:19 -0500 Subject: [PATCH 04/61] test write_disposition parameter --- pandas_gbq/exceptions.py | 6 ++++ pandas_gbq/gbq.py | 68 ++++++++++----------------------------- tests/unit/test_load.py | 2 -- tests/unit/test_to_gbq.py | 5 +-- 4 files changed, 24 insertions(+), 57 deletions(-) diff --git a/pandas_gbq/exceptions.py b/pandas_gbq/exceptions.py index 1b4f6925..464bf8a8 100644 --- a/pandas_gbq/exceptions.py +++ b/pandas_gbq/exceptions.py @@ -27,6 +27,12 @@ class InvalidPrivateKeyFormat(ValueError): Raised when provided private key has invalid format. """ +class InvalidSchema(GenericGBQException): + """ + Raised when the provided DataFrame does + not match the schema of the destination + table in BigQuery. + """ class PerformanceWarning(RuntimeWarning): """ diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py index e1b617dd..823f561a 100644 --- a/pandas_gbq/gbq.py +++ b/pandas_gbq/gbq.py @@ -24,6 +24,7 @@ from pandas_gbq.exceptions import ( AccessDenied, GenericGBQException, + InvalidSchema ) from pandas_gbq.features import FEATURES import pandas_gbq.schema @@ -110,22 +111,6 @@ class InvalidPageToken(ValueError): pass -class InvalidSchema(GenericGBQException): - """ - Raised when the provided DataFrame does - not match the schema of the destination - table in BigQuery. - """ - - def __init__(self, message: str): - super().__init__(message) - self._message = message - - @property - def message(self) -> str: - return self._message - - class NotFoundException(ValueError): """ Raised when the project_id, table or dataset provided in the query could @@ -373,14 +358,12 @@ def get_client(self): @staticmethod def process_http_error(ex): - import pdb # See `BigQuery Troubleshooting Errors # `__ if "cancelled" in ex.message: raise QueryTimeout("Reason: {0}".format(ex)) elif "Provided Schema does not match" in ex.message: - # pdb.set_trace() error_message = ex.errors[0]["message"] raise InvalidSchema(f"Reason: {error_message}") else: @@ -578,7 +561,7 @@ def load_data( self, dataframe, destination_table_ref, - write_disposition: str = "WRITE_EMPTY", + write_disposition, chunksize=None, schema=None, progress_bar=True, @@ -970,9 +953,6 @@ def to_gbq( private_key=None, write_disposition: str = "WRITE_EMPTY", ): - - # write_disposition: str = "WRITE_APPEND", - """Write a DataFrame to a Google BigQuery table. The main method a user calls to export pandas DataFrame contents to @@ -1002,16 +982,6 @@ def to_gbq( Force Google BigQuery to re-authenticate the user. This is useful if multiple accounts are used. - TODO: write_disposition - if_exists : str, default 'fail' - Behavior when the destination table exists. Value can be one of: - - ``'fail'`` - If table exists, do nothing. - ``'replace'`` - If table exists, drop it, recreate it, and insert data. - ``'append'`` - If table exists, insert data. Create if does not exist. auth_local_webserver : bool, default True Use the `local webserver flow `_ @@ -1078,13 +1048,22 @@ def to_gbq( or :func:`google.oauth2.service_account.Credentials.from_service_account_file` instead. + + write_disposition: str, default "WRITE_EMPTY" + Behavior when the destination table exists. Value can be one of: + + ``'WRITE_EMPTY'`` + If table exists, do nothing. + ``'WRITE_TRUNCATE'`` + If table exists, drop it, recreate it, and insert data. + ``'WRITE_APPEND'`` + If table exists, insert data. Create if does not exist. """ _test_google_api_imports() from google.api_core import exceptions as google_exceptions from google.cloud import bigquery - import pdb if verbose is not None and FEATURES.pandas_has_deprecated_verbose: warnings.warn( @@ -1145,19 +1124,17 @@ def to_gbq( # If table_schema isn't provided, we'll create one for you if not table_schema: table_schema = default_schema - # It table_schema is provided, we'll update the default schema to the provided table_schema + # It table_schema is provided, we'll update the default_schema to the provided table_schema else: table_schema = pandas_gbq.schema.update_schema( default_schema, dict(fields=table_schema) ) - # If table exists, check if_exists parameter try: - # try to get the table + # Try to get the table table = bqclient.get_table(destination_table_ref) - # and unless the table is not found (doesn't exist)... except google_exceptions.NotFound: - # if the table didn't exist, create it + # If the table doesn't already exist, create it table_connector = _Table( project_id_table, dataset_id, @@ -1166,20 +1143,9 @@ def to_gbq( ) table_connector.create(table_id, table_schema) else: - # convert original schema (the schema that already exists) to pandas-gbq API format - # TODO: rename to "remote_schema" | add keyword arguments? `schema_is_subset(remote_schema=remote_schema, local_schema=table_schema)` + # Convert original schema (the schema that already exists) to pandas-gbq API format original_schema = pandas_gbq.schema.to_pandas_gbq(table.schema) - # check that the schema created here matches the schema of the destination table - does this have to happen here? - # we'd want to catch a mismatch early... - # "original_schema" is "remote_schema", "table_schema" is "local_schema" - # if not pandas_gbq.schema.schema_is_subset(original_schema, table_schema): - # raise InvalidSchema( - # "Please verify that the structure and " - # "data types in the DataFrame match the " - # "schema of the destination table.", - # table_schema, - # original_schema, - # ) + # Update the local `table_schema` so mode (NULLABLE/REQUIRED) # matches. See: https://github.com/pydata/pandas-gbq/issues/315 table_schema = pandas_gbq.schema.update_schema(table_schema, original_schema) diff --git a/tests/unit/test_load.py b/tests/unit/test_load.py index 4bd07642..1d99d9b4 100644 --- a/tests/unit/test_load.py +++ b/tests/unit/test_load.py @@ -213,12 +213,10 @@ def test_load_chunks_omits_policy_tags( mock_bigquery_client, df, destination, schema=schema, api_method=api_method ) ) - import pdb mock_load = load_method(mock_bigquery_client, api_method=api_method) assert mock_load.called _, kwargs = mock_load.call_args - # pdb.set_trace() assert "job_config" in kwargs sent_field = kwargs["job_config"].schema[0].to_api_repr() assert "policyTags" not in sent_field diff --git a/tests/unit/test_to_gbq.py b/tests/unit/test_to_gbq.py index 8b2d6919..30323ed2 100644 --- a/tests/unit/test_to_gbq.py +++ b/tests/unit/test_to_gbq.py @@ -90,19 +90,16 @@ def test_to_gbq_with_write_disposition_append( def test_to_gbq_with_write_disposition_append_mismatch( - mock_bigquery_client, expected_load_method + mock_bigquery_client ): from google.cloud.bigquery import SchemaField - import pdb mock_bigquery_client.get_table.return_value = google.cloud.bigquery.Table( "myproj.my_dataset.my_table", schema=(SchemaField("col_a", "INTEGER"), SchemaField("col_b", "STRING")), ) - # expected_load_method.side_effect = google.api_core.exceptions.NotFound("schema mismatch") with pytest.raises(gbq.InvalidSchema) as exception_block: - # pdb.set_trace() gbq.to_gbq( DataFrame({"col_a": [0.25, 1.5, -1.0]}), "my_dataset.my_table", From c3ed7a7b09203f3e7b21de2c6235ca05b48b9864 Mon Sep 17 00:00:00 2001 From: aribray <45905583+aribray@users.noreply.github.com> Date: Tue, 25 Oct 2022 22:08:45 -0500 Subject: [PATCH 05/61] refactor tests for WriteDisposition --- pandas_gbq/exceptions.py | 6 - pandas_gbq/gbq.py | 232 ++++++++++++++++++++------------------ pandas_gbq/load.py | 9 -- tests/system/test_gbq.py | 14 ++- tests/unit/test_to_gbq.py | 18 +-- 5 files changed, 142 insertions(+), 137 deletions(-) diff --git a/pandas_gbq/exceptions.py b/pandas_gbq/exceptions.py index 464bf8a8..1b4f6925 100644 --- a/pandas_gbq/exceptions.py +++ b/pandas_gbq/exceptions.py @@ -27,12 +27,6 @@ class InvalidPrivateKeyFormat(ValueError): Raised when provided private key has invalid format. """ -class InvalidSchema(GenericGBQException): - """ - Raised when the provided DataFrame does - not match the schema of the destination - table in BigQuery. - """ class PerformanceWarning(RuntimeWarning): """ diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py index 823f561a..88c7cc4a 100644 --- a/pandas_gbq/gbq.py +++ b/pandas_gbq/gbq.py @@ -21,11 +21,7 @@ if typing.TYPE_CHECKING: # pragma: NO COVER import pandas -from pandas_gbq.exceptions import ( - AccessDenied, - GenericGBQException, - InvalidSchema -) +from pandas_gbq.exceptions import AccessDenied, GenericGBQException from pandas_gbq.features import FEATURES import pandas_gbq.schema import pandas_gbq.timestamp @@ -111,6 +107,21 @@ class InvalidPageToken(ValueError): pass +class InvalidSchema(ValueError): + """ + Raised when the provided DataFrame does + not match the schema of the destination + table in BigQuery. + """ + + def __init__(self, message: str): + self._message = message + + @property + def message(self) -> str: + return self._message + + class NotFoundException(ValueError): """ Raised when the project_id, table or dataset provided in the query could @@ -134,7 +145,12 @@ class TableCreationError(ValueError): Raised when the create table method fails """ - pass + def __init__(self, message: str): + self._message = message + + @property + def message(self) -> str: + return self._message class Context(object): @@ -358,7 +374,6 @@ def get_client(self): @staticmethod def process_http_error(ex): - # See `BigQuery Troubleshooting Errors # `__ if "cancelled" in ex.message: @@ -366,6 +381,9 @@ def process_http_error(ex): elif "Provided Schema does not match" in ex.message: error_message = ex.errors[0]["message"] raise InvalidSchema(f"Reason: {error_message}") + elif "Already Exists: Table" in ex.message: + error_message = ex.errors[0]["message"] + raise TableCreationError(f"Reason: {error_message}") else: raise GenericGBQException("Reason: {0}".format(ex)) @@ -955,109 +973,109 @@ def to_gbq( ): """Write a DataFrame to a Google BigQuery table. - The main method a user calls to export pandas DataFrame contents to - Google BigQuery table. + The main method a user calls to export pandas DataFrame contents to + Google BigQuery table. + + This method uses the Google Cloud client library to make requests to + Google BigQuery, documented `here + `__. - This method uses the Google Cloud client library to make requests to - Google BigQuery, documented `here - `__. + See the :ref:`How to authenticate with Google BigQuery ` + guide for authentication instructions. - See the :ref:`How to authenticate with Google BigQuery ` - guide for authentication instructions. + Parameters + ---------- + dataframe : pandas.DataFrame + DataFrame to be written to a Google BigQuery table. + destination_table : str + Name of table to be written, in the form ``dataset.tablename`` or + ``project.dataset.tablename``. + project_id : str, optional + Google Cloud Platform project ID. Optional when available from + the environment. + chunksize : int, optional + Number of rows to be inserted in each chunk from the dataframe. + Set to ``None`` to load the whole dataframe at once. + reauth : bool, default False + Force Google BigQuery to re-authenticate the user. This is useful + if multiple accounts are used. - Parameters - ---------- - dataframe : pandas.DataFrame - DataFrame to be written to a Google BigQuery table. - destination_table : str - Name of table to be written, in the form ``dataset.tablename`` or - ``project.dataset.tablename``. - project_id : str, optional - Google Cloud Platform project ID. Optional when available from - the environment. - chunksize : int, optional - Number of rows to be inserted in each chunk from the dataframe. - Set to ``None`` to load the whole dataframe at once. - reauth : bool, default False - Force Google BigQuery to re-authenticate the user. This is useful - if multiple accounts are used. - - auth_local_webserver : bool, default True - Use the `local webserver flow - `_ - instead of the `console flow - `_ - when getting user credentials. Your code must run on the same machine - as your web browser and your web browser can access your application - via ``localhost:808X``. - - .. versionadded:: 0.2.0 - table_schema : list of dicts, optional - List of BigQuery table fields to which according DataFrame - columns conform to, e.g. ``[{'name': 'col1', 'type': - 'STRING'},...]``. The ``type`` values must be BigQuery type names. - - - If ``table_schema`` is provided, it may contain all or a subset of - DataFrame columns. If a subset is provided, the rest will be - inferred from the DataFrame dtypes. If ``table_schema`` contains - columns not in the DataFrame, they'll be ignored. - - If ``table_schema`` is **not** provided, it will be - generated according to dtypes of DataFrame columns. See - `Inferring the Table Schema - `__. - for a description of the schema inference. - - See `BigQuery API documentation on valid column names - __. - - .. versionadded:: 0.3.1 - location : str, optional - Location where the load job should run. See the `BigQuery locations - documentation - `__ for a - list of available locations. The location must match that of the - target dataset. - - .. versionadded:: 0.5.0 - progress_bar : bool, default True - Use the library `tqdm` to show the progress bar for the upload, - chunk by chunk. - - .. versionadded:: 0.5.0 - credentials : google.auth.credentials.Credentials, optional - Credentials for accessing Google APIs. Use this parameter to override - default credentials, such as to use Compute Engine - :class:`google.auth.compute_engine.Credentials` or Service Account - :class:`google.oauth2.service_account.Credentials` directly. - - .. versionadded:: 0.8.0 - api_method : str, optional - API method used to upload DataFrame to BigQuery. One of "load_parquet", - "load_csv". Default "load_parquet" if pandas is version 1.1.0+, - otherwise "load_csv". - - .. versionadded:: 0.16.0 - verbose : bool, deprecated - Deprecated in Pandas-GBQ 0.4.0. Use the `logging module - to adjust verbosity instead - `__. - private_key : str, deprecated - Deprecated in pandas-gbq version 0.8.0. Use the ``credentials`` - parameter and - :func:`google.oauth2.service_account.Credentials.from_service_account_info` - or - :func:`google.oauth2.service_account.Credentials.from_service_account_file` - instead. - - write_disposition: str, default "WRITE_EMPTY" - Behavior when the destination table exists. Value can be one of: - - ``'WRITE_EMPTY'`` - If table exists, do nothing. - ``'WRITE_TRUNCATE'`` - If table exists, drop it, recreate it, and insert data. - ``'WRITE_APPEND'`` - If table exists, insert data. Create if does not exist. + auth_local_webserver : bool, default True + Use the `local webserver flow + `_ + instead of the `console flow + `_ + when getting user credentials. Your code must run on the same machine + as your web browser and your web browser can access your application + via ``localhost:808X``. + + .. versionadded:: 0.2.0 + table_schema : list of dicts, optional + List of BigQuery table fields to which according DataFrame + columns conform to, e.g. ``[{'name': 'col1', 'type': + 'STRING'},...]``. The ``type`` values must be BigQuery type names. + + - If ``table_schema`` is provided, it may contain all or a subset of + DataFrame columns. If a subset is provided, the rest will be + inferred from the DataFrame dtypes. If ``table_schema`` contains + columns not in the DataFrame, they'll be ignored. + - If ``table_schema`` is **not** provided, it will be + generated according to dtypes of DataFrame columns. See + `Inferring the Table Schema + `__. + for a description of the schema inference. + + See `BigQuery API documentation on valid column names + __. + + .. versionadded:: 0.3.1 + location : str, optional + Location where the load job should run. See the `BigQuery locations + documentation + `__ for a + list of available locations. The location must match that of the + target dataset. + + .. versionadded:: 0.5.0 + progress_bar : bool, default True + Use the library `tqdm` to show the progress bar for the upload, + chunk by chunk. + + .. versionadded:: 0.5.0 + credentials : google.auth.credentials.Credentials, optional + Credentials for accessing Google APIs. Use this parameter to override + default credentials, such as to use Compute Engine + :class:`google.auth.compute_engine.Credentials` or Service Account + :class:`google.oauth2.service_account.Credentials` directly. + + .. versionadded:: 0.8.0 + api_method : str, optional + API method used to upload DataFrame to BigQuery. One of "load_parquet", + "load_csv". Default "load_parquet" if pandas is version 1.1.0+, + otherwise "load_csv". + + .. versionadded:: 0.16.0 + verbose : bool, deprecated + Deprecated in Pandas-GBQ 0.4.0. Use the `logging module + to adjust verbosity instead + `__. + private_key : str, deprecated + Deprecated in pandas-gbq version 0.8.0. Use the ``credentials`` + parameter and + :func:`google.oauth2.service_account.Credentials.from_service_account_info` + or + :func:`google.oauth2.service_account.Credentials.from_service_account_file` + instead. + + write_disposition: str, default "WRITE_EMPTY" + Behavior when the destination table exists. Value can be one of: + + ``'WRITE_EMPTY'`` + If table exists, do nothing. + ``'WRITE_TRUNCATE'`` + If table exists, drop it, recreate it, and insert data. + ``'WRITE_APPEND'`` + If table exists, insert data. Create if does not exist. """ _test_google_api_imports() diff --git a/pandas_gbq/load.py b/pandas_gbq/load.py index 21e9f3ba..5efac314 100644 --- a/pandas_gbq/load.py +++ b/pandas_gbq/load.py @@ -119,12 +119,7 @@ def load_parquet( billing_project: Optional[str] = None, ): job_config = bigquery.LoadJobConfig() - - # if write_disposition is not None: job_config.write_disposition = write_disposition - # else: - # job_config.write_disposition = "WRITE_EMPTY" - job_config.source_format = "PARQUET" if schema is not None: @@ -154,11 +149,7 @@ def load_csv( ): job_config = bigquery.LoadJobConfig() - # if write_disposition is not None: job_config.write_disposition = write_disposition - # else: - # job_config.write_disposition = "WRITE_EMPTY" - job_config.source_format = "CSV" job_config.allow_quoted_newlines = True diff --git a/tests/system/test_gbq.py b/tests/system/test_gbq.py index 4eb6cbf0..a0f59b3d 100644 --- a/tests/system/test_gbq.py +++ b/tests/system/test_gbq.py @@ -673,7 +673,15 @@ def test_upload_data_if_table_exists_fail(self, project_id): test_id = "2" test_size = 10 df = make_mixed_dataframe_v2(test_size) - self.table.create(TABLE_ID + test_id, gbq._generate_bq_schema(df)) + + # Initialize table with sample data + gbq.to_gbq( + df, + self.destination_table + test_id, + project_id, + chunksize=10000, + credentials=self.credentials, + ) # Test the default value of write_disposition == 'WRITE_EMPTY' with pytest.raises(gbq.TableCreationError): @@ -813,13 +821,13 @@ def test_upload_data_if_table_exists_raises_value_error(self, project_id): test_size = 10 df = make_mixed_dataframe_v2(test_size) - # Test invalid value for if_exists parameter raises value error + # Test invalid value for write_disposition parameter raises value error with pytest.raises(ValueError): gbq.to_gbq( df, self.destination_table + test_id, project_id, - if_exists="xxxxx", + write_disposition="WRITE_DISPOSITION_UNSPECIFIED", credentials=self.credentials, ) diff --git a/tests/unit/test_to_gbq.py b/tests/unit/test_to_gbq.py index 30323ed2..42367266 100644 --- a/tests/unit/test_to_gbq.py +++ b/tests/unit/test_to_gbq.py @@ -89,32 +89,26 @@ def test_to_gbq_with_write_disposition_append( expected_load_method.assert_called_once() -def test_to_gbq_with_write_disposition_append_mismatch( - mock_bigquery_client -): +def test_to_gbq_with_write_disposition_append_mismatch(mock_bigquery_client): from google.cloud.bigquery import SchemaField mock_bigquery_client.get_table.return_value = google.cloud.bigquery.Table( "myproj.my_dataset.my_table", schema=(SchemaField("col_a", "INTEGER"), SchemaField("col_b", "STRING")), ) + mock_bigquery_client.side_effect = gbq.InvalidSchema( + message=r"Provided Schema does not match Table *" + ) - with pytest.raises(gbq.InvalidSchema) as exception_block: + with pytest.raises((gbq.InvalidSchema)) as exception_block: gbq.to_gbq( DataFrame({"col_a": [0.25, 1.5, -1.0]}), "my_dataset.my_table", project_id="myproj", write_disposition="WRITE_APPEND", ) - exc = exception_block.value - assert exc.remote_schema == { - "fields": [ - {"name": "col_a", "type": "INTEGER", "mode": "NULLABLE"}, - {"name": "col_b", "type": "STRING", "mode": "NULLABLE"}, - ] - } - assert exc.local_schema == {"fields": [{"name": "col_a", "type": "FLOAT"}]} + assert exc.message == r"Provided Schema does not match Table *" def test_to_gbq_with_write_disposition_truncate( From 936113e812711add6a67dbff5d3a4f6d592853b9 Mon Sep 17 00:00:00 2001 From: aribray <45905583+aribray@users.noreply.github.com> Date: Tue, 25 Oct 2022 22:10:29 -0500 Subject: [PATCH 06/61] remove unused import --- pandas_gbq/gbq.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py index 88c7cc4a..daf38398 100644 --- a/pandas_gbq/gbq.py +++ b/pandas_gbq/gbq.py @@ -8,7 +8,6 @@ import logging import re import time -from tkinter import W import typing from typing import Any, Dict, Optional, Sequence, Union import warnings From 86134588c2702e84ffd5a6e917ff5f6680586abd Mon Sep 17 00:00:00 2001 From: aribray <45905583+aribray@users.noreply.github.com> Date: Wed, 26 Oct 2022 14:20:58 -0500 Subject: [PATCH 07/61] update minimum google-auth version to 2.13.0 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 0bf0c7b2..635af8ae 100644 --- a/setup.py +++ b/setup.py @@ -32,7 +32,7 @@ # dependency on google-cloud-bigquery, but this library also uses them # directly. "google-api-core >= 1.31.5, <3.0.0dev,!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.0", - "google-auth >=1.25.0", + "google-auth >=2.13.0", "google-auth-oauthlib >=0.0.1", # Require 1.27.* because it has a fix for out-of-bounds timestamps. See: # https://github.com/googleapis/python-bigquery/pull/209 and From bc099296fd4663276600951a886387210a633154 Mon Sep 17 00:00:00 2001 From: aribray <45905583+aribray@users.noreply.github.com> Date: Wed, 26 Oct 2022 14:31:28 -0500 Subject: [PATCH 08/61] update constraints-3.7.txt --- testing/constraints-3.7.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/testing/constraints-3.7.txt b/testing/constraints-3.7.txt index 1d9efed7..53f0c680 100644 --- a/testing/constraints-3.7.txt +++ b/testing/constraints-3.7.txt @@ -7,7 +7,7 @@ # Then this file should have foo==1.14.0 db-dtypes==0.3.1 google-api-core==1.31.5 -google-auth==1.25.0 +google-auth==2.13.0 google-auth-oauthlib==0.0.1 google-cloud-bigquery==1.27.2 google-cloud-bigquery-storage==1.1.0 From f7394bb050a256b6fbff631cff2def21e2b11f1b Mon Sep 17 00:00:00 2001 From: aribray <45905583+aribray@users.noreply.github.com> Date: Wed, 26 Oct 2022 15:01:14 -0500 Subject: [PATCH 09/61] bump google-api-core version to 2.10.2 and google-auth-oauthlib to 0.7.0 --- setup.py | 4 ++-- testing/constraints-3.7.txt | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/setup.py b/setup.py index 635af8ae..4e90429d 100644 --- a/setup.py +++ b/setup.py @@ -31,9 +31,9 @@ # Note: google-api-core and google-auth are also included via transitive # dependency on google-cloud-bigquery, but this library also uses them # directly. - "google-api-core >= 1.31.5, <3.0.0dev,!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.0", + "google-api-core >= 2.10.2, <3.0.0dev,!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.0", "google-auth >=2.13.0", - "google-auth-oauthlib >=0.0.1", + "google-auth-oauthlib >=0.7.0", # Require 1.27.* because it has a fix for out-of-bounds timestamps. See: # https://github.com/googleapis/python-bigquery/pull/209 and # https://github.com/googleapis/python-bigquery-pandas/issues/365 diff --git a/testing/constraints-3.7.txt b/testing/constraints-3.7.txt index 53f0c680..9ba3b1eb 100644 --- a/testing/constraints-3.7.txt +++ b/testing/constraints-3.7.txt @@ -6,9 +6,9 @@ # e.g., if setup.py has "foo >= 1.14.0, < 2.0.0dev", # Then this file should have foo==1.14.0 db-dtypes==0.3.1 -google-api-core==1.31.5 +google-api-core==2.10.2 google-auth==2.13.0 -google-auth-oauthlib==0.0.1 +google-auth-oauthlib==0.7.0 google-cloud-bigquery==1.27.2 google-cloud-bigquery-storage==1.1.0 numpy==1.16.6 From 0d456655d4ff73fdf670af7de087cc9c9ec59e47 Mon Sep 17 00:00:00 2001 From: aribray <45905583+aribray@users.noreply.github.com> Date: Wed, 26 Oct 2022 15:09:16 -0500 Subject: [PATCH 10/61] remove version constraints from google-api-core --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 4e90429d..d1e46071 100644 --- a/setup.py +++ b/setup.py @@ -31,7 +31,7 @@ # Note: google-api-core and google-auth are also included via transitive # dependency on google-cloud-bigquery, but this library also uses them # directly. - "google-api-core >= 2.10.2, <3.0.0dev,!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.0", + "google-api-core >= 2.10.2, <3.0.0dev", "google-auth >=2.13.0", "google-auth-oauthlib >=0.7.0", # Require 1.27.* because it has a fix for out-of-bounds timestamps. See: From 0e4be0915484e772fbe81b45c44364ab7f4d60a9 Mon Sep 17 00:00:00 2001 From: aribray <45905583+aribray@users.noreply.github.com> Date: Wed, 26 Oct 2022 15:21:52 -0500 Subject: [PATCH 11/61] bump google-cloud-bigquery version to 3.3.5, google-cloud-bigquery-storage version to 2.16.2, and pydata-google-auth version to 1.4.0 --- ci/requirements-3.7-0.24.2.conda | 4 ++-- samples/snippets/requirements.txt | 4 ++-- setup.py | 4 ++-- testing/constraints-3.7.txt | 6 +++--- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/ci/requirements-3.7-0.24.2.conda b/ci/requirements-3.7-0.24.2.conda index 2facfb2c..bdc162be 100644 --- a/ci/requirements-3.7-0.24.2.conda +++ b/ci/requirements-3.7-0.24.2.conda @@ -5,8 +5,8 @@ fastavro flake8 freezegun numpy==1.16.6 -google-cloud-bigquery==1.27.2 -google-cloud-bigquery-storage==1.1.0 +google-cloud-bigquery==3.3.5 +google-cloud-bigquery-storage==2.16.2 pyarrow==3.0.0 pydata-google-auth pytest diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 6a681437..3329e69a 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,5 +1,5 @@ -google-cloud-bigquery-storage==2.15.0 -google-cloud-bigquery==3.3.2 +google-cloud-bigquery-storage==2.16.2 +google-cloud-bigquery==3.3.5 pandas-gbq==0.17.8 pandas===1.3.5; python_version == '3.7' pandas==1.4.4; python_version >= '3.8' diff --git a/setup.py b/setup.py index d1e46071..c1597385 100644 --- a/setup.py +++ b/setup.py @@ -39,8 +39,8 @@ # https://github.com/googleapis/python-bigquery-pandas/issues/365 # Exclude 2.4.* because it has a bug where waiting for the query can hang # indefinitely. https://github.com/pydata/pandas-gbq/issues/343 - "google-cloud-bigquery >=1.27.2,<4.0.0dev,!=2.4.*", - "google-cloud-bigquery-storage >=1.1.0,<3.0.0dev", + "google-cloud-bigquery >=3.3.5,<4.0.0dev,!=2.4.*", + "google-cloud-bigquery-storage >=2.16.2,<3.0.0dev", ] extras = { "tqdm": "tqdm>=4.23.0", diff --git a/testing/constraints-3.7.txt b/testing/constraints-3.7.txt index 9ba3b1eb..89328208 100644 --- a/testing/constraints-3.7.txt +++ b/testing/constraints-3.7.txt @@ -9,11 +9,11 @@ db-dtypes==0.3.1 google-api-core==2.10.2 google-auth==2.13.0 google-auth-oauthlib==0.7.0 -google-cloud-bigquery==1.27.2 -google-cloud-bigquery-storage==1.1.0 +google-cloud-bigquery==3.3.5 +google-cloud-bigquery-storage==2.16.2 numpy==1.16.6 pandas==0.24.2 pyarrow==3.0.0 -pydata-google-auth==0.1.2 +pydata-google-auth==1.4.0 tqdm==4.23.0 protobuf==3.19.5 From 57c0bf9a4a67dfbc34348571ca359b50d8c525ea Mon Sep 17 00:00:00 2001 From: aribray <45905583+aribray@users.noreply.github.com> Date: Wed, 26 Oct 2022 15:37:46 -0500 Subject: [PATCH 12/61] bump pandas version, bump db-dbtypes version --- setup.py | 4 ++-- testing/constraints-3.7.txt | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/setup.py b/setup.py index c1597385..33bbfe85 100644 --- a/setup.py +++ b/setup.py @@ -23,9 +23,9 @@ release_status = "Development Status :: 4 - Beta" dependencies = [ "setuptools", - "db-dtypes >=0.3.1,<2.0.0", + "db-dtypes >=1.0.4,<2.0.0", "numpy >=1.16.6", - "pandas >=0.24.2", + "pandas >=1.5.1", "pyarrow >=3.0.0, <10.0dev", "pydata-google-auth", # Note: google-api-core and google-auth are also included via transitive diff --git a/testing/constraints-3.7.txt b/testing/constraints-3.7.txt index 89328208..2ee180f0 100644 --- a/testing/constraints-3.7.txt +++ b/testing/constraints-3.7.txt @@ -5,14 +5,14 @@ # # e.g., if setup.py has "foo >= 1.14.0, < 2.0.0dev", # Then this file should have foo==1.14.0 -db-dtypes==0.3.1 +db-dtypes==1.0.4 google-api-core==2.10.2 google-auth==2.13.0 google-auth-oauthlib==0.7.0 google-cloud-bigquery==3.3.5 google-cloud-bigquery-storage==2.16.2 numpy==1.16.6 -pandas==0.24.2 +pandas==1.5.2 pyarrow==3.0.0 pydata-google-auth==1.4.0 tqdm==4.23.0 From 16411c9ac3d8238bfeaa42a4f9f748ee2eb8129f Mon Sep 17 00:00:00 2001 From: aribray <45905583+aribray@users.noreply.github.com> Date: Wed, 26 Oct 2022 15:38:41 -0500 Subject: [PATCH 13/61] fix pandas version --- testing/constraints-3.7.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/testing/constraints-3.7.txt b/testing/constraints-3.7.txt index 2ee180f0..7137a7f2 100644 --- a/testing/constraints-3.7.txt +++ b/testing/constraints-3.7.txt @@ -12,7 +12,7 @@ google-auth-oauthlib==0.7.0 google-cloud-bigquery==3.3.5 google-cloud-bigquery-storage==2.16.2 numpy==1.16.6 -pandas==1.5.2 +pandas==1.5.1 pyarrow==3.0.0 pydata-google-auth==1.4.0 tqdm==4.23.0 From cb651d254b8674d6f8cb3787c2817d62410d6a7c Mon Sep 17 00:00:00 2001 From: aribray <45905583+aribray@users.noreply.github.com> Date: Wed, 26 Oct 2022 15:45:47 -0500 Subject: [PATCH 14/61] resolve dependency conflicts --- setup.py | 2 +- testing/constraints-3.7.txt | 2 +- testing/constraints-3.8.txt | 2 +- testing/constraints-3.9.txt | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/setup.py b/setup.py index 33bbfe85..6b71fa34 100644 --- a/setup.py +++ b/setup.py @@ -25,7 +25,7 @@ "setuptools", "db-dtypes >=1.0.4,<2.0.0", "numpy >=1.16.6", - "pandas >=1.5.1", + "pandas >=1.1.4", "pyarrow >=3.0.0, <10.0dev", "pydata-google-auth", # Note: google-api-core and google-auth are also included via transitive diff --git a/testing/constraints-3.7.txt b/testing/constraints-3.7.txt index 7137a7f2..569287ad 100644 --- a/testing/constraints-3.7.txt +++ b/testing/constraints-3.7.txt @@ -12,7 +12,7 @@ google-auth-oauthlib==0.7.0 google-cloud-bigquery==3.3.5 google-cloud-bigquery-storage==2.16.2 numpy==1.16.6 -pandas==1.5.1 +pandas==1.1.4 pyarrow==3.0.0 pydata-google-auth==1.4.0 tqdm==4.23.0 diff --git a/testing/constraints-3.8.txt b/testing/constraints-3.8.txt index 9c67e95e..58ed21d4 100644 --- a/testing/constraints-3.8.txt +++ b/testing/constraints-3.8.txt @@ -1 +1 @@ -numpy==1.17.5 +numpy==1.16.6 diff --git a/testing/constraints-3.9.txt b/testing/constraints-3.9.txt index 76864a66..f8a353b0 100644 --- a/testing/constraints-3.9.txt +++ b/testing/constraints-3.9.txt @@ -1,2 +1,2 @@ -numpy==1.19.4 +numpy==1.16.6 pandas==1.1.4 From 81736e25ec67104008716143dcb29d555dce1244 Mon Sep 17 00:00:00 2001 From: aribray <45905583+aribray@users.noreply.github.com> Date: Wed, 26 Oct 2022 15:50:47 -0500 Subject: [PATCH 15/61] bump dbtypes version --- ci/requirements-3.7-0.24.2.conda | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/requirements-3.7-0.24.2.conda b/ci/requirements-3.7-0.24.2.conda index bdc162be..b756f1e7 100644 --- a/ci/requirements-3.7-0.24.2.conda +++ b/ci/requirements-3.7-0.24.2.conda @@ -1,6 +1,6 @@ codecov coverage -db-dtypes==0.3.1 +db-dtypes==1.0.4 fastavro flake8 freezegun From a006ac0724d11fccaeaab28da4650eb8668906bd Mon Sep 17 00:00:00 2001 From: aribray <45905583+aribray@users.noreply.github.com> Date: Wed, 26 Oct 2022 15:57:14 -0500 Subject: [PATCH 16/61] bump circleci pandas version --- .circleci/config.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index e008054c..f953c79e 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -10,7 +10,7 @@ jobs: - image: mambaorg/micromamba environment: PYTHON: "3.7" - PANDAS: "0.24.2" + PANDAS: "1.1.4" steps: - checkout - run: ci/config_auth.sh From 1fa654e97c4248a6d9c7c2191d66535ec45e7d1c Mon Sep 17 00:00:00 2001 From: aribray <45905583+aribray@users.noreply.github.com> Date: Wed, 26 Oct 2022 16:01:17 -0500 Subject: [PATCH 17/61] rename conda requirements file --- ...requirements-3.7-0.24.2.conda => requirements-3.7-1.1.4.conda} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename ci/{requirements-3.7-0.24.2.conda => requirements-3.7-1.1.4.conda} (100%) diff --git a/ci/requirements-3.7-0.24.2.conda b/ci/requirements-3.7-1.1.4.conda similarity index 100% rename from ci/requirements-3.7-0.24.2.conda rename to ci/requirements-3.7-1.1.4.conda From 0407ae56fa9787231a1e0d49d060d436bee36017 Mon Sep 17 00:00:00 2001 From: aribray <45905583+aribray@users.noreply.github.com> Date: Wed, 26 Oct 2022 16:12:47 -0500 Subject: [PATCH 18/61] resetcircleci config pandas version --- .circleci/config.yml | 2 +- ...quirements-3.7-1.1.4.conda => requirements-3.7-0.24.2.conda} | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename ci/{requirements-3.7-1.1.4.conda => requirements-3.7-0.24.2.conda} (100%) diff --git a/.circleci/config.yml b/.circleci/config.yml index f953c79e..e008054c 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -10,7 +10,7 @@ jobs: - image: mambaorg/micromamba environment: PYTHON: "3.7" - PANDAS: "1.1.4" + PANDAS: "0.24.2" steps: - checkout - run: ci/config_auth.sh diff --git a/ci/requirements-3.7-1.1.4.conda b/ci/requirements-3.7-0.24.2.conda similarity index 100% rename from ci/requirements-3.7-1.1.4.conda rename to ci/requirements-3.7-0.24.2.conda From 368070292008c3f964ca835cf56430aa4c564131 Mon Sep 17 00:00:00 2001 From: aribray <45905583+aribray@users.noreply.github.com> Date: Wed, 26 Oct 2022 16:22:33 -0500 Subject: [PATCH 19/61] reset pandas version --- setup.py | 2 +- testing/constraints-3.7.txt | 2 +- testing/constraints-3.8.txt | 2 +- testing/constraints-3.9.txt | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/setup.py b/setup.py index 6b71fa34..6468291f 100644 --- a/setup.py +++ b/setup.py @@ -25,7 +25,7 @@ "setuptools", "db-dtypes >=1.0.4,<2.0.0", "numpy >=1.16.6", - "pandas >=1.1.4", + "pandas >=0.24.2", "pyarrow >=3.0.0, <10.0dev", "pydata-google-auth", # Note: google-api-core and google-auth are also included via transitive diff --git a/testing/constraints-3.7.txt b/testing/constraints-3.7.txt index 569287ad..9970f6ad 100644 --- a/testing/constraints-3.7.txt +++ b/testing/constraints-3.7.txt @@ -12,7 +12,7 @@ google-auth-oauthlib==0.7.0 google-cloud-bigquery==3.3.5 google-cloud-bigquery-storage==2.16.2 numpy==1.16.6 -pandas==1.1.4 +pandas==0.24.2 pyarrow==3.0.0 pydata-google-auth==1.4.0 tqdm==4.23.0 diff --git a/testing/constraints-3.8.txt b/testing/constraints-3.8.txt index 58ed21d4..9c67e95e 100644 --- a/testing/constraints-3.8.txt +++ b/testing/constraints-3.8.txt @@ -1 +1 @@ -numpy==1.16.6 +numpy==1.17.5 diff --git a/testing/constraints-3.9.txt b/testing/constraints-3.9.txt index f8a353b0..76864a66 100644 --- a/testing/constraints-3.9.txt +++ b/testing/constraints-3.9.txt @@ -1,2 +1,2 @@ -numpy==1.16.6 +numpy==1.19.4 pandas==1.1.4 From 846a44d9fc76c2c04dcfba0a736676732bbcc3fd Mon Sep 17 00:00:00 2001 From: aribray <45905583+aribray@users.noreply.github.com> Date: Wed, 26 Oct 2022 16:49:27 -0500 Subject: [PATCH 20/61] readjust constraints-3.7 versions --- ci/requirements-3.7-0.24.2.conda | 2 +- setup.py | 2 +- testing/constraints-3.7.txt | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/ci/requirements-3.7-0.24.2.conda b/ci/requirements-3.7-0.24.2.conda index b756f1e7..bdc162be 100644 --- a/ci/requirements-3.7-0.24.2.conda +++ b/ci/requirements-3.7-0.24.2.conda @@ -1,6 +1,6 @@ codecov coverage -db-dtypes==1.0.4 +db-dtypes==0.3.1 fastavro flake8 freezegun diff --git a/setup.py b/setup.py index 6468291f..6b71fa34 100644 --- a/setup.py +++ b/setup.py @@ -25,7 +25,7 @@ "setuptools", "db-dtypes >=1.0.4,<2.0.0", "numpy >=1.16.6", - "pandas >=0.24.2", + "pandas >=1.1.4", "pyarrow >=3.0.0, <10.0dev", "pydata-google-auth", # Note: google-api-core and google-auth are also included via transitive diff --git a/testing/constraints-3.7.txt b/testing/constraints-3.7.txt index 9970f6ad..8f2ad162 100644 --- a/testing/constraints-3.7.txt +++ b/testing/constraints-3.7.txt @@ -12,8 +12,8 @@ google-auth-oauthlib==0.7.0 google-cloud-bigquery==3.3.5 google-cloud-bigquery-storage==2.16.2 numpy==1.16.6 -pandas==0.24.2 +pandas==1.1.4 pyarrow==3.0.0 -pydata-google-auth==1.4.0 +pydata-google-auth==0.1.2 tqdm==4.23.0 protobuf==3.19.5 From f44704b32d2d4d5036fc25da9886662bf13e1d7a Mon Sep 17 00:00:00 2001 From: aribray <45905583+aribray@users.noreply.github.com> Date: Thu, 27 Oct 2022 09:16:14 -0500 Subject: [PATCH 21/61] test adding to kokoro --- .kokoro/continuous/prerelease-deps-3.10.cfg | 7 +++++++ .kokoro/continuous/prerelease-deps-3.7.cfg | 7 +++++++ .kokoro/continuous/prerelease-deps-3.8.cfg | 7 +++++++ .kokoro/continuous/prerelease-deps-3.9.cfg | 7 +++++++ .kokoro/presubmit/prerelease-deps-3.10.cfg | 7 +++++++ .kokoro/presubmit/prerelease-deps-3.7.cfg | 7 +++++++ .kokoro/presubmit/prerelease-deps-3.8.cfg | 7 +++++++ .kokoro/presubmit/prerelease-deps-3.9.cfg | 7 +++++++ .kokoro/presubmit/presubmit.cfg | 12 +++++++++++- .kokoro/presubmit/system-3.10.cfg | 7 +++++++ .kokoro/presubmit/system-3.7.cfg | 7 +++++++ .kokoro/presubmit/system-3.8.cfg | 7 +++++++ .kokoro/presubmit/system-3.9.cfg | 7 +++++++ 13 files changed, 95 insertions(+), 1 deletion(-) create mode 100644 .kokoro/continuous/prerelease-deps-3.10.cfg create mode 100644 .kokoro/continuous/prerelease-deps-3.7.cfg create mode 100644 .kokoro/continuous/prerelease-deps-3.8.cfg create mode 100644 .kokoro/continuous/prerelease-deps-3.9.cfg create mode 100644 .kokoro/presubmit/prerelease-deps-3.10.cfg create mode 100644 .kokoro/presubmit/prerelease-deps-3.7.cfg create mode 100644 .kokoro/presubmit/prerelease-deps-3.8.cfg create mode 100644 .kokoro/presubmit/prerelease-deps-3.9.cfg create mode 100644 .kokoro/presubmit/system-3.10.cfg create mode 100644 .kokoro/presubmit/system-3.7.cfg create mode 100644 .kokoro/presubmit/system-3.8.cfg create mode 100644 .kokoro/presubmit/system-3.9.cfg diff --git a/.kokoro/continuous/prerelease-deps-3.10.cfg b/.kokoro/continuous/prerelease-deps-3.10.cfg new file mode 100644 index 00000000..339980bd --- /dev/null +++ b/.kokoro/continuous/prerelease-deps-3.10.cfg @@ -0,0 +1,7 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +# Only run this nox session. +env_vars: { + key: "NOX_SESSION" + value: "prerelease_deps-3.10" +} diff --git a/.kokoro/continuous/prerelease-deps-3.7.cfg b/.kokoro/continuous/prerelease-deps-3.7.cfg new file mode 100644 index 00000000..e63f8e85 --- /dev/null +++ b/.kokoro/continuous/prerelease-deps-3.7.cfg @@ -0,0 +1,7 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +# Only run this nox session. +env_vars: { + key: "NOX_SESSION" + value: "prerelease_deps-3.7" +} diff --git a/.kokoro/continuous/prerelease-deps-3.8.cfg b/.kokoro/continuous/prerelease-deps-3.8.cfg new file mode 100644 index 00000000..fabe3e34 --- /dev/null +++ b/.kokoro/continuous/prerelease-deps-3.8.cfg @@ -0,0 +1,7 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +# Only run this nox session. +env_vars: { + key: "NOX_SESSION" + value: "prerelease_deps-3.8" +} diff --git a/.kokoro/continuous/prerelease-deps-3.9.cfg b/.kokoro/continuous/prerelease-deps-3.9.cfg new file mode 100644 index 00000000..f3fcc8d8 --- /dev/null +++ b/.kokoro/continuous/prerelease-deps-3.9.cfg @@ -0,0 +1,7 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +# Only run this nox session. +env_vars: { + key: "NOX_SESSION" + value: "prerelease_deps-3.9" +} diff --git a/.kokoro/presubmit/prerelease-deps-3.10.cfg b/.kokoro/presubmit/prerelease-deps-3.10.cfg new file mode 100644 index 00000000..339980bd --- /dev/null +++ b/.kokoro/presubmit/prerelease-deps-3.10.cfg @@ -0,0 +1,7 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +# Only run this nox session. +env_vars: { + key: "NOX_SESSION" + value: "prerelease_deps-3.10" +} diff --git a/.kokoro/presubmit/prerelease-deps-3.7.cfg b/.kokoro/presubmit/prerelease-deps-3.7.cfg new file mode 100644 index 00000000..e63f8e85 --- /dev/null +++ b/.kokoro/presubmit/prerelease-deps-3.7.cfg @@ -0,0 +1,7 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +# Only run this nox session. +env_vars: { + key: "NOX_SESSION" + value: "prerelease_deps-3.7" +} diff --git a/.kokoro/presubmit/prerelease-deps-3.8.cfg b/.kokoro/presubmit/prerelease-deps-3.8.cfg new file mode 100644 index 00000000..fabe3e34 --- /dev/null +++ b/.kokoro/presubmit/prerelease-deps-3.8.cfg @@ -0,0 +1,7 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +# Only run this nox session. +env_vars: { + key: "NOX_SESSION" + value: "prerelease_deps-3.8" +} diff --git a/.kokoro/presubmit/prerelease-deps-3.9.cfg b/.kokoro/presubmit/prerelease-deps-3.9.cfg new file mode 100644 index 00000000..f3fcc8d8 --- /dev/null +++ b/.kokoro/presubmit/prerelease-deps-3.9.cfg @@ -0,0 +1,7 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +# Only run this nox session. +env_vars: { + key: "NOX_SESSION" + value: "prerelease_deps-3.9" +} diff --git a/.kokoro/presubmit/presubmit.cfg b/.kokoro/presubmit/presubmit.cfg index 8f43917d..17d071ca 100644 --- a/.kokoro/presubmit/presubmit.cfg +++ b/.kokoro/presubmit/presubmit.cfg @@ -1 +1,11 @@ -# Format: //devtools/kokoro/config/proto/build.proto \ No newline at end of file +# Format: //devtools/kokoro/config/proto/build.proto + +# Disable system tests. +env_vars: { + key: "RUN_SYSTEM_TESTS" + value: "false" +} +env_vars: { + key: "RUN_SNIPPETS_TESTS" + value: "false" +} diff --git a/.kokoro/presubmit/system-3.10.cfg b/.kokoro/presubmit/system-3.10.cfg new file mode 100644 index 00000000..f1c7c569 --- /dev/null +++ b/.kokoro/presubmit/system-3.10.cfg @@ -0,0 +1,7 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +# Only run this nox session. +env_vars: { + key: "NOX_SESSION" + value: "system-3.10" +} \ No newline at end of file diff --git a/.kokoro/presubmit/system-3.7.cfg b/.kokoro/presubmit/system-3.7.cfg new file mode 100644 index 00000000..461537b3 --- /dev/null +++ b/.kokoro/presubmit/system-3.7.cfg @@ -0,0 +1,7 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +# Only run this nox session. +env_vars: { + key: "NOX_SESSION" + value: "system-3.7" +} \ No newline at end of file diff --git a/.kokoro/presubmit/system-3.8.cfg b/.kokoro/presubmit/system-3.8.cfg new file mode 100644 index 00000000..f4bcee3d --- /dev/null +++ b/.kokoro/presubmit/system-3.8.cfg @@ -0,0 +1,7 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +# Only run this nox session. +env_vars: { + key: "NOX_SESSION" + value: "system-3.8" +} \ No newline at end of file diff --git a/.kokoro/presubmit/system-3.9.cfg b/.kokoro/presubmit/system-3.9.cfg new file mode 100644 index 00000000..b8ae66b3 --- /dev/null +++ b/.kokoro/presubmit/system-3.9.cfg @@ -0,0 +1,7 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +# Only run this nox session. +env_vars: { + key: "NOX_SESSION" + value: "system-3.9" +} \ No newline at end of file From f3ced372b4735aeab2e897bc7289cf7a4d90bb57 Mon Sep 17 00:00:00 2001 From: aribray <45905583+aribray@users.noreply.github.com> Date: Thu, 27 Oct 2022 09:19:54 -0500 Subject: [PATCH 22/61] test removing circleci --- .kokoro/trampoline_v2.sh | 46 ++++++++++++++++++++-------------------- tests/system/conftest.py | 18 ++++++++-------- 2 files changed, 32 insertions(+), 32 deletions(-) diff --git a/.kokoro/trampoline_v2.sh b/.kokoro/trampoline_v2.sh index 4af6cdc2..40a005c9 100755 --- a/.kokoro/trampoline_v2.sh +++ b/.kokoro/trampoline_v2.sh @@ -203,29 +203,29 @@ elif [[ -n "${GITHUB_RUN_ID:-}" ]]; then "GITHUB_HEAD_REF" "GITHUB_BASE_REF" ) -elif [[ "${CIRCLECI:-}" == "true" ]]; then - RUNNING_IN_CI="true" - TRAMPOLINE_CI="circleci" - pass_down_envvars+=( - "CIRCLE_BRANCH" - "CIRCLE_BUILD_NUM" - "CIRCLE_BUILD_URL" - "CIRCLE_COMPARE_URL" - "CIRCLE_JOB" - "CIRCLE_NODE_INDEX" - "CIRCLE_NODE_TOTAL" - "CIRCLE_PREVIOUS_BUILD_NUM" - "CIRCLE_PROJECT_REPONAME" - "CIRCLE_PROJECT_USERNAME" - "CIRCLE_REPOSITORY_URL" - "CIRCLE_SHA1" - "CIRCLE_STAGE" - "CIRCLE_USERNAME" - "CIRCLE_WORKFLOW_ID" - "CIRCLE_WORKFLOW_JOB_ID" - "CIRCLE_WORKFLOW_UPSTREAM_JOB_IDS" - "CIRCLE_WORKFLOW_WORKSPACE_ID" - ) +# elif [[ "${CIRCLECI:-}" == "true" ]]; then +# RUNNING_IN_CI="true" +# TRAMPOLINE_CI="circleci" +# pass_down_envvars+=( +# "CIRCLE_BRANCH" +# "CIRCLE_BUILD_NUM" +# "CIRCLE_BUILD_URL" +# "CIRCLE_COMPARE_URL" +# "CIRCLE_JOB" +# "CIRCLE_NODE_INDEX" +# "CIRCLE_NODE_TOTAL" +# "CIRCLE_PREVIOUS_BUILD_NUM" +# "CIRCLE_PROJECT_REPONAME" +# "CIRCLE_PROJECT_USERNAME" +# "CIRCLE_REPOSITORY_URL" +# "CIRCLE_SHA1" +# "CIRCLE_STAGE" +# "CIRCLE_USERNAME" +# "CIRCLE_WORKFLOW_ID" +# "CIRCLE_WORKFLOW_JOB_ID" +# "CIRCLE_WORKFLOW_UPSTREAM_JOB_IDS" +# "CIRCLE_WORKFLOW_WORKSPACE_ID" +# ) fi # Configure the service account for pulling the docker image. diff --git a/tests/system/conftest.py b/tests/system/conftest.py index 4ba8bf31..1d7ae681 100644 --- a/tests/system/conftest.py +++ b/tests/system/conftest.py @@ -16,15 +16,15 @@ REPO_DIR = pathlib.Path(__file__).parent.parent.parent -# TODO: remove when fully migrated off of Circle CI -@pytest.fixture(scope="session", autouse=True) -def default_credentials(): - """Setup application default credentials for use in code samples.""" - # Written by the 'ci/config_auth.sh' script. - path = REPO_DIR / "ci" / "service_account.json" - - if path.is_file() and "GOOGLE_APPLICATION_CREDENTIALS" not in os.environ: - os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = str(path) +# # TODO: remove when fully migrated off of Circle CI +# @pytest.fixture(scope="session", autouse=True) +# def default_credentials(): +# """Setup application default credentials for use in code samples.""" +# # Written by the 'ci/config_auth.sh' script. +# path = REPO_DIR / "ci" / "service_account.json" + +# if path.is_file() and "GOOGLE_APPLICATION_CREDENTIALS" not in os.environ: +# os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = str(path) @pytest.fixture(scope="session", autouse=True) From ad45c6d43f488472400947e3ad3bc4dda6c88500 Mon Sep 17 00:00:00 2001 From: aribray <45905583+aribray@users.noreply.github.com> Date: Thu, 27 Oct 2022 09:26:43 -0500 Subject: [PATCH 23/61] test removing circleci --- .circleci/config.yml | 34 -------------------------------- .github/sync-repo-settings.yaml | 4 ++++ .kokoro/trampoline_v2.sh | 23 --------------------- ci/config_auth.sh | 13 ------------ ci/requirements-3.7-0.24.2.conda | 14 ------------- ci/requirements-3.9-1.3.4.conda | 14 ------------- ci/run_conda.sh | 23 --------------------- ci/run_tests.sh | 15 -------------- tests/system/test_auth.py | 2 +- 9 files changed, 5 insertions(+), 137 deletions(-) delete mode 100644 .circleci/config.yml delete mode 100755 ci/config_auth.sh delete mode 100644 ci/requirements-3.7-0.24.2.conda delete mode 100644 ci/requirements-3.9-1.3.4.conda delete mode 100755 ci/run_conda.sh delete mode 100755 ci/run_tests.sh diff --git a/.circleci/config.yml b/.circleci/config.yml deleted file mode 100644 index e008054c..00000000 --- a/.circleci/config.yml +++ /dev/null @@ -1,34 +0,0 @@ -# Copyright (c) 2017 pandas-gbq Authors All rights reserved. -# Use of this source code is governed by a BSD-style -# license that can be found in the LICENSE file. - -version: 2 -jobs: - # Conda - "conda-3.7": - docker: - - image: mambaorg/micromamba - environment: - PYTHON: "3.7" - PANDAS: "0.24.2" - steps: - - checkout - - run: ci/config_auth.sh - - run: ci/run_conda.sh - "conda-3.9": - docker: - - image: mambaorg/micromamba - environment: - PYTHON: "3.9" - PANDAS: "1.3.4" - steps: - - checkout - - run: ci/config_auth.sh - - run: ci/run_conda.sh - -workflows: - version: 2 - build: - jobs: - - "conda-3.7" - - "conda-3.9" diff --git a/.github/sync-repo-settings.yaml b/.github/sync-repo-settings.yaml index 590dda51..e1e8137e 100644 --- a/.github/sync-repo-settings.yaml +++ b/.github/sync-repo-settings.yaml @@ -15,6 +15,10 @@ branchProtectionRules: - 'unit (3.8)' - 'unit (3.9)' - 'unit (3.10)' + - 'system (3.7)' + - 'system (3.8)' + - 'system (3.9)' + - 'system (3.10)' - 'cover' - 'Kokoro' - 'Samples - Lint' diff --git a/.kokoro/trampoline_v2.sh b/.kokoro/trampoline_v2.sh index 40a005c9..f7b6ba86 100755 --- a/.kokoro/trampoline_v2.sh +++ b/.kokoro/trampoline_v2.sh @@ -203,29 +203,6 @@ elif [[ -n "${GITHUB_RUN_ID:-}" ]]; then "GITHUB_HEAD_REF" "GITHUB_BASE_REF" ) -# elif [[ "${CIRCLECI:-}" == "true" ]]; then -# RUNNING_IN_CI="true" -# TRAMPOLINE_CI="circleci" -# pass_down_envvars+=( -# "CIRCLE_BRANCH" -# "CIRCLE_BUILD_NUM" -# "CIRCLE_BUILD_URL" -# "CIRCLE_COMPARE_URL" -# "CIRCLE_JOB" -# "CIRCLE_NODE_INDEX" -# "CIRCLE_NODE_TOTAL" -# "CIRCLE_PREVIOUS_BUILD_NUM" -# "CIRCLE_PROJECT_REPONAME" -# "CIRCLE_PROJECT_USERNAME" -# "CIRCLE_REPOSITORY_URL" -# "CIRCLE_SHA1" -# "CIRCLE_STAGE" -# "CIRCLE_USERNAME" -# "CIRCLE_WORKFLOW_ID" -# "CIRCLE_WORKFLOW_JOB_ID" -# "CIRCLE_WORKFLOW_UPSTREAM_JOB_IDS" -# "CIRCLE_WORKFLOW_WORKSPACE_ID" -# ) fi # Configure the service account for pulling the docker image. diff --git a/ci/config_auth.sh b/ci/config_auth.sh deleted file mode 100755 index cde115c7..00000000 --- a/ci/config_auth.sh +++ /dev/null @@ -1,13 +0,0 @@ -#!/bin/bash -# Copyright (c) 2017 pandas-gbq Authors All rights reserved. -# Use of this source code is governed by a BSD-style -# license that can be found in the LICENSE file. - -set -e -# Don't set -x, because we don't want to leak keys. -DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null && pwd )" - -# Write key to file if present. -if [ ! -z "$SERVICE_ACCOUNT_KEY" ] ; then - echo "$SERVICE_ACCOUNT_KEY" | base64 --decode > "$DIR"/service_account.json -fi diff --git a/ci/requirements-3.7-0.24.2.conda b/ci/requirements-3.7-0.24.2.conda deleted file mode 100644 index bdc162be..00000000 --- a/ci/requirements-3.7-0.24.2.conda +++ /dev/null @@ -1,14 +0,0 @@ -codecov -coverage -db-dtypes==0.3.1 -fastavro -flake8 -freezegun -numpy==1.16.6 -google-cloud-bigquery==3.3.5 -google-cloud-bigquery-storage==2.16.2 -pyarrow==3.0.0 -pydata-google-auth -pytest -pytest-cov -tqdm==4.23.0 diff --git a/ci/requirements-3.9-1.3.4.conda b/ci/requirements-3.9-1.3.4.conda deleted file mode 100644 index 1411fe5b..00000000 --- a/ci/requirements-3.9-1.3.4.conda +++ /dev/null @@ -1,14 +0,0 @@ -codecov -coverage -db-dtypes -fastavro -flake8 -freezegun -google-cloud-bigquery -google-cloud-bigquery-storage -numpy -pyarrow -pydata-google-auth -pytest -pytest-cov -tqdm diff --git a/ci/run_conda.sh b/ci/run_conda.sh deleted file mode 100755 index 11b5b569..00000000 --- a/ci/run_conda.sh +++ /dev/null @@ -1,23 +0,0 @@ -#!/bin/bash -# Copyright (c) 2017 pandas-gbq Authors All rights reserved. -# Use of this source code is governed by a BSD-style -# license that can be found in the LICENSE file. - -set -e -x -DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null && pwd )" - -eval "$(micromamba shell hook --shell=bash)" -micromamba activate - -# Install dependencies using (micro)mamba -# https://github.com/mamba-org/micromamba-docker -REQ="ci/requirements-${PYTHON}-${PANDAS}" -micromamba install -q pandas=$PANDAS python=${PYTHON} -c conda-forge; -micromamba install -q --file "$REQ.conda" -c conda-forge; -micromamba list -micromamba info - -python setup.py develop --no-deps - -# Run the tests -$DIR/run_tests.sh diff --git a/ci/run_tests.sh b/ci/run_tests.sh deleted file mode 100755 index 8a1d7f91..00000000 --- a/ci/run_tests.sh +++ /dev/null @@ -1,15 +0,0 @@ -#!/bin/bash -# Copyright (c) 2017 pandas-gbq Authors All rights reserved. -# Use of this source code is governed by a BSD-style -# license that can be found in the LICENSE file. - -set -e -x -DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null && pwd )" - -if [ -f "$DIR/service_account.json" ]; then - export GOOGLE_APPLICATION_CREDENTIALS="$DIR/service_account.json" -fi - -# Install test requirements -pip install coverage pytest pytest-cov flake8 codecov google-cloud-testutils -pytest -v -m "not local_auth" --cov=pandas_gbq --cov-report xml:/tmp/pytest-cov.xml --cov-fail-under=0 tests diff --git a/tests/system/test_auth.py b/tests/system/test_auth.py index d9f7d096..ec2650f8 100644 --- a/tests/system/test_auth.py +++ b/tests/system/test_auth.py @@ -12,7 +12,7 @@ from pandas_gbq import auth -IS_RUNNING_ON_CI = "CIRCLE_BUILD_NUM" in os.environ or "KOKORO_BUILD_ID" in os.environ +"KOKORO_BUILD_ID" in os.environ def mock_default_credentials(scopes=None, request=None): From 6c771d9b6f526ffa8059da2b220d004755018f12 Mon Sep 17 00:00:00 2001 From: aribray <45905583+aribray@users.noreply.github.com> Date: Thu, 27 Oct 2022 09:29:10 -0500 Subject: [PATCH 24/61] Revert "test removing circleci" This reverts commit ad45c6d43f488472400947e3ad3bc4dda6c88500. --- .circleci/config.yml | 34 ++++++++++++++++++++++++++++++++ .github/sync-repo-settings.yaml | 4 ---- .kokoro/trampoline_v2.sh | 23 +++++++++++++++++++++ ci/config_auth.sh | 13 ++++++++++++ ci/requirements-3.7-0.24.2.conda | 14 +++++++++++++ ci/requirements-3.9-1.3.4.conda | 14 +++++++++++++ ci/run_conda.sh | 23 +++++++++++++++++++++ ci/run_tests.sh | 15 ++++++++++++++ tests/system/test_auth.py | 2 +- 9 files changed, 137 insertions(+), 5 deletions(-) create mode 100644 .circleci/config.yml create mode 100755 ci/config_auth.sh create mode 100644 ci/requirements-3.7-0.24.2.conda create mode 100644 ci/requirements-3.9-1.3.4.conda create mode 100755 ci/run_conda.sh create mode 100755 ci/run_tests.sh diff --git a/.circleci/config.yml b/.circleci/config.yml new file mode 100644 index 00000000..e008054c --- /dev/null +++ b/.circleci/config.yml @@ -0,0 +1,34 @@ +# Copyright (c) 2017 pandas-gbq Authors All rights reserved. +# Use of this source code is governed by a BSD-style +# license that can be found in the LICENSE file. + +version: 2 +jobs: + # Conda + "conda-3.7": + docker: + - image: mambaorg/micromamba + environment: + PYTHON: "3.7" + PANDAS: "0.24.2" + steps: + - checkout + - run: ci/config_auth.sh + - run: ci/run_conda.sh + "conda-3.9": + docker: + - image: mambaorg/micromamba + environment: + PYTHON: "3.9" + PANDAS: "1.3.4" + steps: + - checkout + - run: ci/config_auth.sh + - run: ci/run_conda.sh + +workflows: + version: 2 + build: + jobs: + - "conda-3.7" + - "conda-3.9" diff --git a/.github/sync-repo-settings.yaml b/.github/sync-repo-settings.yaml index e1e8137e..590dda51 100644 --- a/.github/sync-repo-settings.yaml +++ b/.github/sync-repo-settings.yaml @@ -15,10 +15,6 @@ branchProtectionRules: - 'unit (3.8)' - 'unit (3.9)' - 'unit (3.10)' - - 'system (3.7)' - - 'system (3.8)' - - 'system (3.9)' - - 'system (3.10)' - 'cover' - 'Kokoro' - 'Samples - Lint' diff --git a/.kokoro/trampoline_v2.sh b/.kokoro/trampoline_v2.sh index f7b6ba86..40a005c9 100755 --- a/.kokoro/trampoline_v2.sh +++ b/.kokoro/trampoline_v2.sh @@ -203,6 +203,29 @@ elif [[ -n "${GITHUB_RUN_ID:-}" ]]; then "GITHUB_HEAD_REF" "GITHUB_BASE_REF" ) +# elif [[ "${CIRCLECI:-}" == "true" ]]; then +# RUNNING_IN_CI="true" +# TRAMPOLINE_CI="circleci" +# pass_down_envvars+=( +# "CIRCLE_BRANCH" +# "CIRCLE_BUILD_NUM" +# "CIRCLE_BUILD_URL" +# "CIRCLE_COMPARE_URL" +# "CIRCLE_JOB" +# "CIRCLE_NODE_INDEX" +# "CIRCLE_NODE_TOTAL" +# "CIRCLE_PREVIOUS_BUILD_NUM" +# "CIRCLE_PROJECT_REPONAME" +# "CIRCLE_PROJECT_USERNAME" +# "CIRCLE_REPOSITORY_URL" +# "CIRCLE_SHA1" +# "CIRCLE_STAGE" +# "CIRCLE_USERNAME" +# "CIRCLE_WORKFLOW_ID" +# "CIRCLE_WORKFLOW_JOB_ID" +# "CIRCLE_WORKFLOW_UPSTREAM_JOB_IDS" +# "CIRCLE_WORKFLOW_WORKSPACE_ID" +# ) fi # Configure the service account for pulling the docker image. diff --git a/ci/config_auth.sh b/ci/config_auth.sh new file mode 100755 index 00000000..cde115c7 --- /dev/null +++ b/ci/config_auth.sh @@ -0,0 +1,13 @@ +#!/bin/bash +# Copyright (c) 2017 pandas-gbq Authors All rights reserved. +# Use of this source code is governed by a BSD-style +# license that can be found in the LICENSE file. + +set -e +# Don't set -x, because we don't want to leak keys. +DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null && pwd )" + +# Write key to file if present. +if [ ! -z "$SERVICE_ACCOUNT_KEY" ] ; then + echo "$SERVICE_ACCOUNT_KEY" | base64 --decode > "$DIR"/service_account.json +fi diff --git a/ci/requirements-3.7-0.24.2.conda b/ci/requirements-3.7-0.24.2.conda new file mode 100644 index 00000000..bdc162be --- /dev/null +++ b/ci/requirements-3.7-0.24.2.conda @@ -0,0 +1,14 @@ +codecov +coverage +db-dtypes==0.3.1 +fastavro +flake8 +freezegun +numpy==1.16.6 +google-cloud-bigquery==3.3.5 +google-cloud-bigquery-storage==2.16.2 +pyarrow==3.0.0 +pydata-google-auth +pytest +pytest-cov +tqdm==4.23.0 diff --git a/ci/requirements-3.9-1.3.4.conda b/ci/requirements-3.9-1.3.4.conda new file mode 100644 index 00000000..1411fe5b --- /dev/null +++ b/ci/requirements-3.9-1.3.4.conda @@ -0,0 +1,14 @@ +codecov +coverage +db-dtypes +fastavro +flake8 +freezegun +google-cloud-bigquery +google-cloud-bigquery-storage +numpy +pyarrow +pydata-google-auth +pytest +pytest-cov +tqdm diff --git a/ci/run_conda.sh b/ci/run_conda.sh new file mode 100755 index 00000000..11b5b569 --- /dev/null +++ b/ci/run_conda.sh @@ -0,0 +1,23 @@ +#!/bin/bash +# Copyright (c) 2017 pandas-gbq Authors All rights reserved. +# Use of this source code is governed by a BSD-style +# license that can be found in the LICENSE file. + +set -e -x +DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null && pwd )" + +eval "$(micromamba shell hook --shell=bash)" +micromamba activate + +# Install dependencies using (micro)mamba +# https://github.com/mamba-org/micromamba-docker +REQ="ci/requirements-${PYTHON}-${PANDAS}" +micromamba install -q pandas=$PANDAS python=${PYTHON} -c conda-forge; +micromamba install -q --file "$REQ.conda" -c conda-forge; +micromamba list +micromamba info + +python setup.py develop --no-deps + +# Run the tests +$DIR/run_tests.sh diff --git a/ci/run_tests.sh b/ci/run_tests.sh new file mode 100755 index 00000000..8a1d7f91 --- /dev/null +++ b/ci/run_tests.sh @@ -0,0 +1,15 @@ +#!/bin/bash +# Copyright (c) 2017 pandas-gbq Authors All rights reserved. +# Use of this source code is governed by a BSD-style +# license that can be found in the LICENSE file. + +set -e -x +DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null && pwd )" + +if [ -f "$DIR/service_account.json" ]; then + export GOOGLE_APPLICATION_CREDENTIALS="$DIR/service_account.json" +fi + +# Install test requirements +pip install coverage pytest pytest-cov flake8 codecov google-cloud-testutils +pytest -v -m "not local_auth" --cov=pandas_gbq --cov-report xml:/tmp/pytest-cov.xml --cov-fail-under=0 tests diff --git a/tests/system/test_auth.py b/tests/system/test_auth.py index ec2650f8..d9f7d096 100644 --- a/tests/system/test_auth.py +++ b/tests/system/test_auth.py @@ -12,7 +12,7 @@ from pandas_gbq import auth -"KOKORO_BUILD_ID" in os.environ +IS_RUNNING_ON_CI = "CIRCLE_BUILD_NUM" in os.environ or "KOKORO_BUILD_ID" in os.environ def mock_default_credentials(scopes=None, request=None): From 5f23bd02e52c6070c86de488d924b67150ed7875 Mon Sep 17 00:00:00 2001 From: aribray <45905583+aribray@users.noreply.github.com> Date: Thu, 27 Oct 2022 09:32:51 -0500 Subject: [PATCH 25/61] add system tests to github workflow --- .github/workflows/system-test.yml | 57 +++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) create mode 100644 .github/workflows/system-test.yml diff --git a/.github/workflows/system-test.yml b/.github/workflows/system-test.yml new file mode 100644 index 00000000..fa3080a0 --- /dev/null +++ b/.github/workflows/system-test.yml @@ -0,0 +1,57 @@ +on: + pull_request: + branches: + - main +name: system-test +jobs: + system: + runs-on: ubuntu-latest + strategy: + matrix: + python: ['3.7', '3.8', '3.9', '3.10'] + steps: + - name: Checkout + uses: actions/checkout@v3 + - name: Setup Python + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python }} + - name: Install nox + run: | + python -m pip install --upgrade setuptools pip wheel + python -m pip install nox + - name: Run system tests + env: + COVERAGE_FILE: .coverage-${{ matrix.python }} + run: | + nox -s system-${{ matrix.python }} + - name: Upload coverage results + uses: actions/upload-artifact@v3 + with: + name: coverage-artifacts + path: .coverage-${{ matrix.python }} + + cover: + runs-on: ubuntu-latest + needs: + - system + steps: + - name: Checkout + uses: actions/checkout@v3 + - name: Setup Python + uses: actions/setup-python@v4 + with: + python-version: "3.10" + - name: Install coverage + run: | + python -m pip install --upgrade setuptools pip wheel + python -m pip install coverage + - name: Download coverage results + uses: actions/download-artifact@v3 + with: + name: coverage-artifacts + path: .coverage-results/ + - name: Report coverage results + run: | + coverage combine .coverage-results/.coverage* + coverage report --show-missing --fail-under=96 From 0a96c98c35945f46563c620b2eb7d2bb727eb784 Mon Sep 17 00:00:00 2001 From: aribray <45905583+aribray@users.noreply.github.com> Date: Thu, 27 Oct 2022 10:19:23 -0500 Subject: [PATCH 26/61] refactor to_gbq to map 'if_exists' to write_disposition --- .kokoro/trampoline_v2.sh | 46 +++++++++++++++++++-------------------- pandas_gbq/gbq.py | 30 +++++++++++++++---------- tests/system/test_gbq.py | 28 ++++++++++++------------ tests/unit/test_gbq.py | 4 ++-- tests/unit/test_to_gbq.py | 20 +++++++---------- 5 files changed, 65 insertions(+), 63 deletions(-) diff --git a/.kokoro/trampoline_v2.sh b/.kokoro/trampoline_v2.sh index 40a005c9..4af6cdc2 100755 --- a/.kokoro/trampoline_v2.sh +++ b/.kokoro/trampoline_v2.sh @@ -203,29 +203,29 @@ elif [[ -n "${GITHUB_RUN_ID:-}" ]]; then "GITHUB_HEAD_REF" "GITHUB_BASE_REF" ) -# elif [[ "${CIRCLECI:-}" == "true" ]]; then -# RUNNING_IN_CI="true" -# TRAMPOLINE_CI="circleci" -# pass_down_envvars+=( -# "CIRCLE_BRANCH" -# "CIRCLE_BUILD_NUM" -# "CIRCLE_BUILD_URL" -# "CIRCLE_COMPARE_URL" -# "CIRCLE_JOB" -# "CIRCLE_NODE_INDEX" -# "CIRCLE_NODE_TOTAL" -# "CIRCLE_PREVIOUS_BUILD_NUM" -# "CIRCLE_PROJECT_REPONAME" -# "CIRCLE_PROJECT_USERNAME" -# "CIRCLE_REPOSITORY_URL" -# "CIRCLE_SHA1" -# "CIRCLE_STAGE" -# "CIRCLE_USERNAME" -# "CIRCLE_WORKFLOW_ID" -# "CIRCLE_WORKFLOW_JOB_ID" -# "CIRCLE_WORKFLOW_UPSTREAM_JOB_IDS" -# "CIRCLE_WORKFLOW_WORKSPACE_ID" -# ) +elif [[ "${CIRCLECI:-}" == "true" ]]; then + RUNNING_IN_CI="true" + TRAMPOLINE_CI="circleci" + pass_down_envvars+=( + "CIRCLE_BRANCH" + "CIRCLE_BUILD_NUM" + "CIRCLE_BUILD_URL" + "CIRCLE_COMPARE_URL" + "CIRCLE_JOB" + "CIRCLE_NODE_INDEX" + "CIRCLE_NODE_TOTAL" + "CIRCLE_PREVIOUS_BUILD_NUM" + "CIRCLE_PROJECT_REPONAME" + "CIRCLE_PROJECT_USERNAME" + "CIRCLE_REPOSITORY_URL" + "CIRCLE_SHA1" + "CIRCLE_STAGE" + "CIRCLE_USERNAME" + "CIRCLE_WORKFLOW_ID" + "CIRCLE_WORKFLOW_JOB_ID" + "CIRCLE_WORKFLOW_UPSTREAM_JOB_IDS" + "CIRCLE_WORKFLOW_WORKSPACE_ID" + ) fi # Configure the service account for pulling the docker image. diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py index daf38398..69a0a415 100644 --- a/pandas_gbq/gbq.py +++ b/pandas_gbq/gbq.py @@ -960,6 +960,7 @@ def to_gbq( project_id=None, chunksize=None, reauth=False, + if_exists="fail", auth_local_webserver=True, table_schema=None, location=None, @@ -968,7 +969,6 @@ def to_gbq( api_method: str = "default", verbose=None, private_key=None, - write_disposition: str = "WRITE_EMPTY", ): """Write a DataFrame to a Google BigQuery table. @@ -998,7 +998,14 @@ def to_gbq( reauth : bool, default False Force Google BigQuery to re-authenticate the user. This is useful if multiple accounts are used. - + if_exists : str, default 'fail' + Behavior when the destination table exists. Value can be one of: + ``'fail'`` + If table exists, do nothing. + ``'replace'`` + If table exists, drop it, recreate it, and insert data. + ``'append'`` + If table exists, insert data. Create if does not exist. auth_local_webserver : bool, default True Use the `local webserver flow `_ @@ -1065,16 +1072,6 @@ def to_gbq( or :func:`google.oauth2.service_account.Credentials.from_service_account_file` instead. - - write_disposition: str, default "WRITE_EMPTY" - Behavior when the destination table exists. Value can be one of: - - ``'WRITE_EMPTY'`` - If table exists, do nothing. - ``'WRITE_TRUNCATE'`` - If table exists, drop it, recreate it, and insert data. - ``'WRITE_APPEND'`` - If table exists, insert data. Create if does not exist. """ _test_google_api_imports() @@ -1119,6 +1116,15 @@ def to_gbq( "'projectId.datasetId.tableId'" ) + if if_exists not in ("fail", "replace", "append"): + raise ValueError("'{0}' is not valid for if_exists".format(if_exists)) + + if_exists_list = ["fail", "replace", "append"] + dispositions = ["WRITE_EMPTY", "WRITE_TRUNCATE", "WRITE_APPEND"] + dispositions_dict = dict(zip(if_exists_list, dispositions)) + + write_disposition = dispositions_dict[if_exists] + connector = GbqConnector( project_id, reauth=reauth, diff --git a/tests/system/test_gbq.py b/tests/system/test_gbq.py index a0f59b3d..3f85158b 100644 --- a/tests/system/test_gbq.py +++ b/tests/system/test_gbq.py @@ -683,7 +683,7 @@ def test_upload_data_if_table_exists_fail(self, project_id): credentials=self.credentials, ) - # Test the default value of write_disposition == 'WRITE_EMPTY' + # Test the default value of if_exists == 'fail' with pytest.raises(gbq.TableCreationError): gbq.to_gbq( df, @@ -692,13 +692,13 @@ def test_upload_data_if_table_exists_fail(self, project_id): credentials=self.credentials, ) - # Test the write_disposition parameter with value 'WRITE_EMPTY' + # Test the if_exists parameter with value 'replace' with pytest.raises(gbq.TableCreationError): gbq.to_gbq( df, self.destination_table + test_id, project_id, - write_disposition="WRITE_EMPTY", + if_exists="fail", credentials=self.credentials, ) @@ -717,12 +717,12 @@ def test_upload_data_if_table_exists_append(self, project_id): credentials=self.credentials, ) - # Test the write_disposition parameter with value 'WRITE_APPEND + # Test the if_exists parameter with value 'append' gbq.to_gbq( df, self.destination_table + test_id, project_id, - write_disposition="WRITE_APPEND", + if_exists="append", credentials=self.credentials, ) @@ -742,7 +742,7 @@ def test_upload_data_if_table_exists_append(self, project_id): df_different_schema, self.destination_table + test_id, project_id, - write_disposition="WRITE_APPEND", + if_exists="append", credentials=self.credentials, ) @@ -763,12 +763,12 @@ def test_upload_subset_columns_if_table_exists_append(self, project_id): credentials=self.credentials, ) - # Test the write_disposition parameter with value 'WRITE_APPEND' + # Test the if_exists parameter with value 'append' gbq.to_gbq( df_subset_cols, self.destination_table + test_id, project_id, - write_disposition="WRITE_APPEND", + if_exists="append", credentials=self.credentials, ) @@ -797,12 +797,12 @@ def test_upload_data_if_table_exists_replace(self, project_id): credentials=self.credentials, ) - # Test the write_disposition parameter with the value 'WRITE_TRUNCATE'. + # Test the if_exists parameter with the value 'replace'. gbq.to_gbq( df_different_schema, self.destination_table + test_id, project_id, - write_disposition="WRITE_TRUNCATE", + if_exists="replace", credentials=self.credentials, ) @@ -821,13 +821,13 @@ def test_upload_data_if_table_exists_raises_value_error(self, project_id): test_size = 10 df = make_mixed_dataframe_v2(test_size) - # Test invalid value for write_disposition parameter raises value error + # Test invalid value for if_exists parameter raises value error with pytest.raises(ValueError): gbq.to_gbq( df, self.destination_table + test_id, project_id, - write_disposition="WRITE_DISPOSITION_UNSPECIFIED", + if_exists="xxxxx", credentials=self.credentials, ) @@ -932,7 +932,7 @@ def test_upload_data_flexible_column_order(self, project_id): df_columns_reversed, self.destination_table + test_id, project_id, - write_disposition="WRITE_APPEND", + if_exists="append", credentials=self.credentials, ) @@ -1332,7 +1332,7 @@ def test_to_gbq_does_not_override_mode(gbq_table, gbq_connector): pandas.DataFrame({"A": [1.0], "B": [2.0], "C": ["a"]}), "{0}.{1}".format(gbq_table.dataset_id, table_id), project_id=gbq_connector.project_id, - write_disposition="WRITE_APPEND", + if_exists="append", ) assert verify_schema(gbq_connector, gbq_table.dataset_id, table_id, table_schema) diff --git a/tests/unit/test_gbq.py b/tests/unit/test_gbq.py index a22b60bf..5184562a 100644 --- a/tests/unit/test_gbq.py +++ b/tests/unit/test_gbq.py @@ -628,7 +628,7 @@ def test_load_does_not_modify_schema_arg(mock_bigquery_client): "dataset.schematest", project_id="my-project", table_schema=original_schema, - write_disposition="WRITE_EMPTY", + if_exists="fail", ) assert original_schema == original_schema_cp @@ -645,7 +645,7 @@ def test_load_does_not_modify_schema_arg(mock_bigquery_client): "dataset.schematest", project_id="my-project", table_schema=original_schema, - write_disposition="WRITE_APPEND", + if_exists="append", ) assert original_schema == original_schema_cp diff --git a/tests/unit/test_to_gbq.py b/tests/unit/test_to_gbq.py index 42367266..2509d2b6 100644 --- a/tests/unit/test_to_gbq.py +++ b/tests/unit/test_to_gbq.py @@ -68,9 +68,7 @@ def test_to_gbq_load_method_translates_exception( expected_load_method.assert_called_once() -def test_to_gbq_with_write_disposition_append( - mock_bigquery_client, expected_load_method -): +def test_to_gbq_with_if_exists_append(mock_bigquery_client, expected_load_method): from google.cloud.bigquery import SchemaField mock_bigquery_client.get_table.return_value = google.cloud.bigquery.Table( @@ -84,12 +82,12 @@ def test_to_gbq_with_write_disposition_append( DataFrame({"col_a": [0.25, 1.5, -1.0], "col_b": ["a", "b", "c"]}), "my_dataset.my_table", project_id="myproj", - write_disposition="WRITE_APPEND", + if_exists="append", ) expected_load_method.assert_called_once() -def test_to_gbq_with_write_disposition_append_mismatch(mock_bigquery_client): +def test_to_gbq_with_if_exists_append_mismatch(mock_bigquery_client): from google.cloud.bigquery import SchemaField mock_bigquery_client.get_table.return_value = google.cloud.bigquery.Table( @@ -105,15 +103,13 @@ def test_to_gbq_with_write_disposition_append_mismatch(mock_bigquery_client): DataFrame({"col_a": [0.25, 1.5, -1.0]}), "my_dataset.my_table", project_id="myproj", - write_disposition="WRITE_APPEND", + if_exists="append", ) exc = exception_block.value assert exc.message == r"Provided Schema does not match Table *" -def test_to_gbq_with_write_disposition_truncate( - mock_bigquery_client, expected_load_method -): +def test_to_gbq_with_if_exists_truncate(mock_bigquery_client, expected_load_method): mock_bigquery_client.get_table.side_effect = ( # Initial check google.cloud.bigquery.Table("myproj.my_dataset.my_table"), @@ -124,12 +120,12 @@ def test_to_gbq_with_write_disposition_truncate( DataFrame([[1]]), "my_dataset.my_table", project_id="myproj", - write_disposition="WRITE_TRUNCATE", + if_exists="replace", ) expected_load_method.assert_called_once() -def test_to_gbq_with_write_disposition_truncate_cross_project( +def test_to_gbq_with_if_exists_truncate_cross_project( mock_bigquery_client, expected_load_method ): mock_bigquery_client.get_table.side_effect = ( @@ -142,7 +138,7 @@ def test_to_gbq_with_write_disposition_truncate_cross_project( DataFrame([[1]]), "data-project.my_dataset.my_table", project_id="billing-project", - write_disposition="WRITE_TRUNCATE", + if_exists="replace", ) expected_load_method.assert_called_once() From 8f92fef2afac3fe3fc318523c1de13890c60d423 Mon Sep 17 00:00:00 2001 From: aribray <45905583+aribray@users.noreply.github.com> Date: Thu, 27 Oct 2022 10:30:28 -0500 Subject: [PATCH 27/61] fix docstring --- pandas_gbq/gbq.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py index 69a0a415..699d51f0 100644 --- a/pandas_gbq/gbq.py +++ b/pandas_gbq/gbq.py @@ -975,8 +975,7 @@ def to_gbq( The main method a user calls to export pandas DataFrame contents to Google BigQuery table. - This method uses the Google Cloud client library to make requests to - Google BigQuery, documented `here + This method uses the Google Cloud client library to make requests to Google BigQuery, documented `here `__. See the :ref:`How to authenticate with Google BigQuery ` From 77b2d945a1f1c15521e24534e8ac780c31a7181b Mon Sep 17 00:00:00 2001 From: aribray <45905583+aribray@users.noreply.github.com> Date: Thu, 27 Oct 2022 10:40:35 -0500 Subject: [PATCH 28/61] fix docstring --- pandas_gbq/gbq.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py index 699d51f0..65af9a89 100644 --- a/pandas_gbq/gbq.py +++ b/pandas_gbq/gbq.py @@ -972,8 +972,7 @@ def to_gbq( ): """Write a DataFrame to a Google BigQuery table. - The main method a user calls to export pandas DataFrame contents to - Google BigQuery table. + The main method a user calls to export pandas DataFrame contents to Google BigQuery table. This method uses the Google Cloud client library to make requests to Google BigQuery, documented `here `__. @@ -999,6 +998,7 @@ def to_gbq( if multiple accounts are used. if_exists : str, default 'fail' Behavior when the destination table exists. Value can be one of: + ``'fail'`` If table exists, do nothing. ``'replace'`` From 7b3efc1fe8ed82a6c94e74189b5b85b278ebedd5 Mon Sep 17 00:00:00 2001 From: aribray <45905583+aribray@users.noreply.github.com> Date: Thu, 27 Oct 2022 10:46:54 -0500 Subject: [PATCH 29/61] fix conda requirements --- ci/requirements-3.7-0.24.2.conda | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/requirements-3.7-0.24.2.conda b/ci/requirements-3.7-0.24.2.conda index bdc162be..b756f1e7 100644 --- a/ci/requirements-3.7-0.24.2.conda +++ b/ci/requirements-3.7-0.24.2.conda @@ -1,6 +1,6 @@ codecov coverage -db-dtypes==0.3.1 +db-dtypes==1.0.4 fastavro flake8 freezegun From 3fc748daab48e600baafdba89996884c17823870 Mon Sep 17 00:00:00 2001 From: aribray <45905583+aribray@users.noreply.github.com> Date: Thu, 27 Oct 2022 10:49:17 -0500 Subject: [PATCH 30/61] remove system tests from github workflow --- .github/workflows/system-test.yml | 57 --------------------- .kokoro/continuous/prerelease-deps-3.10.cfg | 7 --- .kokoro/continuous/prerelease-deps-3.7.cfg | 7 --- .kokoro/continuous/prerelease-deps-3.8.cfg | 7 --- .kokoro/continuous/prerelease-deps-3.9.cfg | 7 --- .kokoro/presubmit/prerelease-deps-3.10.cfg | 7 --- .kokoro/presubmit/prerelease-deps-3.7.cfg | 7 --- .kokoro/presubmit/prerelease-deps-3.8.cfg | 7 --- .kokoro/presubmit/prerelease-deps-3.9.cfg | 7 --- .kokoro/presubmit/presubmit.cfg | 12 +---- .kokoro/presubmit/system-3.10.cfg | 7 --- .kokoro/presubmit/system-3.7.cfg | 7 --- .kokoro/presubmit/system-3.8.cfg | 7 --- .kokoro/presubmit/system-3.9.cfg | 7 --- 14 files changed, 1 insertion(+), 152 deletions(-) delete mode 100644 .github/workflows/system-test.yml delete mode 100644 .kokoro/continuous/prerelease-deps-3.10.cfg delete mode 100644 .kokoro/continuous/prerelease-deps-3.7.cfg delete mode 100644 .kokoro/continuous/prerelease-deps-3.8.cfg delete mode 100644 .kokoro/continuous/prerelease-deps-3.9.cfg delete mode 100644 .kokoro/presubmit/prerelease-deps-3.10.cfg delete mode 100644 .kokoro/presubmit/prerelease-deps-3.7.cfg delete mode 100644 .kokoro/presubmit/prerelease-deps-3.8.cfg delete mode 100644 .kokoro/presubmit/prerelease-deps-3.9.cfg delete mode 100644 .kokoro/presubmit/system-3.10.cfg delete mode 100644 .kokoro/presubmit/system-3.7.cfg delete mode 100644 .kokoro/presubmit/system-3.8.cfg delete mode 100644 .kokoro/presubmit/system-3.9.cfg diff --git a/.github/workflows/system-test.yml b/.github/workflows/system-test.yml deleted file mode 100644 index fa3080a0..00000000 --- a/.github/workflows/system-test.yml +++ /dev/null @@ -1,57 +0,0 @@ -on: - pull_request: - branches: - - main -name: system-test -jobs: - system: - runs-on: ubuntu-latest - strategy: - matrix: - python: ['3.7', '3.8', '3.9', '3.10'] - steps: - - name: Checkout - uses: actions/checkout@v3 - - name: Setup Python - uses: actions/setup-python@v4 - with: - python-version: ${{ matrix.python }} - - name: Install nox - run: | - python -m pip install --upgrade setuptools pip wheel - python -m pip install nox - - name: Run system tests - env: - COVERAGE_FILE: .coverage-${{ matrix.python }} - run: | - nox -s system-${{ matrix.python }} - - name: Upload coverage results - uses: actions/upload-artifact@v3 - with: - name: coverage-artifacts - path: .coverage-${{ matrix.python }} - - cover: - runs-on: ubuntu-latest - needs: - - system - steps: - - name: Checkout - uses: actions/checkout@v3 - - name: Setup Python - uses: actions/setup-python@v4 - with: - python-version: "3.10" - - name: Install coverage - run: | - python -m pip install --upgrade setuptools pip wheel - python -m pip install coverage - - name: Download coverage results - uses: actions/download-artifact@v3 - with: - name: coverage-artifacts - path: .coverage-results/ - - name: Report coverage results - run: | - coverage combine .coverage-results/.coverage* - coverage report --show-missing --fail-under=96 diff --git a/.kokoro/continuous/prerelease-deps-3.10.cfg b/.kokoro/continuous/prerelease-deps-3.10.cfg deleted file mode 100644 index 339980bd..00000000 --- a/.kokoro/continuous/prerelease-deps-3.10.cfg +++ /dev/null @@ -1,7 +0,0 @@ -# Format: //devtools/kokoro/config/proto/build.proto - -# Only run this nox session. -env_vars: { - key: "NOX_SESSION" - value: "prerelease_deps-3.10" -} diff --git a/.kokoro/continuous/prerelease-deps-3.7.cfg b/.kokoro/continuous/prerelease-deps-3.7.cfg deleted file mode 100644 index e63f8e85..00000000 --- a/.kokoro/continuous/prerelease-deps-3.7.cfg +++ /dev/null @@ -1,7 +0,0 @@ -# Format: //devtools/kokoro/config/proto/build.proto - -# Only run this nox session. -env_vars: { - key: "NOX_SESSION" - value: "prerelease_deps-3.7" -} diff --git a/.kokoro/continuous/prerelease-deps-3.8.cfg b/.kokoro/continuous/prerelease-deps-3.8.cfg deleted file mode 100644 index fabe3e34..00000000 --- a/.kokoro/continuous/prerelease-deps-3.8.cfg +++ /dev/null @@ -1,7 +0,0 @@ -# Format: //devtools/kokoro/config/proto/build.proto - -# Only run this nox session. -env_vars: { - key: "NOX_SESSION" - value: "prerelease_deps-3.8" -} diff --git a/.kokoro/continuous/prerelease-deps-3.9.cfg b/.kokoro/continuous/prerelease-deps-3.9.cfg deleted file mode 100644 index f3fcc8d8..00000000 --- a/.kokoro/continuous/prerelease-deps-3.9.cfg +++ /dev/null @@ -1,7 +0,0 @@ -# Format: //devtools/kokoro/config/proto/build.proto - -# Only run this nox session. -env_vars: { - key: "NOX_SESSION" - value: "prerelease_deps-3.9" -} diff --git a/.kokoro/presubmit/prerelease-deps-3.10.cfg b/.kokoro/presubmit/prerelease-deps-3.10.cfg deleted file mode 100644 index 339980bd..00000000 --- a/.kokoro/presubmit/prerelease-deps-3.10.cfg +++ /dev/null @@ -1,7 +0,0 @@ -# Format: //devtools/kokoro/config/proto/build.proto - -# Only run this nox session. -env_vars: { - key: "NOX_SESSION" - value: "prerelease_deps-3.10" -} diff --git a/.kokoro/presubmit/prerelease-deps-3.7.cfg b/.kokoro/presubmit/prerelease-deps-3.7.cfg deleted file mode 100644 index e63f8e85..00000000 --- a/.kokoro/presubmit/prerelease-deps-3.7.cfg +++ /dev/null @@ -1,7 +0,0 @@ -# Format: //devtools/kokoro/config/proto/build.proto - -# Only run this nox session. -env_vars: { - key: "NOX_SESSION" - value: "prerelease_deps-3.7" -} diff --git a/.kokoro/presubmit/prerelease-deps-3.8.cfg b/.kokoro/presubmit/prerelease-deps-3.8.cfg deleted file mode 100644 index fabe3e34..00000000 --- a/.kokoro/presubmit/prerelease-deps-3.8.cfg +++ /dev/null @@ -1,7 +0,0 @@ -# Format: //devtools/kokoro/config/proto/build.proto - -# Only run this nox session. -env_vars: { - key: "NOX_SESSION" - value: "prerelease_deps-3.8" -} diff --git a/.kokoro/presubmit/prerelease-deps-3.9.cfg b/.kokoro/presubmit/prerelease-deps-3.9.cfg deleted file mode 100644 index f3fcc8d8..00000000 --- a/.kokoro/presubmit/prerelease-deps-3.9.cfg +++ /dev/null @@ -1,7 +0,0 @@ -# Format: //devtools/kokoro/config/proto/build.proto - -# Only run this nox session. -env_vars: { - key: "NOX_SESSION" - value: "prerelease_deps-3.9" -} diff --git a/.kokoro/presubmit/presubmit.cfg b/.kokoro/presubmit/presubmit.cfg index 17d071ca..8f43917d 100644 --- a/.kokoro/presubmit/presubmit.cfg +++ b/.kokoro/presubmit/presubmit.cfg @@ -1,11 +1 @@ -# Format: //devtools/kokoro/config/proto/build.proto - -# Disable system tests. -env_vars: { - key: "RUN_SYSTEM_TESTS" - value: "false" -} -env_vars: { - key: "RUN_SNIPPETS_TESTS" - value: "false" -} +# Format: //devtools/kokoro/config/proto/build.proto \ No newline at end of file diff --git a/.kokoro/presubmit/system-3.10.cfg b/.kokoro/presubmit/system-3.10.cfg deleted file mode 100644 index f1c7c569..00000000 --- a/.kokoro/presubmit/system-3.10.cfg +++ /dev/null @@ -1,7 +0,0 @@ -# Format: //devtools/kokoro/config/proto/build.proto - -# Only run this nox session. -env_vars: { - key: "NOX_SESSION" - value: "system-3.10" -} \ No newline at end of file diff --git a/.kokoro/presubmit/system-3.7.cfg b/.kokoro/presubmit/system-3.7.cfg deleted file mode 100644 index 461537b3..00000000 --- a/.kokoro/presubmit/system-3.7.cfg +++ /dev/null @@ -1,7 +0,0 @@ -# Format: //devtools/kokoro/config/proto/build.proto - -# Only run this nox session. -env_vars: { - key: "NOX_SESSION" - value: "system-3.7" -} \ No newline at end of file diff --git a/.kokoro/presubmit/system-3.8.cfg b/.kokoro/presubmit/system-3.8.cfg deleted file mode 100644 index f4bcee3d..00000000 --- a/.kokoro/presubmit/system-3.8.cfg +++ /dev/null @@ -1,7 +0,0 @@ -# Format: //devtools/kokoro/config/proto/build.proto - -# Only run this nox session. -env_vars: { - key: "NOX_SESSION" - value: "system-3.8" -} \ No newline at end of file diff --git a/.kokoro/presubmit/system-3.9.cfg b/.kokoro/presubmit/system-3.9.cfg deleted file mode 100644 index b8ae66b3..00000000 --- a/.kokoro/presubmit/system-3.9.cfg +++ /dev/null @@ -1,7 +0,0 @@ -# Format: //devtools/kokoro/config/proto/build.proto - -# Only run this nox session. -env_vars: { - key: "NOX_SESSION" - value: "system-3.9" -} \ No newline at end of file From 0bdef404f26261c5a8a99a02370cabab7bcd6ea9 Mon Sep 17 00:00:00 2001 From: aribray <45905583+aribray@users.noreply.github.com> Date: Thu, 27 Oct 2022 11:00:44 -0500 Subject: [PATCH 31/61] adjust circle ci dependencies --- ci/requirements-3.7-0.24.2.conda | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ci/requirements-3.7-0.24.2.conda b/ci/requirements-3.7-0.24.2.conda index b756f1e7..2facfb2c 100644 --- a/ci/requirements-3.7-0.24.2.conda +++ b/ci/requirements-3.7-0.24.2.conda @@ -1,12 +1,12 @@ codecov coverage -db-dtypes==1.0.4 +db-dtypes==0.3.1 fastavro flake8 freezegun numpy==1.16.6 -google-cloud-bigquery==3.3.5 -google-cloud-bigquery-storage==2.16.2 +google-cloud-bigquery==1.27.2 +google-cloud-bigquery-storage==1.1.0 pyarrow==3.0.0 pydata-google-auth pytest From 972ce57bb9bfb51f110236ad7be6194d3f0a48e8 Mon Sep 17 00:00:00 2001 From: aribray <45905583+aribray@users.noreply.github.com> Date: Thu, 27 Oct 2022 13:36:23 -0500 Subject: [PATCH 32/61] drop versions for circleci build --- setup.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/setup.py b/setup.py index 6b71fa34..f22b8ab3 100644 --- a/setup.py +++ b/setup.py @@ -31,16 +31,16 @@ # Note: google-api-core and google-auth are also included via transitive # dependency on google-cloud-bigquery, but this library also uses them # directly. - "google-api-core >= 2.10.2, <3.0.0dev", - "google-auth >=2.13.0", - "google-auth-oauthlib >=0.7.0", + "google-api-core >= 1.31.5, <3.0.0dev,!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.0", + "google-auth >=1.25.0", + "google-auth-oauthlib >=0.0.1", # Require 1.27.* because it has a fix for out-of-bounds timestamps. See: # https://github.com/googleapis/python-bigquery/pull/209 and # https://github.com/googleapis/python-bigquery-pandas/issues/365 # Exclude 2.4.* because it has a bug where waiting for the query can hang # indefinitely. https://github.com/pydata/pandas-gbq/issues/343 "google-cloud-bigquery >=3.3.5,<4.0.0dev,!=2.4.*", - "google-cloud-bigquery-storage >=2.16.2,<3.0.0dev", + "google-cloud-bigquery-storage >=1.1.0,<3.0.0dev", ] extras = { "tqdm": "tqdm>=4.23.0", From 18b0273df7c2821cc01f87e8a9a9dcc4c89496a5 Mon Sep 17 00:00:00 2001 From: aribray <45905583+aribray@users.noreply.github.com> Date: Thu, 27 Oct 2022 13:44:05 -0500 Subject: [PATCH 33/61] add circleci fixture back to conftest.py --- tests/system/conftest.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/tests/system/conftest.py b/tests/system/conftest.py index 1d7ae681..4ba8bf31 100644 --- a/tests/system/conftest.py +++ b/tests/system/conftest.py @@ -16,15 +16,15 @@ REPO_DIR = pathlib.Path(__file__).parent.parent.parent -# # TODO: remove when fully migrated off of Circle CI -# @pytest.fixture(scope="session", autouse=True) -# def default_credentials(): -# """Setup application default credentials for use in code samples.""" -# # Written by the 'ci/config_auth.sh' script. -# path = REPO_DIR / "ci" / "service_account.json" - -# if path.is_file() and "GOOGLE_APPLICATION_CREDENTIALS" not in os.environ: -# os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = str(path) +# TODO: remove when fully migrated off of Circle CI +@pytest.fixture(scope="session", autouse=True) +def default_credentials(): + """Setup application default credentials for use in code samples.""" + # Written by the 'ci/config_auth.sh' script. + path = REPO_DIR / "ci" / "service_account.json" + + if path.is_file() and "GOOGLE_APPLICATION_CREDENTIALS" not in os.environ: + os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = str(path) @pytest.fixture(scope="session", autouse=True) From 43731c23cc3a2cb6931cc08dab845832ce24d169 Mon Sep 17 00:00:00 2001 From: aribray <45905583+aribray@users.noreply.github.com> Date: Thu, 27 Oct 2022 13:51:28 -0500 Subject: [PATCH 34/61] match versions --- testing/constraints-3.7.txt | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/testing/constraints-3.7.txt b/testing/constraints-3.7.txt index 8f2ad162..a5d720f1 100644 --- a/testing/constraints-3.7.txt +++ b/testing/constraints-3.7.txt @@ -6,11 +6,11 @@ # e.g., if setup.py has "foo >= 1.14.0, < 2.0.0dev", # Then this file should have foo==1.14.0 db-dtypes==1.0.4 -google-api-core==2.10.2 -google-auth==2.13.0 -google-auth-oauthlib==0.7.0 +google-api-core==1.31.5 +google-auth==1.25.0 +google-auth-oauthlib==0.0.1 google-cloud-bigquery==3.3.5 -google-cloud-bigquery-storage==2.16.2 +google-cloud-bigquery-storage==1.1.0 numpy==1.16.6 pandas==1.1.4 pyarrow==3.0.0 From 4eccfc95c477f665fbba00e072c8b50ef4212bb9 Mon Sep 17 00:00:00 2001 From: aribray <45905583+aribray@users.noreply.github.com> Date: Thu, 27 Oct 2022 13:53:43 -0500 Subject: [PATCH 35/61] match versions --- setup.py | 2 +- testing/constraints-3.7.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index f22b8ab3..f25da383 100644 --- a/setup.py +++ b/setup.py @@ -40,7 +40,7 @@ # Exclude 2.4.* because it has a bug where waiting for the query can hang # indefinitely. https://github.com/pydata/pandas-gbq/issues/343 "google-cloud-bigquery >=3.3.5,<4.0.0dev,!=2.4.*", - "google-cloud-bigquery-storage >=1.1.0,<3.0.0dev", + "google-cloud-bigquery-storage >=2.16.2,<3.0.0dev", ] extras = { "tqdm": "tqdm>=4.23.0", diff --git a/testing/constraints-3.7.txt b/testing/constraints-3.7.txt index a5d720f1..13505c4f 100644 --- a/testing/constraints-3.7.txt +++ b/testing/constraints-3.7.txt @@ -10,7 +10,7 @@ google-api-core==1.31.5 google-auth==1.25.0 google-auth-oauthlib==0.0.1 google-cloud-bigquery==3.3.5 -google-cloud-bigquery-storage==1.1.0 +google-cloud-bigquery-storage==2.16.2 numpy==1.16.6 pandas==1.1.4 pyarrow==3.0.0 From 4bf2a1cd783a843e2e6baf25d71bfa588e038809 Mon Sep 17 00:00:00 2001 From: aribray <45905583+aribray@users.noreply.github.com> Date: Thu, 27 Oct 2022 14:01:58 -0500 Subject: [PATCH 36/61] reset dependency versions --- samples/snippets/requirements.txt | 4 ++-- setup.py | 8 ++++---- testing/constraints-3.7.txt | 8 ++++---- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 3329e69a..6a681437 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,5 +1,5 @@ -google-cloud-bigquery-storage==2.16.2 -google-cloud-bigquery==3.3.5 +google-cloud-bigquery-storage==2.15.0 +google-cloud-bigquery==3.3.2 pandas-gbq==0.17.8 pandas===1.3.5; python_version == '3.7' pandas==1.4.4; python_version >= '3.8' diff --git a/setup.py b/setup.py index f25da383..0bf0c7b2 100644 --- a/setup.py +++ b/setup.py @@ -23,9 +23,9 @@ release_status = "Development Status :: 4 - Beta" dependencies = [ "setuptools", - "db-dtypes >=1.0.4,<2.0.0", + "db-dtypes >=0.3.1,<2.0.0", "numpy >=1.16.6", - "pandas >=1.1.4", + "pandas >=0.24.2", "pyarrow >=3.0.0, <10.0dev", "pydata-google-auth", # Note: google-api-core and google-auth are also included via transitive @@ -39,8 +39,8 @@ # https://github.com/googleapis/python-bigquery-pandas/issues/365 # Exclude 2.4.* because it has a bug where waiting for the query can hang # indefinitely. https://github.com/pydata/pandas-gbq/issues/343 - "google-cloud-bigquery >=3.3.5,<4.0.0dev,!=2.4.*", - "google-cloud-bigquery-storage >=2.16.2,<3.0.0dev", + "google-cloud-bigquery >=1.27.2,<4.0.0dev,!=2.4.*", + "google-cloud-bigquery-storage >=1.1.0,<3.0.0dev", ] extras = { "tqdm": "tqdm>=4.23.0", diff --git a/testing/constraints-3.7.txt b/testing/constraints-3.7.txt index 13505c4f..1d9efed7 100644 --- a/testing/constraints-3.7.txt +++ b/testing/constraints-3.7.txt @@ -5,14 +5,14 @@ # # e.g., if setup.py has "foo >= 1.14.0, < 2.0.0dev", # Then this file should have foo==1.14.0 -db-dtypes==1.0.4 +db-dtypes==0.3.1 google-api-core==1.31.5 google-auth==1.25.0 google-auth-oauthlib==0.0.1 -google-cloud-bigquery==3.3.5 -google-cloud-bigquery-storage==2.16.2 +google-cloud-bigquery==1.27.2 +google-cloud-bigquery-storage==1.1.0 numpy==1.16.6 -pandas==1.1.4 +pandas==0.24.2 pyarrow==3.0.0 pydata-google-auth==0.1.2 tqdm==4.23.0 From e664423df1a8bdab4e0793e461d47703c8f547d0 Mon Sep 17 00:00:00 2001 From: aribray <45905583+aribray@users.noreply.github.com> Date: Thu, 27 Oct 2022 14:28:41 -0500 Subject: [PATCH 37/61] reset dependency versions --- setup.py | 14 +++++++------- testing/constraints-3.7.txt | 14 +++++++------- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/setup.py b/setup.py index 0bf0c7b2..6b71fa34 100644 --- a/setup.py +++ b/setup.py @@ -23,24 +23,24 @@ release_status = "Development Status :: 4 - Beta" dependencies = [ "setuptools", - "db-dtypes >=0.3.1,<2.0.0", + "db-dtypes >=1.0.4,<2.0.0", "numpy >=1.16.6", - "pandas >=0.24.2", + "pandas >=1.1.4", "pyarrow >=3.0.0, <10.0dev", "pydata-google-auth", # Note: google-api-core and google-auth are also included via transitive # dependency on google-cloud-bigquery, but this library also uses them # directly. - "google-api-core >= 1.31.5, <3.0.0dev,!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.0", - "google-auth >=1.25.0", - "google-auth-oauthlib >=0.0.1", + "google-api-core >= 2.10.2, <3.0.0dev", + "google-auth >=2.13.0", + "google-auth-oauthlib >=0.7.0", # Require 1.27.* because it has a fix for out-of-bounds timestamps. See: # https://github.com/googleapis/python-bigquery/pull/209 and # https://github.com/googleapis/python-bigquery-pandas/issues/365 # Exclude 2.4.* because it has a bug where waiting for the query can hang # indefinitely. https://github.com/pydata/pandas-gbq/issues/343 - "google-cloud-bigquery >=1.27.2,<4.0.0dev,!=2.4.*", - "google-cloud-bigquery-storage >=1.1.0,<3.0.0dev", + "google-cloud-bigquery >=3.3.5,<4.0.0dev,!=2.4.*", + "google-cloud-bigquery-storage >=2.16.2,<3.0.0dev", ] extras = { "tqdm": "tqdm>=4.23.0", diff --git a/testing/constraints-3.7.txt b/testing/constraints-3.7.txt index 1d9efed7..8f2ad162 100644 --- a/testing/constraints-3.7.txt +++ b/testing/constraints-3.7.txt @@ -5,14 +5,14 @@ # # e.g., if setup.py has "foo >= 1.14.0, < 2.0.0dev", # Then this file should have foo==1.14.0 -db-dtypes==0.3.1 -google-api-core==1.31.5 -google-auth==1.25.0 -google-auth-oauthlib==0.0.1 -google-cloud-bigquery==1.27.2 -google-cloud-bigquery-storage==1.1.0 +db-dtypes==1.0.4 +google-api-core==2.10.2 +google-auth==2.13.0 +google-auth-oauthlib==0.7.0 +google-cloud-bigquery==3.3.5 +google-cloud-bigquery-storage==2.16.2 numpy==1.16.6 -pandas==0.24.2 +pandas==1.1.4 pyarrow==3.0.0 pydata-google-auth==0.1.2 tqdm==4.23.0 From 02ebb1f7eca0eb5f11ab4632c5f1a1ba4a644dfa Mon Sep 17 00:00:00 2001 From: aribray <45905583+aribray@users.noreply.github.com> Date: Thu, 27 Oct 2022 14:34:33 -0500 Subject: [PATCH 38/61] reset dependency versions --- ci/requirements-3.7-0.24.2.conda | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ci/requirements-3.7-0.24.2.conda b/ci/requirements-3.7-0.24.2.conda index 2facfb2c..b756f1e7 100644 --- a/ci/requirements-3.7-0.24.2.conda +++ b/ci/requirements-3.7-0.24.2.conda @@ -1,12 +1,12 @@ codecov coverage -db-dtypes==0.3.1 +db-dtypes==1.0.4 fastavro flake8 freezegun numpy==1.16.6 -google-cloud-bigquery==1.27.2 -google-cloud-bigquery-storage==1.1.0 +google-cloud-bigquery==3.3.5 +google-cloud-bigquery-storage==2.16.2 pyarrow==3.0.0 pydata-google-auth pytest From 591cb85863e50ffc7cad94aabec96c964851c604 Mon Sep 17 00:00:00 2001 From: aribray <45905583+aribray@users.noreply.github.com> Date: Thu, 27 Oct 2022 14:54:54 -0500 Subject: [PATCH 39/61] reset dependency versions --- ci/requirements-3.7-0.24.2.conda | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/ci/requirements-3.7-0.24.2.conda b/ci/requirements-3.7-0.24.2.conda index b756f1e7..5efe4247 100644 --- a/ci/requirements-3.7-0.24.2.conda +++ b/ci/requirements-3.7-0.24.2.conda @@ -1,14 +1,14 @@ codecov coverage -db-dtypes==1.0.4 +db-dtypes==0.3.1 fastavro flake8 freezegun numpy==1.16.6 -google-cloud-bigquery==3.3.5 -google-cloud-bigquery-storage==2.16.2 +google-cloud-bigquery==1.27.2 +google-cloud-bigquery-storage==1.1.0 pyarrow==3.0.0 pydata-google-auth pytest pytest-cov -tqdm==4.23.0 +tqdm==4.23.0 \ No newline at end of file From 78b4687c23a6db5fffb3ab66175f0d8d737790dd Mon Sep 17 00:00:00 2001 From: aribray <45905583+aribray@users.noreply.github.com> Date: Thu, 27 Oct 2022 15:19:41 -0500 Subject: [PATCH 40/61] bump pydata-google-auth version --- testing/constraints-3.7.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/testing/constraints-3.7.txt b/testing/constraints-3.7.txt index 8f2ad162..569287ad 100644 --- a/testing/constraints-3.7.txt +++ b/testing/constraints-3.7.txt @@ -14,6 +14,6 @@ google-cloud-bigquery-storage==2.16.2 numpy==1.16.6 pandas==1.1.4 pyarrow==3.0.0 -pydata-google-auth==0.1.2 +pydata-google-auth==1.4.0 tqdm==4.23.0 protobuf==3.19.5 From 86cb721d08f45a6e8c7184fb4b82c350cebe1cc8 Mon Sep 17 00:00:00 2001 From: aribray <45905583+aribray@users.noreply.github.com> Date: Thu, 27 Oct 2022 15:34:40 -0500 Subject: [PATCH 41/61] try pinning dependencies in circleci requirements --- ci/requirements-3.7-0.24.2.conda | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ci/requirements-3.7-0.24.2.conda b/ci/requirements-3.7-0.24.2.conda index 5efe4247..e9d67d12 100644 --- a/ci/requirements-3.7-0.24.2.conda +++ b/ci/requirements-3.7-0.24.2.conda @@ -5,10 +5,11 @@ fastavro flake8 freezegun numpy==1.16.6 +google-auth==2.13.0 google-cloud-bigquery==1.27.2 google-cloud-bigquery-storage==1.1.0 pyarrow==3.0.0 -pydata-google-auth +pydata-google-auth==1.4.0 pytest pytest-cov tqdm==4.23.0 \ No newline at end of file From 96d2f83dedd6a2917c3f958b0e76cd7267fb5208 Mon Sep 17 00:00:00 2001 From: aribray <45905583+aribray@users.noreply.github.com> Date: Thu, 27 Oct 2022 15:38:59 -0500 Subject: [PATCH 42/61] unpin dependencies --- ci/requirements-3.7-0.24.2.conda | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ci/requirements-3.7-0.24.2.conda b/ci/requirements-3.7-0.24.2.conda index e9d67d12..c13e90a8 100644 --- a/ci/requirements-3.7-0.24.2.conda +++ b/ci/requirements-3.7-0.24.2.conda @@ -5,11 +5,11 @@ fastavro flake8 freezegun numpy==1.16.6 -google-auth==2.13.0 +google-auth google-cloud-bigquery==1.27.2 google-cloud-bigquery-storage==1.1.0 pyarrow==3.0.0 -pydata-google-auth==1.4.0 +pydata-google-auth pytest pytest-cov tqdm==4.23.0 \ No newline at end of file From e7c43357490a667c2eb8ea2116df167e96c31f38 Mon Sep 17 00:00:00 2001 From: aribray <45905583+aribray@users.noreply.github.com> Date: Thu, 27 Oct 2022 15:43:14 -0500 Subject: [PATCH 43/61] try adding dpendency --- ci/requirements-3.7-0.24.2.conda | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/requirements-3.7-0.24.2.conda b/ci/requirements-3.7-0.24.2.conda index c13e90a8..6f2faa8e 100644 --- a/ci/requirements-3.7-0.24.2.conda +++ b/ci/requirements-3.7-0.24.2.conda @@ -5,7 +5,7 @@ fastavro flake8 freezegun numpy==1.16.6 -google-auth +google-auth==2.13.0 google-cloud-bigquery==1.27.2 google-cloud-bigquery-storage==1.1.0 pyarrow==3.0.0 From fc1e91a1b093148599e4a2bc7857af344400ef2d Mon Sep 17 00:00:00 2001 From: aribray <45905583+aribray@users.noreply.github.com> Date: Thu, 27 Oct 2022 15:49:35 -0500 Subject: [PATCH 44/61] reset requirements.conda --- ci/requirements-3.7-0.24.2.conda | 1 - 1 file changed, 1 deletion(-) diff --git a/ci/requirements-3.7-0.24.2.conda b/ci/requirements-3.7-0.24.2.conda index 6f2faa8e..5efe4247 100644 --- a/ci/requirements-3.7-0.24.2.conda +++ b/ci/requirements-3.7-0.24.2.conda @@ -5,7 +5,6 @@ fastavro flake8 freezegun numpy==1.16.6 -google-auth==2.13.0 google-cloud-bigquery==1.27.2 google-cloud-bigquery-storage==1.1.0 pyarrow==3.0.0 From ea71b8bd5f428ede68fdbfa2d0af58e9e667b65b Mon Sep 17 00:00:00 2001 From: aribray <45905583+aribray@users.noreply.github.com> Date: Thu, 27 Oct 2022 16:59:12 -0500 Subject: [PATCH 45/61] add oauthlib dependency --- ci/requirements-3.7-0.24.2.conda | 1 + ci/requirements-3.9-1.3.4.conda | 1 + 2 files changed, 2 insertions(+) diff --git a/ci/requirements-3.7-0.24.2.conda b/ci/requirements-3.7-0.24.2.conda index 5efe4247..11962626 100644 --- a/ci/requirements-3.7-0.24.2.conda +++ b/ci/requirements-3.7-0.24.2.conda @@ -5,6 +5,7 @@ fastavro flake8 freezegun numpy==1.16.6 +google-auth-oauthlib==0.7.0 google-cloud-bigquery==1.27.2 google-cloud-bigquery-storage==1.1.0 pyarrow==3.0.0 diff --git a/ci/requirements-3.9-1.3.4.conda b/ci/requirements-3.9-1.3.4.conda index 1411fe5b..cdda67c8 100644 --- a/ci/requirements-3.9-1.3.4.conda +++ b/ci/requirements-3.9-1.3.4.conda @@ -4,6 +4,7 @@ db-dtypes fastavro flake8 freezegun +google-auth-oauthlib google-cloud-bigquery google-cloud-bigquery-storage numpy From d66da9ae90593642b5639aa6dfb5842fd8e7e4be Mon Sep 17 00:00:00 2001 From: aribray <45905583+aribray@users.noreply.github.com> Date: Thu, 27 Oct 2022 17:16:55 -0500 Subject: [PATCH 46/61] pin pydata google auth version --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 6b71fa34..12c8443c 100644 --- a/setup.py +++ b/setup.py @@ -27,7 +27,7 @@ "numpy >=1.16.6", "pandas >=1.1.4", "pyarrow >=3.0.0, <10.0dev", - "pydata-google-auth", + "pydata-google-auth >=1.4.0", # Note: google-api-core and google-auth are also included via transitive # dependency on google-cloud-bigquery, but this library also uses them # directly. From 3b006c5f90337aabb4057f59e8aabbd5bdc2789e Mon Sep 17 00:00:00 2001 From: aribray <45905583+aribray@users.noreply.github.com> Date: Fri, 28 Oct 2022 10:26:01 -0500 Subject: [PATCH 47/61] add google-auth dependency to circleci requirements --- ci/requirements-3.7-0.24.2.conda | 1 + ci/requirements-3.9-1.3.4.conda | 1 + 2 files changed, 2 insertions(+) diff --git a/ci/requirements-3.7-0.24.2.conda b/ci/requirements-3.7-0.24.2.conda index 11962626..05d77f70 100644 --- a/ci/requirements-3.7-0.24.2.conda +++ b/ci/requirements-3.7-0.24.2.conda @@ -5,6 +5,7 @@ fastavro flake8 freezegun numpy==1.16.6 +google-auth==2.13.0 google-auth-oauthlib==0.7.0 google-cloud-bigquery==1.27.2 google-cloud-bigquery-storage==1.1.0 diff --git a/ci/requirements-3.9-1.3.4.conda b/ci/requirements-3.9-1.3.4.conda index cdda67c8..82d9e784 100644 --- a/ci/requirements-3.9-1.3.4.conda +++ b/ci/requirements-3.9-1.3.4.conda @@ -4,6 +4,7 @@ db-dtypes fastavro flake8 freezegun +google-auth google-auth-oauthlib google-cloud-bigquery google-cloud-bigquery-storage From ed04c3c5fcf1cc0cdd3875682e2e7be97bbecd23 Mon Sep 17 00:00:00 2001 From: aribray <45905583+aribray@users.noreply.github.com> Date: Fri, 28 Oct 2022 10:27:53 -0500 Subject: [PATCH 48/61] unpin dependency --- ci/requirements-3.7-0.24.2.conda | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/requirements-3.7-0.24.2.conda b/ci/requirements-3.7-0.24.2.conda index 05d77f70..bc8ddf75 100644 --- a/ci/requirements-3.7-0.24.2.conda +++ b/ci/requirements-3.7-0.24.2.conda @@ -5,7 +5,7 @@ fastavro flake8 freezegun numpy==1.16.6 -google-auth==2.13.0 +google-auth google-auth-oauthlib==0.7.0 google-cloud-bigquery==1.27.2 google-cloud-bigquery-storage==1.1.0 From bda78e88d058f085a00be7318d98ca8c40e6d58c Mon Sep 17 00:00:00 2001 From: aribray <45905583+aribray@users.noreply.github.com> Date: Fri, 28 Oct 2022 11:46:01 -0500 Subject: [PATCH 49/61] use requirements.txt created by pip-compile --- requirements.txt | 129 +++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 124 insertions(+), 5 deletions(-) diff --git a/requirements.txt b/requirements.txt index 7b3ede97..b2e5da5b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,124 @@ -pandas -google-auth -google-auth-oauthlib -google-cloud-bigquery -tqdm +# +# This file is autogenerated by pip-compile with python 3.8 +# To update, run: +# +# pip-compile +# +cachetools==5.2.0 + # via google-auth +certifi==2022.9.24 + # via requests +charset-normalizer==2.1.1 + # via requests +db-dtypes==1.0.4 + # via pandas-gbq (setup.py) +google-api-core[grpc]==2.10.2 + # via + # google-cloud-bigquery + # google-cloud-bigquery-storage + # google-cloud-core + # pandas-gbq (setup.py) +google-auth==2.13.0 + # via + # google-api-core + # google-auth-oauthlib + # google-cloud-core + # pandas-gbq (setup.py) + # pydata-google-auth +google-auth-oauthlib==0.7.0 + # via + # pandas-gbq (setup.py) + # pydata-google-auth +google-cloud-bigquery==3.3.5 + # via pandas-gbq (setup.py) +google-cloud-bigquery-storage==2.16.2 + # via + # google-cloud-bigquery + # pandas-gbq (setup.py) +google-cloud-core==2.3.2 + # via google-cloud-bigquery +google-crc32c==1.5.0 + # via google-resumable-media +google-resumable-media==2.4.0 + # via google-cloud-bigquery +googleapis-common-protos==1.56.4 + # via + # google-api-core + # grpcio-status +grpcio==1.50.0 + # via + # google-api-core + # google-cloud-bigquery + # grpcio-status +grpcio-status==1.50.0 + # via google-api-core +idna==3.4 + # via requests +numpy==1.23.4 + # via + # db-dtypes + # pandas + # pandas-gbq (setup.py) + # pyarrow +oauthlib==3.2.2 + # via requests-oauthlib +packaging==21.3 + # via + # db-dtypes + # google-cloud-bigquery +pandas==1.5.1 + # via + # db-dtypes + # pandas-gbq (setup.py) +proto-plus==1.22.1 + # via + # google-cloud-bigquery + # google-cloud-bigquery-storage +protobuf==4.21.9 + # via + # google-api-core + # google-cloud-bigquery + # google-cloud-bigquery-storage + # googleapis-common-protos + # grpcio-status + # proto-plus +pyarrow==9.0.0 + # via + # db-dtypes + # google-cloud-bigquery + # pandas-gbq (setup.py) +pyasn1==0.4.8 + # via + # pyasn1-modules + # rsa +pyasn1-modules==0.2.8 + # via google-auth +pydata-google-auth==1.4.0 + # via pandas-gbq (setup.py) +pyparsing==3.0.9 + # via packaging +python-dateutil==2.8.2 + # via + # google-cloud-bigquery + # pandas +pytz==2022.5 + # via pandas +requests==2.28.1 + # via + # google-api-core + # google-cloud-bigquery + # requests-oauthlib +requests-oauthlib==1.3.1 + # via google-auth-oauthlib +rsa==4.9 + # via google-auth +six==1.16.0 + # via + # google-auth + # grpcio + # python-dateutil +urllib3==1.26.12 + # via requests + +# The following packages are considered to be unsafe in a requirements file: +# setuptools From f5dd1e7bba311bf64e3367a6cc6ffff6fb7046b6 Mon Sep 17 00:00:00 2001 From: aribray <45905583+aribray@users.noreply.github.com> Date: Fri, 28 Oct 2022 15:28:42 -0500 Subject: [PATCH 50/61] pin dependencies in circleci requirements --- ci/requirements-3.7-0.24.2.conda | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/ci/requirements-3.7-0.24.2.conda b/ci/requirements-3.7-0.24.2.conda index bc8ddf75..edfa2b10 100644 --- a/ci/requirements-3.7-0.24.2.conda +++ b/ci/requirements-3.7-0.24.2.conda @@ -4,12 +4,13 @@ db-dtypes==0.3.1 fastavro flake8 freezegun -numpy==1.16.6 -google-auth +numpy==1.23.4 +google-api-core==2.10.2 +google-auth==2.13.0 google-auth-oauthlib==0.7.0 google-cloud-bigquery==1.27.2 -google-cloud-bigquery-storage==1.1.0 -pyarrow==3.0.0 +google-cloud-bigquery-storage==2.11.0 +pyarrow==9.0.0 pydata-google-auth pytest pytest-cov From 6527fd742790b3e4e007bf855d30d03afff0ffdb Mon Sep 17 00:00:00 2001 From: aribray <45905583+aribray@users.noreply.github.com> Date: Sat, 29 Oct 2022 19:49:12 -0500 Subject: [PATCH 51/61] unpin dependencies in CircleCI requrements --- .circleci/config.yml | 2 +- ci/requirements-3.7-0.24.2.conda | 17 ---- ci/requirements-3.7-1.1.4.conda | 18 +++++ requirements.txt | 129 ++----------------------------- 4 files changed, 24 insertions(+), 142 deletions(-) delete mode 100644 ci/requirements-3.7-0.24.2.conda create mode 100644 ci/requirements-3.7-1.1.4.conda diff --git a/.circleci/config.yml b/.circleci/config.yml index e008054c..f953c79e 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -10,7 +10,7 @@ jobs: - image: mambaorg/micromamba environment: PYTHON: "3.7" - PANDAS: "0.24.2" + PANDAS: "1.1.4" steps: - checkout - run: ci/config_auth.sh diff --git a/ci/requirements-3.7-0.24.2.conda b/ci/requirements-3.7-0.24.2.conda deleted file mode 100644 index edfa2b10..00000000 --- a/ci/requirements-3.7-0.24.2.conda +++ /dev/null @@ -1,17 +0,0 @@ -codecov -coverage -db-dtypes==0.3.1 -fastavro -flake8 -freezegun -numpy==1.23.4 -google-api-core==2.10.2 -google-auth==2.13.0 -google-auth-oauthlib==0.7.0 -google-cloud-bigquery==1.27.2 -google-cloud-bigquery-storage==2.11.0 -pyarrow==9.0.0 -pydata-google-auth -pytest -pytest-cov -tqdm==4.23.0 \ No newline at end of file diff --git a/ci/requirements-3.7-1.1.4.conda b/ci/requirements-3.7-1.1.4.conda new file mode 100644 index 00000000..3ae0c4d4 --- /dev/null +++ b/ci/requirements-3.7-1.1.4.conda @@ -0,0 +1,18 @@ +codecov +coverage +db-dtypes +fastavro +flake8 +freezegun +numpy +google-api-core +google-auth +google-auth-oauthlib +google-cloud-bigquery +google-cloud-bigquery-storage +pyarrow +pydata-google-auth +pytest +pytest-cov +requests-oauthlib +tqdm \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index b2e5da5b..bf23435f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,124 +1,5 @@ -# -# This file is autogenerated by pip-compile with python 3.8 -# To update, run: -# -# pip-compile -# -cachetools==5.2.0 - # via google-auth -certifi==2022.9.24 - # via requests -charset-normalizer==2.1.1 - # via requests -db-dtypes==1.0.4 - # via pandas-gbq (setup.py) -google-api-core[grpc]==2.10.2 - # via - # google-cloud-bigquery - # google-cloud-bigquery-storage - # google-cloud-core - # pandas-gbq (setup.py) -google-auth==2.13.0 - # via - # google-api-core - # google-auth-oauthlib - # google-cloud-core - # pandas-gbq (setup.py) - # pydata-google-auth -google-auth-oauthlib==0.7.0 - # via - # pandas-gbq (setup.py) - # pydata-google-auth -google-cloud-bigquery==3.3.5 - # via pandas-gbq (setup.py) -google-cloud-bigquery-storage==2.16.2 - # via - # google-cloud-bigquery - # pandas-gbq (setup.py) -google-cloud-core==2.3.2 - # via google-cloud-bigquery -google-crc32c==1.5.0 - # via google-resumable-media -google-resumable-media==2.4.0 - # via google-cloud-bigquery -googleapis-common-protos==1.56.4 - # via - # google-api-core - # grpcio-status -grpcio==1.50.0 - # via - # google-api-core - # google-cloud-bigquery - # grpcio-status -grpcio-status==1.50.0 - # via google-api-core -idna==3.4 - # via requests -numpy==1.23.4 - # via - # db-dtypes - # pandas - # pandas-gbq (setup.py) - # pyarrow -oauthlib==3.2.2 - # via requests-oauthlib -packaging==21.3 - # via - # db-dtypes - # google-cloud-bigquery -pandas==1.5.1 - # via - # db-dtypes - # pandas-gbq (setup.py) -proto-plus==1.22.1 - # via - # google-cloud-bigquery - # google-cloud-bigquery-storage -protobuf==4.21.9 - # via - # google-api-core - # google-cloud-bigquery - # google-cloud-bigquery-storage - # googleapis-common-protos - # grpcio-status - # proto-plus -pyarrow==9.0.0 - # via - # db-dtypes - # google-cloud-bigquery - # pandas-gbq (setup.py) -pyasn1==0.4.8 - # via - # pyasn1-modules - # rsa -pyasn1-modules==0.2.8 - # via google-auth -pydata-google-auth==1.4.0 - # via pandas-gbq (setup.py) -pyparsing==3.0.9 - # via packaging -python-dateutil==2.8.2 - # via - # google-cloud-bigquery - # pandas -pytz==2022.5 - # via pandas -requests==2.28.1 - # via - # google-api-core - # google-cloud-bigquery - # requests-oauthlib -requests-oauthlib==1.3.1 - # via google-auth-oauthlib -rsa==4.9 - # via google-auth -six==1.16.0 - # via - # google-auth - # grpcio - # python-dateutil -urllib3==1.26.12 - # via requests - -# The following packages are considered to be unsafe in a requirements file: -# setuptools +pandas +google-auth +google-auth-oauthlib +google-cloud-bigquery +tqdm \ No newline at end of file From 330e34d287994e3981fc2c2b32fefea625f4c633 Mon Sep 17 00:00:00 2001 From: aribray <45905583+aribray@users.noreply.github.com> Date: Sat, 29 Oct 2022 20:25:59 -0500 Subject: [PATCH 52/61] repin some dependencies --- .circleci/config.yml | 2 +- ci/requirements-3.7-0.24.2.conda | 16 ++++++++++++++++ ci/requirements-3.7-1.1.4.conda | 18 ------------------ ci/requirements-3.9-1.3.4.conda | 2 -- pandas_gbq/gbq.py | 4 +--- 5 files changed, 18 insertions(+), 24 deletions(-) create mode 100644 ci/requirements-3.7-0.24.2.conda delete mode 100644 ci/requirements-3.7-1.1.4.conda diff --git a/.circleci/config.yml b/.circleci/config.yml index f953c79e..e008054c 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -10,7 +10,7 @@ jobs: - image: mambaorg/micromamba environment: PYTHON: "3.7" - PANDAS: "1.1.4" + PANDAS: "0.24.2" steps: - checkout - run: ci/config_auth.sh diff --git a/ci/requirements-3.7-0.24.2.conda b/ci/requirements-3.7-0.24.2.conda new file mode 100644 index 00000000..a1643049 --- /dev/null +++ b/ci/requirements-3.7-0.24.2.conda @@ -0,0 +1,16 @@ +codecov +coverage +db-dtypes==0.3.1 +fastavro +flake8 +freezegun +numpy==1.16.6 +google-api-core +google-cloud-bigquery==1.27.2 +google-cloud-bigquery-storage==1.1.0 +pyarrow==3.0.0 +pydata-google-auth +pytest +pytest-cov +requests-oauthlib +tqdm==4.23.0 \ No newline at end of file diff --git a/ci/requirements-3.7-1.1.4.conda b/ci/requirements-3.7-1.1.4.conda deleted file mode 100644 index 3ae0c4d4..00000000 --- a/ci/requirements-3.7-1.1.4.conda +++ /dev/null @@ -1,18 +0,0 @@ -codecov -coverage -db-dtypes -fastavro -flake8 -freezegun -numpy -google-api-core -google-auth -google-auth-oauthlib -google-cloud-bigquery -google-cloud-bigquery-storage -pyarrow -pydata-google-auth -pytest -pytest-cov -requests-oauthlib -tqdm \ No newline at end of file diff --git a/ci/requirements-3.9-1.3.4.conda b/ci/requirements-3.9-1.3.4.conda index 82d9e784..1411fe5b 100644 --- a/ci/requirements-3.9-1.3.4.conda +++ b/ci/requirements-3.9-1.3.4.conda @@ -4,8 +4,6 @@ db-dtypes fastavro flake8 freezegun -google-auth -google-auth-oauthlib google-cloud-bigquery google-cloud-bigquery-storage numpy diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py index 65af9a89..d429f654 100644 --- a/pandas_gbq/gbq.py +++ b/pandas_gbq/gbq.py @@ -284,11 +284,9 @@ def __init__( global context from google.api_core.exceptions import GoogleAPIError from google.api_core.exceptions import ClientError - from google.api_core.exceptions import BadRequest - from pandas_gbq import auth - self.http_error = (ClientError, GoogleAPIError, BadRequest) + self.http_error = (ClientError, GoogleAPIError) self.project_id = project_id self.location = location self.reauth = reauth From e5eb48c56ff3ddf1fc3eed0574a48a9027488c3e Mon Sep 17 00:00:00 2001 From: aribray <45905583+aribray@users.noreply.github.com> Date: Sat, 29 Oct 2022 20:43:13 -0500 Subject: [PATCH 53/61] remove quiet flag --- ci/run_conda.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/run_conda.sh b/ci/run_conda.sh index 11b5b569..815140f9 100755 --- a/ci/run_conda.sh +++ b/ci/run_conda.sh @@ -13,7 +13,7 @@ micromamba activate # https://github.com/mamba-org/micromamba-docker REQ="ci/requirements-${PYTHON}-${PANDAS}" micromamba install -q pandas=$PANDAS python=${PYTHON} -c conda-forge; -micromamba install -q --file "$REQ.conda" -c conda-forge; +micromamba install --file "$REQ.conda" -c conda-forge; micromamba list micromamba info From bc2253f495173b9de9464af9971881305648e604 Mon Sep 17 00:00:00 2001 From: aribray <45905583+aribray@users.noreply.github.com> Date: Sat, 29 Oct 2022 20:53:34 -0500 Subject: [PATCH 54/61] unpin dependencies in CircleCI requrements --- ci/requirements-3.7-0.24.2.conda | 13 +++++++------ ci/run_conda.sh | 2 +- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/ci/requirements-3.7-0.24.2.conda b/ci/requirements-3.7-0.24.2.conda index a1643049..9dacea7e 100644 --- a/ci/requirements-3.7-0.24.2.conda +++ b/ci/requirements-3.7-0.24.2.conda @@ -1,16 +1,17 @@ codecov coverage -db-dtypes==0.3.1 +db-dtype fastavro flake8 freezegun -numpy==1.16.6 +numpy google-api-core -google-cloud-bigquery==1.27.2 -google-cloud-bigquery-storage==1.1.0 -pyarrow==3.0.0 +google-auth +google-cloud-bigquery +google-cloud-bigquery-storage +pyarrow pydata-google-auth pytest pytest-cov requests-oauthlib -tqdm==4.23.0 \ No newline at end of file +tqdm \ No newline at end of file diff --git a/ci/run_conda.sh b/ci/run_conda.sh index 815140f9..11b5b569 100755 --- a/ci/run_conda.sh +++ b/ci/run_conda.sh @@ -13,7 +13,7 @@ micromamba activate # https://github.com/mamba-org/micromamba-docker REQ="ci/requirements-${PYTHON}-${PANDAS}" micromamba install -q pandas=$PANDAS python=${PYTHON} -c conda-forge; -micromamba install --file "$REQ.conda" -c conda-forge; +micromamba install -q --file "$REQ.conda" -c conda-forge; micromamba list micromamba info From c47aa5f2fb484d2126cb360e3435b9253f2f91a3 Mon Sep 17 00:00:00 2001 From: aribray <45905583+aribray@users.noreply.github.com> Date: Sat, 29 Oct 2022 21:02:44 -0500 Subject: [PATCH 55/61] bump pandas version --- .circleci/config.yml | 2 +- ...quirements-3.7-0.24.2.conda => requirements-3.7-1.1.4.conda} | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename ci/{requirements-3.7-0.24.2.conda => requirements-3.7-1.1.4.conda} (100%) diff --git a/.circleci/config.yml b/.circleci/config.yml index e008054c..f953c79e 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -10,7 +10,7 @@ jobs: - image: mambaorg/micromamba environment: PYTHON: "3.7" - PANDAS: "0.24.2" + PANDAS: "1.1.4" steps: - checkout - run: ci/config_auth.sh diff --git a/ci/requirements-3.7-0.24.2.conda b/ci/requirements-3.7-1.1.4.conda similarity index 100% rename from ci/requirements-3.7-0.24.2.conda rename to ci/requirements-3.7-1.1.4.conda From 3a813a03b9c7362ecfa0a07421cb47bff97a8d61 Mon Sep 17 00:00:00 2001 From: aribray <45905583+aribray@users.noreply.github.com> Date: Sat, 29 Oct 2022 21:11:33 -0500 Subject: [PATCH 56/61] drop pandas version, fix package name --- .circleci/config.yml | 2 +- ...quirements-3.7-1.1.4.conda => requirements-3.7-0.24.2.conda} | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) rename ci/{requirements-3.7-1.1.4.conda => requirements-3.7-0.24.2.conda} (93%) diff --git a/.circleci/config.yml b/.circleci/config.yml index f953c79e..e008054c 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -10,7 +10,7 @@ jobs: - image: mambaorg/micromamba environment: PYTHON: "3.7" - PANDAS: "1.1.4" + PANDAS: "0.24.2" steps: - checkout - run: ci/config_auth.sh diff --git a/ci/requirements-3.7-1.1.4.conda b/ci/requirements-3.7-0.24.2.conda similarity index 93% rename from ci/requirements-3.7-1.1.4.conda rename to ci/requirements-3.7-0.24.2.conda index 9dacea7e..e2faf088 100644 --- a/ci/requirements-3.7-1.1.4.conda +++ b/ci/requirements-3.7-0.24.2.conda @@ -1,6 +1,6 @@ codecov coverage -db-dtype +db-dtypes fastavro flake8 freezegun From 5c26e43978e8600e4dc433354ed72a9da148bda1 Mon Sep 17 00:00:00 2001 From: aribray <45905583+aribray@users.noreply.github.com> Date: Sat, 29 Oct 2022 21:41:28 -0500 Subject: [PATCH 57/61] fix linting, add test --- pandas_gbq/gbq.py | 2 ++ tests/system/test_gbq.py | 2 +- tests/unit/test_to_gbq.py | 10 ++++++++++ 3 files changed, 13 insertions(+), 1 deletion(-) diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py index d429f654..2ac460bc 100644 --- a/pandas_gbq/gbq.py +++ b/pandas_gbq/gbq.py @@ -1275,6 +1275,7 @@ def exists(self, table_id): def create(self, table_id, schema): """Create a table in Google BigQuery given a table and schema + Parameters ---------- table : str @@ -1312,6 +1313,7 @@ def create(self, table_id, schema): def delete(self, table_id): """Delete a table in Google BigQuery + Parameters ---------- table : str diff --git a/tests/system/test_gbq.py b/tests/system/test_gbq.py index 3f85158b..5b90e8ba 100644 --- a/tests/system/test_gbq.py +++ b/tests/system/test_gbq.py @@ -692,7 +692,7 @@ def test_upload_data_if_table_exists_fail(self, project_id): credentials=self.credentials, ) - # Test the if_exists parameter with value 'replace' + # Test the if_exists parameter with value 'fail' with pytest.raises(gbq.TableCreationError): gbq.to_gbq( df, diff --git a/tests/unit/test_to_gbq.py b/tests/unit/test_to_gbq.py index 2509d2b6..2f9a9fa5 100644 --- a/tests/unit/test_to_gbq.py +++ b/tests/unit/test_to_gbq.py @@ -150,3 +150,13 @@ def test_to_gbq_with_if_exists_truncate_cross_project( assert table_destination.dataset_id == "my_dataset" assert table_destination.table_id == "my_table" assert load_kwargs["project"] == "billing-project" + + +def test_to_gbq_with_if_exists_unknown(): + with pytest.raises(ValueError): + gbq.to_gbq( + DataFrame([[1]]), + "my_dataset.my_table", + project_id="myproj", + if_exists="unknown", + ) From fba8921a1d1cc6513adbbd3d31039c55609058fc Mon Sep 17 00:00:00 2001 From: aribray <45905583+aribray@users.noreply.github.com> Date: Sat, 29 Oct 2022 21:47:02 -0500 Subject: [PATCH 58/61] rename tests --- tests/unit/test_to_gbq.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/unit/test_to_gbq.py b/tests/unit/test_to_gbq.py index 2f9a9fa5..5510188f 100644 --- a/tests/unit/test_to_gbq.py +++ b/tests/unit/test_to_gbq.py @@ -109,7 +109,7 @@ def test_to_gbq_with_if_exists_append_mismatch(mock_bigquery_client): assert exc.message == r"Provided Schema does not match Table *" -def test_to_gbq_with_if_exists_truncate(mock_bigquery_client, expected_load_method): +def test_to_gbq_with_if_exists_replace(mock_bigquery_client, expected_load_method): mock_bigquery_client.get_table.side_effect = ( # Initial check google.cloud.bigquery.Table("myproj.my_dataset.my_table"), @@ -125,7 +125,7 @@ def test_to_gbq_with_if_exists_truncate(mock_bigquery_client, expected_load_meth expected_load_method.assert_called_once() -def test_to_gbq_with_if_exists_truncate_cross_project( +def test_to_gbq_with_if_exists_replace_cross_project( mock_bigquery_client, expected_load_method ): mock_bigquery_client.get_table.side_effect = ( From ac61ee4e1f6799cd792b685b0a4b59abcb25eb38 Mon Sep 17 00:00:00 2001 From: aribray <45905583+aribray@users.noreply.github.com> Date: Sat, 29 Oct 2022 22:14:16 -0500 Subject: [PATCH 59/61] formatting --- pandas_gbq/load.py | 2 +- tests/unit/test_to_gbq.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas_gbq/load.py b/pandas_gbq/load.py index 5efac314..bad99584 100644 --- a/pandas_gbq/load.py +++ b/pandas_gbq/load.py @@ -126,6 +126,7 @@ def load_parquet( schema = pandas_gbq.schema.remove_policy_tags(schema) job_config.schema = pandas_gbq.schema.to_google_cloud_bigquery(schema) dataframe = cast_dataframe_for_parquet(dataframe, schema) + try: client.load_table_from_dataframe( dataframe, @@ -148,7 +149,6 @@ def load_csv( load_chunk: Callable, ): job_config = bigquery.LoadJobConfig() - job_config.write_disposition = write_disposition job_config.source_format = "CSV" job_config.allow_quoted_newlines = True diff --git a/tests/unit/test_to_gbq.py b/tests/unit/test_to_gbq.py index 5510188f..4456df0e 100644 --- a/tests/unit/test_to_gbq.py +++ b/tests/unit/test_to_gbq.py @@ -105,6 +105,7 @@ def test_to_gbq_with_if_exists_append_mismatch(mock_bigquery_client): project_id="myproj", if_exists="append", ) + exc = exception_block.value assert exc.message == r"Provided Schema does not match Table *" From 6664df6f15ac676122f7ba734900f6e32ea8cb71 Mon Sep 17 00:00:00 2001 From: aribray <45905583+aribray@users.noreply.github.com> Date: Sat, 29 Oct 2022 22:17:30 -0500 Subject: [PATCH 60/61] add newline --- ci/requirements-3.7-0.24.2.conda | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/requirements-3.7-0.24.2.conda b/ci/requirements-3.7-0.24.2.conda index e2faf088..2d61383e 100644 --- a/ci/requirements-3.7-0.24.2.conda +++ b/ci/requirements-3.7-0.24.2.conda @@ -14,4 +14,4 @@ pydata-google-auth pytest pytest-cov requests-oauthlib -tqdm \ No newline at end of file +tqdm From 06ee43c734fb3f4783dfc562856a2ee896dd1006 Mon Sep 17 00:00:00 2001 From: aribray <45905583+aribray@users.noreply.github.com> Date: Sat, 29 Oct 2022 22:21:56 -0500 Subject: [PATCH 61/61] formatting --- pandas_gbq/gbq.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py index 2ac460bc..82099998 100644 --- a/pandas_gbq/gbq.py +++ b/pandas_gbq/gbq.py @@ -373,6 +373,7 @@ def get_client(self): def process_http_error(ex): # See `BigQuery Troubleshooting Errors # `__ + if "cancelled" in ex.message: raise QueryTimeout("Reason: {0}".format(ex)) elif "Provided Schema does not match" in ex.message: @@ -586,6 +587,7 @@ def load_data( from pandas_gbq import load total_rows = len(dataframe) + try: chunks = load.load_chunks( self.client,