20
20
if typing .TYPE_CHECKING : # pragma: NO COVER
21
21
import pandas
22
22
23
- from pandas_gbq .exceptions import (
24
- AccessDenied ,
25
- GenericGBQException ,
26
- )
23
+ from pandas_gbq .exceptions import AccessDenied , GenericGBQException
27
24
from pandas_gbq .features import FEATURES
28
25
import pandas_gbq .schema
29
26
import pandas_gbq .timestamp
@@ -116,20 +113,12 @@ class InvalidSchema(ValueError):
116
113
table in BigQuery.
117
114
"""
118
115
119
- def __init__ (
120
- self , message : str , local_schema : Dict [str , Any ], remote_schema : Dict [str , Any ]
121
- ):
122
- super ().__init__ (message )
123
- self ._local_schema = local_schema
124
- self ._remote_schema = remote_schema
125
-
126
- @property
127
- def local_schema (self ) -> Dict [str , Any ]:
128
- return self ._local_schema
116
+ def __init__ (self , message : str ):
117
+ self ._message = message
129
118
130
119
@property
131
- def remote_schema (self ) -> Dict [ str , Any ] :
132
- return self ._remote_schema
120
+ def message (self ) -> str :
121
+ return self ._message
133
122
134
123
135
124
class NotFoundException (ValueError ):
@@ -155,7 +144,12 @@ class TableCreationError(ValueError):
155
144
Raised when the create table method fails
156
145
"""
157
146
158
- pass
147
+ def __init__ (self , message : str ):
148
+ self ._message = message
149
+
150
+ @property
151
+ def message (self ) -> str :
152
+ return self ._message
159
153
160
154
161
155
class Context (object ):
@@ -382,8 +376,14 @@ def process_http_error(ex):
382
376
383
377
if "cancelled" in ex .message :
384
378
raise QueryTimeout ("Reason: {0}" .format (ex ))
385
-
386
- raise GenericGBQException ("Reason: {0}" .format (ex ))
379
+ elif "Provided Schema does not match" in ex .message :
380
+ error_message = ex .errors [0 ]["message" ]
381
+ raise InvalidSchema (f"Reason: { error_message } " )
382
+ elif "Already Exists: Table" in ex .message :
383
+ error_message = ex .errors [0 ]["message" ]
384
+ raise TableCreationError (f"Reason: { error_message } " )
385
+ else :
386
+ raise GenericGBQException ("Reason: {0}" .format (ex ))
387
387
388
388
def download_table (
389
389
self ,
@@ -577,6 +577,7 @@ def load_data(
577
577
self ,
578
578
dataframe ,
579
579
destination_table_ref ,
580
+ write_disposition ,
580
581
chunksize = None ,
581
582
schema = None ,
582
583
progress_bar = True ,
@@ -596,6 +597,7 @@ def load_data(
596
597
schema = schema ,
597
598
location = self .location ,
598
599
api_method = api_method ,
600
+ write_disposition = write_disposition ,
599
601
billing_project = billing_project ,
600
602
)
601
603
if progress_bar and tqdm :
@@ -609,11 +611,6 @@ def load_data(
609
611
except self .http_error as ex :
610
612
self .process_http_error (ex )
611
613
612
- def delete_and_recreate_table (self , project_id , dataset_id , table_id , table_schema ):
613
- table = _Table (project_id , dataset_id , credentials = self .credentials )
614
- table .delete (table_id )
615
- table .create (table_id , table_schema )
616
-
617
614
618
615
def _bqschema_to_nullsafe_dtypes (schema_fields ):
619
616
"""Specify explicit dtypes based on BigQuery schema.
@@ -975,11 +972,9 @@ def to_gbq(
975
972
):
976
973
"""Write a DataFrame to a Google BigQuery table.
977
974
978
- The main method a user calls to export pandas DataFrame contents to
979
- Google BigQuery table.
975
+ The main method a user calls to export pandas DataFrame contents to Google BigQuery table.
980
976
981
- This method uses the Google Cloud client library to make requests to
982
- Google BigQuery, documented `here
977
+ This method uses the Google Cloud client library to make requests to Google BigQuery, documented `here
983
978
<https://googleapis.dev/python/bigquery/latest/index.html>`__.
984
979
985
980
See the :ref:`How to authenticate with Google BigQuery <authentication>`
@@ -1114,15 +1109,21 @@ def to_gbq(
1114
1109
stacklevel = 2 ,
1115
1110
)
1116
1111
1117
- if if_exists not in ("fail" , "replace" , "append" ):
1118
- raise ValueError ("'{0}' is not valid for if_exists" .format (if_exists ))
1119
-
1120
1112
if "." not in destination_table :
1121
1113
raise NotFoundException (
1122
1114
"Invalid Table Name. Should be of the form 'datasetId.tableId' or "
1123
1115
"'projectId.datasetId.tableId'"
1124
1116
)
1125
1117
1118
+ if if_exists not in ("fail" , "replace" , "append" ):
1119
+ raise ValueError ("'{0}' is not valid for if_exists" .format (if_exists ))
1120
+
1121
+ if_exists_list = ["fail" , "replace" , "append" ]
1122
+ dispositions = ["WRITE_EMPTY" , "WRITE_TRUNCATE" , "WRITE_APPEND" ]
1123
+ dispositions_dict = dict (zip (if_exists_list , dispositions ))
1124
+
1125
+ write_disposition = dispositions_dict [if_exists ]
1126
+
1126
1127
connector = GbqConnector (
1127
1128
project_id ,
1128
1129
reauth = reauth ,
@@ -1142,17 +1143,20 @@ def to_gbq(
1142
1143
table_id = destination_table_ref .table_id
1143
1144
1144
1145
default_schema = _generate_bq_schema (dataframe )
1146
+ # If table_schema isn't provided, we'll create one for you
1145
1147
if not table_schema :
1146
1148
table_schema = default_schema
1149
+ # It table_schema is provided, we'll update the default_schema to the provided table_schema
1147
1150
else :
1148
1151
table_schema = pandas_gbq .schema .update_schema (
1149
1152
default_schema , dict (fields = table_schema )
1150
1153
)
1151
1154
1152
- # If table exists, check if_exists parameter
1153
1155
try :
1156
+ # Try to get the table
1154
1157
table = bqclient .get_table (destination_table_ref )
1155
1158
except google_exceptions .NotFound :
1159
+ # If the table doesn't already exist, create it
1156
1160
table_connector = _Table (
1157
1161
project_id_table ,
1158
1162
dataset_id ,
@@ -1161,34 +1165,12 @@ def to_gbq(
1161
1165
)
1162
1166
table_connector .create (table_id , table_schema )
1163
1167
else :
1168
+ # Convert original schema (the schema that already exists) to pandas-gbq API format
1164
1169
original_schema = pandas_gbq .schema .to_pandas_gbq (table .schema )
1165
1170
1166
- if if_exists == "fail" :
1167
- raise TableCreationError (
1168
- "Could not create the table because it "
1169
- "already exists. "
1170
- "Change the if_exists parameter to "
1171
- "'append' or 'replace' data."
1172
- )
1173
- elif if_exists == "replace" :
1174
- connector .delete_and_recreate_table (
1175
- project_id_table , dataset_id , table_id , table_schema
1176
- )
1177
- else :
1178
- if not pandas_gbq .schema .schema_is_subset (original_schema , table_schema ):
1179
- raise InvalidSchema (
1180
- "Please verify that the structure and "
1181
- "data types in the DataFrame match the "
1182
- "schema of the destination table." ,
1183
- table_schema ,
1184
- original_schema ,
1185
- )
1186
-
1187
- # Update the local `table_schema` so mode (NULLABLE/REQUIRED)
1188
- # matches. See: https://github.com/pydata/pandas-gbq/issues/315
1189
- table_schema = pandas_gbq .schema .update_schema (
1190
- table_schema , original_schema
1191
- )
1171
+ # Update the local `table_schema` so mode (NULLABLE/REQUIRED)
1172
+ # matches. See: https://github.com/pydata/pandas-gbq/issues/315
1173
+ table_schema = pandas_gbq .schema .update_schema (table_schema , original_schema )
1192
1174
1193
1175
if dataframe .empty :
1194
1176
# Create the table (if needed), but don't try to run a load job with an
@@ -1198,6 +1180,7 @@ def to_gbq(
1198
1180
connector .load_data (
1199
1181
dataframe ,
1200
1182
destination_table_ref ,
1183
+ write_disposition = write_disposition ,
1201
1184
chunksize = chunksize ,
1202
1185
schema = table_schema ,
1203
1186
progress_bar = progress_bar ,
0 commit comments