diff --git a/bigquery/dataset.go b/bigquery/dataset.go index bd33d5d2bf6..a10158f2d11 100644 --- a/bigquery/dataset.go +++ b/bigquery/dataset.go @@ -49,6 +49,15 @@ type DatasetMetadata struct { // all newly created partitioned tables in the dataset. DefaultPartitionExpiration time.Duration + // Defines the default collation specification of future tables + // created in the dataset. If a table is created in this dataset without + // table-level default collation, then the table inherits the dataset default + // collation, which is applied to the string fields that do not have explicit + // collation specified. A change to this field affects only tables created + // afterwards, and does not alter the existing tables. + // More information: https://cloud.google.com/bigquery/docs/reference/standard-sql/collation-concepts + DefaultCollation string + // These fields are read-only. CreationTime time.Time LastModifiedTime time.Time // When the dataset or any of its tables were modified. @@ -104,6 +113,10 @@ type DatasetMetadataToUpdate struct { // in the dataset. DefaultEncryptionConfig *EncryptionConfig + // Defines the default collation specification of future tables + // created in the dataset. + DefaultCollation optional.String + // The entire access list. It is not possible to replace individual entries. Access []*AccessEntry @@ -174,6 +187,7 @@ func (dm *DatasetMetadata) toBQ() (*bq.Dataset, error) { ds.Location = dm.Location ds.DefaultTableExpirationMs = int64(dm.DefaultTableExpiration / time.Millisecond) ds.DefaultPartitionExpirationMs = int64(dm.DefaultPartitionExpiration / time.Millisecond) + ds.DefaultCollation = string(dm.DefaultCollation) ds.Labels = dm.Labels var err error ds.Access, err = accessListToBQ(dm.Access) @@ -259,6 +273,7 @@ func bqToDatasetMetadata(d *bq.Dataset, c *Client) (*DatasetMetadata, error) { LastModifiedTime: unixMillisToTime(d.LastModifiedTime), DefaultTableExpiration: time.Duration(d.DefaultTableExpirationMs) * time.Millisecond, DefaultPartitionExpiration: time.Duration(d.DefaultPartitionExpirationMs) * time.Millisecond, + DefaultCollation: d.DefaultCollation, DefaultEncryptionConfig: bqToEncryptionConfig(d.DefaultEncryptionConfiguration), Description: d.Description, Name: d.FriendlyName, @@ -344,6 +359,10 @@ func (dm *DatasetMetadataToUpdate) toBQ() (*bq.Dataset, error) { ds.DefaultPartitionExpirationMs = int64(dur / time.Millisecond) } } + if dm.DefaultCollation != nil { + ds.DefaultCollation = optional.ToString(dm.DefaultCollation) + forceSend("DefaultCollation") + } if dm.DefaultEncryptionConfig != nil { ds.DefaultEncryptionConfiguration = dm.DefaultEncryptionConfig.toBQ() ds.DefaultEncryptionConfiguration.ForceSendFields = []string{"KmsKeyName"} diff --git a/bigquery/dataset_integration_test.go b/bigquery/dataset_integration_test.go index 932e7d82683..12746502300 100644 --- a/bigquery/dataset_integration_test.go +++ b/bigquery/dataset_integration_test.go @@ -229,6 +229,36 @@ func TestIntegration_DatasetUpdateDefaultPartitionExpiration(t *testing.T) { } } +func TestIntegration_DatasetUpdateDefaultCollation(t *testing.T) { + if client == nil { + t.Skip("Integration tests skipped") + } + ctx := context.Background() + _, err := dataset.Metadata(ctx) + if err != nil { + t.Fatal(err) + } + caseInsensitiveCollation := "und:ci" + // Set the default collation + md, err := dataset.Update(ctx, DatasetMetadataToUpdate{ + DefaultCollation: caseInsensitiveCollation, + }, "") + if err != nil { + t.Fatal(err) + } + if md.DefaultCollation != caseInsensitiveCollation { + t.Fatalf("got %q, want und:ci", md.DefaultCollation) + } + // Omitting DefaultCollation doesn't change it. + md, err = dataset.Update(ctx, DatasetMetadataToUpdate{Name: "xyz"}, "") + if err != nil { + t.Fatal(err) + } + if md.DefaultCollation != caseInsensitiveCollation { + t.Fatalf("got %q, want und:ci", md.DefaultCollation) + } +} + func TestIntegration_DatasetUpdateAccess(t *testing.T) { if client == nil { t.Skip("Integration tests skipped") diff --git a/bigquery/schema.go b/bigquery/schema.go index c35ac17f298..d6350d17aa2 100644 --- a/bigquery/schema.go +++ b/bigquery/schema.go @@ -141,6 +141,13 @@ type FieldSchema struct { // - Struct or array composed with the above allowed functions, for example: // [CURRENT_DATE(), DATE '2020-01-01']" DefaultValueExpression string + + // Collation can be set only when the type of field is STRING. + // The following values are supported: + // - 'und:ci': undetermined locale, case insensitive. + // - '': empty string. Default to case-sensitive behavior. + // More information: https://cloud.google.com/bigquery/docs/reference/standard-sql/collation-concepts + Collation string } func (fs *FieldSchema) toBQ() *bq.TableFieldSchema { @@ -153,6 +160,7 @@ func (fs *FieldSchema) toBQ() *bq.TableFieldSchema { Precision: fs.Precision, Scale: fs.Scale, DefaultValueExpression: fs.DefaultValueExpression, + Collation: string(fs.Collation), } if fs.Repeated { @@ -212,6 +220,7 @@ func bqToFieldSchema(tfs *bq.TableFieldSchema) *FieldSchema { Precision: tfs.Precision, Scale: tfs.Scale, DefaultValueExpression: tfs.DefaultValueExpression, + Collation: tfs.Collation, } for _, f := range tfs.Fields { diff --git a/bigquery/schema_test.go b/bigquery/schema_test.go index d3ca8f7fa70..a0225616211 100644 --- a/bigquery/schema_test.go +++ b/bigquery/schema_test.go @@ -348,6 +348,34 @@ func TestSchemaConversion(t *testing.T) { }, }, }, + { + // collation values + bqSchema: &bq.TableSchema{ + Fields: []*bq.TableFieldSchema{ + { + Name: "name", + Type: "STRING", + Collation: "und:ci", + }, + { + Name: "another_name", + Type: "STRING", + Collation: "", + }, + }}, + schema: Schema{ + { + Name: "name", + Type: StringFieldType, + Collation: "und:ci", + }, + { + Name: "another_name", + Type: StringFieldType, + Collation: "", + }, + }, + }, { // policy tags bqSchema: &bq.TableSchema{ diff --git a/bigquery/table.go b/bigquery/table.go index 3bfad2344e7..642476d18da 100644 --- a/bigquery/table.go +++ b/bigquery/table.go @@ -136,6 +136,17 @@ type TableMetadata struct { // ETag is the ETag obtained when reading metadata. Pass it to Table.Update to // ensure that the metadata hasn't changed since it was read. ETag string + + // Defines the default collation specification of new STRING fields + // in the table. During table creation or update, if a STRING field is added + // to this table without explicit collation specified, then the table inherits + // the table default collation. A change to this field affects only fields + // added afterwards, and does not alter the existing fields. + // The following values are supported: + // - 'und:ci': undetermined locale, case insensitive. + // - '': empty string. Default to case-sensitive behavior. + // More information: https://cloud.google.com/bigquery/docs/reference/standard-sql/collation-concepts + DefaultCollation string } // TableCreateDisposition specifies the circumstances under which destination table will be created. @@ -663,6 +674,7 @@ func (tm *TableMetadata) toBQ() (*bq.Table, error) { if tm.ETag != "" { return nil, errors.New("cannot set ETag on create") } + t.DefaultCollation = string(tm.DefaultCollation) return t, nil } @@ -743,6 +755,7 @@ func bqToTableMetadata(t *bq.Table, c *Client) (*TableMetadata, error) { CreationTime: unixMillisToTime(t.CreationTime), LastModifiedTime: unixMillisToTime(int64(t.LastModifiedTime)), ETag: t.Etag, + DefaultCollation: t.DefaultCollation, EncryptionConfig: bqToEncryptionConfig(t.EncryptionConfiguration), RequirePartitionFilter: t.RequirePartitionFilter, SnapshotDefinition: bqToSnapshotDefinition(t.SnapshotDefinition, c), @@ -924,6 +937,10 @@ func (tm *TableMetadataToUpdate) toBQ() (*bq.Table, error) { t.View.UseLegacySql = optional.ToBool(tm.UseLegacySQL) t.View.ForceSendFields = append(t.View.ForceSendFields, "UseLegacySql") } + if tm.DefaultCollation != nil { + t.DefaultCollation = optional.ToString(tm.DefaultCollation) + forceSend("DefaultCollation") + } labels, forces, nulls := tm.update() t.Labels = labels t.ForceSendFields = append(t.ForceSendFields, forces...) @@ -997,6 +1014,10 @@ type TableMetadataToUpdate struct { // elimination when referenced in a query. RequirePartitionFilter optional.Bool + // Defines the default collation specification of new STRING fields + // in the table. + DefaultCollation optional.String + labelUpdater } diff --git a/bigquery/table_integration_test.go b/bigquery/table_integration_test.go index 689c911cccc..d770b23ca15 100644 --- a/bigquery/table_integration_test.go +++ b/bigquery/table_integration_test.go @@ -592,3 +592,72 @@ func TestIntegration_TableUseLegacySQL(t *testing.T) { _ = view.Delete(ctx) } } + +func TestIntegration_TableDefaultCollation(t *testing.T) { + // Test DefaultCollation for Table.Create and Table.Update + if client == nil { + t.Skip("Integration tests skipped") + } + ctx := context.Background() + table := dataset.Table(tableIDs.New()) + caseInsensitiveCollation := "und:ci" + caseSensitiveCollation := "" + err := table.Create(context.Background(), &TableMetadata{ + Schema: schema, + DefaultCollation: caseInsensitiveCollation, + ExpirationTime: testTableExpiration, + }) + if err != nil { + t.Fatal(err) + } + defer table.Delete(ctx) + md, err := table.Metadata(ctx) + if err != nil { + t.Fatal(err) + } + if md.DefaultCollation != caseInsensitiveCollation { + t.Fatalf("expected default collation to be %q, but found %q", caseInsensitiveCollation, md.DefaultCollation) + } + for _, field := range md.Schema { + if field.Type == StringFieldType { + if field.Collation != caseInsensitiveCollation { + t.Fatalf("expected all columns to have collation %q, but found %q on field :%v", caseInsensitiveCollation, field.Collation, field.Name) + } + } + } + + // Update table DefaultCollation to case-sensitive + md, err = table.Update(ctx, TableMetadataToUpdate{ + DefaultCollation: caseSensitiveCollation, + }, "") + if err != nil { + t.Fatal(err) + } + if md.DefaultCollation != caseSensitiveCollation { + t.Fatalf("expected default collation to be %q, but found %q", caseSensitiveCollation, md.DefaultCollation) + } + + // Add a field with different case-insensitive collation + updatedSchema := md.Schema + updatedSchema = append(updatedSchema, &FieldSchema{ + Name: "another_name", + Type: StringFieldType, + Collation: caseInsensitiveCollation, + }) + md, err = table.Update(ctx, TableMetadataToUpdate{ + Schema: updatedSchema, + }, "") + if err != nil { + t.Fatal(err) + } + if md.DefaultCollation != caseSensitiveCollation { + t.Fatalf("expected default collation to be %q, but found %q", caseSensitiveCollation, md.DefaultCollation) + } + for _, field := range md.Schema { + if field.Type == StringFieldType { + if field.Collation != caseInsensitiveCollation { + t.Fatalf("expected all columns to have collation %q, but found %q on field :%v", caseInsensitiveCollation, field.Collation, field.Name) + } + } + } +}