Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(bigquery): add dataset/table collation #7235

Merged
merged 5 commits into from
Jan 13, 2023
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
18 changes: 18 additions & 0 deletions bigquery/dataset.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,14 @@ type DatasetMetadata struct {
// all newly created partitioned tables in the dataset.
DefaultPartitionExpiration time.Duration

// Defines the default collation specification of future tables
// created in the dataset. If a table is created in this dataset without
// table-level default collation, then the table inherits the dataset default
// collation, which is applied to the string fields that do not have explicit
// collation specified. A change to this field affects only tables created
// afterwards, and does not alter the existing tables.
DefaultCollation string

// These fields are read-only.
CreationTime time.Time
LastModifiedTime time.Time // When the dataset or any of its tables were modified.
Expand Down Expand Up @@ -104,6 +112,10 @@ type DatasetMetadataToUpdate struct {
// in the dataset.
DefaultEncryptionConfig *EncryptionConfig

// Defines the default collation specification of future tables
// created in the dataset.
DefaultCollation optional.String

// The entire access list. It is not possible to replace individual entries.
Access []*AccessEntry

Expand Down Expand Up @@ -174,6 +186,7 @@ func (dm *DatasetMetadata) toBQ() (*bq.Dataset, error) {
ds.Location = dm.Location
ds.DefaultTableExpirationMs = int64(dm.DefaultTableExpiration / time.Millisecond)
ds.DefaultPartitionExpirationMs = int64(dm.DefaultPartitionExpiration / time.Millisecond)
ds.DefaultCollation = string(dm.DefaultCollation)
ds.Labels = dm.Labels
var err error
ds.Access, err = accessListToBQ(dm.Access)
Expand Down Expand Up @@ -259,6 +272,7 @@ func bqToDatasetMetadata(d *bq.Dataset, c *Client) (*DatasetMetadata, error) {
LastModifiedTime: unixMillisToTime(d.LastModifiedTime),
DefaultTableExpiration: time.Duration(d.DefaultTableExpirationMs) * time.Millisecond,
DefaultPartitionExpiration: time.Duration(d.DefaultPartitionExpirationMs) * time.Millisecond,
DefaultCollation: d.DefaultCollation,
DefaultEncryptionConfig: bqToEncryptionConfig(d.DefaultEncryptionConfiguration),
Description: d.Description,
Name: d.FriendlyName,
Expand Down Expand Up @@ -344,6 +358,10 @@ func (dm *DatasetMetadataToUpdate) toBQ() (*bq.Dataset, error) {
ds.DefaultPartitionExpirationMs = int64(dur / time.Millisecond)
}
}
if dm.DefaultCollation != nil {
ds.DefaultCollation = optional.ToString(dm.DefaultCollation)
forceSend("DefaultCollation")
}
if dm.DefaultEncryptionConfig != nil {
ds.DefaultEncryptionConfiguration = dm.DefaultEncryptionConfig.toBQ()
ds.DefaultEncryptionConfiguration.ForceSendFields = []string{"KmsKeyName"}
Expand Down
30 changes: 30 additions & 0 deletions bigquery/dataset_integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,36 @@ func TestIntegration_DatasetUpdateDefaultPartitionExpiration(t *testing.T) {
}
}

func TestIntegration_DatasetUpdateDefaultCollation(t *testing.T) {
if client == nil {
t.Skip("Integration tests skipped")
}
ctx := context.Background()
_, err := dataset.Metadata(ctx)
if err != nil {
t.Fatal(err)
}
caseInsensitiveCollation := "und:ci"
// Set the default collation
md, err := dataset.Update(ctx, DatasetMetadataToUpdate{
DefaultCollation: caseInsensitiveCollation,
}, "")
if err != nil {
t.Fatal(err)
}
if md.DefaultCollation != caseInsensitiveCollation {
t.Fatalf("got `%v`, want und:ci", md.DefaultCollation)
}
// Omitting DefaultCollation doesn't change it.
md, err = dataset.Update(ctx, DatasetMetadataToUpdate{Name: "xyz"}, "")
if err != nil {
t.Fatal(err)
}
if md.DefaultCollation != caseInsensitiveCollation {
t.Fatalf("got `%v`, want und:ci", md.DefaultCollation)
}
}

func TestIntegration_DatasetUpdateAccess(t *testing.T) {
if client == nil {
t.Skip("Integration tests skipped")
Expand Down
8 changes: 8 additions & 0 deletions bigquery/schema.go
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,12 @@ type FieldSchema struct {
// - Struct or array composed with the above allowed functions, for example:
// [CURRENT_DATE(), DATE '2020-01-01']"
DefaultValueExpression string

// Collation can be set only when the type of field is STRING.
// The following values are supported:
// - 'und:ci': undetermined locale, case insensitive.
// - '': empty string. Default to case-sensitive behavior.
Collation string
}

func (fs *FieldSchema) toBQ() *bq.TableFieldSchema {
Expand All @@ -153,6 +159,7 @@ func (fs *FieldSchema) toBQ() *bq.TableFieldSchema {
Precision: fs.Precision,
Scale: fs.Scale,
DefaultValueExpression: fs.DefaultValueExpression,
Collation: string(fs.Collation),
}

if fs.Repeated {
Expand Down Expand Up @@ -212,6 +219,7 @@ func bqToFieldSchema(tfs *bq.TableFieldSchema) *FieldSchema {
Precision: tfs.Precision,
Scale: tfs.Scale,
DefaultValueExpression: tfs.DefaultValueExpression,
Collation: tfs.Collation,
}

for _, f := range tfs.Fields {
Expand Down
28 changes: 28 additions & 0 deletions bigquery/schema_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -348,6 +348,34 @@ func TestSchemaConversion(t *testing.T) {
},
},
},
{
// collation values
bqSchema: &bq.TableSchema{
Fields: []*bq.TableFieldSchema{
{
Name: "name",
Type: "STRING",
Collation: "und:ci",
},
{
Name: "another_name",
Type: "STRING",
Collation: "",
},
}},
schema: Schema{
{
Name: "name",
Type: StringFieldType,
Collation: "und:ci",
},
{
Name: "another_name",
Type: StringFieldType,
Collation: "",
},
},
},
{
// policy tags
bqSchema: &bq.TableSchema{
Expand Down
20 changes: 20 additions & 0 deletions bigquery/table.go
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,16 @@ type TableMetadata struct {
// ETag is the ETag obtained when reading metadata. Pass it to Table.Update to
// ensure that the metadata hasn't changed since it was read.
ETag string

// Defines the default collation specification of new STRING fields
// in the table. During table creation or update, if a STRING field is added
// to this table without explicit collation specified, then the table inherits
// the table default collation. A change to this field affects only fields
// added afterwards, and does not alter the existing fields.
// The following values are supported:
// - 'und:ci': undetermined locale, case insensitive.
// - '': empty string. Default to case-sensitive behavior.
DefaultCollation string
}

// TableCreateDisposition specifies the circumstances under which destination table will be created.
Expand Down Expand Up @@ -663,6 +673,7 @@ func (tm *TableMetadata) toBQ() (*bq.Table, error) {
if tm.ETag != "" {
return nil, errors.New("cannot set ETag on create")
}
t.DefaultCollation = string(tm.DefaultCollation)
return t, nil
}

Expand Down Expand Up @@ -743,6 +754,7 @@ func bqToTableMetadata(t *bq.Table, c *Client) (*TableMetadata, error) {
CreationTime: unixMillisToTime(t.CreationTime),
LastModifiedTime: unixMillisToTime(int64(t.LastModifiedTime)),
ETag: t.Etag,
DefaultCollation: t.DefaultCollation,
EncryptionConfig: bqToEncryptionConfig(t.EncryptionConfiguration),
RequirePartitionFilter: t.RequirePartitionFilter,
SnapshotDefinition: bqToSnapshotDefinition(t.SnapshotDefinition, c),
Expand Down Expand Up @@ -924,6 +936,10 @@ func (tm *TableMetadataToUpdate) toBQ() (*bq.Table, error) {
t.View.UseLegacySql = optional.ToBool(tm.UseLegacySQL)
t.View.ForceSendFields = append(t.View.ForceSendFields, "UseLegacySql")
}
if tm.DefaultCollation != nil {
t.DefaultCollation = optional.ToString(tm.DefaultCollation)
forceSend("DefaultCollation")
}
labels, forces, nulls := tm.update()
t.Labels = labels
t.ForceSendFields = append(t.ForceSendFields, forces...)
Expand Down Expand Up @@ -997,6 +1013,10 @@ type TableMetadataToUpdate struct {
// elimination when referenced in a query.
RequirePartitionFilter optional.Bool

// Defines the default collation specification of new STRING fields
// in the table.
DefaultCollation optional.String

labelUpdater
}

Expand Down
69 changes: 69 additions & 0 deletions bigquery/table_integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -592,3 +592,72 @@ func TestIntegration_TableUseLegacySQL(t *testing.T) {
_ = view.Delete(ctx)
}
}

func TestIntegration_TableDefaultCollation(t *testing.T) {
// Test DefaultCollation for Table.Create and Table.Update
if client == nil {
t.Skip("Integration tests skipped")
}
ctx := context.Background()
table := dataset.Table(tableIDs.New())
caseInsensitiveCollation := "und:ci"
caseSensitiveCollation := ""
err := table.Create(context.Background(), &TableMetadata{
Schema: schema,
DefaultCollation: caseInsensitiveCollation,
ExpirationTime: testTableExpiration,
})
if err != nil {
t.Fatal(err)
}
defer table.Delete(ctx)
md, err := table.Metadata(ctx)
if err != nil {
t.Fatal(err)
}
if md.DefaultCollation != caseInsensitiveCollation {
t.Fatalf("expected default collation to be `%v`, but found `%v`", caseInsensitiveCollation, md.DefaultCollation)
}
for _, field := range md.Schema {
if field.Type == StringFieldType {
if field.Collation != caseInsensitiveCollation {
t.Fatalf("expected all columns to have collation `%v`, but found `%v` on field :%v", caseInsensitiveCollation, field.Collation, field.Name)
alvarowolfx marked this conversation as resolved.
Show resolved Hide resolved
}
}
}

// Update table DefaultCollation to case-sensitive
md, err = table.Update(ctx, TableMetadataToUpdate{
DefaultCollation: caseSensitiveCollation,
}, "")
if err != nil {
t.Fatal(err)
}
if md.DefaultCollation != caseSensitiveCollation {
t.Fatalf("expected default collation to be %q, but found %q", caseSensitiveCollation, md.DefaultCollation)
}

// Add a field with different case-insensitive collation
updatedSchema := md.Schema
updatedSchema = append(updatedSchema, &FieldSchema{
Name: "another_name",
Type: StringFieldType,
Collation: caseInsensitiveCollation,
})
md, err = table.Update(ctx, TableMetadataToUpdate{
Schema: updatedSchema,
}, "")
if err != nil {
t.Fatal(err)
}
if md.DefaultCollation != caseSensitiveCollation {
t.Fatalf("expected default collation to be %q, but found %q", caseSensitiveCollation, md.DefaultCollation)
}
for _, field := range md.Schema {
if field.Type == StringFieldType {
if field.Collation != caseInsensitiveCollation {
t.Fatalf("expected all columns to have collation %q, but found %q on field :%v", caseInsensitiveCollation, field.Collation, field.Name)
}
}
}
}