Skip to content

Commit

Permalink
fix(bigquery): dependency detection on proto conversion (#8566)
Browse files Browse the repository at this point in the history
  • Loading branch information
alvarowolfx committed Sep 19, 2023
1 parent 8e53787 commit 763ab5d
Show file tree
Hide file tree
Showing 2 changed files with 192 additions and 2 deletions.
25 changes: 23 additions & 2 deletions bigquery/storage/managedwriter/adapt/protoconversion.go
Expand Up @@ -185,8 +185,8 @@ func storageSchemaToDescriptorInternal(inSchema *storagepb.TableSchema, scope st
if foundDesc != nil {
// check to see if we already have this in current dependency list
haveDep := false
for _, curDep := range deps {
if foundDesc.ParentFile().FullName() == curDep.FullName() {
for _, dep := range deps {
if messageDependsOnFile(foundDesc, dep) {
haveDep = true
break
}
Expand Down Expand Up @@ -279,6 +279,27 @@ func storageSchemaToDescriptorInternal(inSchema *storagepb.TableSchema, scope st
return found.(protoreflect.MessageDescriptor), nil
}

// messageDependsOnFile checks if the given message descriptor already belongs to the file descriptor.
// To check for that, first we check if the message descriptor parent file is the same as the file descriptor.
// If not, check if the message descriptor belongs is contained as a child of the file descriptor.
func messageDependsOnFile(msg protoreflect.MessageDescriptor, file protoreflect.FileDescriptor) bool {
parentFile := msg.ParentFile()
parentFileName := parentFile.FullName()
if parentFileName != "" {
if parentFileName == file.FullName() {
return true
}
}
fileMessages := file.Messages()
for i := 0; i < fileMessages.Len(); i++ {
childMsg := fileMessages.Get(i)
if msg.FullName() == childMsg.FullName() {
return true
}
}
return false
}

// tableFieldSchemaToFieldDescriptorProto builds individual field descriptors for a proto message.
//
// For proto3, in cases where the mode is nullable we use the well known wrapper types.
Expand Down
169 changes: 169 additions & 0 deletions bigquery/storage/managedwriter/adapt/protoconversion_test.go
Expand Up @@ -360,6 +360,175 @@ func TestSchemaToProtoConversion(t *testing.T) {
},
},
},
{
description: "nested with reused submessage in different levels",
bq: &storagepb.TableSchema{
Fields: []*storagepb.TableFieldSchema{
{
Name: "reused_inner_struct",
Type: storagepb.TableFieldSchema_STRUCT,
Mode: storagepb.TableFieldSchema_REQUIRED,
Fields: []*storagepb.TableFieldSchema{
{
Name: "leaf",
Type: storagepb.TableFieldSchema_STRING,
Mode: storagepb.TableFieldSchema_REQUIRED,
},
},
},
{
Name: "outer_struct",
Type: storagepb.TableFieldSchema_STRUCT,
Mode: storagepb.TableFieldSchema_REQUIRED,
Fields: []*storagepb.TableFieldSchema{
{
Name: "another_inner_struct",
Type: storagepb.TableFieldSchema_STRUCT,
Mode: storagepb.TableFieldSchema_REQUIRED,
Fields: []*storagepb.TableFieldSchema{
{
Name: "another_leaf",
Type: storagepb.TableFieldSchema_STRING,
Mode: storagepb.TableFieldSchema_REQUIRED,
},
},
},
{
Name: "reused_inner_struct_one",
Type: storagepb.TableFieldSchema_STRUCT,
Mode: storagepb.TableFieldSchema_REQUIRED,
Fields: []*storagepb.TableFieldSchema{
{
Name: "leaf",
Type: storagepb.TableFieldSchema_STRING,
Mode: storagepb.TableFieldSchema_REQUIRED,
},
},
},
{
Name: "reused_inner_struct_two",
Type: storagepb.TableFieldSchema_STRUCT,
Mode: storagepb.TableFieldSchema_REQUIRED,
Fields: []*storagepb.TableFieldSchema{
{
Name: "leaf",
Type: storagepb.TableFieldSchema_STRING,
Mode: storagepb.TableFieldSchema_REQUIRED,
},
},
},
},
},
},
},
wantProto2: &descriptorpb.DescriptorProto{
Name: proto.String("root"),
Field: []*descriptorpb.FieldDescriptorProto{
{
Name: proto.String("reused_inner_struct"),
Number: proto.Int32(1),
Type: descriptorpb.FieldDescriptorProto_TYPE_MESSAGE.Enum(),
TypeName: proto.String(".root__reused_inner_struct"),
Label: descriptorpb.FieldDescriptorProto_LABEL_REQUIRED.Enum(),
},
{
Name: proto.String("outer_struct"),
Number: proto.Int32(2),
Type: descriptorpb.FieldDescriptorProto_TYPE_MESSAGE.Enum(),
TypeName: proto.String(".root__outer_struct"),
Label: descriptorpb.FieldDescriptorProto_LABEL_REQUIRED.Enum(),
},
},
},
wantProto2Normalized: &descriptorpb.DescriptorProto{
Name: proto.String("root"),
Field: []*descriptorpb.FieldDescriptorProto{
{
Name: proto.String("reused_inner_struct"),
Number: proto.Int32(1),
Type: descriptorpb.FieldDescriptorProto_TYPE_MESSAGE.Enum(),
TypeName: proto.String("root__reused_inner_struct"),
Label: descriptorpb.FieldDescriptorProto_LABEL_REQUIRED.Enum(),
},
{
Name: proto.String("outer_struct"),
Number: proto.Int32(2),
Type: descriptorpb.FieldDescriptorProto_TYPE_MESSAGE.Enum(),
TypeName: proto.String("root__outer_struct"),
Label: descriptorpb.FieldDescriptorProto_LABEL_REQUIRED.Enum(),
},
},
NestedType: []*descriptorpb.DescriptorProto{
{
Name: proto.String("root__reused_inner_struct"),
Field: []*descriptorpb.FieldDescriptorProto{
{
Name: proto.String("leaf"),
Number: proto.Int32(1),
Type: descriptorpb.FieldDescriptorProto_TYPE_STRING.Enum(),
Label: descriptorpb.FieldDescriptorProto_LABEL_REQUIRED.Enum(),
},
},
},
{
Name: proto.String("root__outer_struct__another_inner_struct"),
Field: []*descriptorpb.FieldDescriptorProto{
{
Name: proto.String("another_leaf"),
Number: proto.Int32(1),
Type: descriptorpb.FieldDescriptorProto_TYPE_STRING.Enum(),
Label: descriptorpb.FieldDescriptorProto_LABEL_REQUIRED.Enum(),
},
},
},
{
Name: proto.String("root__outer_struct"),
Field: []*descriptorpb.FieldDescriptorProto{
{
Name: proto.String("another_inner_struct"),
Number: proto.Int32(1),
Type: descriptorpb.FieldDescriptorProto_TYPE_MESSAGE.Enum(),
TypeName: proto.String("root__outer_struct__another_inner_struct"),
Label: descriptorpb.FieldDescriptorProto_LABEL_REQUIRED.Enum(),
},
{
Name: proto.String("reused_inner_struct_one"),
Number: proto.Int32(2),
Type: descriptorpb.FieldDescriptorProto_TYPE_MESSAGE.Enum(),
TypeName: proto.String("root__reused_inner_struct"),
Label: descriptorpb.FieldDescriptorProto_LABEL_REQUIRED.Enum(),
},
{
Name: proto.String("reused_inner_struct_two"),
Number: proto.Int32(3),
Type: descriptorpb.FieldDescriptorProto_TYPE_MESSAGE.Enum(),
TypeName: proto.String("root__reused_inner_struct"),
Label: descriptorpb.FieldDescriptorProto_LABEL_REQUIRED.Enum(),
},
},
},
},
},
wantProto3: &descriptorpb.DescriptorProto{
Name: proto.String("root"),
Field: []*descriptorpb.FieldDescriptorProto{
{
Name: proto.String("reused_inner_struct"),
Number: proto.Int32(1),
Type: descriptorpb.FieldDescriptorProto_TYPE_MESSAGE.Enum(),
TypeName: proto.String(".root__reused_inner_struct"),
Label: descriptorpb.FieldDescriptorProto_LABEL_OPTIONAL.Enum(),
},
{
Name: proto.String("outer_struct"),
Number: proto.Int32(2),
Type: descriptorpb.FieldDescriptorProto_TYPE_MESSAGE.Enum(),
TypeName: proto.String(".root__outer_struct"),
Label: descriptorpb.FieldDescriptorProto_LABEL_OPTIONAL.Enum(),
},
},
},
},
{
description: "multiple nesting levels",
bq: &storagepb.TableSchema{
Expand Down

0 comments on commit 763ab5d

Please sign in to comment.