Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Add support for FileSetSpec #2888

Merged
merged 2 commits into from Sep 25, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
Expand Up @@ -79,8 +79,20 @@ public Builder setSourceUris(List<String> sourceUris) {
return setSourceUrisImmut(ImmutableList.copyOf(sourceUris));
}

abstract Builder setFileSetSpecTypeInner(String spec);

abstract Builder setSourceUrisImmut(ImmutableList<String> sourceUris);

/**
* Defines how to interpret files denoted by URIs. By default the files are assumed to be data
* files (this can be specified explicitly via FILE_SET_SPEC_TYPE_FILE_SYSTEM_MATCH). A second
* option is "FILE_SET_SPEC_TYPE_NEW_LINE_DELIMITED_MANIFEST" which interprets each file as a
* manifest file, where each line is a reference to a file.
*/
public Builder setFileSetSpecType(String fileSetSpecType) {
return setFileSetSpecTypeInner(fileSetSpecType);
}

/**
* Sets the source format, and possibly some parsing options, of the external data. Supported
* formats are {@code CSV} and {@code NEWLINE_DELIMITED_JSON}.
Expand Down Expand Up @@ -232,6 +244,14 @@ public List<String> getSourceUris() {
return getSourceUrisImmut();
}

@Nullable
public String getFileSetSpecType() {
return getFileSetSpecTypeInner();
}

@Nullable
abstract String getFileSetSpecTypeInner();

@Nullable
public abstract ImmutableList<String> getSourceUrisImmut();

Expand Down Expand Up @@ -338,6 +358,10 @@ com.google.api.services.bigquery.model.ExternalDataConfiguration toExternalDataC
if (getHivePartitioningOptions() != null) {
externalConfigurationPb.setHivePartitioningOptions(getHivePartitioningOptions().toPb());
}
if (getFileSetSpecType() != null) {
externalConfigurationPb.setFileSetSpecType(getFileSetSpecType());
}

return externalConfigurationPb;
}

Expand Down Expand Up @@ -507,6 +531,9 @@ static ExternalTableDefinition fromPb(Table tablePb) {
if (externalDataConfiguration.getReferenceFileSchemaUri() != null) {
builder.setReferenceFileSchemaUri(externalDataConfiguration.getReferenceFileSchemaUri());
}
if (externalDataConfiguration.getFileSetSpecType() != null) {
builder.setFileSetSpecType(externalDataConfiguration.getFileSetSpecType());
}
}
return builder.build();
}
Expand Down Expand Up @@ -566,6 +593,9 @@ static ExternalTableDefinition fromExternalDataConfiguration(
builder.setHivePartitioningOptions(
HivePartitioningOptions.fromPb(externalDataConfiguration.getHivePartitioningOptions()));
}
if (externalDataConfiguration.getFileSetSpecType() != null) {
builder.setFileSetSpecType(externalDataConfiguration.getFileSetSpecType());
}

return builder.build();
}
Expand Down
Expand Up @@ -38,6 +38,7 @@ public final class LoadJobConfiguration extends JobConfiguration implements Load
private static final long serialVersionUID = -2673554846792429829L;

private final List<String> sourceUris;
private final String fileSetSpecType;
private final TableId destinationTable;
private final List<String> decimalTargetTypes;
private final EncryptionConfiguration destinationEncryptionConfiguration;
Expand Down Expand Up @@ -67,6 +68,7 @@ public static final class Builder extends JobConfiguration.Builder<LoadJobConfig
implements LoadConfiguration.Builder {

private List<String> sourceUris;
private String fileSetSpecType;
private TableId destinationTable;
private List<String> decimalTargetTypes;
private EncryptionConfiguration destinationEncryptionConfiguration;
Expand Down Expand Up @@ -107,6 +109,7 @@ private Builder(LoadJobConfiguration loadConfiguration) {
this.schema = loadConfiguration.schema;
this.ignoreUnknownValues = loadConfiguration.ignoreUnknownValues;
this.sourceUris = loadConfiguration.sourceUris;
this.fileSetSpecType = loadConfiguration.fileSetSpecType;
this.schemaUpdateOptions = loadConfiguration.schemaUpdateOptions;
this.autodetect = loadConfiguration.autodetect;
this.destinationEncryptionConfiguration =
Expand Down Expand Up @@ -175,6 +178,9 @@ private Builder(com.google.api.services.bigquery.model.JobConfiguration configur
if (loadConfigurationPb.getSourceUris() != null) {
this.sourceUris = ImmutableList.copyOf(configurationPb.getLoad().getSourceUris());
}
if (loadConfigurationPb.getFileSetSpecType() != null) {
this.fileSetSpecType = loadConfigurationPb.getFileSetSpecType();
}
if (loadConfigurationPb.getSchemaUpdateOptions() != null) {
ImmutableList.Builder<JobInfo.SchemaUpdateOption> schemaUpdateOptionsBuilder =
new ImmutableList.Builder<>();
Expand Down Expand Up @@ -306,6 +312,17 @@ public Builder setSourceUris(List<String> sourceUris) {
return this;
}

/**
* Defines how to interpret files denoted by URIs. By default the files are assumed to be data
* files (this can be specified explicitly via FILE_SET_SPEC_TYPE_FILE_SYSTEM_MATCH). A second
* option is "FILE_SET_SPEC_TYPE_NEW_LINE_DELIMITED_MANIFEST" which interprets each file as a
* manifest file, where each line is a reference to a file.
*/
public Builder setFileSetSpecType(String fileSetSpecType) {
this.fileSetSpecType = fileSetSpecType;
return this;
}

/**
* Defines the list of possible SQL data types to which the source decimal values are converted.
* This list and the precision and the scale parameters of the decimal field determine the
Expand Down Expand Up @@ -403,6 +420,7 @@ public LoadJobConfiguration build() {
private LoadJobConfiguration(Builder builder) {
super(builder);
this.sourceUris = builder.sourceUris;
this.fileSetSpecType = builder.fileSetSpecType;
this.destinationTable = builder.destinationTable;
this.decimalTargetTypes = builder.decimalTargetTypes;
this.createDisposition = builder.createDisposition;
Expand Down Expand Up @@ -497,6 +515,10 @@ public List<String> getSourceUris() {
return sourceUris;
}

public String getFileSetSpecType() {
return fileSetSpecType;
}

public List<String> getDecimalTargetTypes() {
return decimalTargetTypes;
}
Expand Down Expand Up @@ -575,6 +597,7 @@ ToStringHelper toStringHelper() {
.add("schema", schema)
.add("ignoreUnknownValue", ignoreUnknownValues)
.add("sourceUris", sourceUris)
.add("fileSetSpecType", fileSetSpecType)
.add("schemaUpdateOptions", schemaUpdateOptions)
.add("autodetect", autodetect)
.add("timePartitioning", timePartitioning)
Expand Down Expand Up @@ -655,6 +678,9 @@ com.google.api.services.bigquery.model.JobConfiguration toPb() {
if (sourceUris != null) {
loadConfigurationPb.setSourceUris(ImmutableList.copyOf(sourceUris));
}
if (fileSetSpecType != null) {
loadConfigurationPb.setFileSetSpecType(fileSetSpecType);
}
if (decimalTargetTypes != null) {
loadConfigurationPb.setDecimalTargetTypes(ImmutableList.copyOf(decimalTargetTypes));
}
Expand Down
Expand Up @@ -60,6 +60,7 @@ public class ExternalTableDefinitionTest {
.build();
private static final ExternalTableDefinition EXTERNAL_TABLE_DEFINITION =
ExternalTableDefinition.newBuilder(SOURCE_URIS, TABLE_SCHEMA, CSV_OPTIONS)
.setFileSetSpecType("FILE_SET_SPEC_TYPE_FILE_SYSTEM_MATCH")
.setDecimalTargetTypes(DECIMAL_TARGET_TYPES)
.setCompression(COMPRESSION)
.setConnectionId(CONNECTION_ID)
Expand Down Expand Up @@ -154,6 +155,7 @@ public void testToAndFromPbParquet() {
private void compareExternalTableDefinition(
ExternalTableDefinition expected, ExternalTableDefinition value) {
assertEquals(expected, value);
assertEquals(expected.getFileSetSpecType(), value.getFileSetSpecType());
assertEquals(expected.getDecimalTargetTypes(), value.getDecimalTargetTypes());
assertEquals(expected.getCompression(), value.getCompression());
assertEquals(expected.getConnectionId(), value.getConnectionId());
Expand Down
Expand Up @@ -91,6 +91,7 @@ public class LoadJobConfigurationTest {
.setCreateDisposition(CREATE_DISPOSITION)
.setWriteDisposition(WRITE_DISPOSITION)
.setFormatOptions(CSV_OPTIONS)
.setFileSetSpecType("FILE_SET_SPEC_TYPE_FILE_SYSTEM_MATCH")
.setIgnoreUnknownValues(IGNORE_UNKNOWN_VALUES)
.setMaxBadRecords(MAX_BAD_RECORDS)
.setSchema(TABLE_SCHEMA)
Expand Down Expand Up @@ -240,6 +241,7 @@ private void compareLoadJobConfiguration(
LoadJobConfiguration expected, LoadJobConfiguration value) {
assertEquals(expected, value);
assertEquals(expected.hashCode(), value.hashCode());
assertEquals(expected.getFileSetSpecType(), value.getFileSetSpecType());
assertEquals(expected.toString(), value.toString());
assertEquals(expected.getDestinationTable(), value.getDestinationTable());
assertEquals(expected.getDecimalTargetTypes(), value.getDecimalTargetTypes());
Expand Down