Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Modified parameters for BigtableToParquet #1490

Merged
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
b31bd4b
Modified parameters for BigQueryToParquet
Supriya-Koppa Apr 30, 2024
997dfd2
Modified parameters for BigQueryToParquet
Supriya-Koppa Apr 30, 2024
ae391ca
Modified parameters for BigtableToParquet
Supriya-Koppa Apr 30, 2024
0c932f8
Modified parameters for SpannerVectorEmbeddingExport and TextIOToBigQ…
Supriya-Koppa May 1, 2024
ec32669
Modified parameters for SpannerVectorEmbeddingExport and TextIOToBigQ…
Supriya-Koppa May 1, 2024
bd943b2
Modified parameters for DataStream to BigQuery template
Supriya-Koppa May 1, 2024
19e36d6
Modified parameters for MQTT to Pub/Sub template
Supriya-Koppa May 3, 2024
a7d68c1
Modified parameters for Pub/Sub to Spunk template
Supriya-Koppa May 6, 2024
cde388b
Modified parameters for Pub/Sub to Datadog template
Supriya-Koppa May 6, 2024
1feef9e
Modified parameters for MongoDB to BigQuery template
Supriya-Koppa May 6, 2024
5de7fde
Modified parameters for Datastore to Cloud Storage template
Supriya-Koppa May 6, 2024
a75b30e
Modified parameters for MySQL CDC to BigQuery template
Supriya-Koppa May 6, 2024
1342c6c
Modified parameters for BigTable to Parquet template (revised)
Supriya-Koppa May 6, 2024
948545e
Modified parameters for BigQuery to Parquet template (revised)
Supriya-Koppa May 6, 2024
704eb35
Merge branch 'main' into koppas-dataflowTemplates
sharan-malyala May 6, 2024
a1d02b5
Apply suggestions from code review
Supriya-Koppa May 7, 2024
acf5569
maven spoteless
Supriya-Koppa May 7, 2024
b48de7a
Removing optional from helptext
gauravjain6633 May 8, 2024
ffd8d07
Update v1/src/main/java/com/google/cloud/teleport/templates/common/Da…
Supriya-Koppa May 8, 2024
a0744c0
Apply suggestions from code review
Supriya-Koppa May 8, 2024
6422bf7
Update v1/src/main/java/com/google/cloud/teleport/templates/common/Da…
Supriya-Koppa May 8, 2024
cf990d3
Modified parameters for Cloud Storage to BigQuery - re-done
Supriya-Koppa May 9, 2024
3276300
Modified parameters for Pub/Sub to Splunk - re-done
Supriya-Koppa May 9, 2024
f56eac6
spotless
Supriya-Koppa May 9, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ public interface Options extends PipelineOptions {
order = 1,
description = "Project ID",
helpText =
"The ID of the Google Cloud project of the Cloud Bigtable instance that you want to read data from")
"The ID of the Google Cloud project of the Bigtable instance that you want to read data from.")
ValueProvider<String> getBigtableProjectId();

@SuppressWarnings("unused")
Expand All @@ -87,7 +87,7 @@ public interface Options extends PipelineOptions {
order = 2,
regexes = {"[a-z][a-z0-9\\-]+[a-z0-9]"},
description = "Instance ID",
helpText = "The ID of the Cloud Bigtable instance that contains the table")
helpText = "The ID of the Bigtable instance that contains the table")
Supriya-Koppa marked this conversation as resolved.
Show resolved Hide resolved
ValueProvider<String> getBigtableInstanceId();

@SuppressWarnings("unused")
Expand All @@ -97,7 +97,7 @@ public interface Options extends PipelineOptions {
order = 3,
regexes = {"[_a-zA-Z0-9][-_.a-zA-Z0-9]*"},
description = "Table ID",
helpText = "The ID of the Cloud Bigtable table to export")
helpText = "The ID of the Bigtable table to export.")
ValueProvider<String> getBigtableTableId();

@SuppressWarnings("unused")
Expand All @@ -107,8 +107,7 @@ public interface Options extends PipelineOptions {
order = 4,
description = "Output file directory in Cloud Storage",
helpText =
"The path and filename prefix for writing output files. Must end with a slash. DateTime formatting is used to parse directory path for date & time formatters.",
example = "gs://your-bucket/your-path")
"The Cloud Storage path where data is written. For example, gs://mybucket/somefolder.")
ValueProvider<String> getOutputDirectory();

@SuppressWarnings("unused")
Expand All @@ -117,7 +116,7 @@ public interface Options extends PipelineOptions {
@TemplateParameter.Text(
order = 5,
description = "Parquet file prefix",
helpText = "The prefix of the Parquet file name. For example, \"table1-\"")
helpText = "The prefix of the Parquet filename. For example, output-")
@Default.String("part")
ValueProvider<String> getFilenamePrefix();

Expand All @@ -129,10 +128,7 @@ public interface Options extends PipelineOptions {
optional = true,
description = "Maximum output shards",
helpText =
"The maximum number of output shards produced when writing. A higher number of "
+ "shards means higher throughput for writing to Cloud Storage, but potentially higher "
+ "data aggregation cost across shards when processing output Cloud Storage files. "
+ "Default value is decided by Dataflow.")
"The number of output file shards. For example 2.")
@Default.Integer(0)
ValueProvider<Integer> getNumShards();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ public interface BigQueryToParquetOptions extends PipelineOptions {
@TemplateParameter.BigQueryTable(
order = 1,
description = "BigQuery table to export",
helpText = "BigQuery table location to export in the format <project>:<dataset>.<table>.",
helpText = "The BigQuery input table location.",
example = "your-project:your-dataset.your-table-name")
@Required
String getTableRef();
Expand All @@ -156,7 +156,7 @@ public interface BigQueryToParquetOptions extends PipelineOptions {
@TemplateParameter.GcsWriteFile(
order = 2,
description = "Output Cloud Storage file(s)",
helpText = "Path and filename prefix for writing output files.",
helpText = "The Cloud Storage folder in which to write the Parquet files",
example = "gs://your-bucket/export/")
@Required
String getBucket();
Expand All @@ -168,10 +168,7 @@ public interface BigQueryToParquetOptions extends PipelineOptions {
optional = true,
description = "Maximum output shards",
helpText =
"The maximum number of output shards produced when writing. A higher number of shards"
+ " means higher throughput for writing to Cloud Storage, but potentially higher"
+ " data aggregation cost across shards when processing output Cloud Storage"
+ " files.")
"(Optional) The number of output file shards. The default value is 1.")
Supriya-Koppa marked this conversation as resolved.
Show resolved Hide resolved
@Default.Integer(0)
Integer getNumShards();

Expand All @@ -181,7 +178,7 @@ public interface BigQueryToParquetOptions extends PipelineOptions {
order = 4,
optional = true,
description = "List of field names",
helpText = "Comma separated list of fields to select from the table.")
helpText = "(Optional) A comma-separated list of fields to select from the input BigQuery table.")
String getFields();

void setFields(String fields);
Expand Down