diff --git a/docs/dyn/dataproc_v1.projects.locations.batches.html b/docs/dyn/dataproc_v1.projects.locations.batches.html index cdf05f6aa05..e0fbf364efa 100644 --- a/docs/dyn/dataproc_v1.projects.locations.batches.html +++ b/docs/dyn/dataproc_v1.projects.locations.batches.html @@ -112,7 +112,7 @@

Method Details

"creator": "A String", # Output only. The email address of the user who created the batch. "environmentConfig": { # Environment configuration for a workload. # Optional. Environment configuration for the batch execution. "executionConfig": { # Execution configuration for a workload. # Optional. Execution configuration for a workload. - "idleTtl": "A String", # Optional. The duration to keep the session alive while it's idling. Passing this threshold will cause the session to be terminated. Minimum value is 10 minutes; maximum value is 14 days (see JSON representation of Duration (https://developers.google.com/protocol-buffers/docs/proto3#json)). Defaults to 10 minutes if not set. + "idleTtl": "A String", # Optional. The duration to keep the session alive while it's idling. Passing this threshold will cause the session to be terminated. Minimum value is 10 minutes; maximum value is 14 days (see JSON representation of Duration (https://developers.google.com/protocol-buffers/docs/proto3#json)). Defaults to 4 hours if not set. "kmsKey": "A String", # Optional. The Cloud KMS key to use for encryption. "networkTags": [ # Optional. Tags used for network traffic control. "A String", @@ -163,6 +163,11 @@

Method Details

"milliDcuSeconds": "A String", # Optional. DCU (Dataproc Compute Units) usage in (milliDCU x seconds) (see Dataproc Serverless pricing (https://cloud.google.com/dataproc-serverless/pricing)). "shuffleStorageGbSeconds": "A String", # Optional. Shuffle storage usage in (GB x seconds) (see Dataproc Serverless pricing (https://cloud.google.com/dataproc-serverless/pricing)). }, + "currentUsage": { # The usage snaphot represents the resources consumed by a workload at a specified time. # Output only. Snapshot of current workload resource usage. + "milliDcu": "A String", # Optional. Milli (one-thousandth) Dataproc Compute Units (DCUs) (see Dataproc Serverless pricing (https://cloud.google.com/dataproc-serverless/pricing)). + "shuffleStorageGb": "A String", # Optional. Shuffle Storage in gigabytes (GB). (see Dataproc Serverless pricing (https://cloud.google.com/dataproc-serverless/pricing)) + "snapshotTime": "A String", # Optional. The timestamp of the usage snapshot. + }, "diagnosticOutputUri": "A String", # Output only. A URI pointing to the location of the diagnostics tarball. "endpoints": { # Output only. Map of remote access endpoints (such as web interfaces and APIs) to their URIs. "a_key": "A String", @@ -287,7 +292,7 @@

Method Details

"creator": "A String", # Output only. The email address of the user who created the batch. "environmentConfig": { # Environment configuration for a workload. # Optional. Environment configuration for the batch execution. "executionConfig": { # Execution configuration for a workload. # Optional. Execution configuration for a workload. - "idleTtl": "A String", # Optional. The duration to keep the session alive while it's idling. Passing this threshold will cause the session to be terminated. Minimum value is 10 minutes; maximum value is 14 days (see JSON representation of Duration (https://developers.google.com/protocol-buffers/docs/proto3#json)). Defaults to 10 minutes if not set. + "idleTtl": "A String", # Optional. The duration to keep the session alive while it's idling. Passing this threshold will cause the session to be terminated. Minimum value is 10 minutes; maximum value is 14 days (see JSON representation of Duration (https://developers.google.com/protocol-buffers/docs/proto3#json)). Defaults to 4 hours if not set. "kmsKey": "A String", # Optional. The Cloud KMS key to use for encryption. "networkTags": [ # Optional. Tags used for network traffic control. "A String", @@ -338,6 +343,11 @@

Method Details

"milliDcuSeconds": "A String", # Optional. DCU (Dataproc Compute Units) usage in (milliDCU x seconds) (see Dataproc Serverless pricing (https://cloud.google.com/dataproc-serverless/pricing)). "shuffleStorageGbSeconds": "A String", # Optional. Shuffle storage usage in (GB x seconds) (see Dataproc Serverless pricing (https://cloud.google.com/dataproc-serverless/pricing)). }, + "currentUsage": { # The usage snaphot represents the resources consumed by a workload at a specified time. # Output only. Snapshot of current workload resource usage. + "milliDcu": "A String", # Optional. Milli (one-thousandth) Dataproc Compute Units (DCUs) (see Dataproc Serverless pricing (https://cloud.google.com/dataproc-serverless/pricing)). + "shuffleStorageGb": "A String", # Optional. Shuffle Storage in gigabytes (GB). (see Dataproc Serverless pricing (https://cloud.google.com/dataproc-serverless/pricing)) + "snapshotTime": "A String", # Optional. The timestamp of the usage snapshot. + }, "diagnosticOutputUri": "A String", # Output only. A URI pointing to the location of the diagnostics tarball. "endpoints": { # Output only. Map of remote access endpoints (such as web interfaces and APIs) to their URIs. "a_key": "A String", @@ -418,7 +428,7 @@

Method Details

"creator": "A String", # Output only. The email address of the user who created the batch. "environmentConfig": { # Environment configuration for a workload. # Optional. Environment configuration for the batch execution. "executionConfig": { # Execution configuration for a workload. # Optional. Execution configuration for a workload. - "idleTtl": "A String", # Optional. The duration to keep the session alive while it's idling. Passing this threshold will cause the session to be terminated. Minimum value is 10 minutes; maximum value is 14 days (see JSON representation of Duration (https://developers.google.com/protocol-buffers/docs/proto3#json)). Defaults to 10 minutes if not set. + "idleTtl": "A String", # Optional. The duration to keep the session alive while it's idling. Passing this threshold will cause the session to be terminated. Minimum value is 10 minutes; maximum value is 14 days (see JSON representation of Duration (https://developers.google.com/protocol-buffers/docs/proto3#json)). Defaults to 4 hours if not set. "kmsKey": "A String", # Optional. The Cloud KMS key to use for encryption. "networkTags": [ # Optional. Tags used for network traffic control. "A String", @@ -469,6 +479,11 @@

Method Details

"milliDcuSeconds": "A String", # Optional. DCU (Dataproc Compute Units) usage in (milliDCU x seconds) (see Dataproc Serverless pricing (https://cloud.google.com/dataproc-serverless/pricing)). "shuffleStorageGbSeconds": "A String", # Optional. Shuffle storage usage in (GB x seconds) (see Dataproc Serverless pricing (https://cloud.google.com/dataproc-serverless/pricing)). }, + "currentUsage": { # The usage snaphot represents the resources consumed by a workload at a specified time. # Output only. Snapshot of current workload resource usage. + "milliDcu": "A String", # Optional. Milli (one-thousandth) Dataproc Compute Units (DCUs) (see Dataproc Serverless pricing (https://cloud.google.com/dataproc-serverless/pricing)). + "shuffleStorageGb": "A String", # Optional. Shuffle Storage in gigabytes (GB). (see Dataproc Serverless pricing (https://cloud.google.com/dataproc-serverless/pricing)) + "snapshotTime": "A String", # Optional. The timestamp of the usage snapshot. + }, "diagnosticOutputUri": "A String", # Output only. A URI pointing to the location of the diagnostics tarball. "endpoints": { # Output only. Map of remote access endpoints (such as web interfaces and APIs) to their URIs. "a_key": "A String", diff --git a/docs/dyn/dataproc_v1.projects.locations.workflowTemplates.html b/docs/dyn/dataproc_v1.projects.locations.workflowTemplates.html index a18aa08bc68..476af759036 100644 --- a/docs/dyn/dataproc_v1.projects.locations.workflowTemplates.html +++ b/docs/dyn/dataproc_v1.projects.locations.workflowTemplates.html @@ -250,8 +250,8 @@

Method Details

], }, "scheduling": { # Job scheduling options. # Optional. Job scheduling configuration. - "maxFailuresPerHour": 42, # Optional. Maximum number of times per hour a driver may be restarted as a result of driver exiting with non-zero code before job is reported failed.A job may be reported as thrashing if driver exits with non-zero code 4 times within 10 minute window.Maximum value is 10.Note: Currently, this restartable job option is not supported in Dataproc workflow template (https://cloud.google.com/dataproc/docs/concepts/workflows/using-workflows#adding_jobs_to_a_template) jobs. - "maxFailuresTotal": 42, # Optional. Maximum number of times in total a driver may be restarted as a result of driver exiting with non-zero code before job is reported failed. Maximum value is 240.Note: Currently, this restartable job option is not supported in Dataproc workflow template (https://cloud.google.com/dataproc/docs/concepts/workflows/using-workflows#adding_jobs_to_a_template) jobs. + "maxFailuresPerHour": 42, # Optional. Maximum number of times per hour a driver may be restarted as a result of driver exiting with non-zero code before job is reported failed.A job may be reported as thrashing if the driver exits with a non-zero code four times within a 10-minute window.Maximum value is 10.Note: This restartable job option is not supported in Dataproc workflow templates (https://cloud.google.com/dataproc/docs/concepts/workflows/using-workflows#adding_jobs_to_a_template). + "maxFailuresTotal": 42, # Optional. Maximum total number of times a driver may be restarted as a result of the driver exiting with a non-zero code. After the maximum number is reached, the job will be reported as failed.Maximum value is 240.Note: Currently, this restartable job option is not supported in Dataproc workflow templates (https://cloud.google.com/dataproc/docs/concepts/workflows/using-workflows#adding_jobs_to_a_template). }, "sparkJob": { # A Dataproc job for running Apache Spark (https://spark.apache.org/) applications on YARN. # Optional. Job is a Spark job. "archiveUris": [ # Optional. HCFS URIs of archives to be extracted into the working directory of each executor. Supported file types: .jar, .tar, .tar.gz, .tgz, and .zip. @@ -381,6 +381,55 @@

Method Details

"autoscalingConfig": { # Autoscaling Policy config associated with the cluster. # Optional. Autoscaling config for the policy associated with the cluster. Cluster does not autoscale if this field is unset. "policyUri": "A String", # Optional. The autoscaling policy used by the cluster.Only resource names including projectid and location (region) are valid. Examples: https://www.googleapis.com/compute/v1/projects/[project_id]/locations/[dataproc_region]/autoscalingPolicies/[policy_id] projects/[project_id]/locations/[dataproc_region]/autoscalingPolicies/[policy_id]Note that the policy must be in the same project and Dataproc region. }, + "auxiliaryNodeGroups": [ # Optional. The node group settings. + { # Node group identification and configuration information. + "nodeGroup": { # Dataproc Node Group. The Dataproc NodeGroup resource is not related to the Dataproc NodeGroupAffinity resource. # Required. Node group configuration. + "labels": { # Optional. Node group labels. Label keys must consist of from 1 to 63 characters and conform to RFC 1035 (https://www.ietf.org/rfc/rfc1035.txt). Label values can be empty. If specified, they must consist of from 1 to 63 characters and conform to RFC 1035 (https://www.ietf.org/rfc/rfc1035.txt). The node group must have no more than 32 labelsn. + "a_key": "A String", + }, + "name": "A String", # The Node group resource name (https://aip.dev/122). + "nodeGroupConfig": { # The config settings for Compute Engine resources in an instance group, such as a master or worker group. # Optional. The node group instance group configuration. + "accelerators": [ # Optional. The Compute Engine accelerator configuration for these instances. + { # Specifies the type and number of accelerator cards attached to the instances of an instance. See GPUs on Compute Engine (https://cloud.google.com/compute/docs/gpus/). + "acceleratorCount": 42, # The number of the accelerator cards of this type exposed to this instance. + "acceleratorTypeUri": "A String", # Full URL, partial URI, or short name of the accelerator type resource to expose to this instance. See Compute Engine AcceleratorTypes (https://cloud.google.com/compute/docs/reference/beta/acceleratorTypes).Examples: https://www.googleapis.com/compute/beta/projects/[project_id]/zones/us-east1-a/acceleratorTypes/nvidia-tesla-k80 projects/[project_id]/zones/us-east1-a/acceleratorTypes/nvidia-tesla-k80 nvidia-tesla-k80Auto Zone Exception: If you are using the Dataproc Auto Zone Placement (https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/auto-zone#using_auto_zone_placement) feature, you must use the short name of the accelerator type resource, for example, nvidia-tesla-k80. + }, + ], + "diskConfig": { # Specifies the config of disk options for a group of VM instances. # Optional. Disk option config settings. + "bootDiskSizeGb": 42, # Optional. Size in GB of the boot disk (default is 500GB). + "bootDiskType": "A String", # Optional. Type of the boot disk (default is "pd-standard"). Valid values: "pd-balanced" (Persistent Disk Balanced Solid State Drive), "pd-ssd" (Persistent Disk Solid State Drive), or "pd-standard" (Persistent Disk Hard Disk Drive). See Disk types (https://cloud.google.com/compute/docs/disks#disk-types). + "localSsdInterface": "A String", # Optional. Interface type of local SSDs (default is "scsi"). Valid values: "scsi" (Small Computer System Interface), "nvme" (Non-Volatile Memory Express). See local SSD performance (https://cloud.google.com/compute/docs/disks/local-ssd#performance). + "numLocalSsds": 42, # Optional. Number of attached SSDs, from 0 to 8 (default is 0). If SSDs are not attached, the boot disk is used to store runtime logs and HDFS (https://hadoop.apache.org/docs/r1.2.1/hdfs_user_guide.html) data. If one or more SSDs are attached, this runtime bulk data is spread across them, and the boot disk contains only basic config and installed binaries.Note: Local SSD options may vary by machine type and number of vCPUs selected. + }, + "imageUri": "A String", # Optional. The Compute Engine image resource used for cluster instances.The URI can represent an image or image family.Image examples: https://www.googleapis.com/compute/beta/projects/[project_id]/global/images/[image-id] projects/[project_id]/global/images/[image-id] image-idImage family examples. Dataproc will use the most recent image from the family: https://www.googleapis.com/compute/beta/projects/[project_id]/global/images/family/[custom-image-family-name] projects/[project_id]/global/images/family/[custom-image-family-name]If the URI is unspecified, it will be inferred from SoftwareConfig.image_version or the system default. + "instanceNames": [ # Output only. The list of instance names. Dataproc derives the names from cluster_name, num_instances, and the instance group. + "A String", + ], + "instanceReferences": [ # Output only. List of references to Compute Engine instances. + { # A reference to a Compute Engine instance. + "instanceId": "A String", # The unique identifier of the Compute Engine instance. + "instanceName": "A String", # The user-friendly name of the Compute Engine instance. + "publicEciesKey": "A String", # The public ECIES key used for sharing data with this instance. + "publicKey": "A String", # The public RSA key used for sharing data with this instance. + }, + ], + "isPreemptible": True or False, # Output only. Specifies that this instance group contains preemptible instances. + "machineTypeUri": "A String", # Optional. The Compute Engine machine type used for cluster instances.A full URL, partial URI, or short name are valid. Examples: https://www.googleapis.com/compute/v1/projects/[project_id]/zones/us-east1-a/machineTypes/n1-standard-2 projects/[project_id]/zones/us-east1-a/machineTypes/n1-standard-2 n1-standard-2Auto Zone Exception: If you are using the Dataproc Auto Zone Placement (https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/auto-zone#using_auto_zone_placement) feature, you must use the short name of the machine type resource, for example, n1-standard-2. + "managedGroupConfig": { # Specifies the resources used to actively manage an instance group. # Output only. The config for Compute Engine Instance Group Manager that manages this group. This is only used for preemptible instance groups. + "instanceGroupManagerName": "A String", # Output only. The name of the Instance Group Manager for this group. + "instanceTemplateName": "A String", # Output only. The name of the Instance Template used for the Managed Instance Group. + }, + "minCpuPlatform": "A String", # Optional. Specifies the minimum cpu platform for the Instance Group. See Dataproc -> Minimum CPU Platform (https://cloud.google.com/dataproc/docs/concepts/compute/dataproc-min-cpu). + "numInstances": 42, # Optional. The number of VM instances in the instance group. For HA cluster master_config groups, must be set to 3. For standard cluster master_config groups, must be set to 1. + "preemptibility": "A String", # Optional. Specifies the preemptibility of the instance group.The default value for master and worker groups is NON_PREEMPTIBLE. This default cannot be changed.The default value for secondary instances is PREEMPTIBLE. + }, + "roles": [ # Required. Node group roles. + "A String", + ], + }, + "nodeGroupId": "A String", # Optional. A node group ID. Generated if not specified.The ID must contain only letters (a-z, A-Z), numbers (0-9), underscores (_), and hyphens (-). Cannot begin or end with underscore or hyphen. Must consist of from 3 to 33 characters. + }, + ], "configBucket": "A String", # Optional. A Cloud Storage bucket used to stage job dependencies, config files, and job driver console output. If you do not specify a staging bucket, Cloud Dataproc will determine a Cloud Storage location (US, ASIA, or EU) for your cluster's staging bucket according to the Compute Engine zone where your cluster is deployed, and then create and manage this project-level, per-location bucket (see Dataproc staging and temp buckets (https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/staging-bucket)). This field requires a Cloud Storage bucket name, not a gs://... URI to a Cloud Storage bucket. "dataprocMetricConfig": { # Dataproc metric config. # Optional. The config for Dataproc metrics. "metrics": [ # Required. Metrics sources to enable. @@ -410,7 +459,7 @@

Method Details

"a_key": "A String", }, "networkUri": "A String", # Optional. The Compute Engine network to be used for machine communications. Cannot be specified with subnetwork_uri. If neither network_uri nor subnetwork_uri is specified, the "default" network of the project is used, if it exists. Cannot be a "Custom Subnet Network" (see Using Subnetworks (https://cloud.google.com/compute/docs/subnetworks) for more information).A full URL, partial URI, or short name are valid. Examples: https://www.googleapis.com/compute/v1/projects/[project_id]/regions/global/default projects/[project_id]/regions/global/default default - "nodeGroupAffinity": { # Node Group Affinity for clusters using sole-tenant node groups. # Optional. Node Group Affinity for sole-tenant clusters. + "nodeGroupAffinity": { # Node Group Affinity for clusters using sole-tenant node groups. The Dataproc NodeGroupAffinity resource is not related to the Dataproc NodeGroup resource. # Optional. Node Group Affinity for sole-tenant clusters. "nodeGroupUri": "A String", # Required. The URI of a sole-tenant node group resource (https://cloud.google.com/compute/docs/reference/rest/v1/nodeGroups) that the cluster will be created on.A full URL, partial URI, or node group name are valid. Examples: https://www.googleapis.com/compute/v1/projects/[project_id]/zones/us-central1-a/nodeGroups/node-group-1 projects/[project_id]/zones/us-central1-a/nodeGroups/node-group-1 node-group-1 }, "privateIpv6GoogleAccess": "A String", # Optional. The type of IPv6 access for a cluster. @@ -772,8 +821,8 @@

Method Details

], }, "scheduling": { # Job scheduling options. # Optional. Job scheduling configuration. - "maxFailuresPerHour": 42, # Optional. Maximum number of times per hour a driver may be restarted as a result of driver exiting with non-zero code before job is reported failed.A job may be reported as thrashing if driver exits with non-zero code 4 times within 10 minute window.Maximum value is 10.Note: Currently, this restartable job option is not supported in Dataproc workflow template (https://cloud.google.com/dataproc/docs/concepts/workflows/using-workflows#adding_jobs_to_a_template) jobs. - "maxFailuresTotal": 42, # Optional. Maximum number of times in total a driver may be restarted as a result of driver exiting with non-zero code before job is reported failed. Maximum value is 240.Note: Currently, this restartable job option is not supported in Dataproc workflow template (https://cloud.google.com/dataproc/docs/concepts/workflows/using-workflows#adding_jobs_to_a_template) jobs. + "maxFailuresPerHour": 42, # Optional. Maximum number of times per hour a driver may be restarted as a result of driver exiting with non-zero code before job is reported failed.A job may be reported as thrashing if the driver exits with a non-zero code four times within a 10-minute window.Maximum value is 10.Note: This restartable job option is not supported in Dataproc workflow templates (https://cloud.google.com/dataproc/docs/concepts/workflows/using-workflows#adding_jobs_to_a_template). + "maxFailuresTotal": 42, # Optional. Maximum total number of times a driver may be restarted as a result of the driver exiting with a non-zero code. After the maximum number is reached, the job will be reported as failed.Maximum value is 240.Note: Currently, this restartable job option is not supported in Dataproc workflow templates (https://cloud.google.com/dataproc/docs/concepts/workflows/using-workflows#adding_jobs_to_a_template). }, "sparkJob": { # A Dataproc job for running Apache Spark (https://spark.apache.org/) applications on YARN. # Optional. Job is a Spark job. "archiveUris": [ # Optional. HCFS URIs of archives to be extracted into the working directory of each executor. Supported file types: .jar, .tar, .tar.gz, .tgz, and .zip. @@ -903,6 +952,55 @@

Method Details

"autoscalingConfig": { # Autoscaling Policy config associated with the cluster. # Optional. Autoscaling config for the policy associated with the cluster. Cluster does not autoscale if this field is unset. "policyUri": "A String", # Optional. The autoscaling policy used by the cluster.Only resource names including projectid and location (region) are valid. Examples: https://www.googleapis.com/compute/v1/projects/[project_id]/locations/[dataproc_region]/autoscalingPolicies/[policy_id] projects/[project_id]/locations/[dataproc_region]/autoscalingPolicies/[policy_id]Note that the policy must be in the same project and Dataproc region. }, + "auxiliaryNodeGroups": [ # Optional. The node group settings. + { # Node group identification and configuration information. + "nodeGroup": { # Dataproc Node Group. The Dataproc NodeGroup resource is not related to the Dataproc NodeGroupAffinity resource. # Required. Node group configuration. + "labels": { # Optional. Node group labels. Label keys must consist of from 1 to 63 characters and conform to RFC 1035 (https://www.ietf.org/rfc/rfc1035.txt). Label values can be empty. If specified, they must consist of from 1 to 63 characters and conform to RFC 1035 (https://www.ietf.org/rfc/rfc1035.txt). The node group must have no more than 32 labelsn. + "a_key": "A String", + }, + "name": "A String", # The Node group resource name (https://aip.dev/122). + "nodeGroupConfig": { # The config settings for Compute Engine resources in an instance group, such as a master or worker group. # Optional. The node group instance group configuration. + "accelerators": [ # Optional. The Compute Engine accelerator configuration for these instances. + { # Specifies the type and number of accelerator cards attached to the instances of an instance. See GPUs on Compute Engine (https://cloud.google.com/compute/docs/gpus/). + "acceleratorCount": 42, # The number of the accelerator cards of this type exposed to this instance. + "acceleratorTypeUri": "A String", # Full URL, partial URI, or short name of the accelerator type resource to expose to this instance. See Compute Engine AcceleratorTypes (https://cloud.google.com/compute/docs/reference/beta/acceleratorTypes).Examples: https://www.googleapis.com/compute/beta/projects/[project_id]/zones/us-east1-a/acceleratorTypes/nvidia-tesla-k80 projects/[project_id]/zones/us-east1-a/acceleratorTypes/nvidia-tesla-k80 nvidia-tesla-k80Auto Zone Exception: If you are using the Dataproc Auto Zone Placement (https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/auto-zone#using_auto_zone_placement) feature, you must use the short name of the accelerator type resource, for example, nvidia-tesla-k80. + }, + ], + "diskConfig": { # Specifies the config of disk options for a group of VM instances. # Optional. Disk option config settings. + "bootDiskSizeGb": 42, # Optional. Size in GB of the boot disk (default is 500GB). + "bootDiskType": "A String", # Optional. Type of the boot disk (default is "pd-standard"). Valid values: "pd-balanced" (Persistent Disk Balanced Solid State Drive), "pd-ssd" (Persistent Disk Solid State Drive), or "pd-standard" (Persistent Disk Hard Disk Drive). See Disk types (https://cloud.google.com/compute/docs/disks#disk-types). + "localSsdInterface": "A String", # Optional. Interface type of local SSDs (default is "scsi"). Valid values: "scsi" (Small Computer System Interface), "nvme" (Non-Volatile Memory Express). See local SSD performance (https://cloud.google.com/compute/docs/disks/local-ssd#performance). + "numLocalSsds": 42, # Optional. Number of attached SSDs, from 0 to 8 (default is 0). If SSDs are not attached, the boot disk is used to store runtime logs and HDFS (https://hadoop.apache.org/docs/r1.2.1/hdfs_user_guide.html) data. If one or more SSDs are attached, this runtime bulk data is spread across them, and the boot disk contains only basic config and installed binaries.Note: Local SSD options may vary by machine type and number of vCPUs selected. + }, + "imageUri": "A String", # Optional. The Compute Engine image resource used for cluster instances.The URI can represent an image or image family.Image examples: https://www.googleapis.com/compute/beta/projects/[project_id]/global/images/[image-id] projects/[project_id]/global/images/[image-id] image-idImage family examples. Dataproc will use the most recent image from the family: https://www.googleapis.com/compute/beta/projects/[project_id]/global/images/family/[custom-image-family-name] projects/[project_id]/global/images/family/[custom-image-family-name]If the URI is unspecified, it will be inferred from SoftwareConfig.image_version or the system default. + "instanceNames": [ # Output only. The list of instance names. Dataproc derives the names from cluster_name, num_instances, and the instance group. + "A String", + ], + "instanceReferences": [ # Output only. List of references to Compute Engine instances. + { # A reference to a Compute Engine instance. + "instanceId": "A String", # The unique identifier of the Compute Engine instance. + "instanceName": "A String", # The user-friendly name of the Compute Engine instance. + "publicEciesKey": "A String", # The public ECIES key used for sharing data with this instance. + "publicKey": "A String", # The public RSA key used for sharing data with this instance. + }, + ], + "isPreemptible": True or False, # Output only. Specifies that this instance group contains preemptible instances. + "machineTypeUri": "A String", # Optional. The Compute Engine machine type used for cluster instances.A full URL, partial URI, or short name are valid. Examples: https://www.googleapis.com/compute/v1/projects/[project_id]/zones/us-east1-a/machineTypes/n1-standard-2 projects/[project_id]/zones/us-east1-a/machineTypes/n1-standard-2 n1-standard-2Auto Zone Exception: If you are using the Dataproc Auto Zone Placement (https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/auto-zone#using_auto_zone_placement) feature, you must use the short name of the machine type resource, for example, n1-standard-2. + "managedGroupConfig": { # Specifies the resources used to actively manage an instance group. # Output only. The config for Compute Engine Instance Group Manager that manages this group. This is only used for preemptible instance groups. + "instanceGroupManagerName": "A String", # Output only. The name of the Instance Group Manager for this group. + "instanceTemplateName": "A String", # Output only. The name of the Instance Template used for the Managed Instance Group. + }, + "minCpuPlatform": "A String", # Optional. Specifies the minimum cpu platform for the Instance Group. See Dataproc -> Minimum CPU Platform (https://cloud.google.com/dataproc/docs/concepts/compute/dataproc-min-cpu). + "numInstances": 42, # Optional. The number of VM instances in the instance group. For HA cluster master_config groups, must be set to 3. For standard cluster master_config groups, must be set to 1. + "preemptibility": "A String", # Optional. Specifies the preemptibility of the instance group.The default value for master and worker groups is NON_PREEMPTIBLE. This default cannot be changed.The default value for secondary instances is PREEMPTIBLE. + }, + "roles": [ # Required. Node group roles. + "A String", + ], + }, + "nodeGroupId": "A String", # Optional. A node group ID. Generated if not specified.The ID must contain only letters (a-z, A-Z), numbers (0-9), underscores (_), and hyphens (-). Cannot begin or end with underscore or hyphen. Must consist of from 3 to 33 characters. + }, + ], "configBucket": "A String", # Optional. A Cloud Storage bucket used to stage job dependencies, config files, and job driver console output. If you do not specify a staging bucket, Cloud Dataproc will determine a Cloud Storage location (US, ASIA, or EU) for your cluster's staging bucket according to the Compute Engine zone where your cluster is deployed, and then create and manage this project-level, per-location bucket (see Dataproc staging and temp buckets (https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/staging-bucket)). This field requires a Cloud Storage bucket name, not a gs://... URI to a Cloud Storage bucket. "dataprocMetricConfig": { # Dataproc metric config. # Optional. The config for Dataproc metrics. "metrics": [ # Required. Metrics sources to enable. @@ -932,7 +1030,7 @@

Method Details

"a_key": "A String", }, "networkUri": "A String", # Optional. The Compute Engine network to be used for machine communications. Cannot be specified with subnetwork_uri. If neither network_uri nor subnetwork_uri is specified, the "default" network of the project is used, if it exists. Cannot be a "Custom Subnet Network" (see Using Subnetworks (https://cloud.google.com/compute/docs/subnetworks) for more information).A full URL, partial URI, or short name are valid. Examples: https://www.googleapis.com/compute/v1/projects/[project_id]/regions/global/default projects/[project_id]/regions/global/default default - "nodeGroupAffinity": { # Node Group Affinity for clusters using sole-tenant node groups. # Optional. Node Group Affinity for sole-tenant clusters. + "nodeGroupAffinity": { # Node Group Affinity for clusters using sole-tenant node groups. The Dataproc NodeGroupAffinity resource is not related to the Dataproc NodeGroup resource. # Optional. Node Group Affinity for sole-tenant clusters. "nodeGroupUri": "A String", # Required. The URI of a sole-tenant node group resource (https://cloud.google.com/compute/docs/reference/rest/v1/nodeGroups) that the cluster will be created on.A full URL, partial URI, or node group name are valid. Examples: https://www.googleapis.com/compute/v1/projects/[project_id]/zones/us-central1-a/nodeGroups/node-group-1 projects/[project_id]/zones/us-central1-a/nodeGroups/node-group-1 node-group-1 }, "privateIpv6GoogleAccess": "A String", # Optional. The type of IPv6 access for a cluster. @@ -1321,8 +1419,8 @@

Method Details

], }, "scheduling": { # Job scheduling options. # Optional. Job scheduling configuration. - "maxFailuresPerHour": 42, # Optional. Maximum number of times per hour a driver may be restarted as a result of driver exiting with non-zero code before job is reported failed.A job may be reported as thrashing if driver exits with non-zero code 4 times within 10 minute window.Maximum value is 10.Note: Currently, this restartable job option is not supported in Dataproc workflow template (https://cloud.google.com/dataproc/docs/concepts/workflows/using-workflows#adding_jobs_to_a_template) jobs. - "maxFailuresTotal": 42, # Optional. Maximum number of times in total a driver may be restarted as a result of driver exiting with non-zero code before job is reported failed. Maximum value is 240.Note: Currently, this restartable job option is not supported in Dataproc workflow template (https://cloud.google.com/dataproc/docs/concepts/workflows/using-workflows#adding_jobs_to_a_template) jobs. + "maxFailuresPerHour": 42, # Optional. Maximum number of times per hour a driver may be restarted as a result of driver exiting with non-zero code before job is reported failed.A job may be reported as thrashing if the driver exits with a non-zero code four times within a 10-minute window.Maximum value is 10.Note: This restartable job option is not supported in Dataproc workflow templates (https://cloud.google.com/dataproc/docs/concepts/workflows/using-workflows#adding_jobs_to_a_template). + "maxFailuresTotal": 42, # Optional. Maximum total number of times a driver may be restarted as a result of the driver exiting with a non-zero code. After the maximum number is reached, the job will be reported as failed.Maximum value is 240.Note: Currently, this restartable job option is not supported in Dataproc workflow templates (https://cloud.google.com/dataproc/docs/concepts/workflows/using-workflows#adding_jobs_to_a_template). }, "sparkJob": { # A Dataproc job for running Apache Spark (https://spark.apache.org/) applications on YARN. # Optional. Job is a Spark job. "archiveUris": [ # Optional. HCFS URIs of archives to be extracted into the working directory of each executor. Supported file types: .jar, .tar, .tar.gz, .tgz, and .zip. @@ -1452,6 +1550,55 @@

Method Details

"autoscalingConfig": { # Autoscaling Policy config associated with the cluster. # Optional. Autoscaling config for the policy associated with the cluster. Cluster does not autoscale if this field is unset. "policyUri": "A String", # Optional. The autoscaling policy used by the cluster.Only resource names including projectid and location (region) are valid. Examples: https://www.googleapis.com/compute/v1/projects/[project_id]/locations/[dataproc_region]/autoscalingPolicies/[policy_id] projects/[project_id]/locations/[dataproc_region]/autoscalingPolicies/[policy_id]Note that the policy must be in the same project and Dataproc region. }, + "auxiliaryNodeGroups": [ # Optional. The node group settings. + { # Node group identification and configuration information. + "nodeGroup": { # Dataproc Node Group. The Dataproc NodeGroup resource is not related to the Dataproc NodeGroupAffinity resource. # Required. Node group configuration. + "labels": { # Optional. Node group labels. Label keys must consist of from 1 to 63 characters and conform to RFC 1035 (https://www.ietf.org/rfc/rfc1035.txt). Label values can be empty. If specified, they must consist of from 1 to 63 characters and conform to RFC 1035 (https://www.ietf.org/rfc/rfc1035.txt). The node group must have no more than 32 labelsn. + "a_key": "A String", + }, + "name": "A String", # The Node group resource name (https://aip.dev/122). + "nodeGroupConfig": { # The config settings for Compute Engine resources in an instance group, such as a master or worker group. # Optional. The node group instance group configuration. + "accelerators": [ # Optional. The Compute Engine accelerator configuration for these instances. + { # Specifies the type and number of accelerator cards attached to the instances of an instance. See GPUs on Compute Engine (https://cloud.google.com/compute/docs/gpus/). + "acceleratorCount": 42, # The number of the accelerator cards of this type exposed to this instance. + "acceleratorTypeUri": "A String", # Full URL, partial URI, or short name of the accelerator type resource to expose to this instance. See Compute Engine AcceleratorTypes (https://cloud.google.com/compute/docs/reference/beta/acceleratorTypes).Examples: https://www.googleapis.com/compute/beta/projects/[project_id]/zones/us-east1-a/acceleratorTypes/nvidia-tesla-k80 projects/[project_id]/zones/us-east1-a/acceleratorTypes/nvidia-tesla-k80 nvidia-tesla-k80Auto Zone Exception: If you are using the Dataproc Auto Zone Placement (https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/auto-zone#using_auto_zone_placement) feature, you must use the short name of the accelerator type resource, for example, nvidia-tesla-k80. + }, + ], + "diskConfig": { # Specifies the config of disk options for a group of VM instances. # Optional. Disk option config settings. + "bootDiskSizeGb": 42, # Optional. Size in GB of the boot disk (default is 500GB). + "bootDiskType": "A String", # Optional. Type of the boot disk (default is "pd-standard"). Valid values: "pd-balanced" (Persistent Disk Balanced Solid State Drive), "pd-ssd" (Persistent Disk Solid State Drive), or "pd-standard" (Persistent Disk Hard Disk Drive). See Disk types (https://cloud.google.com/compute/docs/disks#disk-types). + "localSsdInterface": "A String", # Optional. Interface type of local SSDs (default is "scsi"). Valid values: "scsi" (Small Computer System Interface), "nvme" (Non-Volatile Memory Express). See local SSD performance (https://cloud.google.com/compute/docs/disks/local-ssd#performance). + "numLocalSsds": 42, # Optional. Number of attached SSDs, from 0 to 8 (default is 0). If SSDs are not attached, the boot disk is used to store runtime logs and HDFS (https://hadoop.apache.org/docs/r1.2.1/hdfs_user_guide.html) data. If one or more SSDs are attached, this runtime bulk data is spread across them, and the boot disk contains only basic config and installed binaries.Note: Local SSD options may vary by machine type and number of vCPUs selected. + }, + "imageUri": "A String", # Optional. The Compute Engine image resource used for cluster instances.The URI can represent an image or image family.Image examples: https://www.googleapis.com/compute/beta/projects/[project_id]/global/images/[image-id] projects/[project_id]/global/images/[image-id] image-idImage family examples. Dataproc will use the most recent image from the family: https://www.googleapis.com/compute/beta/projects/[project_id]/global/images/family/[custom-image-family-name] projects/[project_id]/global/images/family/[custom-image-family-name]If the URI is unspecified, it will be inferred from SoftwareConfig.image_version or the system default. + "instanceNames": [ # Output only. The list of instance names. Dataproc derives the names from cluster_name, num_instances, and the instance group. + "A String", + ], + "instanceReferences": [ # Output only. List of references to Compute Engine instances. + { # A reference to a Compute Engine instance. + "instanceId": "A String", # The unique identifier of the Compute Engine instance. + "instanceName": "A String", # The user-friendly name of the Compute Engine instance. + "publicEciesKey": "A String", # The public ECIES key used for sharing data with this instance. + "publicKey": "A String", # The public RSA key used for sharing data with this instance. + }, + ], + "isPreemptible": True or False, # Output only. Specifies that this instance group contains preemptible instances. + "machineTypeUri": "A String", # Optional. The Compute Engine machine type used for cluster instances.A full URL, partial URI, or short name are valid. Examples: https://www.googleapis.com/compute/v1/projects/[project_id]/zones/us-east1-a/machineTypes/n1-standard-2 projects/[project_id]/zones/us-east1-a/machineTypes/n1-standard-2 n1-standard-2Auto Zone Exception: If you are using the Dataproc Auto Zone Placement (https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/auto-zone#using_auto_zone_placement) feature, you must use the short name of the machine type resource, for example, n1-standard-2. + "managedGroupConfig": { # Specifies the resources used to actively manage an instance group. # Output only. The config for Compute Engine Instance Group Manager that manages this group. This is only used for preemptible instance groups. + "instanceGroupManagerName": "A String", # Output only. The name of the Instance Group Manager for this group. + "instanceTemplateName": "A String", # Output only. The name of the Instance Template used for the Managed Instance Group. + }, + "minCpuPlatform": "A String", # Optional. Specifies the minimum cpu platform for the Instance Group. See Dataproc -> Minimum CPU Platform (https://cloud.google.com/dataproc/docs/concepts/compute/dataproc-min-cpu). + "numInstances": 42, # Optional. The number of VM instances in the instance group. For HA cluster master_config groups, must be set to 3. For standard cluster master_config groups, must be set to 1. + "preemptibility": "A String", # Optional. Specifies the preemptibility of the instance group.The default value for master and worker groups is NON_PREEMPTIBLE. This default cannot be changed.The default value for secondary instances is PREEMPTIBLE. + }, + "roles": [ # Required. Node group roles. + "A String", + ], + }, + "nodeGroupId": "A String", # Optional. A node group ID. Generated if not specified.The ID must contain only letters (a-z, A-Z), numbers (0-9), underscores (_), and hyphens (-). Cannot begin or end with underscore or hyphen. Must consist of from 3 to 33 characters. + }, + ], "configBucket": "A String", # Optional. A Cloud Storage bucket used to stage job dependencies, config files, and job driver console output. If you do not specify a staging bucket, Cloud Dataproc will determine a Cloud Storage location (US, ASIA, or EU) for your cluster's staging bucket according to the Compute Engine zone where your cluster is deployed, and then create and manage this project-level, per-location bucket (see Dataproc staging and temp buckets (https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/staging-bucket)). This field requires a Cloud Storage bucket name, not a gs://... URI to a Cloud Storage bucket. "dataprocMetricConfig": { # Dataproc metric config. # Optional. The config for Dataproc metrics. "metrics": [ # Required. Metrics sources to enable. @@ -1481,7 +1628,7 @@

Method Details

"a_key": "A String", }, "networkUri": "A String", # Optional. The Compute Engine network to be used for machine communications. Cannot be specified with subnetwork_uri. If neither network_uri nor subnetwork_uri is specified, the "default" network of the project is used, if it exists. Cannot be a "Custom Subnet Network" (see Using Subnetworks (https://cloud.google.com/compute/docs/subnetworks) for more information).A full URL, partial URI, or short name are valid. Examples: https://www.googleapis.com/compute/v1/projects/[project_id]/regions/global/default projects/[project_id]/regions/global/default default - "nodeGroupAffinity": { # Node Group Affinity for clusters using sole-tenant node groups. # Optional. Node Group Affinity for sole-tenant clusters. + "nodeGroupAffinity": { # Node Group Affinity for clusters using sole-tenant node groups. The Dataproc NodeGroupAffinity resource is not related to the Dataproc NodeGroup resource. # Optional. Node Group Affinity for sole-tenant clusters. "nodeGroupUri": "A String", # Required. The URI of a sole-tenant node group resource (https://cloud.google.com/compute/docs/reference/rest/v1/nodeGroups) that the cluster will be created on.A full URL, partial URI, or node group name are valid. Examples: https://www.googleapis.com/compute/v1/projects/[project_id]/zones/us-central1-a/nodeGroups/node-group-1 projects/[project_id]/zones/us-central1-a/nodeGroups/node-group-1 node-group-1 }, "privateIpv6GoogleAccess": "A String", # Optional. The type of IPv6 access for a cluster. @@ -1934,8 +2081,8 @@

Method Details

], }, "scheduling": { # Job scheduling options. # Optional. Job scheduling configuration. - "maxFailuresPerHour": 42, # Optional. Maximum number of times per hour a driver may be restarted as a result of driver exiting with non-zero code before job is reported failed.A job may be reported as thrashing if driver exits with non-zero code 4 times within 10 minute window.Maximum value is 10.Note: Currently, this restartable job option is not supported in Dataproc workflow template (https://cloud.google.com/dataproc/docs/concepts/workflows/using-workflows#adding_jobs_to_a_template) jobs. - "maxFailuresTotal": 42, # Optional. Maximum number of times in total a driver may be restarted as a result of driver exiting with non-zero code before job is reported failed. Maximum value is 240.Note: Currently, this restartable job option is not supported in Dataproc workflow template (https://cloud.google.com/dataproc/docs/concepts/workflows/using-workflows#adding_jobs_to_a_template) jobs. + "maxFailuresPerHour": 42, # Optional. Maximum number of times per hour a driver may be restarted as a result of driver exiting with non-zero code before job is reported failed.A job may be reported as thrashing if the driver exits with a non-zero code four times within a 10-minute window.Maximum value is 10.Note: This restartable job option is not supported in Dataproc workflow templates (https://cloud.google.com/dataproc/docs/concepts/workflows/using-workflows#adding_jobs_to_a_template). + "maxFailuresTotal": 42, # Optional. Maximum total number of times a driver may be restarted as a result of the driver exiting with a non-zero code. After the maximum number is reached, the job will be reported as failed.Maximum value is 240.Note: Currently, this restartable job option is not supported in Dataproc workflow templates (https://cloud.google.com/dataproc/docs/concepts/workflows/using-workflows#adding_jobs_to_a_template). }, "sparkJob": { # A Dataproc job for running Apache Spark (https://spark.apache.org/) applications on YARN. # Optional. Job is a Spark job. "archiveUris": [ # Optional. HCFS URIs of archives to be extracted into the working directory of each executor. Supported file types: .jar, .tar, .tar.gz, .tgz, and .zip. @@ -2065,6 +2212,55 @@

Method Details

"autoscalingConfig": { # Autoscaling Policy config associated with the cluster. # Optional. Autoscaling config for the policy associated with the cluster. Cluster does not autoscale if this field is unset. "policyUri": "A String", # Optional. The autoscaling policy used by the cluster.Only resource names including projectid and location (region) are valid. Examples: https://www.googleapis.com/compute/v1/projects/[project_id]/locations/[dataproc_region]/autoscalingPolicies/[policy_id] projects/[project_id]/locations/[dataproc_region]/autoscalingPolicies/[policy_id]Note that the policy must be in the same project and Dataproc region. }, + "auxiliaryNodeGroups": [ # Optional. The node group settings. + { # Node group identification and configuration information. + "nodeGroup": { # Dataproc Node Group. The Dataproc NodeGroup resource is not related to the Dataproc NodeGroupAffinity resource. # Required. Node group configuration. + "labels": { # Optional. Node group labels. Label keys must consist of from 1 to 63 characters and conform to RFC 1035 (https://www.ietf.org/rfc/rfc1035.txt). Label values can be empty. If specified, they must consist of from 1 to 63 characters and conform to RFC 1035 (https://www.ietf.org/rfc/rfc1035.txt). The node group must have no more than 32 labelsn. + "a_key": "A String", + }, + "name": "A String", # The Node group resource name (https://aip.dev/122). + "nodeGroupConfig": { # The config settings for Compute Engine resources in an instance group, such as a master or worker group. # Optional. The node group instance group configuration. + "accelerators": [ # Optional. The Compute Engine accelerator configuration for these instances. + { # Specifies the type and number of accelerator cards attached to the instances of an instance. See GPUs on Compute Engine (https://cloud.google.com/compute/docs/gpus/). + "acceleratorCount": 42, # The number of the accelerator cards of this type exposed to this instance. + "acceleratorTypeUri": "A String", # Full URL, partial URI, or short name of the accelerator type resource to expose to this instance. See Compute Engine AcceleratorTypes (https://cloud.google.com/compute/docs/reference/beta/acceleratorTypes).Examples: https://www.googleapis.com/compute/beta/projects/[project_id]/zones/us-east1-a/acceleratorTypes/nvidia-tesla-k80 projects/[project_id]/zones/us-east1-a/acceleratorTypes/nvidia-tesla-k80 nvidia-tesla-k80Auto Zone Exception: If you are using the Dataproc Auto Zone Placement (https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/auto-zone#using_auto_zone_placement) feature, you must use the short name of the accelerator type resource, for example, nvidia-tesla-k80. + }, + ], + "diskConfig": { # Specifies the config of disk options for a group of VM instances. # Optional. Disk option config settings. + "bootDiskSizeGb": 42, # Optional. Size in GB of the boot disk (default is 500GB). + "bootDiskType": "A String", # Optional. Type of the boot disk (default is "pd-standard"). Valid values: "pd-balanced" (Persistent Disk Balanced Solid State Drive), "pd-ssd" (Persistent Disk Solid State Drive), or "pd-standard" (Persistent Disk Hard Disk Drive). See Disk types (https://cloud.google.com/compute/docs/disks#disk-types). + "localSsdInterface": "A String", # Optional. Interface type of local SSDs (default is "scsi"). Valid values: "scsi" (Small Computer System Interface), "nvme" (Non-Volatile Memory Express). See local SSD performance (https://cloud.google.com/compute/docs/disks/local-ssd#performance). + "numLocalSsds": 42, # Optional. Number of attached SSDs, from 0 to 8 (default is 0). If SSDs are not attached, the boot disk is used to store runtime logs and HDFS (https://hadoop.apache.org/docs/r1.2.1/hdfs_user_guide.html) data. If one or more SSDs are attached, this runtime bulk data is spread across them, and the boot disk contains only basic config and installed binaries.Note: Local SSD options may vary by machine type and number of vCPUs selected. + }, + "imageUri": "A String", # Optional. The Compute Engine image resource used for cluster instances.The URI can represent an image or image family.Image examples: https://www.googleapis.com/compute/beta/projects/[project_id]/global/images/[image-id] projects/[project_id]/global/images/[image-id] image-idImage family examples. Dataproc will use the most recent image from the family: https://www.googleapis.com/compute/beta/projects/[project_id]/global/images/family/[custom-image-family-name] projects/[project_id]/global/images/family/[custom-image-family-name]If the URI is unspecified, it will be inferred from SoftwareConfig.image_version or the system default. + "instanceNames": [ # Output only. The list of instance names. Dataproc derives the names from cluster_name, num_instances, and the instance group. + "A String", + ], + "instanceReferences": [ # Output only. List of references to Compute Engine instances. + { # A reference to a Compute Engine instance. + "instanceId": "A String", # The unique identifier of the Compute Engine instance. + "instanceName": "A String", # The user-friendly name of the Compute Engine instance. + "publicEciesKey": "A String", # The public ECIES key used for sharing data with this instance. + "publicKey": "A String", # The public RSA key used for sharing data with this instance. + }, + ], + "isPreemptible": True or False, # Output only. Specifies that this instance group contains preemptible instances. + "machineTypeUri": "A String", # Optional. The Compute Engine machine type used for cluster instances.A full URL, partial URI, or short name are valid. Examples: https://www.googleapis.com/compute/v1/projects/[project_id]/zones/us-east1-a/machineTypes/n1-standard-2 projects/[project_id]/zones/us-east1-a/machineTypes/n1-standard-2 n1-standard-2Auto Zone Exception: If you are using the Dataproc Auto Zone Placement (https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/auto-zone#using_auto_zone_placement) feature, you must use the short name of the machine type resource, for example, n1-standard-2. + "managedGroupConfig": { # Specifies the resources used to actively manage an instance group. # Output only. The config for Compute Engine Instance Group Manager that manages this group. This is only used for preemptible instance groups. + "instanceGroupManagerName": "A String", # Output only. The name of the Instance Group Manager for this group. + "instanceTemplateName": "A String", # Output only. The name of the Instance Template used for the Managed Instance Group. + }, + "minCpuPlatform": "A String", # Optional. Specifies the minimum cpu platform for the Instance Group. See Dataproc -> Minimum CPU Platform (https://cloud.google.com/dataproc/docs/concepts/compute/dataproc-min-cpu). + "numInstances": 42, # Optional. The number of VM instances in the instance group. For HA cluster master_config groups, must be set to 3. For standard cluster master_config groups, must be set to 1. + "preemptibility": "A String", # Optional. Specifies the preemptibility of the instance group.The default value for master and worker groups is NON_PREEMPTIBLE. This default cannot be changed.The default value for secondary instances is PREEMPTIBLE. + }, + "roles": [ # Required. Node group roles. + "A String", + ], + }, + "nodeGroupId": "A String", # Optional. A node group ID. Generated if not specified.The ID must contain only letters (a-z, A-Z), numbers (0-9), underscores (_), and hyphens (-). Cannot begin or end with underscore or hyphen. Must consist of from 3 to 33 characters. + }, + ], "configBucket": "A String", # Optional. A Cloud Storage bucket used to stage job dependencies, config files, and job driver console output. If you do not specify a staging bucket, Cloud Dataproc will determine a Cloud Storage location (US, ASIA, or EU) for your cluster's staging bucket according to the Compute Engine zone where your cluster is deployed, and then create and manage this project-level, per-location bucket (see Dataproc staging and temp buckets (https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/staging-bucket)). This field requires a Cloud Storage bucket name, not a gs://... URI to a Cloud Storage bucket. "dataprocMetricConfig": { # Dataproc metric config. # Optional. The config for Dataproc metrics. "metrics": [ # Required. Metrics sources to enable. @@ -2094,7 +2290,7 @@

Method Details

"a_key": "A String", }, "networkUri": "A String", # Optional. The Compute Engine network to be used for machine communications. Cannot be specified with subnetwork_uri. If neither network_uri nor subnetwork_uri is specified, the "default" network of the project is used, if it exists. Cannot be a "Custom Subnet Network" (see Using Subnetworks (https://cloud.google.com/compute/docs/subnetworks) for more information).A full URL, partial URI, or short name are valid. Examples: https://www.googleapis.com/compute/v1/projects/[project_id]/regions/global/default projects/[project_id]/regions/global/default default - "nodeGroupAffinity": { # Node Group Affinity for clusters using sole-tenant node groups. # Optional. Node Group Affinity for sole-tenant clusters. + "nodeGroupAffinity": { # Node Group Affinity for clusters using sole-tenant node groups. The Dataproc NodeGroupAffinity resource is not related to the Dataproc NodeGroup resource. # Optional. Node Group Affinity for sole-tenant clusters. "nodeGroupUri": "A String", # Required. The URI of a sole-tenant node group resource (https://cloud.google.com/compute/docs/reference/rest/v1/nodeGroups) that the cluster will be created on.A full URL, partial URI, or node group name are valid. Examples: https://www.googleapis.com/compute/v1/projects/[project_id]/zones/us-central1-a/nodeGroups/node-group-1 projects/[project_id]/zones/us-central1-a/nodeGroups/node-group-1 node-group-1 }, "privateIpv6GoogleAccess": "A String", # Optional. The type of IPv6 access for a cluster. @@ -2497,8 +2693,8 @@

Method Details

], }, "scheduling": { # Job scheduling options. # Optional. Job scheduling configuration. - "maxFailuresPerHour": 42, # Optional. Maximum number of times per hour a driver may be restarted as a result of driver exiting with non-zero code before job is reported failed.A job may be reported as thrashing if driver exits with non-zero code 4 times within 10 minute window.Maximum value is 10.Note: Currently, this restartable job option is not supported in Dataproc workflow template (https://cloud.google.com/dataproc/docs/concepts/workflows/using-workflows#adding_jobs_to_a_template) jobs. - "maxFailuresTotal": 42, # Optional. Maximum number of times in total a driver may be restarted as a result of driver exiting with non-zero code before job is reported failed. Maximum value is 240.Note: Currently, this restartable job option is not supported in Dataproc workflow template (https://cloud.google.com/dataproc/docs/concepts/workflows/using-workflows#adding_jobs_to_a_template) jobs. + "maxFailuresPerHour": 42, # Optional. Maximum number of times per hour a driver may be restarted as a result of driver exiting with non-zero code before job is reported failed.A job may be reported as thrashing if the driver exits with a non-zero code four times within a 10-minute window.Maximum value is 10.Note: This restartable job option is not supported in Dataproc workflow templates (https://cloud.google.com/dataproc/docs/concepts/workflows/using-workflows#adding_jobs_to_a_template). + "maxFailuresTotal": 42, # Optional. Maximum total number of times a driver may be restarted as a result of the driver exiting with a non-zero code. After the maximum number is reached, the job will be reported as failed.Maximum value is 240.Note: Currently, this restartable job option is not supported in Dataproc workflow templates (https://cloud.google.com/dataproc/docs/concepts/workflows/using-workflows#adding_jobs_to_a_template). }, "sparkJob": { # A Dataproc job for running Apache Spark (https://spark.apache.org/) applications on YARN. # Optional. Job is a Spark job. "archiveUris": [ # Optional. HCFS URIs of archives to be extracted into the working directory of each executor. Supported file types: .jar, .tar, .tar.gz, .tgz, and .zip. @@ -2628,6 +2824,55 @@

Method Details

"autoscalingConfig": { # Autoscaling Policy config associated with the cluster. # Optional. Autoscaling config for the policy associated with the cluster. Cluster does not autoscale if this field is unset. "policyUri": "A String", # Optional. The autoscaling policy used by the cluster.Only resource names including projectid and location (region) are valid. Examples: https://www.googleapis.com/compute/v1/projects/[project_id]/locations/[dataproc_region]/autoscalingPolicies/[policy_id] projects/[project_id]/locations/[dataproc_region]/autoscalingPolicies/[policy_id]Note that the policy must be in the same project and Dataproc region. }, + "auxiliaryNodeGroups": [ # Optional. The node group settings. + { # Node group identification and configuration information. + "nodeGroup": { # Dataproc Node Group. The Dataproc NodeGroup resource is not related to the Dataproc NodeGroupAffinity resource. # Required. Node group configuration. + "labels": { # Optional. Node group labels. Label keys must consist of from 1 to 63 characters and conform to RFC 1035 (https://www.ietf.org/rfc/rfc1035.txt). Label values can be empty. If specified, they must consist of from 1 to 63 characters and conform to RFC 1035 (https://www.ietf.org/rfc/rfc1035.txt). The node group must have no more than 32 labelsn. + "a_key": "A String", + }, + "name": "A String", # The Node group resource name (https://aip.dev/122). + "nodeGroupConfig": { # The config settings for Compute Engine resources in an instance group, such as a master or worker group. # Optional. The node group instance group configuration. + "accelerators": [ # Optional. The Compute Engine accelerator configuration for these instances. + { # Specifies the type and number of accelerator cards attached to the instances of an instance. See GPUs on Compute Engine (https://cloud.google.com/compute/docs/gpus/). + "acceleratorCount": 42, # The number of the accelerator cards of this type exposed to this instance. + "acceleratorTypeUri": "A String", # Full URL, partial URI, or short name of the accelerator type resource to expose to this instance. See Compute Engine AcceleratorTypes (https://cloud.google.com/compute/docs/reference/beta/acceleratorTypes).Examples: https://www.googleapis.com/compute/beta/projects/[project_id]/zones/us-east1-a/acceleratorTypes/nvidia-tesla-k80 projects/[project_id]/zones/us-east1-a/acceleratorTypes/nvidia-tesla-k80 nvidia-tesla-k80Auto Zone Exception: If you are using the Dataproc Auto Zone Placement (https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/auto-zone#using_auto_zone_placement) feature, you must use the short name of the accelerator type resource, for example, nvidia-tesla-k80. + }, + ], + "diskConfig": { # Specifies the config of disk options for a group of VM instances. # Optional. Disk option config settings. + "bootDiskSizeGb": 42, # Optional. Size in GB of the boot disk (default is 500GB). + "bootDiskType": "A String", # Optional. Type of the boot disk (default is "pd-standard"). Valid values: "pd-balanced" (Persistent Disk Balanced Solid State Drive), "pd-ssd" (Persistent Disk Solid State Drive), or "pd-standard" (Persistent Disk Hard Disk Drive). See Disk types (https://cloud.google.com/compute/docs/disks#disk-types). + "localSsdInterface": "A String", # Optional. Interface type of local SSDs (default is "scsi"). Valid values: "scsi" (Small Computer System Interface), "nvme" (Non-Volatile Memory Express). See local SSD performance (https://cloud.google.com/compute/docs/disks/local-ssd#performance). + "numLocalSsds": 42, # Optional. Number of attached SSDs, from 0 to 8 (default is 0). If SSDs are not attached, the boot disk is used to store runtime logs and HDFS (https://hadoop.apache.org/docs/r1.2.1/hdfs_user_guide.html) data. If one or more SSDs are attached, this runtime bulk data is spread across them, and the boot disk contains only basic config and installed binaries.Note: Local SSD options may vary by machine type and number of vCPUs selected. + }, + "imageUri": "A String", # Optional. The Compute Engine image resource used for cluster instances.The URI can represent an image or image family.Image examples: https://www.googleapis.com/compute/beta/projects/[project_id]/global/images/[image-id] projects/[project_id]/global/images/[image-id] image-idImage family examples. Dataproc will use the most recent image from the family: https://www.googleapis.com/compute/beta/projects/[project_id]/global/images/family/[custom-image-family-name] projects/[project_id]/global/images/family/[custom-image-family-name]If the URI is unspecified, it will be inferred from SoftwareConfig.image_version or the system default. + "instanceNames": [ # Output only. The list of instance names. Dataproc derives the names from cluster_name, num_instances, and the instance group. + "A String", + ], + "instanceReferences": [ # Output only. List of references to Compute Engine instances. + { # A reference to a Compute Engine instance. + "instanceId": "A String", # The unique identifier of the Compute Engine instance. + "instanceName": "A String", # The user-friendly name of the Compute Engine instance. + "publicEciesKey": "A String", # The public ECIES key used for sharing data with this instance. + "publicKey": "A String", # The public RSA key used for sharing data with this instance. + }, + ], + "isPreemptible": True or False, # Output only. Specifies that this instance group contains preemptible instances. + "machineTypeUri": "A String", # Optional. The Compute Engine machine type used for cluster instances.A full URL, partial URI, or short name are valid. Examples: https://www.googleapis.com/compute/v1/projects/[project_id]/zones/us-east1-a/machineTypes/n1-standard-2 projects/[project_id]/zones/us-east1-a/machineTypes/n1-standard-2 n1-standard-2Auto Zone Exception: If you are using the Dataproc Auto Zone Placement (https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/auto-zone#using_auto_zone_placement) feature, you must use the short name of the machine type resource, for example, n1-standard-2. + "managedGroupConfig": { # Specifies the resources used to actively manage an instance group. # Output only. The config for Compute Engine Instance Group Manager that manages this group. This is only used for preemptible instance groups. + "instanceGroupManagerName": "A String", # Output only. The name of the Instance Group Manager for this group. + "instanceTemplateName": "A String", # Output only. The name of the Instance Template used for the Managed Instance Group. + }, + "minCpuPlatform": "A String", # Optional. Specifies the minimum cpu platform for the Instance Group. See Dataproc -> Minimum CPU Platform (https://cloud.google.com/dataproc/docs/concepts/compute/dataproc-min-cpu). + "numInstances": 42, # Optional. The number of VM instances in the instance group. For HA cluster master_config groups, must be set to 3. For standard cluster master_config groups, must be set to 1. + "preemptibility": "A String", # Optional. Specifies the preemptibility of the instance group.The default value for master and worker groups is NON_PREEMPTIBLE. This default cannot be changed.The default value for secondary instances is PREEMPTIBLE. + }, + "roles": [ # Required. Node group roles. + "A String", + ], + }, + "nodeGroupId": "A String", # Optional. A node group ID. Generated if not specified.The ID must contain only letters (a-z, A-Z), numbers (0-9), underscores (_), and hyphens (-). Cannot begin or end with underscore or hyphen. Must consist of from 3 to 33 characters. + }, + ], "configBucket": "A String", # Optional. A Cloud Storage bucket used to stage job dependencies, config files, and job driver console output. If you do not specify a staging bucket, Cloud Dataproc will determine a Cloud Storage location (US, ASIA, or EU) for your cluster's staging bucket according to the Compute Engine zone where your cluster is deployed, and then create and manage this project-level, per-location bucket (see Dataproc staging and temp buckets (https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/staging-bucket)). This field requires a Cloud Storage bucket name, not a gs://... URI to a Cloud Storage bucket. "dataprocMetricConfig": { # Dataproc metric config. # Optional. The config for Dataproc metrics. "metrics": [ # Required. Metrics sources to enable. @@ -2657,7 +2902,7 @@

Method Details

"a_key": "A String", }, "networkUri": "A String", # Optional. The Compute Engine network to be used for machine communications. Cannot be specified with subnetwork_uri. If neither network_uri nor subnetwork_uri is specified, the "default" network of the project is used, if it exists. Cannot be a "Custom Subnet Network" (see Using Subnetworks (https://cloud.google.com/compute/docs/subnetworks) for more information).A full URL, partial URI, or short name are valid. Examples: https://www.googleapis.com/compute/v1/projects/[project_id]/regions/global/default projects/[project_id]/regions/global/default default - "nodeGroupAffinity": { # Node Group Affinity for clusters using sole-tenant node groups. # Optional. Node Group Affinity for sole-tenant clusters. + "nodeGroupAffinity": { # Node Group Affinity for clusters using sole-tenant node groups. The Dataproc NodeGroupAffinity resource is not related to the Dataproc NodeGroup resource. # Optional. Node Group Affinity for sole-tenant clusters. "nodeGroupUri": "A String", # Required. The URI of a sole-tenant node group resource (https://cloud.google.com/compute/docs/reference/rest/v1/nodeGroups) that the cluster will be created on.A full URL, partial URI, or node group name are valid. Examples: https://www.googleapis.com/compute/v1/projects/[project_id]/zones/us-central1-a/nodeGroups/node-group-1 projects/[project_id]/zones/us-central1-a/nodeGroups/node-group-1 node-group-1 }, "privateIpv6GoogleAccess": "A String", # Optional. The type of IPv6 access for a cluster. @@ -3125,8 +3370,8 @@

Method Details

], }, "scheduling": { # Job scheduling options. # Optional. Job scheduling configuration. - "maxFailuresPerHour": 42, # Optional. Maximum number of times per hour a driver may be restarted as a result of driver exiting with non-zero code before job is reported failed.A job may be reported as thrashing if driver exits with non-zero code 4 times within 10 minute window.Maximum value is 10.Note: Currently, this restartable job option is not supported in Dataproc workflow template (https://cloud.google.com/dataproc/docs/concepts/workflows/using-workflows#adding_jobs_to_a_template) jobs. - "maxFailuresTotal": 42, # Optional. Maximum number of times in total a driver may be restarted as a result of driver exiting with non-zero code before job is reported failed. Maximum value is 240.Note: Currently, this restartable job option is not supported in Dataproc workflow template (https://cloud.google.com/dataproc/docs/concepts/workflows/using-workflows#adding_jobs_to_a_template) jobs. + "maxFailuresPerHour": 42, # Optional. Maximum number of times per hour a driver may be restarted as a result of driver exiting with non-zero code before job is reported failed.A job may be reported as thrashing if the driver exits with a non-zero code four times within a 10-minute window.Maximum value is 10.Note: This restartable job option is not supported in Dataproc workflow templates (https://cloud.google.com/dataproc/docs/concepts/workflows/using-workflows#adding_jobs_to_a_template). + "maxFailuresTotal": 42, # Optional. Maximum total number of times a driver may be restarted as a result of the driver exiting with a non-zero code. After the maximum number is reached, the job will be reported as failed.Maximum value is 240.Note: Currently, this restartable job option is not supported in Dataproc workflow templates (https://cloud.google.com/dataproc/docs/concepts/workflows/using-workflows#adding_jobs_to_a_template). }, "sparkJob": { # A Dataproc job for running Apache Spark (https://spark.apache.org/) applications on YARN. # Optional. Job is a Spark job. "archiveUris": [ # Optional. HCFS URIs of archives to be extracted into the working directory of each executor. Supported file types: .jar, .tar, .tar.gz, .tgz, and .zip. @@ -3256,6 +3501,55 @@

Method Details

"autoscalingConfig": { # Autoscaling Policy config associated with the cluster. # Optional. Autoscaling config for the policy associated with the cluster. Cluster does not autoscale if this field is unset. "policyUri": "A String", # Optional. The autoscaling policy used by the cluster.Only resource names including projectid and location (region) are valid. Examples: https://www.googleapis.com/compute/v1/projects/[project_id]/locations/[dataproc_region]/autoscalingPolicies/[policy_id] projects/[project_id]/locations/[dataproc_region]/autoscalingPolicies/[policy_id]Note that the policy must be in the same project and Dataproc region. }, + "auxiliaryNodeGroups": [ # Optional. The node group settings. + { # Node group identification and configuration information. + "nodeGroup": { # Dataproc Node Group. The Dataproc NodeGroup resource is not related to the Dataproc NodeGroupAffinity resource. # Required. Node group configuration. + "labels": { # Optional. Node group labels. Label keys must consist of from 1 to 63 characters and conform to RFC 1035 (https://www.ietf.org/rfc/rfc1035.txt). Label values can be empty. If specified, they must consist of from 1 to 63 characters and conform to RFC 1035 (https://www.ietf.org/rfc/rfc1035.txt). The node group must have no more than 32 labelsn. + "a_key": "A String", + }, + "name": "A String", # The Node group resource name (https://aip.dev/122). + "nodeGroupConfig": { # The config settings for Compute Engine resources in an instance group, such as a master or worker group. # Optional. The node group instance group configuration. + "accelerators": [ # Optional. The Compute Engine accelerator configuration for these instances. + { # Specifies the type and number of accelerator cards attached to the instances of an instance. See GPUs on Compute Engine (https://cloud.google.com/compute/docs/gpus/). + "acceleratorCount": 42, # The number of the accelerator cards of this type exposed to this instance. + "acceleratorTypeUri": "A String", # Full URL, partial URI, or short name of the accelerator type resource to expose to this instance. See Compute Engine AcceleratorTypes (https://cloud.google.com/compute/docs/reference/beta/acceleratorTypes).Examples: https://www.googleapis.com/compute/beta/projects/[project_id]/zones/us-east1-a/acceleratorTypes/nvidia-tesla-k80 projects/[project_id]/zones/us-east1-a/acceleratorTypes/nvidia-tesla-k80 nvidia-tesla-k80Auto Zone Exception: If you are using the Dataproc Auto Zone Placement (https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/auto-zone#using_auto_zone_placement) feature, you must use the short name of the accelerator type resource, for example, nvidia-tesla-k80. + }, + ], + "diskConfig": { # Specifies the config of disk options for a group of VM instances. # Optional. Disk option config settings. + "bootDiskSizeGb": 42, # Optional. Size in GB of the boot disk (default is 500GB). + "bootDiskType": "A String", # Optional. Type of the boot disk (default is "pd-standard"). Valid values: "pd-balanced" (Persistent Disk Balanced Solid State Drive), "pd-ssd" (Persistent Disk Solid State Drive), or "pd-standard" (Persistent Disk Hard Disk Drive). See Disk types (https://cloud.google.com/compute/docs/disks#disk-types). + "localSsdInterface": "A String", # Optional. Interface type of local SSDs (default is "scsi"). Valid values: "scsi" (Small Computer System Interface), "nvme" (Non-Volatile Memory Express). See local SSD performance (https://cloud.google.com/compute/docs/disks/local-ssd#performance). + "numLocalSsds": 42, # Optional. Number of attached SSDs, from 0 to 8 (default is 0). If SSDs are not attached, the boot disk is used to store runtime logs and HDFS (https://hadoop.apache.org/docs/r1.2.1/hdfs_user_guide.html) data. If one or more SSDs are attached, this runtime bulk data is spread across them, and the boot disk contains only basic config and installed binaries.Note: Local SSD options may vary by machine type and number of vCPUs selected. + }, + "imageUri": "A String", # Optional. The Compute Engine image resource used for cluster instances.The URI can represent an image or image family.Image examples: https://www.googleapis.com/compute/beta/projects/[project_id]/global/images/[image-id] projects/[project_id]/global/images/[image-id] image-idImage family examples. Dataproc will use the most recent image from the family: https://www.googleapis.com/compute/beta/projects/[project_id]/global/images/family/[custom-image-family-name] projects/[project_id]/global/images/family/[custom-image-family-name]If the URI is unspecified, it will be inferred from SoftwareConfig.image_version or the system default. + "instanceNames": [ # Output only. The list of instance names. Dataproc derives the names from cluster_name, num_instances, and the instance group. + "A String", + ], + "instanceReferences": [ # Output only. List of references to Compute Engine instances. + { # A reference to a Compute Engine instance. + "instanceId": "A String", # The unique identifier of the Compute Engine instance. + "instanceName": "A String", # The user-friendly name of the Compute Engine instance. + "publicEciesKey": "A String", # The public ECIES key used for sharing data with this instance. + "publicKey": "A String", # The public RSA key used for sharing data with this instance. + }, + ], + "isPreemptible": True or False, # Output only. Specifies that this instance group contains preemptible instances. + "machineTypeUri": "A String", # Optional. The Compute Engine machine type used for cluster instances.A full URL, partial URI, or short name are valid. Examples: https://www.googleapis.com/compute/v1/projects/[project_id]/zones/us-east1-a/machineTypes/n1-standard-2 projects/[project_id]/zones/us-east1-a/machineTypes/n1-standard-2 n1-standard-2Auto Zone Exception: If you are using the Dataproc Auto Zone Placement (https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/auto-zone#using_auto_zone_placement) feature, you must use the short name of the machine type resource, for example, n1-standard-2. + "managedGroupConfig": { # Specifies the resources used to actively manage an instance group. # Output only. The config for Compute Engine Instance Group Manager that manages this group. This is only used for preemptible instance groups. + "instanceGroupManagerName": "A String", # Output only. The name of the Instance Group Manager for this group. + "instanceTemplateName": "A String", # Output only. The name of the Instance Template used for the Managed Instance Group. + }, + "minCpuPlatform": "A String", # Optional. Specifies the minimum cpu platform for the Instance Group. See Dataproc -> Minimum CPU Platform (https://cloud.google.com/dataproc/docs/concepts/compute/dataproc-min-cpu). + "numInstances": 42, # Optional. The number of VM instances in the instance group. For HA cluster master_config groups, must be set to 3. For standard cluster master_config groups, must be set to 1. + "preemptibility": "A String", # Optional. Specifies the preemptibility of the instance group.The default value for master and worker groups is NON_PREEMPTIBLE. This default cannot be changed.The default value for secondary instances is PREEMPTIBLE. + }, + "roles": [ # Required. Node group roles. + "A String", + ], + }, + "nodeGroupId": "A String", # Optional. A node group ID. Generated if not specified.The ID must contain only letters (a-z, A-Z), numbers (0-9), underscores (_), and hyphens (-). Cannot begin or end with underscore or hyphen. Must consist of from 3 to 33 characters. + }, + ], "configBucket": "A String", # Optional. A Cloud Storage bucket used to stage job dependencies, config files, and job driver console output. If you do not specify a staging bucket, Cloud Dataproc will determine a Cloud Storage location (US, ASIA, or EU) for your cluster's staging bucket according to the Compute Engine zone where your cluster is deployed, and then create and manage this project-level, per-location bucket (see Dataproc staging and temp buckets (https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/staging-bucket)). This field requires a Cloud Storage bucket name, not a gs://... URI to a Cloud Storage bucket. "dataprocMetricConfig": { # Dataproc metric config. # Optional. The config for Dataproc metrics. "metrics": [ # Required. Metrics sources to enable. @@ -3285,7 +3579,7 @@

Method Details

"a_key": "A String", }, "networkUri": "A String", # Optional. The Compute Engine network to be used for machine communications. Cannot be specified with subnetwork_uri. If neither network_uri nor subnetwork_uri is specified, the "default" network of the project is used, if it exists. Cannot be a "Custom Subnet Network" (see Using Subnetworks (https://cloud.google.com/compute/docs/subnetworks) for more information).A full URL, partial URI, or short name are valid. Examples: https://www.googleapis.com/compute/v1/projects/[project_id]/regions/global/default projects/[project_id]/regions/global/default default - "nodeGroupAffinity": { # Node Group Affinity for clusters using sole-tenant node groups. # Optional. Node Group Affinity for sole-tenant clusters. + "nodeGroupAffinity": { # Node Group Affinity for clusters using sole-tenant node groups. The Dataproc NodeGroupAffinity resource is not related to the Dataproc NodeGroup resource. # Optional. Node Group Affinity for sole-tenant clusters. "nodeGroupUri": "A String", # Required. The URI of a sole-tenant node group resource (https://cloud.google.com/compute/docs/reference/rest/v1/nodeGroups) that the cluster will be created on.A full URL, partial URI, or node group name are valid. Examples: https://www.googleapis.com/compute/v1/projects/[project_id]/zones/us-central1-a/nodeGroups/node-group-1 projects/[project_id]/zones/us-central1-a/nodeGroups/node-group-1 node-group-1 }, "privateIpv6GoogleAccess": "A String", # Optional. The type of IPv6 access for a cluster. @@ -3647,8 +3941,8 @@

Method Details

], }, "scheduling": { # Job scheduling options. # Optional. Job scheduling configuration. - "maxFailuresPerHour": 42, # Optional. Maximum number of times per hour a driver may be restarted as a result of driver exiting with non-zero code before job is reported failed.A job may be reported as thrashing if driver exits with non-zero code 4 times within 10 minute window.Maximum value is 10.Note: Currently, this restartable job option is not supported in Dataproc workflow template (https://cloud.google.com/dataproc/docs/concepts/workflows/using-workflows#adding_jobs_to_a_template) jobs. - "maxFailuresTotal": 42, # Optional. Maximum number of times in total a driver may be restarted as a result of driver exiting with non-zero code before job is reported failed. Maximum value is 240.Note: Currently, this restartable job option is not supported in Dataproc workflow template (https://cloud.google.com/dataproc/docs/concepts/workflows/using-workflows#adding_jobs_to_a_template) jobs. + "maxFailuresPerHour": 42, # Optional. Maximum number of times per hour a driver may be restarted as a result of driver exiting with non-zero code before job is reported failed.A job may be reported as thrashing if the driver exits with a non-zero code four times within a 10-minute window.Maximum value is 10.Note: This restartable job option is not supported in Dataproc workflow templates (https://cloud.google.com/dataproc/docs/concepts/workflows/using-workflows#adding_jobs_to_a_template). + "maxFailuresTotal": 42, # Optional. Maximum total number of times a driver may be restarted as a result of the driver exiting with a non-zero code. After the maximum number is reached, the job will be reported as failed.Maximum value is 240.Note: Currently, this restartable job option is not supported in Dataproc workflow templates (https://cloud.google.com/dataproc/docs/concepts/workflows/using-workflows#adding_jobs_to_a_template). }, "sparkJob": { # A Dataproc job for running Apache Spark (https://spark.apache.org/) applications on YARN. # Optional. Job is a Spark job. "archiveUris": [ # Optional. HCFS URIs of archives to be extracted into the working directory of each executor. Supported file types: .jar, .tar, .tar.gz, .tgz, and .zip. @@ -3778,6 +4072,55 @@

Method Details

"autoscalingConfig": { # Autoscaling Policy config associated with the cluster. # Optional. Autoscaling config for the policy associated with the cluster. Cluster does not autoscale if this field is unset. "policyUri": "A String", # Optional. The autoscaling policy used by the cluster.Only resource names including projectid and location (region) are valid. Examples: https://www.googleapis.com/compute/v1/projects/[project_id]/locations/[dataproc_region]/autoscalingPolicies/[policy_id] projects/[project_id]/locations/[dataproc_region]/autoscalingPolicies/[policy_id]Note that the policy must be in the same project and Dataproc region. }, + "auxiliaryNodeGroups": [ # Optional. The node group settings. + { # Node group identification and configuration information. + "nodeGroup": { # Dataproc Node Group. The Dataproc NodeGroup resource is not related to the Dataproc NodeGroupAffinity resource. # Required. Node group configuration. + "labels": { # Optional. Node group labels. Label keys must consist of from 1 to 63 characters and conform to RFC 1035 (https://www.ietf.org/rfc/rfc1035.txt). Label values can be empty. If specified, they must consist of from 1 to 63 characters and conform to RFC 1035 (https://www.ietf.org/rfc/rfc1035.txt). The node group must have no more than 32 labelsn. + "a_key": "A String", + }, + "name": "A String", # The Node group resource name (https://aip.dev/122). + "nodeGroupConfig": { # The config settings for Compute Engine resources in an instance group, such as a master or worker group. # Optional. The node group instance group configuration. + "accelerators": [ # Optional. The Compute Engine accelerator configuration for these instances. + { # Specifies the type and number of accelerator cards attached to the instances of an instance. See GPUs on Compute Engine (https://cloud.google.com/compute/docs/gpus/). + "acceleratorCount": 42, # The number of the accelerator cards of this type exposed to this instance. + "acceleratorTypeUri": "A String", # Full URL, partial URI, or short name of the accelerator type resource to expose to this instance. See Compute Engine AcceleratorTypes (https://cloud.google.com/compute/docs/reference/beta/acceleratorTypes).Examples: https://www.googleapis.com/compute/beta/projects/[project_id]/zones/us-east1-a/acceleratorTypes/nvidia-tesla-k80 projects/[project_id]/zones/us-east1-a/acceleratorTypes/nvidia-tesla-k80 nvidia-tesla-k80Auto Zone Exception: If you are using the Dataproc Auto Zone Placement (https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/auto-zone#using_auto_zone_placement) feature, you must use the short name of the accelerator type resource, for example, nvidia-tesla-k80. + }, + ], + "diskConfig": { # Specifies the config of disk options for a group of VM instances. # Optional. Disk option config settings. + "bootDiskSizeGb": 42, # Optional. Size in GB of the boot disk (default is 500GB). + "bootDiskType": "A String", # Optional. Type of the boot disk (default is "pd-standard"). Valid values: "pd-balanced" (Persistent Disk Balanced Solid State Drive), "pd-ssd" (Persistent Disk Solid State Drive), or "pd-standard" (Persistent Disk Hard Disk Drive). See Disk types (https://cloud.google.com/compute/docs/disks#disk-types). + "localSsdInterface": "A String", # Optional. Interface type of local SSDs (default is "scsi"). Valid values: "scsi" (Small Computer System Interface), "nvme" (Non-Volatile Memory Express). See local SSD performance (https://cloud.google.com/compute/docs/disks/local-ssd#performance). + "numLocalSsds": 42, # Optional. Number of attached SSDs, from 0 to 8 (default is 0). If SSDs are not attached, the boot disk is used to store runtime logs and HDFS (https://hadoop.apache.org/docs/r1.2.1/hdfs_user_guide.html) data. If one or more SSDs are attached, this runtime bulk data is spread across them, and the boot disk contains only basic config and installed binaries.Note: Local SSD options may vary by machine type and number of vCPUs selected. + }, + "imageUri": "A String", # Optional. The Compute Engine image resource used for cluster instances.The URI can represent an image or image family.Image examples: https://www.googleapis.com/compute/beta/projects/[project_id]/global/images/[image-id] projects/[project_id]/global/images/[image-id] image-idImage family examples. Dataproc will use the most recent image from the family: https://www.googleapis.com/compute/beta/projects/[project_id]/global/images/family/[custom-image-family-name] projects/[project_id]/global/images/family/[custom-image-family-name]If the URI is unspecified, it will be inferred from SoftwareConfig.image_version or the system default. + "instanceNames": [ # Output only. The list of instance names. Dataproc derives the names from cluster_name, num_instances, and the instance group. + "A String", + ], + "instanceReferences": [ # Output only. List of references to Compute Engine instances. + { # A reference to a Compute Engine instance. + "instanceId": "A String", # The unique identifier of the Compute Engine instance. + "instanceName": "A String", # The user-friendly name of the Compute Engine instance. + "publicEciesKey": "A String", # The public ECIES key used for sharing data with this instance. + "publicKey": "A String", # The public RSA key used for sharing data with this instance. + }, + ], + "isPreemptible": True or False, # Output only. Specifies that this instance group contains preemptible instances. + "machineTypeUri": "A String", # Optional. The Compute Engine machine type used for cluster instances.A full URL, partial URI, or short name are valid. Examples: https://www.googleapis.com/compute/v1/projects/[project_id]/zones/us-east1-a/machineTypes/n1-standard-2 projects/[project_id]/zones/us-east1-a/machineTypes/n1-standard-2 n1-standard-2Auto Zone Exception: If you are using the Dataproc Auto Zone Placement (https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/auto-zone#using_auto_zone_placement) feature, you must use the short name of the machine type resource, for example, n1-standard-2. + "managedGroupConfig": { # Specifies the resources used to actively manage an instance group. # Output only. The config for Compute Engine Instance Group Manager that manages this group. This is only used for preemptible instance groups. + "instanceGroupManagerName": "A String", # Output only. The name of the Instance Group Manager for this group. + "instanceTemplateName": "A String", # Output only. The name of the Instance Template used for the Managed Instance Group. + }, + "minCpuPlatform": "A String", # Optional. Specifies the minimum cpu platform for the Instance Group. See Dataproc -> Minimum CPU Platform (https://cloud.google.com/dataproc/docs/concepts/compute/dataproc-min-cpu). + "numInstances": 42, # Optional. The number of VM instances in the instance group. For HA cluster master_config groups, must be set to 3. For standard cluster master_config groups, must be set to 1. + "preemptibility": "A String", # Optional. Specifies the preemptibility of the instance group.The default value for master and worker groups is NON_PREEMPTIBLE. This default cannot be changed.The default value for secondary instances is PREEMPTIBLE. + }, + "roles": [ # Required. Node group roles. + "A String", + ], + }, + "nodeGroupId": "A String", # Optional. A node group ID. Generated if not specified.The ID must contain only letters (a-z, A-Z), numbers (0-9), underscores (_), and hyphens (-). Cannot begin or end with underscore or hyphen. Must consist of from 3 to 33 characters. + }, + ], "configBucket": "A String", # Optional. A Cloud Storage bucket used to stage job dependencies, config files, and job driver console output. If you do not specify a staging bucket, Cloud Dataproc will determine a Cloud Storage location (US, ASIA, or EU) for your cluster's staging bucket according to the Compute Engine zone where your cluster is deployed, and then create and manage this project-level, per-location bucket (see Dataproc staging and temp buckets (https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/staging-bucket)). This field requires a Cloud Storage bucket name, not a gs://... URI to a Cloud Storage bucket. "dataprocMetricConfig": { # Dataproc metric config. # Optional. The config for Dataproc metrics. "metrics": [ # Required. Metrics sources to enable. @@ -3807,7 +4150,7 @@

Method Details

"a_key": "A String", }, "networkUri": "A String", # Optional. The Compute Engine network to be used for machine communications. Cannot be specified with subnetwork_uri. If neither network_uri nor subnetwork_uri is specified, the "default" network of the project is used, if it exists. Cannot be a "Custom Subnet Network" (see Using Subnetworks (https://cloud.google.com/compute/docs/subnetworks) for more information).A full URL, partial URI, or short name are valid. Examples: https://www.googleapis.com/compute/v1/projects/[project_id]/regions/global/default projects/[project_id]/regions/global/default default - "nodeGroupAffinity": { # Node Group Affinity for clusters using sole-tenant node groups. # Optional. Node Group Affinity for sole-tenant clusters. + "nodeGroupAffinity": { # Node Group Affinity for clusters using sole-tenant node groups. The Dataproc NodeGroupAffinity resource is not related to the Dataproc NodeGroup resource. # Optional. Node Group Affinity for sole-tenant clusters. "nodeGroupUri": "A String", # Required. The URI of a sole-tenant node group resource (https://cloud.google.com/compute/docs/reference/rest/v1/nodeGroups) that the cluster will be created on.A full URL, partial URI, or node group name are valid. Examples: https://www.googleapis.com/compute/v1/projects/[project_id]/zones/us-central1-a/nodeGroups/node-group-1 projects/[project_id]/zones/us-central1-a/nodeGroups/node-group-1 node-group-1 }, "privateIpv6GoogleAccess": "A String", # Optional. The type of IPv6 access for a cluster. diff --git a/docs/dyn/dataproc_v1.projects.regions.clusters.html b/docs/dyn/dataproc_v1.projects.regions.clusters.html index 08c9e25d8d8..85be6087077 100644 --- a/docs/dyn/dataproc_v1.projects.regions.clusters.html +++ b/docs/dyn/dataproc_v1.projects.regions.clusters.html @@ -74,6 +74,11 @@

Cloud Dataproc API . projects . regions . clusters

Instance Methods

+

+ nodeGroups() +

+

Returns the nodeGroups Resource.

+

close()

Close httplib2 connections.

@@ -142,6 +147,55 @@

Method Details

"autoscalingConfig": { # Autoscaling Policy config associated with the cluster. # Optional. Autoscaling config for the policy associated with the cluster. Cluster does not autoscale if this field is unset. "policyUri": "A String", # Optional. The autoscaling policy used by the cluster.Only resource names including projectid and location (region) are valid. Examples: https://www.googleapis.com/compute/v1/projects/[project_id]/locations/[dataproc_region]/autoscalingPolicies/[policy_id] projects/[project_id]/locations/[dataproc_region]/autoscalingPolicies/[policy_id]Note that the policy must be in the same project and Dataproc region. }, + "auxiliaryNodeGroups": [ # Optional. The node group settings. + { # Node group identification and configuration information. + "nodeGroup": { # Dataproc Node Group. The Dataproc NodeGroup resource is not related to the Dataproc NodeGroupAffinity resource. # Required. Node group configuration. + "labels": { # Optional. Node group labels. Label keys must consist of from 1 to 63 characters and conform to RFC 1035 (https://www.ietf.org/rfc/rfc1035.txt). Label values can be empty. If specified, they must consist of from 1 to 63 characters and conform to RFC 1035 (https://www.ietf.org/rfc/rfc1035.txt). The node group must have no more than 32 labelsn. + "a_key": "A String", + }, + "name": "A String", # The Node group resource name (https://aip.dev/122). + "nodeGroupConfig": { # The config settings for Compute Engine resources in an instance group, such as a master or worker group. # Optional. The node group instance group configuration. + "accelerators": [ # Optional. The Compute Engine accelerator configuration for these instances. + { # Specifies the type and number of accelerator cards attached to the instances of an instance. See GPUs on Compute Engine (https://cloud.google.com/compute/docs/gpus/). + "acceleratorCount": 42, # The number of the accelerator cards of this type exposed to this instance. + "acceleratorTypeUri": "A String", # Full URL, partial URI, or short name of the accelerator type resource to expose to this instance. See Compute Engine AcceleratorTypes (https://cloud.google.com/compute/docs/reference/beta/acceleratorTypes).Examples: https://www.googleapis.com/compute/beta/projects/[project_id]/zones/us-east1-a/acceleratorTypes/nvidia-tesla-k80 projects/[project_id]/zones/us-east1-a/acceleratorTypes/nvidia-tesla-k80 nvidia-tesla-k80Auto Zone Exception: If you are using the Dataproc Auto Zone Placement (https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/auto-zone#using_auto_zone_placement) feature, you must use the short name of the accelerator type resource, for example, nvidia-tesla-k80. + }, + ], + "diskConfig": { # Specifies the config of disk options for a group of VM instances. # Optional. Disk option config settings. + "bootDiskSizeGb": 42, # Optional. Size in GB of the boot disk (default is 500GB). + "bootDiskType": "A String", # Optional. Type of the boot disk (default is "pd-standard"). Valid values: "pd-balanced" (Persistent Disk Balanced Solid State Drive), "pd-ssd" (Persistent Disk Solid State Drive), or "pd-standard" (Persistent Disk Hard Disk Drive). See Disk types (https://cloud.google.com/compute/docs/disks#disk-types). + "localSsdInterface": "A String", # Optional. Interface type of local SSDs (default is "scsi"). Valid values: "scsi" (Small Computer System Interface), "nvme" (Non-Volatile Memory Express). See local SSD performance (https://cloud.google.com/compute/docs/disks/local-ssd#performance). + "numLocalSsds": 42, # Optional. Number of attached SSDs, from 0 to 8 (default is 0). If SSDs are not attached, the boot disk is used to store runtime logs and HDFS (https://hadoop.apache.org/docs/r1.2.1/hdfs_user_guide.html) data. If one or more SSDs are attached, this runtime bulk data is spread across them, and the boot disk contains only basic config and installed binaries.Note: Local SSD options may vary by machine type and number of vCPUs selected. + }, + "imageUri": "A String", # Optional. The Compute Engine image resource used for cluster instances.The URI can represent an image or image family.Image examples: https://www.googleapis.com/compute/beta/projects/[project_id]/global/images/[image-id] projects/[project_id]/global/images/[image-id] image-idImage family examples. Dataproc will use the most recent image from the family: https://www.googleapis.com/compute/beta/projects/[project_id]/global/images/family/[custom-image-family-name] projects/[project_id]/global/images/family/[custom-image-family-name]If the URI is unspecified, it will be inferred from SoftwareConfig.image_version or the system default. + "instanceNames": [ # Output only. The list of instance names. Dataproc derives the names from cluster_name, num_instances, and the instance group. + "A String", + ], + "instanceReferences": [ # Output only. List of references to Compute Engine instances. + { # A reference to a Compute Engine instance. + "instanceId": "A String", # The unique identifier of the Compute Engine instance. + "instanceName": "A String", # The user-friendly name of the Compute Engine instance. + "publicEciesKey": "A String", # The public ECIES key used for sharing data with this instance. + "publicKey": "A String", # The public RSA key used for sharing data with this instance. + }, + ], + "isPreemptible": True or False, # Output only. Specifies that this instance group contains preemptible instances. + "machineTypeUri": "A String", # Optional. The Compute Engine machine type used for cluster instances.A full URL, partial URI, or short name are valid. Examples: https://www.googleapis.com/compute/v1/projects/[project_id]/zones/us-east1-a/machineTypes/n1-standard-2 projects/[project_id]/zones/us-east1-a/machineTypes/n1-standard-2 n1-standard-2Auto Zone Exception: If you are using the Dataproc Auto Zone Placement (https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/auto-zone#using_auto_zone_placement) feature, you must use the short name of the machine type resource, for example, n1-standard-2. + "managedGroupConfig": { # Specifies the resources used to actively manage an instance group. # Output only. The config for Compute Engine Instance Group Manager that manages this group. This is only used for preemptible instance groups. + "instanceGroupManagerName": "A String", # Output only. The name of the Instance Group Manager for this group. + "instanceTemplateName": "A String", # Output only. The name of the Instance Template used for the Managed Instance Group. + }, + "minCpuPlatform": "A String", # Optional. Specifies the minimum cpu platform for the Instance Group. See Dataproc -> Minimum CPU Platform (https://cloud.google.com/dataproc/docs/concepts/compute/dataproc-min-cpu). + "numInstances": 42, # Optional. The number of VM instances in the instance group. For HA cluster master_config groups, must be set to 3. For standard cluster master_config groups, must be set to 1. + "preemptibility": "A String", # Optional. Specifies the preemptibility of the instance group.The default value for master and worker groups is NON_PREEMPTIBLE. This default cannot be changed.The default value for secondary instances is PREEMPTIBLE. + }, + "roles": [ # Required. Node group roles. + "A String", + ], + }, + "nodeGroupId": "A String", # Optional. A node group ID. Generated if not specified.The ID must contain only letters (a-z, A-Z), numbers (0-9), underscores (_), and hyphens (-). Cannot begin or end with underscore or hyphen. Must consist of from 3 to 33 characters. + }, + ], "configBucket": "A String", # Optional. A Cloud Storage bucket used to stage job dependencies, config files, and job driver console output. If you do not specify a staging bucket, Cloud Dataproc will determine a Cloud Storage location (US, ASIA, or EU) for your cluster's staging bucket according to the Compute Engine zone where your cluster is deployed, and then create and manage this project-level, per-location bucket (see Dataproc staging and temp buckets (https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/staging-bucket)). This field requires a Cloud Storage bucket name, not a gs://... URI to a Cloud Storage bucket. "dataprocMetricConfig": { # Dataproc metric config. # Optional. The config for Dataproc metrics. "metrics": [ # Required. Metrics sources to enable. @@ -171,7 +225,7 @@

Method Details

"a_key": "A String", }, "networkUri": "A String", # Optional. The Compute Engine network to be used for machine communications. Cannot be specified with subnetwork_uri. If neither network_uri nor subnetwork_uri is specified, the "default" network of the project is used, if it exists. Cannot be a "Custom Subnet Network" (see Using Subnetworks (https://cloud.google.com/compute/docs/subnetworks) for more information).A full URL, partial URI, or short name are valid. Examples: https://www.googleapis.com/compute/v1/projects/[project_id]/regions/global/default projects/[project_id]/regions/global/default default - "nodeGroupAffinity": { # Node Group Affinity for clusters using sole-tenant node groups. # Optional. Node Group Affinity for sole-tenant clusters. + "nodeGroupAffinity": { # Node Group Affinity for clusters using sole-tenant node groups. The Dataproc NodeGroupAffinity resource is not related to the Dataproc NodeGroup resource. # Optional. Node Group Affinity for sole-tenant clusters. "nodeGroupUri": "A String", # Required. The URI of a sole-tenant node group resource (https://cloud.google.com/compute/docs/reference/rest/v1/nodeGroups) that the cluster will be created on.A full URL, partial URI, or node group name are valid. Examples: https://www.googleapis.com/compute/v1/projects/[project_id]/zones/us-central1-a/nodeGroups/node-group-1 projects/[project_id]/zones/us-central1-a/nodeGroups/node-group-1 node-group-1 }, "privateIpv6GoogleAccess": "A String", # Optional. The type of IPv6 access for a cluster. @@ -620,6 +674,55 @@

Method Details

"autoscalingConfig": { # Autoscaling Policy config associated with the cluster. # Optional. Autoscaling config for the policy associated with the cluster. Cluster does not autoscale if this field is unset. "policyUri": "A String", # Optional. The autoscaling policy used by the cluster.Only resource names including projectid and location (region) are valid. Examples: https://www.googleapis.com/compute/v1/projects/[project_id]/locations/[dataproc_region]/autoscalingPolicies/[policy_id] projects/[project_id]/locations/[dataproc_region]/autoscalingPolicies/[policy_id]Note that the policy must be in the same project and Dataproc region. }, + "auxiliaryNodeGroups": [ # Optional. The node group settings. + { # Node group identification and configuration information. + "nodeGroup": { # Dataproc Node Group. The Dataproc NodeGroup resource is not related to the Dataproc NodeGroupAffinity resource. # Required. Node group configuration. + "labels": { # Optional. Node group labels. Label keys must consist of from 1 to 63 characters and conform to RFC 1035 (https://www.ietf.org/rfc/rfc1035.txt). Label values can be empty. If specified, they must consist of from 1 to 63 characters and conform to RFC 1035 (https://www.ietf.org/rfc/rfc1035.txt). The node group must have no more than 32 labelsn. + "a_key": "A String", + }, + "name": "A String", # The Node group resource name (https://aip.dev/122). + "nodeGroupConfig": { # The config settings for Compute Engine resources in an instance group, such as a master or worker group. # Optional. The node group instance group configuration. + "accelerators": [ # Optional. The Compute Engine accelerator configuration for these instances. + { # Specifies the type and number of accelerator cards attached to the instances of an instance. See GPUs on Compute Engine (https://cloud.google.com/compute/docs/gpus/). + "acceleratorCount": 42, # The number of the accelerator cards of this type exposed to this instance. + "acceleratorTypeUri": "A String", # Full URL, partial URI, or short name of the accelerator type resource to expose to this instance. See Compute Engine AcceleratorTypes (https://cloud.google.com/compute/docs/reference/beta/acceleratorTypes).Examples: https://www.googleapis.com/compute/beta/projects/[project_id]/zones/us-east1-a/acceleratorTypes/nvidia-tesla-k80 projects/[project_id]/zones/us-east1-a/acceleratorTypes/nvidia-tesla-k80 nvidia-tesla-k80Auto Zone Exception: If you are using the Dataproc Auto Zone Placement (https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/auto-zone#using_auto_zone_placement) feature, you must use the short name of the accelerator type resource, for example, nvidia-tesla-k80. + }, + ], + "diskConfig": { # Specifies the config of disk options for a group of VM instances. # Optional. Disk option config settings. + "bootDiskSizeGb": 42, # Optional. Size in GB of the boot disk (default is 500GB). + "bootDiskType": "A String", # Optional. Type of the boot disk (default is "pd-standard"). Valid values: "pd-balanced" (Persistent Disk Balanced Solid State Drive), "pd-ssd" (Persistent Disk Solid State Drive), or "pd-standard" (Persistent Disk Hard Disk Drive). See Disk types (https://cloud.google.com/compute/docs/disks#disk-types). + "localSsdInterface": "A String", # Optional. Interface type of local SSDs (default is "scsi"). Valid values: "scsi" (Small Computer System Interface), "nvme" (Non-Volatile Memory Express). See local SSD performance (https://cloud.google.com/compute/docs/disks/local-ssd#performance). + "numLocalSsds": 42, # Optional. Number of attached SSDs, from 0 to 8 (default is 0). If SSDs are not attached, the boot disk is used to store runtime logs and HDFS (https://hadoop.apache.org/docs/r1.2.1/hdfs_user_guide.html) data. If one or more SSDs are attached, this runtime bulk data is spread across them, and the boot disk contains only basic config and installed binaries.Note: Local SSD options may vary by machine type and number of vCPUs selected. + }, + "imageUri": "A String", # Optional. The Compute Engine image resource used for cluster instances.The URI can represent an image or image family.Image examples: https://www.googleapis.com/compute/beta/projects/[project_id]/global/images/[image-id] projects/[project_id]/global/images/[image-id] image-idImage family examples. Dataproc will use the most recent image from the family: https://www.googleapis.com/compute/beta/projects/[project_id]/global/images/family/[custom-image-family-name] projects/[project_id]/global/images/family/[custom-image-family-name]If the URI is unspecified, it will be inferred from SoftwareConfig.image_version or the system default. + "instanceNames": [ # Output only. The list of instance names. Dataproc derives the names from cluster_name, num_instances, and the instance group. + "A String", + ], + "instanceReferences": [ # Output only. List of references to Compute Engine instances. + { # A reference to a Compute Engine instance. + "instanceId": "A String", # The unique identifier of the Compute Engine instance. + "instanceName": "A String", # The user-friendly name of the Compute Engine instance. + "publicEciesKey": "A String", # The public ECIES key used for sharing data with this instance. + "publicKey": "A String", # The public RSA key used for sharing data with this instance. + }, + ], + "isPreemptible": True or False, # Output only. Specifies that this instance group contains preemptible instances. + "machineTypeUri": "A String", # Optional. The Compute Engine machine type used for cluster instances.A full URL, partial URI, or short name are valid. Examples: https://www.googleapis.com/compute/v1/projects/[project_id]/zones/us-east1-a/machineTypes/n1-standard-2 projects/[project_id]/zones/us-east1-a/machineTypes/n1-standard-2 n1-standard-2Auto Zone Exception: If you are using the Dataproc Auto Zone Placement (https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/auto-zone#using_auto_zone_placement) feature, you must use the short name of the machine type resource, for example, n1-standard-2. + "managedGroupConfig": { # Specifies the resources used to actively manage an instance group. # Output only. The config for Compute Engine Instance Group Manager that manages this group. This is only used for preemptible instance groups. + "instanceGroupManagerName": "A String", # Output only. The name of the Instance Group Manager for this group. + "instanceTemplateName": "A String", # Output only. The name of the Instance Template used for the Managed Instance Group. + }, + "minCpuPlatform": "A String", # Optional. Specifies the minimum cpu platform for the Instance Group. See Dataproc -> Minimum CPU Platform (https://cloud.google.com/dataproc/docs/concepts/compute/dataproc-min-cpu). + "numInstances": 42, # Optional. The number of VM instances in the instance group. For HA cluster master_config groups, must be set to 3. For standard cluster master_config groups, must be set to 1. + "preemptibility": "A String", # Optional. Specifies the preemptibility of the instance group.The default value for master and worker groups is NON_PREEMPTIBLE. This default cannot be changed.The default value for secondary instances is PREEMPTIBLE. + }, + "roles": [ # Required. Node group roles. + "A String", + ], + }, + "nodeGroupId": "A String", # Optional. A node group ID. Generated if not specified.The ID must contain only letters (a-z, A-Z), numbers (0-9), underscores (_), and hyphens (-). Cannot begin or end with underscore or hyphen. Must consist of from 3 to 33 characters. + }, + ], "configBucket": "A String", # Optional. A Cloud Storage bucket used to stage job dependencies, config files, and job driver console output. If you do not specify a staging bucket, Cloud Dataproc will determine a Cloud Storage location (US, ASIA, or EU) for your cluster's staging bucket according to the Compute Engine zone where your cluster is deployed, and then create and manage this project-level, per-location bucket (see Dataproc staging and temp buckets (https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/staging-bucket)). This field requires a Cloud Storage bucket name, not a gs://... URI to a Cloud Storage bucket. "dataprocMetricConfig": { # Dataproc metric config. # Optional. The config for Dataproc metrics. "metrics": [ # Required. Metrics sources to enable. @@ -649,7 +752,7 @@

Method Details

"a_key": "A String", }, "networkUri": "A String", # Optional. The Compute Engine network to be used for machine communications. Cannot be specified with subnetwork_uri. If neither network_uri nor subnetwork_uri is specified, the "default" network of the project is used, if it exists. Cannot be a "Custom Subnet Network" (see Using Subnetworks (https://cloud.google.com/compute/docs/subnetworks) for more information).A full URL, partial URI, or short name are valid. Examples: https://www.googleapis.com/compute/v1/projects/[project_id]/regions/global/default projects/[project_id]/regions/global/default default - "nodeGroupAffinity": { # Node Group Affinity for clusters using sole-tenant node groups. # Optional. Node Group Affinity for sole-tenant clusters. + "nodeGroupAffinity": { # Node Group Affinity for clusters using sole-tenant node groups. The Dataproc NodeGroupAffinity resource is not related to the Dataproc NodeGroup resource. # Optional. Node Group Affinity for sole-tenant clusters. "nodeGroupUri": "A String", # Required. The URI of a sole-tenant node group resource (https://cloud.google.com/compute/docs/reference/rest/v1/nodeGroups) that the cluster will be created on.A full URL, partial URI, or node group name are valid. Examples: https://www.googleapis.com/compute/v1/projects/[project_id]/zones/us-central1-a/nodeGroups/node-group-1 projects/[project_id]/zones/us-central1-a/nodeGroups/node-group-1 node-group-1 }, "privateIpv6GoogleAccess": "A String", # Optional. The type of IPv6 access for a cluster. @@ -1074,6 +1177,55 @@

Method Details

"autoscalingConfig": { # Autoscaling Policy config associated with the cluster. # Optional. Autoscaling config for the policy associated with the cluster. Cluster does not autoscale if this field is unset. "policyUri": "A String", # Optional. The autoscaling policy used by the cluster.Only resource names including projectid and location (region) are valid. Examples: https://www.googleapis.com/compute/v1/projects/[project_id]/locations/[dataproc_region]/autoscalingPolicies/[policy_id] projects/[project_id]/locations/[dataproc_region]/autoscalingPolicies/[policy_id]Note that the policy must be in the same project and Dataproc region. }, + "auxiliaryNodeGroups": [ # Optional. The node group settings. + { # Node group identification and configuration information. + "nodeGroup": { # Dataproc Node Group. The Dataproc NodeGroup resource is not related to the Dataproc NodeGroupAffinity resource. # Required. Node group configuration. + "labels": { # Optional. Node group labels. Label keys must consist of from 1 to 63 characters and conform to RFC 1035 (https://www.ietf.org/rfc/rfc1035.txt). Label values can be empty. If specified, they must consist of from 1 to 63 characters and conform to RFC 1035 (https://www.ietf.org/rfc/rfc1035.txt). The node group must have no more than 32 labelsn. + "a_key": "A String", + }, + "name": "A String", # The Node group resource name (https://aip.dev/122). + "nodeGroupConfig": { # The config settings for Compute Engine resources in an instance group, such as a master or worker group. # Optional. The node group instance group configuration. + "accelerators": [ # Optional. The Compute Engine accelerator configuration for these instances. + { # Specifies the type and number of accelerator cards attached to the instances of an instance. See GPUs on Compute Engine (https://cloud.google.com/compute/docs/gpus/). + "acceleratorCount": 42, # The number of the accelerator cards of this type exposed to this instance. + "acceleratorTypeUri": "A String", # Full URL, partial URI, or short name of the accelerator type resource to expose to this instance. See Compute Engine AcceleratorTypes (https://cloud.google.com/compute/docs/reference/beta/acceleratorTypes).Examples: https://www.googleapis.com/compute/beta/projects/[project_id]/zones/us-east1-a/acceleratorTypes/nvidia-tesla-k80 projects/[project_id]/zones/us-east1-a/acceleratorTypes/nvidia-tesla-k80 nvidia-tesla-k80Auto Zone Exception: If you are using the Dataproc Auto Zone Placement (https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/auto-zone#using_auto_zone_placement) feature, you must use the short name of the accelerator type resource, for example, nvidia-tesla-k80. + }, + ], + "diskConfig": { # Specifies the config of disk options for a group of VM instances. # Optional. Disk option config settings. + "bootDiskSizeGb": 42, # Optional. Size in GB of the boot disk (default is 500GB). + "bootDiskType": "A String", # Optional. Type of the boot disk (default is "pd-standard"). Valid values: "pd-balanced" (Persistent Disk Balanced Solid State Drive), "pd-ssd" (Persistent Disk Solid State Drive), or "pd-standard" (Persistent Disk Hard Disk Drive). See Disk types (https://cloud.google.com/compute/docs/disks#disk-types). + "localSsdInterface": "A String", # Optional. Interface type of local SSDs (default is "scsi"). Valid values: "scsi" (Small Computer System Interface), "nvme" (Non-Volatile Memory Express). See local SSD performance (https://cloud.google.com/compute/docs/disks/local-ssd#performance). + "numLocalSsds": 42, # Optional. Number of attached SSDs, from 0 to 8 (default is 0). If SSDs are not attached, the boot disk is used to store runtime logs and HDFS (https://hadoop.apache.org/docs/r1.2.1/hdfs_user_guide.html) data. If one or more SSDs are attached, this runtime bulk data is spread across them, and the boot disk contains only basic config and installed binaries.Note: Local SSD options may vary by machine type and number of vCPUs selected. + }, + "imageUri": "A String", # Optional. The Compute Engine image resource used for cluster instances.The URI can represent an image or image family.Image examples: https://www.googleapis.com/compute/beta/projects/[project_id]/global/images/[image-id] projects/[project_id]/global/images/[image-id] image-idImage family examples. Dataproc will use the most recent image from the family: https://www.googleapis.com/compute/beta/projects/[project_id]/global/images/family/[custom-image-family-name] projects/[project_id]/global/images/family/[custom-image-family-name]If the URI is unspecified, it will be inferred from SoftwareConfig.image_version or the system default. + "instanceNames": [ # Output only. The list of instance names. Dataproc derives the names from cluster_name, num_instances, and the instance group. + "A String", + ], + "instanceReferences": [ # Output only. List of references to Compute Engine instances. + { # A reference to a Compute Engine instance. + "instanceId": "A String", # The unique identifier of the Compute Engine instance. + "instanceName": "A String", # The user-friendly name of the Compute Engine instance. + "publicEciesKey": "A String", # The public ECIES key used for sharing data with this instance. + "publicKey": "A String", # The public RSA key used for sharing data with this instance. + }, + ], + "isPreemptible": True or False, # Output only. Specifies that this instance group contains preemptible instances. + "machineTypeUri": "A String", # Optional. The Compute Engine machine type used for cluster instances.A full URL, partial URI, or short name are valid. Examples: https://www.googleapis.com/compute/v1/projects/[project_id]/zones/us-east1-a/machineTypes/n1-standard-2 projects/[project_id]/zones/us-east1-a/machineTypes/n1-standard-2 n1-standard-2Auto Zone Exception: If you are using the Dataproc Auto Zone Placement (https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/auto-zone#using_auto_zone_placement) feature, you must use the short name of the machine type resource, for example, n1-standard-2. + "managedGroupConfig": { # Specifies the resources used to actively manage an instance group. # Output only. The config for Compute Engine Instance Group Manager that manages this group. This is only used for preemptible instance groups. + "instanceGroupManagerName": "A String", # Output only. The name of the Instance Group Manager for this group. + "instanceTemplateName": "A String", # Output only. The name of the Instance Template used for the Managed Instance Group. + }, + "minCpuPlatform": "A String", # Optional. Specifies the minimum cpu platform for the Instance Group. See Dataproc -> Minimum CPU Platform (https://cloud.google.com/dataproc/docs/concepts/compute/dataproc-min-cpu). + "numInstances": 42, # Optional. The number of VM instances in the instance group. For HA cluster master_config groups, must be set to 3. For standard cluster master_config groups, must be set to 1. + "preemptibility": "A String", # Optional. Specifies the preemptibility of the instance group.The default value for master and worker groups is NON_PREEMPTIBLE. This default cannot be changed.The default value for secondary instances is PREEMPTIBLE. + }, + "roles": [ # Required. Node group roles. + "A String", + ], + }, + "nodeGroupId": "A String", # Optional. A node group ID. Generated if not specified.The ID must contain only letters (a-z, A-Z), numbers (0-9), underscores (_), and hyphens (-). Cannot begin or end with underscore or hyphen. Must consist of from 3 to 33 characters. + }, + ], "configBucket": "A String", # Optional. A Cloud Storage bucket used to stage job dependencies, config files, and job driver console output. If you do not specify a staging bucket, Cloud Dataproc will determine a Cloud Storage location (US, ASIA, or EU) for your cluster's staging bucket according to the Compute Engine zone where your cluster is deployed, and then create and manage this project-level, per-location bucket (see Dataproc staging and temp buckets (https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/staging-bucket)). This field requires a Cloud Storage bucket name, not a gs://... URI to a Cloud Storage bucket. "dataprocMetricConfig": { # Dataproc metric config. # Optional. The config for Dataproc metrics. "metrics": [ # Required. Metrics sources to enable. @@ -1103,7 +1255,7 @@

Method Details

"a_key": "A String", }, "networkUri": "A String", # Optional. The Compute Engine network to be used for machine communications. Cannot be specified with subnetwork_uri. If neither network_uri nor subnetwork_uri is specified, the "default" network of the project is used, if it exists. Cannot be a "Custom Subnet Network" (see Using Subnetworks (https://cloud.google.com/compute/docs/subnetworks) for more information).A full URL, partial URI, or short name are valid. Examples: https://www.googleapis.com/compute/v1/projects/[project_id]/regions/global/default projects/[project_id]/regions/global/default default - "nodeGroupAffinity": { # Node Group Affinity for clusters using sole-tenant node groups. # Optional. Node Group Affinity for sole-tenant clusters. + "nodeGroupAffinity": { # Node Group Affinity for clusters using sole-tenant node groups. The Dataproc NodeGroupAffinity resource is not related to the Dataproc NodeGroup resource. # Optional. Node Group Affinity for sole-tenant clusters. "nodeGroupUri": "A String", # Required. The URI of a sole-tenant node group resource (https://cloud.google.com/compute/docs/reference/rest/v1/nodeGroups) that the cluster will be created on.A full URL, partial URI, or node group name are valid. Examples: https://www.googleapis.com/compute/v1/projects/[project_id]/zones/us-central1-a/nodeGroups/node-group-1 projects/[project_id]/zones/us-central1-a/nodeGroups/node-group-1 node-group-1 }, "privateIpv6GoogleAccess": "A String", # Optional. The type of IPv6 access for a cluster. @@ -1448,6 +1600,55 @@

Method Details

"autoscalingConfig": { # Autoscaling Policy config associated with the cluster. # Optional. Autoscaling config for the policy associated with the cluster. Cluster does not autoscale if this field is unset. "policyUri": "A String", # Optional. The autoscaling policy used by the cluster.Only resource names including projectid and location (region) are valid. Examples: https://www.googleapis.com/compute/v1/projects/[project_id]/locations/[dataproc_region]/autoscalingPolicies/[policy_id] projects/[project_id]/locations/[dataproc_region]/autoscalingPolicies/[policy_id]Note that the policy must be in the same project and Dataproc region. }, + "auxiliaryNodeGroups": [ # Optional. The node group settings. + { # Node group identification and configuration information. + "nodeGroup": { # Dataproc Node Group. The Dataproc NodeGroup resource is not related to the Dataproc NodeGroupAffinity resource. # Required. Node group configuration. + "labels": { # Optional. Node group labels. Label keys must consist of from 1 to 63 characters and conform to RFC 1035 (https://www.ietf.org/rfc/rfc1035.txt). Label values can be empty. If specified, they must consist of from 1 to 63 characters and conform to RFC 1035 (https://www.ietf.org/rfc/rfc1035.txt). The node group must have no more than 32 labelsn. + "a_key": "A String", + }, + "name": "A String", # The Node group resource name (https://aip.dev/122). + "nodeGroupConfig": { # The config settings for Compute Engine resources in an instance group, such as a master or worker group. # Optional. The node group instance group configuration. + "accelerators": [ # Optional. The Compute Engine accelerator configuration for these instances. + { # Specifies the type and number of accelerator cards attached to the instances of an instance. See GPUs on Compute Engine (https://cloud.google.com/compute/docs/gpus/). + "acceleratorCount": 42, # The number of the accelerator cards of this type exposed to this instance. + "acceleratorTypeUri": "A String", # Full URL, partial URI, or short name of the accelerator type resource to expose to this instance. See Compute Engine AcceleratorTypes (https://cloud.google.com/compute/docs/reference/beta/acceleratorTypes).Examples: https://www.googleapis.com/compute/beta/projects/[project_id]/zones/us-east1-a/acceleratorTypes/nvidia-tesla-k80 projects/[project_id]/zones/us-east1-a/acceleratorTypes/nvidia-tesla-k80 nvidia-tesla-k80Auto Zone Exception: If you are using the Dataproc Auto Zone Placement (https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/auto-zone#using_auto_zone_placement) feature, you must use the short name of the accelerator type resource, for example, nvidia-tesla-k80. + }, + ], + "diskConfig": { # Specifies the config of disk options for a group of VM instances. # Optional. Disk option config settings. + "bootDiskSizeGb": 42, # Optional. Size in GB of the boot disk (default is 500GB). + "bootDiskType": "A String", # Optional. Type of the boot disk (default is "pd-standard"). Valid values: "pd-balanced" (Persistent Disk Balanced Solid State Drive), "pd-ssd" (Persistent Disk Solid State Drive), or "pd-standard" (Persistent Disk Hard Disk Drive). See Disk types (https://cloud.google.com/compute/docs/disks#disk-types). + "localSsdInterface": "A String", # Optional. Interface type of local SSDs (default is "scsi"). Valid values: "scsi" (Small Computer System Interface), "nvme" (Non-Volatile Memory Express). See local SSD performance (https://cloud.google.com/compute/docs/disks/local-ssd#performance). + "numLocalSsds": 42, # Optional. Number of attached SSDs, from 0 to 8 (default is 0). If SSDs are not attached, the boot disk is used to store runtime logs and HDFS (https://hadoop.apache.org/docs/r1.2.1/hdfs_user_guide.html) data. If one or more SSDs are attached, this runtime bulk data is spread across them, and the boot disk contains only basic config and installed binaries.Note: Local SSD options may vary by machine type and number of vCPUs selected. + }, + "imageUri": "A String", # Optional. The Compute Engine image resource used for cluster instances.The URI can represent an image or image family.Image examples: https://www.googleapis.com/compute/beta/projects/[project_id]/global/images/[image-id] projects/[project_id]/global/images/[image-id] image-idImage family examples. Dataproc will use the most recent image from the family: https://www.googleapis.com/compute/beta/projects/[project_id]/global/images/family/[custom-image-family-name] projects/[project_id]/global/images/family/[custom-image-family-name]If the URI is unspecified, it will be inferred from SoftwareConfig.image_version or the system default. + "instanceNames": [ # Output only. The list of instance names. Dataproc derives the names from cluster_name, num_instances, and the instance group. + "A String", + ], + "instanceReferences": [ # Output only. List of references to Compute Engine instances. + { # A reference to a Compute Engine instance. + "instanceId": "A String", # The unique identifier of the Compute Engine instance. + "instanceName": "A String", # The user-friendly name of the Compute Engine instance. + "publicEciesKey": "A String", # The public ECIES key used for sharing data with this instance. + "publicKey": "A String", # The public RSA key used for sharing data with this instance. + }, + ], + "isPreemptible": True or False, # Output only. Specifies that this instance group contains preemptible instances. + "machineTypeUri": "A String", # Optional. The Compute Engine machine type used for cluster instances.A full URL, partial URI, or short name are valid. Examples: https://www.googleapis.com/compute/v1/projects/[project_id]/zones/us-east1-a/machineTypes/n1-standard-2 projects/[project_id]/zones/us-east1-a/machineTypes/n1-standard-2 n1-standard-2Auto Zone Exception: If you are using the Dataproc Auto Zone Placement (https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/auto-zone#using_auto_zone_placement) feature, you must use the short name of the machine type resource, for example, n1-standard-2. + "managedGroupConfig": { # Specifies the resources used to actively manage an instance group. # Output only. The config for Compute Engine Instance Group Manager that manages this group. This is only used for preemptible instance groups. + "instanceGroupManagerName": "A String", # Output only. The name of the Instance Group Manager for this group. + "instanceTemplateName": "A String", # Output only. The name of the Instance Template used for the Managed Instance Group. + }, + "minCpuPlatform": "A String", # Optional. Specifies the minimum cpu platform for the Instance Group. See Dataproc -> Minimum CPU Platform (https://cloud.google.com/dataproc/docs/concepts/compute/dataproc-min-cpu). + "numInstances": 42, # Optional. The number of VM instances in the instance group. For HA cluster master_config groups, must be set to 3. For standard cluster master_config groups, must be set to 1. + "preemptibility": "A String", # Optional. Specifies the preemptibility of the instance group.The default value for master and worker groups is NON_PREEMPTIBLE. This default cannot be changed.The default value for secondary instances is PREEMPTIBLE. + }, + "roles": [ # Required. Node group roles. + "A String", + ], + }, + "nodeGroupId": "A String", # Optional. A node group ID. Generated if not specified.The ID must contain only letters (a-z, A-Z), numbers (0-9), underscores (_), and hyphens (-). Cannot begin or end with underscore or hyphen. Must consist of from 3 to 33 characters. + }, + ], "configBucket": "A String", # Optional. A Cloud Storage bucket used to stage job dependencies, config files, and job driver console output. If you do not specify a staging bucket, Cloud Dataproc will determine a Cloud Storage location (US, ASIA, or EU) for your cluster's staging bucket according to the Compute Engine zone where your cluster is deployed, and then create and manage this project-level, per-location bucket (see Dataproc staging and temp buckets (https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/staging-bucket)). This field requires a Cloud Storage bucket name, not a gs://... URI to a Cloud Storage bucket. "dataprocMetricConfig": { # Dataproc metric config. # Optional. The config for Dataproc metrics. "metrics": [ # Required. Metrics sources to enable. @@ -1477,7 +1678,7 @@

Method Details

"a_key": "A String", }, "networkUri": "A String", # Optional. The Compute Engine network to be used for machine communications. Cannot be specified with subnetwork_uri. If neither network_uri nor subnetwork_uri is specified, the "default" network of the project is used, if it exists. Cannot be a "Custom Subnet Network" (see Using Subnetworks (https://cloud.google.com/compute/docs/subnetworks) for more information).A full URL, partial URI, or short name are valid. Examples: https://www.googleapis.com/compute/v1/projects/[project_id]/regions/global/default projects/[project_id]/regions/global/default default - "nodeGroupAffinity": { # Node Group Affinity for clusters using sole-tenant node groups. # Optional. Node Group Affinity for sole-tenant clusters. + "nodeGroupAffinity": { # Node Group Affinity for clusters using sole-tenant node groups. The Dataproc NodeGroupAffinity resource is not related to the Dataproc NodeGroup resource. # Optional. Node Group Affinity for sole-tenant clusters. "nodeGroupUri": "A String", # Required. The URI of a sole-tenant node group resource (https://cloud.google.com/compute/docs/reference/rest/v1/nodeGroups) that the cluster will be created on.A full URL, partial URI, or node group name are valid. Examples: https://www.googleapis.com/compute/v1/projects/[project_id]/zones/us-central1-a/nodeGroups/node-group-1 projects/[project_id]/zones/us-central1-a/nodeGroups/node-group-1 node-group-1 }, "privateIpv6GoogleAccess": "A String", # Optional. The type of IPv6 access for a cluster. diff --git a/docs/dyn/dataproc_v1.projects.regions.clusters.nodeGroups.html b/docs/dyn/dataproc_v1.projects.regions.clusters.nodeGroups.html new file mode 100644 index 00000000000..14760bee204 --- /dev/null +++ b/docs/dyn/dataproc_v1.projects.regions.clusters.nodeGroups.html @@ -0,0 +1,196 @@ + + + +

Cloud Dataproc API . projects . regions . clusters . nodeGroups

+

Instance Methods

+

+ close()

+

Close httplib2 connections.

+

+ get(name, x__xgafv=None)

+

Gets the resource representation for a node group in a cluster.

+

+ resize(name, body=None, x__xgafv=None)

+

Resizes a node group in a cluster. The returned Operation.metadata is NodeGroupOperationMetadata (https://cloud.google.com/dataproc/docs/reference/rpc/google.cloud.dataproc.v1#nodegroupoperationmetadata).

+

Method Details

+
+ close() +
Close httplib2 connections.
+
+ +
+ get(name, x__xgafv=None) +
Gets the resource representation for a node group in a cluster.
+
+Args:
+  name: string, Required. The name of the node group to retrieve. Format: projects/{project}/regions/{region}/clusters/{cluster}/nodeGroups/{nodeGroup} (required)
+  x__xgafv: string, V1 error format.
+    Allowed values
+      1 - v1 error format
+      2 - v2 error format
+
+Returns:
+  An object of the form:
+
+    { # Dataproc Node Group. The Dataproc NodeGroup resource is not related to the Dataproc NodeGroupAffinity resource.
+  "labels": { # Optional. Node group labels. Label keys must consist of from 1 to 63 characters and conform to RFC 1035 (https://www.ietf.org/rfc/rfc1035.txt). Label values can be empty. If specified, they must consist of from 1 to 63 characters and conform to RFC 1035 (https://www.ietf.org/rfc/rfc1035.txt). The node group must have no more than 32 labelsn.
+    "a_key": "A String",
+  },
+  "name": "A String", # The Node group resource name (https://aip.dev/122).
+  "nodeGroupConfig": { # The config settings for Compute Engine resources in an instance group, such as a master or worker group. # Optional. The node group instance group configuration.
+    "accelerators": [ # Optional. The Compute Engine accelerator configuration for these instances.
+      { # Specifies the type and number of accelerator cards attached to the instances of an instance. See GPUs on Compute Engine (https://cloud.google.com/compute/docs/gpus/).
+        "acceleratorCount": 42, # The number of the accelerator cards of this type exposed to this instance.
+        "acceleratorTypeUri": "A String", # Full URL, partial URI, or short name of the accelerator type resource to expose to this instance. See Compute Engine AcceleratorTypes (https://cloud.google.com/compute/docs/reference/beta/acceleratorTypes).Examples: https://www.googleapis.com/compute/beta/projects/[project_id]/zones/us-east1-a/acceleratorTypes/nvidia-tesla-k80 projects/[project_id]/zones/us-east1-a/acceleratorTypes/nvidia-tesla-k80 nvidia-tesla-k80Auto Zone Exception: If you are using the Dataproc Auto Zone Placement (https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/auto-zone#using_auto_zone_placement) feature, you must use the short name of the accelerator type resource, for example, nvidia-tesla-k80.
+      },
+    ],
+    "diskConfig": { # Specifies the config of disk options for a group of VM instances. # Optional. Disk option config settings.
+      "bootDiskSizeGb": 42, # Optional. Size in GB of the boot disk (default is 500GB).
+      "bootDiskType": "A String", # Optional. Type of the boot disk (default is "pd-standard"). Valid values: "pd-balanced" (Persistent Disk Balanced Solid State Drive), "pd-ssd" (Persistent Disk Solid State Drive), or "pd-standard" (Persistent Disk Hard Disk Drive). See Disk types (https://cloud.google.com/compute/docs/disks#disk-types).
+      "localSsdInterface": "A String", # Optional. Interface type of local SSDs (default is "scsi"). Valid values: "scsi" (Small Computer System Interface), "nvme" (Non-Volatile Memory Express). See local SSD performance (https://cloud.google.com/compute/docs/disks/local-ssd#performance).
+      "numLocalSsds": 42, # Optional. Number of attached SSDs, from 0 to 8 (default is 0). If SSDs are not attached, the boot disk is used to store runtime logs and HDFS (https://hadoop.apache.org/docs/r1.2.1/hdfs_user_guide.html) data. If one or more SSDs are attached, this runtime bulk data is spread across them, and the boot disk contains only basic config and installed binaries.Note: Local SSD options may vary by machine type and number of vCPUs selected.
+    },
+    "imageUri": "A String", # Optional. The Compute Engine image resource used for cluster instances.The URI can represent an image or image family.Image examples: https://www.googleapis.com/compute/beta/projects/[project_id]/global/images/[image-id] projects/[project_id]/global/images/[image-id] image-idImage family examples. Dataproc will use the most recent image from the family: https://www.googleapis.com/compute/beta/projects/[project_id]/global/images/family/[custom-image-family-name] projects/[project_id]/global/images/family/[custom-image-family-name]If the URI is unspecified, it will be inferred from SoftwareConfig.image_version or the system default.
+    "instanceNames": [ # Output only. The list of instance names. Dataproc derives the names from cluster_name, num_instances, and the instance group.
+      "A String",
+    ],
+    "instanceReferences": [ # Output only. List of references to Compute Engine instances.
+      { # A reference to a Compute Engine instance.
+        "instanceId": "A String", # The unique identifier of the Compute Engine instance.
+        "instanceName": "A String", # The user-friendly name of the Compute Engine instance.
+        "publicEciesKey": "A String", # The public ECIES key used for sharing data with this instance.
+        "publicKey": "A String", # The public RSA key used for sharing data with this instance.
+      },
+    ],
+    "isPreemptible": True or False, # Output only. Specifies that this instance group contains preemptible instances.
+    "machineTypeUri": "A String", # Optional. The Compute Engine machine type used for cluster instances.A full URL, partial URI, or short name are valid. Examples: https://www.googleapis.com/compute/v1/projects/[project_id]/zones/us-east1-a/machineTypes/n1-standard-2 projects/[project_id]/zones/us-east1-a/machineTypes/n1-standard-2 n1-standard-2Auto Zone Exception: If you are using the Dataproc Auto Zone Placement (https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/auto-zone#using_auto_zone_placement) feature, you must use the short name of the machine type resource, for example, n1-standard-2.
+    "managedGroupConfig": { # Specifies the resources used to actively manage an instance group. # Output only. The config for Compute Engine Instance Group Manager that manages this group. This is only used for preemptible instance groups.
+      "instanceGroupManagerName": "A String", # Output only. The name of the Instance Group Manager for this group.
+      "instanceTemplateName": "A String", # Output only. The name of the Instance Template used for the Managed Instance Group.
+    },
+    "minCpuPlatform": "A String", # Optional. Specifies the minimum cpu platform for the Instance Group. See Dataproc -> Minimum CPU Platform (https://cloud.google.com/dataproc/docs/concepts/compute/dataproc-min-cpu).
+    "numInstances": 42, # Optional. The number of VM instances in the instance group. For HA cluster master_config groups, must be set to 3. For standard cluster master_config groups, must be set to 1.
+    "preemptibility": "A String", # Optional. Specifies the preemptibility of the instance group.The default value for master and worker groups is NON_PREEMPTIBLE. This default cannot be changed.The default value for secondary instances is PREEMPTIBLE.
+  },
+  "roles": [ # Required. Node group roles.
+    "A String",
+  ],
+}
+
+ +
+ resize(name, body=None, x__xgafv=None) +
Resizes a node group in a cluster. The returned Operation.metadata is NodeGroupOperationMetadata (https://cloud.google.com/dataproc/docs/reference/rpc/google.cloud.dataproc.v1#nodegroupoperationmetadata).
+
+Args:
+  name: string, Required. The name of the node group to resize. Format: projects/{project}/regions/{region}/clusters/{cluster}/nodeGroups/{nodeGroup} (required)
+  body: object, The request body.
+    The object takes the form of:
+
+{ # A request to resize a node group.
+  "gracefulDecommissionTimeout": "A String", # Optional. Timeout for graceful YARN decomissioning. Graceful decommissioning (https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/scaling-clusters#graceful_decommissioning) allows the removal of nodes from the Compute Engine node group without interrupting jobs in progress. This timeout specifies how long to wait for jobs in progress to finish before forcefully removing nodes (and potentially interrupting jobs). Default timeout is 0 (for forceful decommission), and the maximum allowed timeout is 1 day. (see JSON representation of Duration (https://developers.google.com/protocol-buffers/docs/proto3#json)).Only supported on Dataproc image versions 1.2 and higher.
+  "requestId": "A String", # Optional. A unique ID used to identify the request. If the server receives two ResizeNodeGroupRequest (https://cloud.google.com/dataproc/docs/reference/rpc/google.cloud.dataproc.v1#google.cloud.dataproc.v1.ResizeNodeGroupRequests) with the same ID, the second request is ignored and the first google.longrunning.Operation created and stored in the backend is returned.Recommendation: Set this value to a UUID (https://en.wikipedia.org/wiki/Universally_unique_identifier).The ID must contain only letters (a-z, A-Z), numbers (0-9), underscores (_), and hyphens (-). The maximum length is 40 characters.
+  "size": 42, # Required. The number of running instances for the node group to maintain. The group adds or removes instances to maintain the number of instances specified by this parameter.
+}
+
+  x__xgafv: string, V1 error format.
+    Allowed values
+      1 - v1 error format
+      2 - v2 error format
+
+Returns:
+  An object of the form:
+
+    { # This resource represents a long-running operation that is the result of a network API call.
+  "done": True or False, # If the value is false, it means the operation is still in progress. If true, the operation is completed, and either error or response is available.
+  "error": { # The Status type defines a logical error model that is suitable for different programming environments, including REST APIs and RPC APIs. It is used by gRPC (https://github.com/grpc). Each Status message contains three pieces of data: error code, error message, and error details.You can find out more about this error model and how to work with it in the API Design Guide (https://cloud.google.com/apis/design/errors). # The error result of the operation in case of failure or cancellation.
+    "code": 42, # The status code, which should be an enum value of google.rpc.Code.
+    "details": [ # A list of messages that carry the error details. There is a common set of message types for APIs to use.
+      {
+        "a_key": "", # Properties of the object. Contains field @type with type URL.
+      },
+    ],
+    "message": "A String", # A developer-facing error message, which should be in English. Any user-facing error message should be localized and sent in the google.rpc.Status.details field, or localized by the client.
+  },
+  "metadata": { # Service-specific metadata associated with the operation. It typically contains progress information and common metadata such as create time. Some services might not provide such metadata. Any method that returns a long-running operation should document the metadata type, if any.
+    "a_key": "", # Properties of the object. Contains field @type with type URL.
+  },
+  "name": "A String", # The server-assigned name, which is only unique within the same service that originally returns it. If you use the default HTTP mapping, the name should be a resource name ending with operations/{unique_id}.
+  "response": { # The normal response of the operation in case of success. If the original method returns no data on success, such as Delete, the response is google.protobuf.Empty. If the original method is standard Get/Create/Update, the response should be the resource. For other methods, the response should have the type XxxResponse, where Xxx is the original method name. For example, if the original method name is TakeSnapshot(), the inferred response type is TakeSnapshotResponse.
+    "a_key": "", # Properties of the object. Contains field @type with type URL.
+  },
+}
+
+ + \ No newline at end of file diff --git a/docs/dyn/dataproc_v1.projects.regions.jobs.html b/docs/dyn/dataproc_v1.projects.regions.jobs.html index 83e380dc144..d351f87f02c 100644 --- a/docs/dyn/dataproc_v1.projects.regions.jobs.html +++ b/docs/dyn/dataproc_v1.projects.regions.jobs.html @@ -137,6 +137,10 @@

Method Details

"done": True or False, # Output only. Indicates whether the job is completed. If the value is false, the job is still in progress. If true, the job is completed, and status.state field will indicate if it was successful, failed, or cancelled. "driverControlFilesUri": "A String", # Output only. If present, the location of miscellaneous control files which may be used as part of job setup and handling. If not present, control files may be placed in the same location as driver_output_uri. "driverOutputResourceUri": "A String", # Output only. A URI pointing to the location of the stdout of the job's driver program. + "driverSchedulingConfig": { # Driver scheduling configuration. # Optional. Driver scheduling configuration. + "memoryMb": 42, # Required. The amount of memory in MB the driver is requesting. + "vcores": 42, # Required. The number of vCPUs the driver is requesting. + }, "hadoopJob": { # A Dataproc job for running Apache Hadoop MapReduce (https://hadoop.apache.org/docs/current/hadoop-mapreduce-client/hadoop-mapreduce-client-core/MapReduceTutorial.html) jobs on Apache Hadoop YARN (https://hadoop.apache.org/docs/r2.7.1/hadoop-yarn/hadoop-yarn-site/YARN.html). # Optional. Job is a Hadoop job. "archiveUris": [ # Optional. HCFS URIs of archives to be extracted in the working directory of Hadoop drivers and tasks. Supported file types: .jar, .tar, .tar.gz, .tgz, or .zip. "A String", @@ -265,8 +269,8 @@

Method Details

"projectId": "A String", # Optional. The ID of the Google Cloud Platform project that the job belongs to. If specified, must match the request project ID. }, "scheduling": { # Job scheduling options. # Optional. Job scheduling configuration. - "maxFailuresPerHour": 42, # Optional. Maximum number of times per hour a driver may be restarted as a result of driver exiting with non-zero code before job is reported failed.A job may be reported as thrashing if driver exits with non-zero code 4 times within 10 minute window.Maximum value is 10.Note: Currently, this restartable job option is not supported in Dataproc workflow template (https://cloud.google.com/dataproc/docs/concepts/workflows/using-workflows#adding_jobs_to_a_template) jobs. - "maxFailuresTotal": 42, # Optional. Maximum number of times in total a driver may be restarted as a result of driver exiting with non-zero code before job is reported failed. Maximum value is 240.Note: Currently, this restartable job option is not supported in Dataproc workflow template (https://cloud.google.com/dataproc/docs/concepts/workflows/using-workflows#adding_jobs_to_a_template) jobs. + "maxFailuresPerHour": 42, # Optional. Maximum number of times per hour a driver may be restarted as a result of driver exiting with non-zero code before job is reported failed.A job may be reported as thrashing if the driver exits with a non-zero code four times within a 10-minute window.Maximum value is 10.Note: This restartable job option is not supported in Dataproc workflow templates (https://cloud.google.com/dataproc/docs/concepts/workflows/using-workflows#adding_jobs_to_a_template). + "maxFailuresTotal": 42, # Optional. Maximum total number of times a driver may be restarted as a result of the driver exiting with a non-zero code. After the maximum number is reached, the job will be reported as failed.Maximum value is 240.Note: Currently, this restartable job option is not supported in Dataproc workflow templates (https://cloud.google.com/dataproc/docs/concepts/workflows/using-workflows#adding_jobs_to_a_template). }, "sparkJob": { # A Dataproc job for running Apache Spark (https://spark.apache.org/) applications on YARN. # Optional. Job is a Spark job. "archiveUris": [ # Optional. HCFS URIs of archives to be extracted into the working directory of each executor. Supported file types: .jar, .tar, .tar.gz, .tgz, and .zip. @@ -425,6 +429,10 @@

Method Details

"done": True or False, # Output only. Indicates whether the job is completed. If the value is false, the job is still in progress. If true, the job is completed, and status.state field will indicate if it was successful, failed, or cancelled. "driverControlFilesUri": "A String", # Output only. If present, the location of miscellaneous control files which may be used as part of job setup and handling. If not present, control files may be placed in the same location as driver_output_uri. "driverOutputResourceUri": "A String", # Output only. A URI pointing to the location of the stdout of the job's driver program. + "driverSchedulingConfig": { # Driver scheduling configuration. # Optional. Driver scheduling configuration. + "memoryMb": 42, # Required. The amount of memory in MB the driver is requesting. + "vcores": 42, # Required. The number of vCPUs the driver is requesting. + }, "hadoopJob": { # A Dataproc job for running Apache Hadoop MapReduce (https://hadoop.apache.org/docs/current/hadoop-mapreduce-client/hadoop-mapreduce-client-core/MapReduceTutorial.html) jobs on Apache Hadoop YARN (https://hadoop.apache.org/docs/r2.7.1/hadoop-yarn/hadoop-yarn-site/YARN.html). # Optional. Job is a Hadoop job. "archiveUris": [ # Optional. HCFS URIs of archives to be extracted in the working directory of Hadoop drivers and tasks. Supported file types: .jar, .tar, .tar.gz, .tgz, or .zip. "A String", @@ -553,8 +561,8 @@

Method Details

"projectId": "A String", # Optional. The ID of the Google Cloud Platform project that the job belongs to. If specified, must match the request project ID. }, "scheduling": { # Job scheduling options. # Optional. Job scheduling configuration. - "maxFailuresPerHour": 42, # Optional. Maximum number of times per hour a driver may be restarted as a result of driver exiting with non-zero code before job is reported failed.A job may be reported as thrashing if driver exits with non-zero code 4 times within 10 minute window.Maximum value is 10.Note: Currently, this restartable job option is not supported in Dataproc workflow template (https://cloud.google.com/dataproc/docs/concepts/workflows/using-workflows#adding_jobs_to_a_template) jobs. - "maxFailuresTotal": 42, # Optional. Maximum number of times in total a driver may be restarted as a result of driver exiting with non-zero code before job is reported failed. Maximum value is 240.Note: Currently, this restartable job option is not supported in Dataproc workflow template (https://cloud.google.com/dataproc/docs/concepts/workflows/using-workflows#adding_jobs_to_a_template) jobs. + "maxFailuresPerHour": 42, # Optional. Maximum number of times per hour a driver may be restarted as a result of driver exiting with non-zero code before job is reported failed.A job may be reported as thrashing if the driver exits with a non-zero code four times within a 10-minute window.Maximum value is 10.Note: This restartable job option is not supported in Dataproc workflow templates (https://cloud.google.com/dataproc/docs/concepts/workflows/using-workflows#adding_jobs_to_a_template). + "maxFailuresTotal": 42, # Optional. Maximum total number of times a driver may be restarted as a result of the driver exiting with a non-zero code. After the maximum number is reached, the job will be reported as failed.Maximum value is 240.Note: Currently, this restartable job option is not supported in Dataproc workflow templates (https://cloud.google.com/dataproc/docs/concepts/workflows/using-workflows#adding_jobs_to_a_template). }, "sparkJob": { # A Dataproc job for running Apache Spark (https://spark.apache.org/) applications on YARN. # Optional. Job is a Spark job. "archiveUris": [ # Optional. HCFS URIs of archives to be extracted into the working directory of each executor. Supported file types: .jar, .tar, .tar.gz, .tgz, and .zip. @@ -741,6 +749,10 @@

Method Details

"done": True or False, # Output only. Indicates whether the job is completed. If the value is false, the job is still in progress. If true, the job is completed, and status.state field will indicate if it was successful, failed, or cancelled. "driverControlFilesUri": "A String", # Output only. If present, the location of miscellaneous control files which may be used as part of job setup and handling. If not present, control files may be placed in the same location as driver_output_uri. "driverOutputResourceUri": "A String", # Output only. A URI pointing to the location of the stdout of the job's driver program. + "driverSchedulingConfig": { # Driver scheduling configuration. # Optional. Driver scheduling configuration. + "memoryMb": 42, # Required. The amount of memory in MB the driver is requesting. + "vcores": 42, # Required. The number of vCPUs the driver is requesting. + }, "hadoopJob": { # A Dataproc job for running Apache Hadoop MapReduce (https://hadoop.apache.org/docs/current/hadoop-mapreduce-client/hadoop-mapreduce-client-core/MapReduceTutorial.html) jobs on Apache Hadoop YARN (https://hadoop.apache.org/docs/r2.7.1/hadoop-yarn/hadoop-yarn-site/YARN.html). # Optional. Job is a Hadoop job. "archiveUris": [ # Optional. HCFS URIs of archives to be extracted in the working directory of Hadoop drivers and tasks. Supported file types: .jar, .tar, .tar.gz, .tgz, or .zip. "A String", @@ -869,8 +881,8 @@

Method Details

"projectId": "A String", # Optional. The ID of the Google Cloud Platform project that the job belongs to. If specified, must match the request project ID. }, "scheduling": { # Job scheduling options. # Optional. Job scheduling configuration. - "maxFailuresPerHour": 42, # Optional. Maximum number of times per hour a driver may be restarted as a result of driver exiting with non-zero code before job is reported failed.A job may be reported as thrashing if driver exits with non-zero code 4 times within 10 minute window.Maximum value is 10.Note: Currently, this restartable job option is not supported in Dataproc workflow template (https://cloud.google.com/dataproc/docs/concepts/workflows/using-workflows#adding_jobs_to_a_template) jobs. - "maxFailuresTotal": 42, # Optional. Maximum number of times in total a driver may be restarted as a result of driver exiting with non-zero code before job is reported failed. Maximum value is 240.Note: Currently, this restartable job option is not supported in Dataproc workflow template (https://cloud.google.com/dataproc/docs/concepts/workflows/using-workflows#adding_jobs_to_a_template) jobs. + "maxFailuresPerHour": 42, # Optional. Maximum number of times per hour a driver may be restarted as a result of driver exiting with non-zero code before job is reported failed.A job may be reported as thrashing if the driver exits with a non-zero code four times within a 10-minute window.Maximum value is 10.Note: This restartable job option is not supported in Dataproc workflow templates (https://cloud.google.com/dataproc/docs/concepts/workflows/using-workflows#adding_jobs_to_a_template). + "maxFailuresTotal": 42, # Optional. Maximum total number of times a driver may be restarted as a result of the driver exiting with a non-zero code. After the maximum number is reached, the job will be reported as failed.Maximum value is 240.Note: Currently, this restartable job option is not supported in Dataproc workflow templates (https://cloud.google.com/dataproc/docs/concepts/workflows/using-workflows#adding_jobs_to_a_template). }, "sparkJob": { # A Dataproc job for running Apache Spark (https://spark.apache.org/) applications on YARN. # Optional. Job is a Spark job. "archiveUris": [ # Optional. HCFS URIs of archives to be extracted into the working directory of each executor. Supported file types: .jar, .tar, .tar.gz, .tgz, and .zip. @@ -1016,6 +1028,10 @@

Method Details

"done": True or False, # Output only. Indicates whether the job is completed. If the value is false, the job is still in progress. If true, the job is completed, and status.state field will indicate if it was successful, failed, or cancelled. "driverControlFilesUri": "A String", # Output only. If present, the location of miscellaneous control files which may be used as part of job setup and handling. If not present, control files may be placed in the same location as driver_output_uri. "driverOutputResourceUri": "A String", # Output only. A URI pointing to the location of the stdout of the job's driver program. + "driverSchedulingConfig": { # Driver scheduling configuration. # Optional. Driver scheduling configuration. + "memoryMb": 42, # Required. The amount of memory in MB the driver is requesting. + "vcores": 42, # Required. The number of vCPUs the driver is requesting. + }, "hadoopJob": { # A Dataproc job for running Apache Hadoop MapReduce (https://hadoop.apache.org/docs/current/hadoop-mapreduce-client/hadoop-mapreduce-client-core/MapReduceTutorial.html) jobs on Apache Hadoop YARN (https://hadoop.apache.org/docs/r2.7.1/hadoop-yarn/hadoop-yarn-site/YARN.html). # Optional. Job is a Hadoop job. "archiveUris": [ # Optional. HCFS URIs of archives to be extracted in the working directory of Hadoop drivers and tasks. Supported file types: .jar, .tar, .tar.gz, .tgz, or .zip. "A String", @@ -1144,8 +1160,8 @@

Method Details

"projectId": "A String", # Optional. The ID of the Google Cloud Platform project that the job belongs to. If specified, must match the request project ID. }, "scheduling": { # Job scheduling options. # Optional. Job scheduling configuration. - "maxFailuresPerHour": 42, # Optional. Maximum number of times per hour a driver may be restarted as a result of driver exiting with non-zero code before job is reported failed.A job may be reported as thrashing if driver exits with non-zero code 4 times within 10 minute window.Maximum value is 10.Note: Currently, this restartable job option is not supported in Dataproc workflow template (https://cloud.google.com/dataproc/docs/concepts/workflows/using-workflows#adding_jobs_to_a_template) jobs. - "maxFailuresTotal": 42, # Optional. Maximum number of times in total a driver may be restarted as a result of driver exiting with non-zero code before job is reported failed. Maximum value is 240.Note: Currently, this restartable job option is not supported in Dataproc workflow template (https://cloud.google.com/dataproc/docs/concepts/workflows/using-workflows#adding_jobs_to_a_template) jobs. + "maxFailuresPerHour": 42, # Optional. Maximum number of times per hour a driver may be restarted as a result of driver exiting with non-zero code before job is reported failed.A job may be reported as thrashing if the driver exits with a non-zero code four times within a 10-minute window.Maximum value is 10.Note: This restartable job option is not supported in Dataproc workflow templates (https://cloud.google.com/dataproc/docs/concepts/workflows/using-workflows#adding_jobs_to_a_template). + "maxFailuresTotal": 42, # Optional. Maximum total number of times a driver may be restarted as a result of the driver exiting with a non-zero code. After the maximum number is reached, the job will be reported as failed.Maximum value is 240.Note: Currently, this restartable job option is not supported in Dataproc workflow templates (https://cloud.google.com/dataproc/docs/concepts/workflows/using-workflows#adding_jobs_to_a_template). }, "sparkJob": { # A Dataproc job for running Apache Spark (https://spark.apache.org/) applications on YARN. # Optional. Job is a Spark job. "archiveUris": [ # Optional. HCFS URIs of archives to be extracted into the working directory of each executor. Supported file types: .jar, .tar, .tar.gz, .tgz, and .zip. @@ -1271,6 +1287,10 @@

Method Details

"done": True or False, # Output only. Indicates whether the job is completed. If the value is false, the job is still in progress. If true, the job is completed, and status.state field will indicate if it was successful, failed, or cancelled. "driverControlFilesUri": "A String", # Output only. If present, the location of miscellaneous control files which may be used as part of job setup and handling. If not present, control files may be placed in the same location as driver_output_uri. "driverOutputResourceUri": "A String", # Output only. A URI pointing to the location of the stdout of the job's driver program. + "driverSchedulingConfig": { # Driver scheduling configuration. # Optional. Driver scheduling configuration. + "memoryMb": 42, # Required. The amount of memory in MB the driver is requesting. + "vcores": 42, # Required. The number of vCPUs the driver is requesting. + }, "hadoopJob": { # A Dataproc job for running Apache Hadoop MapReduce (https://hadoop.apache.org/docs/current/hadoop-mapreduce-client/hadoop-mapreduce-client-core/MapReduceTutorial.html) jobs on Apache Hadoop YARN (https://hadoop.apache.org/docs/r2.7.1/hadoop-yarn/hadoop-yarn-site/YARN.html). # Optional. Job is a Hadoop job. "archiveUris": [ # Optional. HCFS URIs of archives to be extracted in the working directory of Hadoop drivers and tasks. Supported file types: .jar, .tar, .tar.gz, .tgz, or .zip. "A String", @@ -1399,8 +1419,8 @@

Method Details

"projectId": "A String", # Optional. The ID of the Google Cloud Platform project that the job belongs to. If specified, must match the request project ID. }, "scheduling": { # Job scheduling options. # Optional. Job scheduling configuration. - "maxFailuresPerHour": 42, # Optional. Maximum number of times per hour a driver may be restarted as a result of driver exiting with non-zero code before job is reported failed.A job may be reported as thrashing if driver exits with non-zero code 4 times within 10 minute window.Maximum value is 10.Note: Currently, this restartable job option is not supported in Dataproc workflow template (https://cloud.google.com/dataproc/docs/concepts/workflows/using-workflows#adding_jobs_to_a_template) jobs. - "maxFailuresTotal": 42, # Optional. Maximum number of times in total a driver may be restarted as a result of driver exiting with non-zero code before job is reported failed. Maximum value is 240.Note: Currently, this restartable job option is not supported in Dataproc workflow template (https://cloud.google.com/dataproc/docs/concepts/workflows/using-workflows#adding_jobs_to_a_template) jobs. + "maxFailuresPerHour": 42, # Optional. Maximum number of times per hour a driver may be restarted as a result of driver exiting with non-zero code before job is reported failed.A job may be reported as thrashing if the driver exits with a non-zero code four times within a 10-minute window.Maximum value is 10.Note: This restartable job option is not supported in Dataproc workflow templates (https://cloud.google.com/dataproc/docs/concepts/workflows/using-workflows#adding_jobs_to_a_template). + "maxFailuresTotal": 42, # Optional. Maximum total number of times a driver may be restarted as a result of the driver exiting with a non-zero code. After the maximum number is reached, the job will be reported as failed.Maximum value is 240.Note: Currently, this restartable job option is not supported in Dataproc workflow templates (https://cloud.google.com/dataproc/docs/concepts/workflows/using-workflows#adding_jobs_to_a_template). }, "sparkJob": { # A Dataproc job for running Apache Spark (https://spark.apache.org/) applications on YARN. # Optional. Job is a Spark job. "archiveUris": [ # Optional. HCFS URIs of archives to be extracted into the working directory of each executor. Supported file types: .jar, .tar, .tar.gz, .tgz, and .zip. @@ -1587,6 +1607,10 @@

Method Details

"done": True or False, # Output only. Indicates whether the job is completed. If the value is false, the job is still in progress. If true, the job is completed, and status.state field will indicate if it was successful, failed, or cancelled. "driverControlFilesUri": "A String", # Output only. If present, the location of miscellaneous control files which may be used as part of job setup and handling. If not present, control files may be placed in the same location as driver_output_uri. "driverOutputResourceUri": "A String", # Output only. A URI pointing to the location of the stdout of the job's driver program. + "driverSchedulingConfig": { # Driver scheduling configuration. # Optional. Driver scheduling configuration. + "memoryMb": 42, # Required. The amount of memory in MB the driver is requesting. + "vcores": 42, # Required. The number of vCPUs the driver is requesting. + }, "hadoopJob": { # A Dataproc job for running Apache Hadoop MapReduce (https://hadoop.apache.org/docs/current/hadoop-mapreduce-client/hadoop-mapreduce-client-core/MapReduceTutorial.html) jobs on Apache Hadoop YARN (https://hadoop.apache.org/docs/r2.7.1/hadoop-yarn/hadoop-yarn-site/YARN.html). # Optional. Job is a Hadoop job. "archiveUris": [ # Optional. HCFS URIs of archives to be extracted in the working directory of Hadoop drivers and tasks. Supported file types: .jar, .tar, .tar.gz, .tgz, or .zip. "A String", @@ -1715,8 +1739,8 @@

Method Details

"projectId": "A String", # Optional. The ID of the Google Cloud Platform project that the job belongs to. If specified, must match the request project ID. }, "scheduling": { # Job scheduling options. # Optional. Job scheduling configuration. - "maxFailuresPerHour": 42, # Optional. Maximum number of times per hour a driver may be restarted as a result of driver exiting with non-zero code before job is reported failed.A job may be reported as thrashing if driver exits with non-zero code 4 times within 10 minute window.Maximum value is 10.Note: Currently, this restartable job option is not supported in Dataproc workflow template (https://cloud.google.com/dataproc/docs/concepts/workflows/using-workflows#adding_jobs_to_a_template) jobs. - "maxFailuresTotal": 42, # Optional. Maximum number of times in total a driver may be restarted as a result of driver exiting with non-zero code before job is reported failed. Maximum value is 240.Note: Currently, this restartable job option is not supported in Dataproc workflow template (https://cloud.google.com/dataproc/docs/concepts/workflows/using-workflows#adding_jobs_to_a_template) jobs. + "maxFailuresPerHour": 42, # Optional. Maximum number of times per hour a driver may be restarted as a result of driver exiting with non-zero code before job is reported failed.A job may be reported as thrashing if the driver exits with a non-zero code four times within a 10-minute window.Maximum value is 10.Note: This restartable job option is not supported in Dataproc workflow templates (https://cloud.google.com/dataproc/docs/concepts/workflows/using-workflows#adding_jobs_to_a_template). + "maxFailuresTotal": 42, # Optional. Maximum total number of times a driver may be restarted as a result of the driver exiting with a non-zero code. After the maximum number is reached, the job will be reported as failed.Maximum value is 240.Note: Currently, this restartable job option is not supported in Dataproc workflow templates (https://cloud.google.com/dataproc/docs/concepts/workflows/using-workflows#adding_jobs_to_a_template). }, "sparkJob": { # A Dataproc job for running Apache Spark (https://spark.apache.org/) applications on YARN. # Optional. Job is a Spark job. "archiveUris": [ # Optional. HCFS URIs of archives to be extracted into the working directory of each executor. Supported file types: .jar, .tar, .tar.gz, .tgz, and .zip. @@ -1843,6 +1867,10 @@

Method Details

"done": True or False, # Output only. Indicates whether the job is completed. If the value is false, the job is still in progress. If true, the job is completed, and status.state field will indicate if it was successful, failed, or cancelled. "driverControlFilesUri": "A String", # Output only. If present, the location of miscellaneous control files which may be used as part of job setup and handling. If not present, control files may be placed in the same location as driver_output_uri. "driverOutputResourceUri": "A String", # Output only. A URI pointing to the location of the stdout of the job's driver program. + "driverSchedulingConfig": { # Driver scheduling configuration. # Optional. Driver scheduling configuration. + "memoryMb": 42, # Required. The amount of memory in MB the driver is requesting. + "vcores": 42, # Required. The number of vCPUs the driver is requesting. + }, "hadoopJob": { # A Dataproc job for running Apache Hadoop MapReduce (https://hadoop.apache.org/docs/current/hadoop-mapreduce-client/hadoop-mapreduce-client-core/MapReduceTutorial.html) jobs on Apache Hadoop YARN (https://hadoop.apache.org/docs/r2.7.1/hadoop-yarn/hadoop-yarn-site/YARN.html). # Optional. Job is a Hadoop job. "archiveUris": [ # Optional. HCFS URIs of archives to be extracted in the working directory of Hadoop drivers and tasks. Supported file types: .jar, .tar, .tar.gz, .tgz, or .zip. "A String", @@ -1971,8 +1999,8 @@

Method Details

"projectId": "A String", # Optional. The ID of the Google Cloud Platform project that the job belongs to. If specified, must match the request project ID. }, "scheduling": { # Job scheduling options. # Optional. Job scheduling configuration. - "maxFailuresPerHour": 42, # Optional. Maximum number of times per hour a driver may be restarted as a result of driver exiting with non-zero code before job is reported failed.A job may be reported as thrashing if driver exits with non-zero code 4 times within 10 minute window.Maximum value is 10.Note: Currently, this restartable job option is not supported in Dataproc workflow template (https://cloud.google.com/dataproc/docs/concepts/workflows/using-workflows#adding_jobs_to_a_template) jobs. - "maxFailuresTotal": 42, # Optional. Maximum number of times in total a driver may be restarted as a result of driver exiting with non-zero code before job is reported failed. Maximum value is 240.Note: Currently, this restartable job option is not supported in Dataproc workflow template (https://cloud.google.com/dataproc/docs/concepts/workflows/using-workflows#adding_jobs_to_a_template) jobs. + "maxFailuresPerHour": 42, # Optional. Maximum number of times per hour a driver may be restarted as a result of driver exiting with non-zero code before job is reported failed.A job may be reported as thrashing if the driver exits with a non-zero code four times within a 10-minute window.Maximum value is 10.Note: This restartable job option is not supported in Dataproc workflow templates (https://cloud.google.com/dataproc/docs/concepts/workflows/using-workflows#adding_jobs_to_a_template). + "maxFailuresTotal": 42, # Optional. Maximum total number of times a driver may be restarted as a result of the driver exiting with a non-zero code. After the maximum number is reached, the job will be reported as failed.Maximum value is 240.Note: Currently, this restartable job option is not supported in Dataproc workflow templates (https://cloud.google.com/dataproc/docs/concepts/workflows/using-workflows#adding_jobs_to_a_template). }, "sparkJob": { # A Dataproc job for running Apache Spark (https://spark.apache.org/) applications on YARN. # Optional. Job is a Spark job. "archiveUris": [ # Optional. HCFS URIs of archives to be extracted into the working directory of each executor. Supported file types: .jar, .tar, .tar.gz, .tgz, and .zip. @@ -2101,6 +2129,10 @@

Method Details

"done": True or False, # Output only. Indicates whether the job is completed. If the value is false, the job is still in progress. If true, the job is completed, and status.state field will indicate if it was successful, failed, or cancelled. "driverControlFilesUri": "A String", # Output only. If present, the location of miscellaneous control files which may be used as part of job setup and handling. If not present, control files may be placed in the same location as driver_output_uri. "driverOutputResourceUri": "A String", # Output only. A URI pointing to the location of the stdout of the job's driver program. + "driverSchedulingConfig": { # Driver scheduling configuration. # Optional. Driver scheduling configuration. + "memoryMb": 42, # Required. The amount of memory in MB the driver is requesting. + "vcores": 42, # Required. The number of vCPUs the driver is requesting. + }, "hadoopJob": { # A Dataproc job for running Apache Hadoop MapReduce (https://hadoop.apache.org/docs/current/hadoop-mapreduce-client/hadoop-mapreduce-client-core/MapReduceTutorial.html) jobs on Apache Hadoop YARN (https://hadoop.apache.org/docs/r2.7.1/hadoop-yarn/hadoop-yarn-site/YARN.html). # Optional. Job is a Hadoop job. "archiveUris": [ # Optional. HCFS URIs of archives to be extracted in the working directory of Hadoop drivers and tasks. Supported file types: .jar, .tar, .tar.gz, .tgz, or .zip. "A String", @@ -2229,8 +2261,8 @@

Method Details

"projectId": "A String", # Optional. The ID of the Google Cloud Platform project that the job belongs to. If specified, must match the request project ID. }, "scheduling": { # Job scheduling options. # Optional. Job scheduling configuration. - "maxFailuresPerHour": 42, # Optional. Maximum number of times per hour a driver may be restarted as a result of driver exiting with non-zero code before job is reported failed.A job may be reported as thrashing if driver exits with non-zero code 4 times within 10 minute window.Maximum value is 10.Note: Currently, this restartable job option is not supported in Dataproc workflow template (https://cloud.google.com/dataproc/docs/concepts/workflows/using-workflows#adding_jobs_to_a_template) jobs. - "maxFailuresTotal": 42, # Optional. Maximum number of times in total a driver may be restarted as a result of driver exiting with non-zero code before job is reported failed. Maximum value is 240.Note: Currently, this restartable job option is not supported in Dataproc workflow template (https://cloud.google.com/dataproc/docs/concepts/workflows/using-workflows#adding_jobs_to_a_template) jobs. + "maxFailuresPerHour": 42, # Optional. Maximum number of times per hour a driver may be restarted as a result of driver exiting with non-zero code before job is reported failed.A job may be reported as thrashing if the driver exits with a non-zero code four times within a 10-minute window.Maximum value is 10.Note: This restartable job option is not supported in Dataproc workflow templates (https://cloud.google.com/dataproc/docs/concepts/workflows/using-workflows#adding_jobs_to_a_template). + "maxFailuresTotal": 42, # Optional. Maximum total number of times a driver may be restarted as a result of the driver exiting with a non-zero code. After the maximum number is reached, the job will be reported as failed.Maximum value is 240.Note: Currently, this restartable job option is not supported in Dataproc workflow templates (https://cloud.google.com/dataproc/docs/concepts/workflows/using-workflows#adding_jobs_to_a_template). }, "sparkJob": { # A Dataproc job for running Apache Spark (https://spark.apache.org/) applications on YARN. # Optional. Job is a Spark job. "archiveUris": [ # Optional. HCFS URIs of archives to be extracted into the working directory of each executor. Supported file types: .jar, .tar, .tar.gz, .tgz, and .zip. diff --git a/docs/dyn/dataproc_v1.projects.regions.workflowTemplates.html b/docs/dyn/dataproc_v1.projects.regions.workflowTemplates.html index afddae02c3b..163712037a5 100644 --- a/docs/dyn/dataproc_v1.projects.regions.workflowTemplates.html +++ b/docs/dyn/dataproc_v1.projects.regions.workflowTemplates.html @@ -250,8 +250,8 @@

Method Details

], }, "scheduling": { # Job scheduling options. # Optional. Job scheduling configuration. - "maxFailuresPerHour": 42, # Optional. Maximum number of times per hour a driver may be restarted as a result of driver exiting with non-zero code before job is reported failed.A job may be reported as thrashing if driver exits with non-zero code 4 times within 10 minute window.Maximum value is 10.Note: Currently, this restartable job option is not supported in Dataproc workflow template (https://cloud.google.com/dataproc/docs/concepts/workflows/using-workflows#adding_jobs_to_a_template) jobs. - "maxFailuresTotal": 42, # Optional. Maximum number of times in total a driver may be restarted as a result of driver exiting with non-zero code before job is reported failed. Maximum value is 240.Note: Currently, this restartable job option is not supported in Dataproc workflow template (https://cloud.google.com/dataproc/docs/concepts/workflows/using-workflows#adding_jobs_to_a_template) jobs. + "maxFailuresPerHour": 42, # Optional. Maximum number of times per hour a driver may be restarted as a result of driver exiting with non-zero code before job is reported failed.A job may be reported as thrashing if the driver exits with a non-zero code four times within a 10-minute window.Maximum value is 10.Note: This restartable job option is not supported in Dataproc workflow templates (https://cloud.google.com/dataproc/docs/concepts/workflows/using-workflows#adding_jobs_to_a_template). + "maxFailuresTotal": 42, # Optional. Maximum total number of times a driver may be restarted as a result of the driver exiting with a non-zero code. After the maximum number is reached, the job will be reported as failed.Maximum value is 240.Note: Currently, this restartable job option is not supported in Dataproc workflow templates (https://cloud.google.com/dataproc/docs/concepts/workflows/using-workflows#adding_jobs_to_a_template). }, "sparkJob": { # A Dataproc job for running Apache Spark (https://spark.apache.org/) applications on YARN. # Optional. Job is a Spark job. "archiveUris": [ # Optional. HCFS URIs of archives to be extracted into the working directory of each executor. Supported file types: .jar, .tar, .tar.gz, .tgz, and .zip. @@ -381,6 +381,55 @@

Method Details

"autoscalingConfig": { # Autoscaling Policy config associated with the cluster. # Optional. Autoscaling config for the policy associated with the cluster. Cluster does not autoscale if this field is unset. "policyUri": "A String", # Optional. The autoscaling policy used by the cluster.Only resource names including projectid and location (region) are valid. Examples: https://www.googleapis.com/compute/v1/projects/[project_id]/locations/[dataproc_region]/autoscalingPolicies/[policy_id] projects/[project_id]/locations/[dataproc_region]/autoscalingPolicies/[policy_id]Note that the policy must be in the same project and Dataproc region. }, + "auxiliaryNodeGroups": [ # Optional. The node group settings. + { # Node group identification and configuration information. + "nodeGroup": { # Dataproc Node Group. The Dataproc NodeGroup resource is not related to the Dataproc NodeGroupAffinity resource. # Required. Node group configuration. + "labels": { # Optional. Node group labels. Label keys must consist of from 1 to 63 characters and conform to RFC 1035 (https://www.ietf.org/rfc/rfc1035.txt). Label values can be empty. If specified, they must consist of from 1 to 63 characters and conform to RFC 1035 (https://www.ietf.org/rfc/rfc1035.txt). The node group must have no more than 32 labelsn. + "a_key": "A String", + }, + "name": "A String", # The Node group resource name (https://aip.dev/122). + "nodeGroupConfig": { # The config settings for Compute Engine resources in an instance group, such as a master or worker group. # Optional. The node group instance group configuration. + "accelerators": [ # Optional. The Compute Engine accelerator configuration for these instances. + { # Specifies the type and number of accelerator cards attached to the instances of an instance. See GPUs on Compute Engine (https://cloud.google.com/compute/docs/gpus/). + "acceleratorCount": 42, # The number of the accelerator cards of this type exposed to this instance. + "acceleratorTypeUri": "A String", # Full URL, partial URI, or short name of the accelerator type resource to expose to this instance. See Compute Engine AcceleratorTypes (https://cloud.google.com/compute/docs/reference/beta/acceleratorTypes).Examples: https://www.googleapis.com/compute/beta/projects/[project_id]/zones/us-east1-a/acceleratorTypes/nvidia-tesla-k80 projects/[project_id]/zones/us-east1-a/acceleratorTypes/nvidia-tesla-k80 nvidia-tesla-k80Auto Zone Exception: If you are using the Dataproc Auto Zone Placement (https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/auto-zone#using_auto_zone_placement) feature, you must use the short name of the accelerator type resource, for example, nvidia-tesla-k80. + }, + ], + "diskConfig": { # Specifies the config of disk options for a group of VM instances. # Optional. Disk option config settings. + "bootDiskSizeGb": 42, # Optional. Size in GB of the boot disk (default is 500GB). + "bootDiskType": "A String", # Optional. Type of the boot disk (default is "pd-standard"). Valid values: "pd-balanced" (Persistent Disk Balanced Solid State Drive), "pd-ssd" (Persistent Disk Solid State Drive), or "pd-standard" (Persistent Disk Hard Disk Drive). See Disk types (https://cloud.google.com/compute/docs/disks#disk-types). + "localSsdInterface": "A String", # Optional. Interface type of local SSDs (default is "scsi"). Valid values: "scsi" (Small Computer System Interface), "nvme" (Non-Volatile Memory Express). See local SSD performance (https://cloud.google.com/compute/docs/disks/local-ssd#performance). + "numLocalSsds": 42, # Optional. Number of attached SSDs, from 0 to 8 (default is 0). If SSDs are not attached, the boot disk is used to store runtime logs and HDFS (https://hadoop.apache.org/docs/r1.2.1/hdfs_user_guide.html) data. If one or more SSDs are attached, this runtime bulk data is spread across them, and the boot disk contains only basic config and installed binaries.Note: Local SSD options may vary by machine type and number of vCPUs selected. + }, + "imageUri": "A String", # Optional. The Compute Engine image resource used for cluster instances.The URI can represent an image or image family.Image examples: https://www.googleapis.com/compute/beta/projects/[project_id]/global/images/[image-id] projects/[project_id]/global/images/[image-id] image-idImage family examples. Dataproc will use the most recent image from the family: https://www.googleapis.com/compute/beta/projects/[project_id]/global/images/family/[custom-image-family-name] projects/[project_id]/global/images/family/[custom-image-family-name]If the URI is unspecified, it will be inferred from SoftwareConfig.image_version or the system default. + "instanceNames": [ # Output only. The list of instance names. Dataproc derives the names from cluster_name, num_instances, and the instance group. + "A String", + ], + "instanceReferences": [ # Output only. List of references to Compute Engine instances. + { # A reference to a Compute Engine instance. + "instanceId": "A String", # The unique identifier of the Compute Engine instance. + "instanceName": "A String", # The user-friendly name of the Compute Engine instance. + "publicEciesKey": "A String", # The public ECIES key used for sharing data with this instance. + "publicKey": "A String", # The public RSA key used for sharing data with this instance. + }, + ], + "isPreemptible": True or False, # Output only. Specifies that this instance group contains preemptible instances. + "machineTypeUri": "A String", # Optional. The Compute Engine machine type used for cluster instances.A full URL, partial URI, or short name are valid. Examples: https://www.googleapis.com/compute/v1/projects/[project_id]/zones/us-east1-a/machineTypes/n1-standard-2 projects/[project_id]/zones/us-east1-a/machineTypes/n1-standard-2 n1-standard-2Auto Zone Exception: If you are using the Dataproc Auto Zone Placement (https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/auto-zone#using_auto_zone_placement) feature, you must use the short name of the machine type resource, for example, n1-standard-2. + "managedGroupConfig": { # Specifies the resources used to actively manage an instance group. # Output only. The config for Compute Engine Instance Group Manager that manages this group. This is only used for preemptible instance groups. + "instanceGroupManagerName": "A String", # Output only. The name of the Instance Group Manager for this group. + "instanceTemplateName": "A String", # Output only. The name of the Instance Template used for the Managed Instance Group. + }, + "minCpuPlatform": "A String", # Optional. Specifies the minimum cpu platform for the Instance Group. See Dataproc -> Minimum CPU Platform (https://cloud.google.com/dataproc/docs/concepts/compute/dataproc-min-cpu). + "numInstances": 42, # Optional. The number of VM instances in the instance group. For HA cluster master_config groups, must be set to 3. For standard cluster master_config groups, must be set to 1. + "preemptibility": "A String", # Optional. Specifies the preemptibility of the instance group.The default value for master and worker groups is NON_PREEMPTIBLE. This default cannot be changed.The default value for secondary instances is PREEMPTIBLE. + }, + "roles": [ # Required. Node group roles. + "A String", + ], + }, + "nodeGroupId": "A String", # Optional. A node group ID. Generated if not specified.The ID must contain only letters (a-z, A-Z), numbers (0-9), underscores (_), and hyphens (-). Cannot begin or end with underscore or hyphen. Must consist of from 3 to 33 characters. + }, + ], "configBucket": "A String", # Optional. A Cloud Storage bucket used to stage job dependencies, config files, and job driver console output. If you do not specify a staging bucket, Cloud Dataproc will determine a Cloud Storage location (US, ASIA, or EU) for your cluster's staging bucket according to the Compute Engine zone where your cluster is deployed, and then create and manage this project-level, per-location bucket (see Dataproc staging and temp buckets (https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/staging-bucket)). This field requires a Cloud Storage bucket name, not a gs://... URI to a Cloud Storage bucket. "dataprocMetricConfig": { # Dataproc metric config. # Optional. The config for Dataproc metrics. "metrics": [ # Required. Metrics sources to enable. @@ -410,7 +459,7 @@

Method Details

"a_key": "A String", }, "networkUri": "A String", # Optional. The Compute Engine network to be used for machine communications. Cannot be specified with subnetwork_uri. If neither network_uri nor subnetwork_uri is specified, the "default" network of the project is used, if it exists. Cannot be a "Custom Subnet Network" (see Using Subnetworks (https://cloud.google.com/compute/docs/subnetworks) for more information).A full URL, partial URI, or short name are valid. Examples: https://www.googleapis.com/compute/v1/projects/[project_id]/regions/global/default projects/[project_id]/regions/global/default default - "nodeGroupAffinity": { # Node Group Affinity for clusters using sole-tenant node groups. # Optional. Node Group Affinity for sole-tenant clusters. + "nodeGroupAffinity": { # Node Group Affinity for clusters using sole-tenant node groups. The Dataproc NodeGroupAffinity resource is not related to the Dataproc NodeGroup resource. # Optional. Node Group Affinity for sole-tenant clusters. "nodeGroupUri": "A String", # Required. The URI of a sole-tenant node group resource (https://cloud.google.com/compute/docs/reference/rest/v1/nodeGroups) that the cluster will be created on.A full URL, partial URI, or node group name are valid. Examples: https://www.googleapis.com/compute/v1/projects/[project_id]/zones/us-central1-a/nodeGroups/node-group-1 projects/[project_id]/zones/us-central1-a/nodeGroups/node-group-1 node-group-1 }, "privateIpv6GoogleAccess": "A String", # Optional. The type of IPv6 access for a cluster. @@ -772,8 +821,8 @@

Method Details

], }, "scheduling": { # Job scheduling options. # Optional. Job scheduling configuration. - "maxFailuresPerHour": 42, # Optional. Maximum number of times per hour a driver may be restarted as a result of driver exiting with non-zero code before job is reported failed.A job may be reported as thrashing if driver exits with non-zero code 4 times within 10 minute window.Maximum value is 10.Note: Currently, this restartable job option is not supported in Dataproc workflow template (https://cloud.google.com/dataproc/docs/concepts/workflows/using-workflows#adding_jobs_to_a_template) jobs. - "maxFailuresTotal": 42, # Optional. Maximum number of times in total a driver may be restarted as a result of driver exiting with non-zero code before job is reported failed. Maximum value is 240.Note: Currently, this restartable job option is not supported in Dataproc workflow template (https://cloud.google.com/dataproc/docs/concepts/workflows/using-workflows#adding_jobs_to_a_template) jobs. + "maxFailuresPerHour": 42, # Optional. Maximum number of times per hour a driver may be restarted as a result of driver exiting with non-zero code before job is reported failed.A job may be reported as thrashing if the driver exits with a non-zero code four times within a 10-minute window.Maximum value is 10.Note: This restartable job option is not supported in Dataproc workflow templates (https://cloud.google.com/dataproc/docs/concepts/workflows/using-workflows#adding_jobs_to_a_template). + "maxFailuresTotal": 42, # Optional. Maximum total number of times a driver may be restarted as a result of the driver exiting with a non-zero code. After the maximum number is reached, the job will be reported as failed.Maximum value is 240.Note: Currently, this restartable job option is not supported in Dataproc workflow templates (https://cloud.google.com/dataproc/docs/concepts/workflows/using-workflows#adding_jobs_to_a_template). }, "sparkJob": { # A Dataproc job for running Apache Spark (https://spark.apache.org/) applications on YARN. # Optional. Job is a Spark job. "archiveUris": [ # Optional. HCFS URIs of archives to be extracted into the working directory of each executor. Supported file types: .jar, .tar, .tar.gz, .tgz, and .zip. @@ -903,6 +952,55 @@

Method Details

"autoscalingConfig": { # Autoscaling Policy config associated with the cluster. # Optional. Autoscaling config for the policy associated with the cluster. Cluster does not autoscale if this field is unset. "policyUri": "A String", # Optional. The autoscaling policy used by the cluster.Only resource names including projectid and location (region) are valid. Examples: https://www.googleapis.com/compute/v1/projects/[project_id]/locations/[dataproc_region]/autoscalingPolicies/[policy_id] projects/[project_id]/locations/[dataproc_region]/autoscalingPolicies/[policy_id]Note that the policy must be in the same project and Dataproc region. }, + "auxiliaryNodeGroups": [ # Optional. The node group settings. + { # Node group identification and configuration information. + "nodeGroup": { # Dataproc Node Group. The Dataproc NodeGroup resource is not related to the Dataproc NodeGroupAffinity resource. # Required. Node group configuration. + "labels": { # Optional. Node group labels. Label keys must consist of from 1 to 63 characters and conform to RFC 1035 (https://www.ietf.org/rfc/rfc1035.txt). Label values can be empty. If specified, they must consist of from 1 to 63 characters and conform to RFC 1035 (https://www.ietf.org/rfc/rfc1035.txt). The node group must have no more than 32 labelsn. + "a_key": "A String", + }, + "name": "A String", # The Node group resource name (https://aip.dev/122). + "nodeGroupConfig": { # The config settings for Compute Engine resources in an instance group, such as a master or worker group. # Optional. The node group instance group configuration. + "accelerators": [ # Optional. The Compute Engine accelerator configuration for these instances. + { # Specifies the type and number of accelerator cards attached to the instances of an instance. See GPUs on Compute Engine (https://cloud.google.com/compute/docs/gpus/). + "acceleratorCount": 42, # The number of the accelerator cards of this type exposed to this instance. + "acceleratorTypeUri": "A String", # Full URL, partial URI, or short name of the accelerator type resource to expose to this instance. See Compute Engine AcceleratorTypes (https://cloud.google.com/compute/docs/reference/beta/acceleratorTypes).Examples: https://www.googleapis.com/compute/beta/projects/[project_id]/zones/us-east1-a/acceleratorTypes/nvidia-tesla-k80 projects/[project_id]/zones/us-east1-a/acceleratorTypes/nvidia-tesla-k80 nvidia-tesla-k80Auto Zone Exception: If you are using the Dataproc Auto Zone Placement (https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/auto-zone#using_auto_zone_placement) feature, you must use the short name of the accelerator type resource, for example, nvidia-tesla-k80. + }, + ], + "diskConfig": { # Specifies the config of disk options for a group of VM instances. # Optional. Disk option config settings. + "bootDiskSizeGb": 42, # Optional. Size in GB of the boot disk (default is 500GB). + "bootDiskType": "A String", # Optional. Type of the boot disk (default is "pd-standard"). Valid values: "pd-balanced" (Persistent Disk Balanced Solid State Drive), "pd-ssd" (Persistent Disk Solid State Drive), or "pd-standard" (Persistent Disk Hard Disk Drive). See Disk types (https://cloud.google.com/compute/docs/disks#disk-types). + "localSsdInterface": "A String", # Optional. Interface type of local SSDs (default is "scsi"). Valid values: "scsi" (Small Computer System Interface), "nvme" (Non-Volatile Memory Express). See local SSD performance (https://cloud.google.com/compute/docs/disks/local-ssd#performance). + "numLocalSsds": 42, # Optional. Number of attached SSDs, from 0 to 8 (default is 0). If SSDs are not attached, the boot disk is used to store runtime logs and HDFS (https://hadoop.apache.org/docs/r1.2.1/hdfs_user_guide.html) data. If one or more SSDs are attached, this runtime bulk data is spread across them, and the boot disk contains only basic config and installed binaries.Note: Local SSD options may vary by machine type and number of vCPUs selected. + }, + "imageUri": "A String", # Optional. The Compute Engine image resource used for cluster instances.The URI can represent an image or image family.Image examples: https://www.googleapis.com/compute/beta/projects/[project_id]/global/images/[image-id] projects/[project_id]/global/images/[image-id] image-idImage family examples. Dataproc will use the most recent image from the family: https://www.googleapis.com/compute/beta/projects/[project_id]/global/images/family/[custom-image-family-name] projects/[project_id]/global/images/family/[custom-image-family-name]If the URI is unspecified, it will be inferred from SoftwareConfig.image_version or the system default. + "instanceNames": [ # Output only. The list of instance names. Dataproc derives the names from cluster_name, num_instances, and the instance group. + "A String", + ], + "instanceReferences": [ # Output only. List of references to Compute Engine instances. + { # A reference to a Compute Engine instance. + "instanceId": "A String", # The unique identifier of the Compute Engine instance. + "instanceName": "A String", # The user-friendly name of the Compute Engine instance. + "publicEciesKey": "A String", # The public ECIES key used for sharing data with this instance. + "publicKey": "A String", # The public RSA key used for sharing data with this instance. + }, + ], + "isPreemptible": True or False, # Output only. Specifies that this instance group contains preemptible instances. + "machineTypeUri": "A String", # Optional. The Compute Engine machine type used for cluster instances.A full URL, partial URI, or short name are valid. Examples: https://www.googleapis.com/compute/v1/projects/[project_id]/zones/us-east1-a/machineTypes/n1-standard-2 projects/[project_id]/zones/us-east1-a/machineTypes/n1-standard-2 n1-standard-2Auto Zone Exception: If you are using the Dataproc Auto Zone Placement (https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/auto-zone#using_auto_zone_placement) feature, you must use the short name of the machine type resource, for example, n1-standard-2. + "managedGroupConfig": { # Specifies the resources used to actively manage an instance group. # Output only. The config for Compute Engine Instance Group Manager that manages this group. This is only used for preemptible instance groups. + "instanceGroupManagerName": "A String", # Output only. The name of the Instance Group Manager for this group. + "instanceTemplateName": "A String", # Output only. The name of the Instance Template used for the Managed Instance Group. + }, + "minCpuPlatform": "A String", # Optional. Specifies the minimum cpu platform for the Instance Group. See Dataproc -> Minimum CPU Platform (https://cloud.google.com/dataproc/docs/concepts/compute/dataproc-min-cpu). + "numInstances": 42, # Optional. The number of VM instances in the instance group. For HA cluster master_config groups, must be set to 3. For standard cluster master_config groups, must be set to 1. + "preemptibility": "A String", # Optional. Specifies the preemptibility of the instance group.The default value for master and worker groups is NON_PREEMPTIBLE. This default cannot be changed.The default value for secondary instances is PREEMPTIBLE. + }, + "roles": [ # Required. Node group roles. + "A String", + ], + }, + "nodeGroupId": "A String", # Optional. A node group ID. Generated if not specified.The ID must contain only letters (a-z, A-Z), numbers (0-9), underscores (_), and hyphens (-). Cannot begin or end with underscore or hyphen. Must consist of from 3 to 33 characters. + }, + ], "configBucket": "A String", # Optional. A Cloud Storage bucket used to stage job dependencies, config files, and job driver console output. If you do not specify a staging bucket, Cloud Dataproc will determine a Cloud Storage location (US, ASIA, or EU) for your cluster's staging bucket according to the Compute Engine zone where your cluster is deployed, and then create and manage this project-level, per-location bucket (see Dataproc staging and temp buckets (https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/staging-bucket)). This field requires a Cloud Storage bucket name, not a gs://... URI to a Cloud Storage bucket. "dataprocMetricConfig": { # Dataproc metric config. # Optional. The config for Dataproc metrics. "metrics": [ # Required. Metrics sources to enable. @@ -932,7 +1030,7 @@

Method Details

"a_key": "A String", }, "networkUri": "A String", # Optional. The Compute Engine network to be used for machine communications. Cannot be specified with subnetwork_uri. If neither network_uri nor subnetwork_uri is specified, the "default" network of the project is used, if it exists. Cannot be a "Custom Subnet Network" (see Using Subnetworks (https://cloud.google.com/compute/docs/subnetworks) for more information).A full URL, partial URI, or short name are valid. Examples: https://www.googleapis.com/compute/v1/projects/[project_id]/regions/global/default projects/[project_id]/regions/global/default default - "nodeGroupAffinity": { # Node Group Affinity for clusters using sole-tenant node groups. # Optional. Node Group Affinity for sole-tenant clusters. + "nodeGroupAffinity": { # Node Group Affinity for clusters using sole-tenant node groups. The Dataproc NodeGroupAffinity resource is not related to the Dataproc NodeGroup resource. # Optional. Node Group Affinity for sole-tenant clusters. "nodeGroupUri": "A String", # Required. The URI of a sole-tenant node group resource (https://cloud.google.com/compute/docs/reference/rest/v1/nodeGroups) that the cluster will be created on.A full URL, partial URI, or node group name are valid. Examples: https://www.googleapis.com/compute/v1/projects/[project_id]/zones/us-central1-a/nodeGroups/node-group-1 projects/[project_id]/zones/us-central1-a/nodeGroups/node-group-1 node-group-1 }, "privateIpv6GoogleAccess": "A String", # Optional. The type of IPv6 access for a cluster. @@ -1321,8 +1419,8 @@

Method Details

], }, "scheduling": { # Job scheduling options. # Optional. Job scheduling configuration. - "maxFailuresPerHour": 42, # Optional. Maximum number of times per hour a driver may be restarted as a result of driver exiting with non-zero code before job is reported failed.A job may be reported as thrashing if driver exits with non-zero code 4 times within 10 minute window.Maximum value is 10.Note: Currently, this restartable job option is not supported in Dataproc workflow template (https://cloud.google.com/dataproc/docs/concepts/workflows/using-workflows#adding_jobs_to_a_template) jobs. - "maxFailuresTotal": 42, # Optional. Maximum number of times in total a driver may be restarted as a result of driver exiting with non-zero code before job is reported failed. Maximum value is 240.Note: Currently, this restartable job option is not supported in Dataproc workflow template (https://cloud.google.com/dataproc/docs/concepts/workflows/using-workflows#adding_jobs_to_a_template) jobs. + "maxFailuresPerHour": 42, # Optional. Maximum number of times per hour a driver may be restarted as a result of driver exiting with non-zero code before job is reported failed.A job may be reported as thrashing if the driver exits with a non-zero code four times within a 10-minute window.Maximum value is 10.Note: This restartable job option is not supported in Dataproc workflow templates (https://cloud.google.com/dataproc/docs/concepts/workflows/using-workflows#adding_jobs_to_a_template). + "maxFailuresTotal": 42, # Optional. Maximum total number of times a driver may be restarted as a result of the driver exiting with a non-zero code. After the maximum number is reached, the job will be reported as failed.Maximum value is 240.Note: Currently, this restartable job option is not supported in Dataproc workflow templates (https://cloud.google.com/dataproc/docs/concepts/workflows/using-workflows#adding_jobs_to_a_template). }, "sparkJob": { # A Dataproc job for running Apache Spark (https://spark.apache.org/) applications on YARN. # Optional. Job is a Spark job. "archiveUris": [ # Optional. HCFS URIs of archives to be extracted into the working directory of each executor. Supported file types: .jar, .tar, .tar.gz, .tgz, and .zip. @@ -1452,6 +1550,55 @@

Method Details

"autoscalingConfig": { # Autoscaling Policy config associated with the cluster. # Optional. Autoscaling config for the policy associated with the cluster. Cluster does not autoscale if this field is unset. "policyUri": "A String", # Optional. The autoscaling policy used by the cluster.Only resource names including projectid and location (region) are valid. Examples: https://www.googleapis.com/compute/v1/projects/[project_id]/locations/[dataproc_region]/autoscalingPolicies/[policy_id] projects/[project_id]/locations/[dataproc_region]/autoscalingPolicies/[policy_id]Note that the policy must be in the same project and Dataproc region. }, + "auxiliaryNodeGroups": [ # Optional. The node group settings. + { # Node group identification and configuration information. + "nodeGroup": { # Dataproc Node Group. The Dataproc NodeGroup resource is not related to the Dataproc NodeGroupAffinity resource. # Required. Node group configuration. + "labels": { # Optional. Node group labels. Label keys must consist of from 1 to 63 characters and conform to RFC 1035 (https://www.ietf.org/rfc/rfc1035.txt). Label values can be empty. If specified, they must consist of from 1 to 63 characters and conform to RFC 1035 (https://www.ietf.org/rfc/rfc1035.txt). The node group must have no more than 32 labelsn. + "a_key": "A String", + }, + "name": "A String", # The Node group resource name (https://aip.dev/122). + "nodeGroupConfig": { # The config settings for Compute Engine resources in an instance group, such as a master or worker group. # Optional. The node group instance group configuration. + "accelerators": [ # Optional. The Compute Engine accelerator configuration for these instances. + { # Specifies the type and number of accelerator cards attached to the instances of an instance. See GPUs on Compute Engine (https://cloud.google.com/compute/docs/gpus/). + "acceleratorCount": 42, # The number of the accelerator cards of this type exposed to this instance. + "acceleratorTypeUri": "A String", # Full URL, partial URI, or short name of the accelerator type resource to expose to this instance. See Compute Engine AcceleratorTypes (https://cloud.google.com/compute/docs/reference/beta/acceleratorTypes).Examples: https://www.googleapis.com/compute/beta/projects/[project_id]/zones/us-east1-a/acceleratorTypes/nvidia-tesla-k80 projects/[project_id]/zones/us-east1-a/acceleratorTypes/nvidia-tesla-k80 nvidia-tesla-k80Auto Zone Exception: If you are using the Dataproc Auto Zone Placement (https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/auto-zone#using_auto_zone_placement) feature, you must use the short name of the accelerator type resource, for example, nvidia-tesla-k80. + }, + ], + "diskConfig": { # Specifies the config of disk options for a group of VM instances. # Optional. Disk option config settings. + "bootDiskSizeGb": 42, # Optional. Size in GB of the boot disk (default is 500GB). + "bootDiskType": "A String", # Optional. Type of the boot disk (default is "pd-standard"). Valid values: "pd-balanced" (Persistent Disk Balanced Solid State Drive), "pd-ssd" (Persistent Disk Solid State Drive), or "pd-standard" (Persistent Disk Hard Disk Drive). See Disk types (https://cloud.google.com/compute/docs/disks#disk-types). + "localSsdInterface": "A String", # Optional. Interface type of local SSDs (default is "scsi"). Valid values: "scsi" (Small Computer System Interface), "nvme" (Non-Volatile Memory Express). See local SSD performance (https://cloud.google.com/compute/docs/disks/local-ssd#performance). + "numLocalSsds": 42, # Optional. Number of attached SSDs, from 0 to 8 (default is 0). If SSDs are not attached, the boot disk is used to store runtime logs and HDFS (https://hadoop.apache.org/docs/r1.2.1/hdfs_user_guide.html) data. If one or more SSDs are attached, this runtime bulk data is spread across them, and the boot disk contains only basic config and installed binaries.Note: Local SSD options may vary by machine type and number of vCPUs selected. + }, + "imageUri": "A String", # Optional. The Compute Engine image resource used for cluster instances.The URI can represent an image or image family.Image examples: https://www.googleapis.com/compute/beta/projects/[project_id]/global/images/[image-id] projects/[project_id]/global/images/[image-id] image-idImage family examples. Dataproc will use the most recent image from the family: https://www.googleapis.com/compute/beta/projects/[project_id]/global/images/family/[custom-image-family-name] projects/[project_id]/global/images/family/[custom-image-family-name]If the URI is unspecified, it will be inferred from SoftwareConfig.image_version or the system default. + "instanceNames": [ # Output only. The list of instance names. Dataproc derives the names from cluster_name, num_instances, and the instance group. + "A String", + ], + "instanceReferences": [ # Output only. List of references to Compute Engine instances. + { # A reference to a Compute Engine instance. + "instanceId": "A String", # The unique identifier of the Compute Engine instance. + "instanceName": "A String", # The user-friendly name of the Compute Engine instance. + "publicEciesKey": "A String", # The public ECIES key used for sharing data with this instance. + "publicKey": "A String", # The public RSA key used for sharing data with this instance. + }, + ], + "isPreemptible": True or False, # Output only. Specifies that this instance group contains preemptible instances. + "machineTypeUri": "A String", # Optional. The Compute Engine machine type used for cluster instances.A full URL, partial URI, or short name are valid. Examples: https://www.googleapis.com/compute/v1/projects/[project_id]/zones/us-east1-a/machineTypes/n1-standard-2 projects/[project_id]/zones/us-east1-a/machineTypes/n1-standard-2 n1-standard-2Auto Zone Exception: If you are using the Dataproc Auto Zone Placement (https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/auto-zone#using_auto_zone_placement) feature, you must use the short name of the machine type resource, for example, n1-standard-2. + "managedGroupConfig": { # Specifies the resources used to actively manage an instance group. # Output only. The config for Compute Engine Instance Group Manager that manages this group. This is only used for preemptible instance groups. + "instanceGroupManagerName": "A String", # Output only. The name of the Instance Group Manager for this group. + "instanceTemplateName": "A String", # Output only. The name of the Instance Template used for the Managed Instance Group. + }, + "minCpuPlatform": "A String", # Optional. Specifies the minimum cpu platform for the Instance Group. See Dataproc -> Minimum CPU Platform (https://cloud.google.com/dataproc/docs/concepts/compute/dataproc-min-cpu). + "numInstances": 42, # Optional. The number of VM instances in the instance group. For HA cluster master_config groups, must be set to 3. For standard cluster master_config groups, must be set to 1. + "preemptibility": "A String", # Optional. Specifies the preemptibility of the instance group.The default value for master and worker groups is NON_PREEMPTIBLE. This default cannot be changed.The default value for secondary instances is PREEMPTIBLE. + }, + "roles": [ # Required. Node group roles. + "A String", + ], + }, + "nodeGroupId": "A String", # Optional. A node group ID. Generated if not specified.The ID must contain only letters (a-z, A-Z), numbers (0-9), underscores (_), and hyphens (-). Cannot begin or end with underscore or hyphen. Must consist of from 3 to 33 characters. + }, + ], "configBucket": "A String", # Optional. A Cloud Storage bucket used to stage job dependencies, config files, and job driver console output. If you do not specify a staging bucket, Cloud Dataproc will determine a Cloud Storage location (US, ASIA, or EU) for your cluster's staging bucket according to the Compute Engine zone where your cluster is deployed, and then create and manage this project-level, per-location bucket (see Dataproc staging and temp buckets (https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/staging-bucket)). This field requires a Cloud Storage bucket name, not a gs://... URI to a Cloud Storage bucket. "dataprocMetricConfig": { # Dataproc metric config. # Optional. The config for Dataproc metrics. "metrics": [ # Required. Metrics sources to enable. @@ -1481,7 +1628,7 @@

Method Details

"a_key": "A String", }, "networkUri": "A String", # Optional. The Compute Engine network to be used for machine communications. Cannot be specified with subnetwork_uri. If neither network_uri nor subnetwork_uri is specified, the "default" network of the project is used, if it exists. Cannot be a "Custom Subnet Network" (see Using Subnetworks (https://cloud.google.com/compute/docs/subnetworks) for more information).A full URL, partial URI, or short name are valid. Examples: https://www.googleapis.com/compute/v1/projects/[project_id]/regions/global/default projects/[project_id]/regions/global/default default - "nodeGroupAffinity": { # Node Group Affinity for clusters using sole-tenant node groups. # Optional. Node Group Affinity for sole-tenant clusters. + "nodeGroupAffinity": { # Node Group Affinity for clusters using sole-tenant node groups. The Dataproc NodeGroupAffinity resource is not related to the Dataproc NodeGroup resource. # Optional. Node Group Affinity for sole-tenant clusters. "nodeGroupUri": "A String", # Required. The URI of a sole-tenant node group resource (https://cloud.google.com/compute/docs/reference/rest/v1/nodeGroups) that the cluster will be created on.A full URL, partial URI, or node group name are valid. Examples: https://www.googleapis.com/compute/v1/projects/[project_id]/zones/us-central1-a/nodeGroups/node-group-1 projects/[project_id]/zones/us-central1-a/nodeGroups/node-group-1 node-group-1 }, "privateIpv6GoogleAccess": "A String", # Optional. The type of IPv6 access for a cluster. @@ -1934,8 +2081,8 @@

Method Details

], }, "scheduling": { # Job scheduling options. # Optional. Job scheduling configuration. - "maxFailuresPerHour": 42, # Optional. Maximum number of times per hour a driver may be restarted as a result of driver exiting with non-zero code before job is reported failed.A job may be reported as thrashing if driver exits with non-zero code 4 times within 10 minute window.Maximum value is 10.Note: Currently, this restartable job option is not supported in Dataproc workflow template (https://cloud.google.com/dataproc/docs/concepts/workflows/using-workflows#adding_jobs_to_a_template) jobs. - "maxFailuresTotal": 42, # Optional. Maximum number of times in total a driver may be restarted as a result of driver exiting with non-zero code before job is reported failed. Maximum value is 240.Note: Currently, this restartable job option is not supported in Dataproc workflow template (https://cloud.google.com/dataproc/docs/concepts/workflows/using-workflows#adding_jobs_to_a_template) jobs. + "maxFailuresPerHour": 42, # Optional. Maximum number of times per hour a driver may be restarted as a result of driver exiting with non-zero code before job is reported failed.A job may be reported as thrashing if the driver exits with a non-zero code four times within a 10-minute window.Maximum value is 10.Note: This restartable job option is not supported in Dataproc workflow templates (https://cloud.google.com/dataproc/docs/concepts/workflows/using-workflows#adding_jobs_to_a_template). + "maxFailuresTotal": 42, # Optional. Maximum total number of times a driver may be restarted as a result of the driver exiting with a non-zero code. After the maximum number is reached, the job will be reported as failed.Maximum value is 240.Note: Currently, this restartable job option is not supported in Dataproc workflow templates (https://cloud.google.com/dataproc/docs/concepts/workflows/using-workflows#adding_jobs_to_a_template). }, "sparkJob": { # A Dataproc job for running Apache Spark (https://spark.apache.org/) applications on YARN. # Optional. Job is a Spark job. "archiveUris": [ # Optional. HCFS URIs of archives to be extracted into the working directory of each executor. Supported file types: .jar, .tar, .tar.gz, .tgz, and .zip. @@ -2065,6 +2212,55 @@

Method Details

"autoscalingConfig": { # Autoscaling Policy config associated with the cluster. # Optional. Autoscaling config for the policy associated with the cluster. Cluster does not autoscale if this field is unset. "policyUri": "A String", # Optional. The autoscaling policy used by the cluster.Only resource names including projectid and location (region) are valid. Examples: https://www.googleapis.com/compute/v1/projects/[project_id]/locations/[dataproc_region]/autoscalingPolicies/[policy_id] projects/[project_id]/locations/[dataproc_region]/autoscalingPolicies/[policy_id]Note that the policy must be in the same project and Dataproc region. }, + "auxiliaryNodeGroups": [ # Optional. The node group settings. + { # Node group identification and configuration information. + "nodeGroup": { # Dataproc Node Group. The Dataproc NodeGroup resource is not related to the Dataproc NodeGroupAffinity resource. # Required. Node group configuration. + "labels": { # Optional. Node group labels. Label keys must consist of from 1 to 63 characters and conform to RFC 1035 (https://www.ietf.org/rfc/rfc1035.txt). Label values can be empty. If specified, they must consist of from 1 to 63 characters and conform to RFC 1035 (https://www.ietf.org/rfc/rfc1035.txt). The node group must have no more than 32 labelsn. + "a_key": "A String", + }, + "name": "A String", # The Node group resource name (https://aip.dev/122). + "nodeGroupConfig": { # The config settings for Compute Engine resources in an instance group, such as a master or worker group. # Optional. The node group instance group configuration. + "accelerators": [ # Optional. The Compute Engine accelerator configuration for these instances. + { # Specifies the type and number of accelerator cards attached to the instances of an instance. See GPUs on Compute Engine (https://cloud.google.com/compute/docs/gpus/). + "acceleratorCount": 42, # The number of the accelerator cards of this type exposed to this instance. + "acceleratorTypeUri": "A String", # Full URL, partial URI, or short name of the accelerator type resource to expose to this instance. See Compute Engine AcceleratorTypes (https://cloud.google.com/compute/docs/reference/beta/acceleratorTypes).Examples: https://www.googleapis.com/compute/beta/projects/[project_id]/zones/us-east1-a/acceleratorTypes/nvidia-tesla-k80 projects/[project_id]/zones/us-east1-a/acceleratorTypes/nvidia-tesla-k80 nvidia-tesla-k80Auto Zone Exception: If you are using the Dataproc Auto Zone Placement (https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/auto-zone#using_auto_zone_placement) feature, you must use the short name of the accelerator type resource, for example, nvidia-tesla-k80. + }, + ], + "diskConfig": { # Specifies the config of disk options for a group of VM instances. # Optional. Disk option config settings. + "bootDiskSizeGb": 42, # Optional. Size in GB of the boot disk (default is 500GB). + "bootDiskType": "A String", # Optional. Type of the boot disk (default is "pd-standard"). Valid values: "pd-balanced" (Persistent Disk Balanced Solid State Drive), "pd-ssd" (Persistent Disk Solid State Drive), or "pd-standard" (Persistent Disk Hard Disk Drive). See Disk types (https://cloud.google.com/compute/docs/disks#disk-types). + "localSsdInterface": "A String", # Optional. Interface type of local SSDs (default is "scsi"). Valid values: "scsi" (Small Computer System Interface), "nvme" (Non-Volatile Memory Express). See local SSD performance (https://cloud.google.com/compute/docs/disks/local-ssd#performance). + "numLocalSsds": 42, # Optional. Number of attached SSDs, from 0 to 8 (default is 0). If SSDs are not attached, the boot disk is used to store runtime logs and HDFS (https://hadoop.apache.org/docs/r1.2.1/hdfs_user_guide.html) data. If one or more SSDs are attached, this runtime bulk data is spread across them, and the boot disk contains only basic config and installed binaries.Note: Local SSD options may vary by machine type and number of vCPUs selected. + }, + "imageUri": "A String", # Optional. The Compute Engine image resource used for cluster instances.The URI can represent an image or image family.Image examples: https://www.googleapis.com/compute/beta/projects/[project_id]/global/images/[image-id] projects/[project_id]/global/images/[image-id] image-idImage family examples. Dataproc will use the most recent image from the family: https://www.googleapis.com/compute/beta/projects/[project_id]/global/images/family/[custom-image-family-name] projects/[project_id]/global/images/family/[custom-image-family-name]If the URI is unspecified, it will be inferred from SoftwareConfig.image_version or the system default. + "instanceNames": [ # Output only. The list of instance names. Dataproc derives the names from cluster_name, num_instances, and the instance group. + "A String", + ], + "instanceReferences": [ # Output only. List of references to Compute Engine instances. + { # A reference to a Compute Engine instance. + "instanceId": "A String", # The unique identifier of the Compute Engine instance. + "instanceName": "A String", # The user-friendly name of the Compute Engine instance. + "publicEciesKey": "A String", # The public ECIES key used for sharing data with this instance. + "publicKey": "A String", # The public RSA key used for sharing data with this instance. + }, + ], + "isPreemptible": True or False, # Output only. Specifies that this instance group contains preemptible instances. + "machineTypeUri": "A String", # Optional. The Compute Engine machine type used for cluster instances.A full URL, partial URI, or short name are valid. Examples: https://www.googleapis.com/compute/v1/projects/[project_id]/zones/us-east1-a/machineTypes/n1-standard-2 projects/[project_id]/zones/us-east1-a/machineTypes/n1-standard-2 n1-standard-2Auto Zone Exception: If you are using the Dataproc Auto Zone Placement (https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/auto-zone#using_auto_zone_placement) feature, you must use the short name of the machine type resource, for example, n1-standard-2. + "managedGroupConfig": { # Specifies the resources used to actively manage an instance group. # Output only. The config for Compute Engine Instance Group Manager that manages this group. This is only used for preemptible instance groups. + "instanceGroupManagerName": "A String", # Output only. The name of the Instance Group Manager for this group. + "instanceTemplateName": "A String", # Output only. The name of the Instance Template used for the Managed Instance Group. + }, + "minCpuPlatform": "A String", # Optional. Specifies the minimum cpu platform for the Instance Group. See Dataproc -> Minimum CPU Platform (https://cloud.google.com/dataproc/docs/concepts/compute/dataproc-min-cpu). + "numInstances": 42, # Optional. The number of VM instances in the instance group. For HA cluster master_config groups, must be set to 3. For standard cluster master_config groups, must be set to 1. + "preemptibility": "A String", # Optional. Specifies the preemptibility of the instance group.The default value for master and worker groups is NON_PREEMPTIBLE. This default cannot be changed.The default value for secondary instances is PREEMPTIBLE. + }, + "roles": [ # Required. Node group roles. + "A String", + ], + }, + "nodeGroupId": "A String", # Optional. A node group ID. Generated if not specified.The ID must contain only letters (a-z, A-Z), numbers (0-9), underscores (_), and hyphens (-). Cannot begin or end with underscore or hyphen. Must consist of from 3 to 33 characters. + }, + ], "configBucket": "A String", # Optional. A Cloud Storage bucket used to stage job dependencies, config files, and job driver console output. If you do not specify a staging bucket, Cloud Dataproc will determine a Cloud Storage location (US, ASIA, or EU) for your cluster's staging bucket according to the Compute Engine zone where your cluster is deployed, and then create and manage this project-level, per-location bucket (see Dataproc staging and temp buckets (https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/staging-bucket)). This field requires a Cloud Storage bucket name, not a gs://... URI to a Cloud Storage bucket. "dataprocMetricConfig": { # Dataproc metric config. # Optional. The config for Dataproc metrics. "metrics": [ # Required. Metrics sources to enable. @@ -2094,7 +2290,7 @@

Method Details

"a_key": "A String", }, "networkUri": "A String", # Optional. The Compute Engine network to be used for machine communications. Cannot be specified with subnetwork_uri. If neither network_uri nor subnetwork_uri is specified, the "default" network of the project is used, if it exists. Cannot be a "Custom Subnet Network" (see Using Subnetworks (https://cloud.google.com/compute/docs/subnetworks) for more information).A full URL, partial URI, or short name are valid. Examples: https://www.googleapis.com/compute/v1/projects/[project_id]/regions/global/default projects/[project_id]/regions/global/default default - "nodeGroupAffinity": { # Node Group Affinity for clusters using sole-tenant node groups. # Optional. Node Group Affinity for sole-tenant clusters. + "nodeGroupAffinity": { # Node Group Affinity for clusters using sole-tenant node groups. The Dataproc NodeGroupAffinity resource is not related to the Dataproc NodeGroup resource. # Optional. Node Group Affinity for sole-tenant clusters. "nodeGroupUri": "A String", # Required. The URI of a sole-tenant node group resource (https://cloud.google.com/compute/docs/reference/rest/v1/nodeGroups) that the cluster will be created on.A full URL, partial URI, or node group name are valid. Examples: https://www.googleapis.com/compute/v1/projects/[project_id]/zones/us-central1-a/nodeGroups/node-group-1 projects/[project_id]/zones/us-central1-a/nodeGroups/node-group-1 node-group-1 }, "privateIpv6GoogleAccess": "A String", # Optional. The type of IPv6 access for a cluster. @@ -2497,8 +2693,8 @@

Method Details

], }, "scheduling": { # Job scheduling options. # Optional. Job scheduling configuration. - "maxFailuresPerHour": 42, # Optional. Maximum number of times per hour a driver may be restarted as a result of driver exiting with non-zero code before job is reported failed.A job may be reported as thrashing if driver exits with non-zero code 4 times within 10 minute window.Maximum value is 10.Note: Currently, this restartable job option is not supported in Dataproc workflow template (https://cloud.google.com/dataproc/docs/concepts/workflows/using-workflows#adding_jobs_to_a_template) jobs. - "maxFailuresTotal": 42, # Optional. Maximum number of times in total a driver may be restarted as a result of driver exiting with non-zero code before job is reported failed. Maximum value is 240.Note: Currently, this restartable job option is not supported in Dataproc workflow template (https://cloud.google.com/dataproc/docs/concepts/workflows/using-workflows#adding_jobs_to_a_template) jobs. + "maxFailuresPerHour": 42, # Optional. Maximum number of times per hour a driver may be restarted as a result of driver exiting with non-zero code before job is reported failed.A job may be reported as thrashing if the driver exits with a non-zero code four times within a 10-minute window.Maximum value is 10.Note: This restartable job option is not supported in Dataproc workflow templates (https://cloud.google.com/dataproc/docs/concepts/workflows/using-workflows#adding_jobs_to_a_template). + "maxFailuresTotal": 42, # Optional. Maximum total number of times a driver may be restarted as a result of the driver exiting with a non-zero code. After the maximum number is reached, the job will be reported as failed.Maximum value is 240.Note: Currently, this restartable job option is not supported in Dataproc workflow templates (https://cloud.google.com/dataproc/docs/concepts/workflows/using-workflows#adding_jobs_to_a_template). }, "sparkJob": { # A Dataproc job for running Apache Spark (https://spark.apache.org/) applications on YARN. # Optional. Job is a Spark job. "archiveUris": [ # Optional. HCFS URIs of archives to be extracted into the working directory of each executor. Supported file types: .jar, .tar, .tar.gz, .tgz, and .zip. @@ -2628,6 +2824,55 @@

Method Details

"autoscalingConfig": { # Autoscaling Policy config associated with the cluster. # Optional. Autoscaling config for the policy associated with the cluster. Cluster does not autoscale if this field is unset. "policyUri": "A String", # Optional. The autoscaling policy used by the cluster.Only resource names including projectid and location (region) are valid. Examples: https://www.googleapis.com/compute/v1/projects/[project_id]/locations/[dataproc_region]/autoscalingPolicies/[policy_id] projects/[project_id]/locations/[dataproc_region]/autoscalingPolicies/[policy_id]Note that the policy must be in the same project and Dataproc region. }, + "auxiliaryNodeGroups": [ # Optional. The node group settings. + { # Node group identification and configuration information. + "nodeGroup": { # Dataproc Node Group. The Dataproc NodeGroup resource is not related to the Dataproc NodeGroupAffinity resource. # Required. Node group configuration. + "labels": { # Optional. Node group labels. Label keys must consist of from 1 to 63 characters and conform to RFC 1035 (https://www.ietf.org/rfc/rfc1035.txt). Label values can be empty. If specified, they must consist of from 1 to 63 characters and conform to RFC 1035 (https://www.ietf.org/rfc/rfc1035.txt). The node group must have no more than 32 labelsn. + "a_key": "A String", + }, + "name": "A String", # The Node group resource name (https://aip.dev/122). + "nodeGroupConfig": { # The config settings for Compute Engine resources in an instance group, such as a master or worker group. # Optional. The node group instance group configuration. + "accelerators": [ # Optional. The Compute Engine accelerator configuration for these instances. + { # Specifies the type and number of accelerator cards attached to the instances of an instance. See GPUs on Compute Engine (https://cloud.google.com/compute/docs/gpus/). + "acceleratorCount": 42, # The number of the accelerator cards of this type exposed to this instance. + "acceleratorTypeUri": "A String", # Full URL, partial URI, or short name of the accelerator type resource to expose to this instance. See Compute Engine AcceleratorTypes (https://cloud.google.com/compute/docs/reference/beta/acceleratorTypes).Examples: https://www.googleapis.com/compute/beta/projects/[project_id]/zones/us-east1-a/acceleratorTypes/nvidia-tesla-k80 projects/[project_id]/zones/us-east1-a/acceleratorTypes/nvidia-tesla-k80 nvidia-tesla-k80Auto Zone Exception: If you are using the Dataproc Auto Zone Placement (https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/auto-zone#using_auto_zone_placement) feature, you must use the short name of the accelerator type resource, for example, nvidia-tesla-k80. + }, + ], + "diskConfig": { # Specifies the config of disk options for a group of VM instances. # Optional. Disk option config settings. + "bootDiskSizeGb": 42, # Optional. Size in GB of the boot disk (default is 500GB). + "bootDiskType": "A String", # Optional. Type of the boot disk (default is "pd-standard"). Valid values: "pd-balanced" (Persistent Disk Balanced Solid State Drive), "pd-ssd" (Persistent Disk Solid State Drive), or "pd-standard" (Persistent Disk Hard Disk Drive). See Disk types (https://cloud.google.com/compute/docs/disks#disk-types). + "localSsdInterface": "A String", # Optional. Interface type of local SSDs (default is "scsi"). Valid values: "scsi" (Small Computer System Interface), "nvme" (Non-Volatile Memory Express). See local SSD performance (https://cloud.google.com/compute/docs/disks/local-ssd#performance). + "numLocalSsds": 42, # Optional. Number of attached SSDs, from 0 to 8 (default is 0). If SSDs are not attached, the boot disk is used to store runtime logs and HDFS (https://hadoop.apache.org/docs/r1.2.1/hdfs_user_guide.html) data. If one or more SSDs are attached, this runtime bulk data is spread across them, and the boot disk contains only basic config and installed binaries.Note: Local SSD options may vary by machine type and number of vCPUs selected. + }, + "imageUri": "A String", # Optional. The Compute Engine image resource used for cluster instances.The URI can represent an image or image family.Image examples: https://www.googleapis.com/compute/beta/projects/[project_id]/global/images/[image-id] projects/[project_id]/global/images/[image-id] image-idImage family examples. Dataproc will use the most recent image from the family: https://www.googleapis.com/compute/beta/projects/[project_id]/global/images/family/[custom-image-family-name] projects/[project_id]/global/images/family/[custom-image-family-name]If the URI is unspecified, it will be inferred from SoftwareConfig.image_version or the system default. + "instanceNames": [ # Output only. The list of instance names. Dataproc derives the names from cluster_name, num_instances, and the instance group. + "A String", + ], + "instanceReferences": [ # Output only. List of references to Compute Engine instances. + { # A reference to a Compute Engine instance. + "instanceId": "A String", # The unique identifier of the Compute Engine instance. + "instanceName": "A String", # The user-friendly name of the Compute Engine instance. + "publicEciesKey": "A String", # The public ECIES key used for sharing data with this instance. + "publicKey": "A String", # The public RSA key used for sharing data with this instance. + }, + ], + "isPreemptible": True or False, # Output only. Specifies that this instance group contains preemptible instances. + "machineTypeUri": "A String", # Optional. The Compute Engine machine type used for cluster instances.A full URL, partial URI, or short name are valid. Examples: https://www.googleapis.com/compute/v1/projects/[project_id]/zones/us-east1-a/machineTypes/n1-standard-2 projects/[project_id]/zones/us-east1-a/machineTypes/n1-standard-2 n1-standard-2Auto Zone Exception: If you are using the Dataproc Auto Zone Placement (https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/auto-zone#using_auto_zone_placement) feature, you must use the short name of the machine type resource, for example, n1-standard-2. + "managedGroupConfig": { # Specifies the resources used to actively manage an instance group. # Output only. The config for Compute Engine Instance Group Manager that manages this group. This is only used for preemptible instance groups. + "instanceGroupManagerName": "A String", # Output only. The name of the Instance Group Manager for this group. + "instanceTemplateName": "A String", # Output only. The name of the Instance Template used for the Managed Instance Group. + }, + "minCpuPlatform": "A String", # Optional. Specifies the minimum cpu platform for the Instance Group. See Dataproc -> Minimum CPU Platform (https://cloud.google.com/dataproc/docs/concepts/compute/dataproc-min-cpu). + "numInstances": 42, # Optional. The number of VM instances in the instance group. For HA cluster master_config groups, must be set to 3. For standard cluster master_config groups, must be set to 1. + "preemptibility": "A String", # Optional. Specifies the preemptibility of the instance group.The default value for master and worker groups is NON_PREEMPTIBLE. This default cannot be changed.The default value for secondary instances is PREEMPTIBLE. + }, + "roles": [ # Required. Node group roles. + "A String", + ], + }, + "nodeGroupId": "A String", # Optional. A node group ID. Generated if not specified.The ID must contain only letters (a-z, A-Z), numbers (0-9), underscores (_), and hyphens (-). Cannot begin or end with underscore or hyphen. Must consist of from 3 to 33 characters. + }, + ], "configBucket": "A String", # Optional. A Cloud Storage bucket used to stage job dependencies, config files, and job driver console output. If you do not specify a staging bucket, Cloud Dataproc will determine a Cloud Storage location (US, ASIA, or EU) for your cluster's staging bucket according to the Compute Engine zone where your cluster is deployed, and then create and manage this project-level, per-location bucket (see Dataproc staging and temp buckets (https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/staging-bucket)). This field requires a Cloud Storage bucket name, not a gs://... URI to a Cloud Storage bucket. "dataprocMetricConfig": { # Dataproc metric config. # Optional. The config for Dataproc metrics. "metrics": [ # Required. Metrics sources to enable. @@ -2657,7 +2902,7 @@

Method Details

"a_key": "A String", }, "networkUri": "A String", # Optional. The Compute Engine network to be used for machine communications. Cannot be specified with subnetwork_uri. If neither network_uri nor subnetwork_uri is specified, the "default" network of the project is used, if it exists. Cannot be a "Custom Subnet Network" (see Using Subnetworks (https://cloud.google.com/compute/docs/subnetworks) for more information).A full URL, partial URI, or short name are valid. Examples: https://www.googleapis.com/compute/v1/projects/[project_id]/regions/global/default projects/[project_id]/regions/global/default default - "nodeGroupAffinity": { # Node Group Affinity for clusters using sole-tenant node groups. # Optional. Node Group Affinity for sole-tenant clusters. + "nodeGroupAffinity": { # Node Group Affinity for clusters using sole-tenant node groups. The Dataproc NodeGroupAffinity resource is not related to the Dataproc NodeGroup resource. # Optional. Node Group Affinity for sole-tenant clusters. "nodeGroupUri": "A String", # Required. The URI of a sole-tenant node group resource (https://cloud.google.com/compute/docs/reference/rest/v1/nodeGroups) that the cluster will be created on.A full URL, partial URI, or node group name are valid. Examples: https://www.googleapis.com/compute/v1/projects/[project_id]/zones/us-central1-a/nodeGroups/node-group-1 projects/[project_id]/zones/us-central1-a/nodeGroups/node-group-1 node-group-1 }, "privateIpv6GoogleAccess": "A String", # Optional. The type of IPv6 access for a cluster. @@ -3125,8 +3370,8 @@

Method Details

], }, "scheduling": { # Job scheduling options. # Optional. Job scheduling configuration. - "maxFailuresPerHour": 42, # Optional. Maximum number of times per hour a driver may be restarted as a result of driver exiting with non-zero code before job is reported failed.A job may be reported as thrashing if driver exits with non-zero code 4 times within 10 minute window.Maximum value is 10.Note: Currently, this restartable job option is not supported in Dataproc workflow template (https://cloud.google.com/dataproc/docs/concepts/workflows/using-workflows#adding_jobs_to_a_template) jobs. - "maxFailuresTotal": 42, # Optional. Maximum number of times in total a driver may be restarted as a result of driver exiting with non-zero code before job is reported failed. Maximum value is 240.Note: Currently, this restartable job option is not supported in Dataproc workflow template (https://cloud.google.com/dataproc/docs/concepts/workflows/using-workflows#adding_jobs_to_a_template) jobs. + "maxFailuresPerHour": 42, # Optional. Maximum number of times per hour a driver may be restarted as a result of driver exiting with non-zero code before job is reported failed.A job may be reported as thrashing if the driver exits with a non-zero code four times within a 10-minute window.Maximum value is 10.Note: This restartable job option is not supported in Dataproc workflow templates (https://cloud.google.com/dataproc/docs/concepts/workflows/using-workflows#adding_jobs_to_a_template). + "maxFailuresTotal": 42, # Optional. Maximum total number of times a driver may be restarted as a result of the driver exiting with a non-zero code. After the maximum number is reached, the job will be reported as failed.Maximum value is 240.Note: Currently, this restartable job option is not supported in Dataproc workflow templates (https://cloud.google.com/dataproc/docs/concepts/workflows/using-workflows#adding_jobs_to_a_template). }, "sparkJob": { # A Dataproc job for running Apache Spark (https://spark.apache.org/) applications on YARN. # Optional. Job is a Spark job. "archiveUris": [ # Optional. HCFS URIs of archives to be extracted into the working directory of each executor. Supported file types: .jar, .tar, .tar.gz, .tgz, and .zip. @@ -3256,6 +3501,55 @@

Method Details

"autoscalingConfig": { # Autoscaling Policy config associated with the cluster. # Optional. Autoscaling config for the policy associated with the cluster. Cluster does not autoscale if this field is unset. "policyUri": "A String", # Optional. The autoscaling policy used by the cluster.Only resource names including projectid and location (region) are valid. Examples: https://www.googleapis.com/compute/v1/projects/[project_id]/locations/[dataproc_region]/autoscalingPolicies/[policy_id] projects/[project_id]/locations/[dataproc_region]/autoscalingPolicies/[policy_id]Note that the policy must be in the same project and Dataproc region. }, + "auxiliaryNodeGroups": [ # Optional. The node group settings. + { # Node group identification and configuration information. + "nodeGroup": { # Dataproc Node Group. The Dataproc NodeGroup resource is not related to the Dataproc NodeGroupAffinity resource. # Required. Node group configuration. + "labels": { # Optional. Node group labels. Label keys must consist of from 1 to 63 characters and conform to RFC 1035 (https://www.ietf.org/rfc/rfc1035.txt). Label values can be empty. If specified, they must consist of from 1 to 63 characters and conform to RFC 1035 (https://www.ietf.org/rfc/rfc1035.txt). The node group must have no more than 32 labelsn. + "a_key": "A String", + }, + "name": "A String", # The Node group resource name (https://aip.dev/122). + "nodeGroupConfig": { # The config settings for Compute Engine resources in an instance group, such as a master or worker group. # Optional. The node group instance group configuration. + "accelerators": [ # Optional. The Compute Engine accelerator configuration for these instances. + { # Specifies the type and number of accelerator cards attached to the instances of an instance. See GPUs on Compute Engine (https://cloud.google.com/compute/docs/gpus/). + "acceleratorCount": 42, # The number of the accelerator cards of this type exposed to this instance. + "acceleratorTypeUri": "A String", # Full URL, partial URI, or short name of the accelerator type resource to expose to this instance. See Compute Engine AcceleratorTypes (https://cloud.google.com/compute/docs/reference/beta/acceleratorTypes).Examples: https://www.googleapis.com/compute/beta/projects/[project_id]/zones/us-east1-a/acceleratorTypes/nvidia-tesla-k80 projects/[project_id]/zones/us-east1-a/acceleratorTypes/nvidia-tesla-k80 nvidia-tesla-k80Auto Zone Exception: If you are using the Dataproc Auto Zone Placement (https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/auto-zone#using_auto_zone_placement) feature, you must use the short name of the accelerator type resource, for example, nvidia-tesla-k80. + }, + ], + "diskConfig": { # Specifies the config of disk options for a group of VM instances. # Optional. Disk option config settings. + "bootDiskSizeGb": 42, # Optional. Size in GB of the boot disk (default is 500GB). + "bootDiskType": "A String", # Optional. Type of the boot disk (default is "pd-standard"). Valid values: "pd-balanced" (Persistent Disk Balanced Solid State Drive), "pd-ssd" (Persistent Disk Solid State Drive), or "pd-standard" (Persistent Disk Hard Disk Drive). See Disk types (https://cloud.google.com/compute/docs/disks#disk-types). + "localSsdInterface": "A String", # Optional. Interface type of local SSDs (default is "scsi"). Valid values: "scsi" (Small Computer System Interface), "nvme" (Non-Volatile Memory Express). See local SSD performance (https://cloud.google.com/compute/docs/disks/local-ssd#performance). + "numLocalSsds": 42, # Optional. Number of attached SSDs, from 0 to 8 (default is 0). If SSDs are not attached, the boot disk is used to store runtime logs and HDFS (https://hadoop.apache.org/docs/r1.2.1/hdfs_user_guide.html) data. If one or more SSDs are attached, this runtime bulk data is spread across them, and the boot disk contains only basic config and installed binaries.Note: Local SSD options may vary by machine type and number of vCPUs selected. + }, + "imageUri": "A String", # Optional. The Compute Engine image resource used for cluster instances.The URI can represent an image or image family.Image examples: https://www.googleapis.com/compute/beta/projects/[project_id]/global/images/[image-id] projects/[project_id]/global/images/[image-id] image-idImage family examples. Dataproc will use the most recent image from the family: https://www.googleapis.com/compute/beta/projects/[project_id]/global/images/family/[custom-image-family-name] projects/[project_id]/global/images/family/[custom-image-family-name]If the URI is unspecified, it will be inferred from SoftwareConfig.image_version or the system default. + "instanceNames": [ # Output only. The list of instance names. Dataproc derives the names from cluster_name, num_instances, and the instance group. + "A String", + ], + "instanceReferences": [ # Output only. List of references to Compute Engine instances. + { # A reference to a Compute Engine instance. + "instanceId": "A String", # The unique identifier of the Compute Engine instance. + "instanceName": "A String", # The user-friendly name of the Compute Engine instance. + "publicEciesKey": "A String", # The public ECIES key used for sharing data with this instance. + "publicKey": "A String", # The public RSA key used for sharing data with this instance. + }, + ], + "isPreemptible": True or False, # Output only. Specifies that this instance group contains preemptible instances. + "machineTypeUri": "A String", # Optional. The Compute Engine machine type used for cluster instances.A full URL, partial URI, or short name are valid. Examples: https://www.googleapis.com/compute/v1/projects/[project_id]/zones/us-east1-a/machineTypes/n1-standard-2 projects/[project_id]/zones/us-east1-a/machineTypes/n1-standard-2 n1-standard-2Auto Zone Exception: If you are using the Dataproc Auto Zone Placement (https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/auto-zone#using_auto_zone_placement) feature, you must use the short name of the machine type resource, for example, n1-standard-2. + "managedGroupConfig": { # Specifies the resources used to actively manage an instance group. # Output only. The config for Compute Engine Instance Group Manager that manages this group. This is only used for preemptible instance groups. + "instanceGroupManagerName": "A String", # Output only. The name of the Instance Group Manager for this group. + "instanceTemplateName": "A String", # Output only. The name of the Instance Template used for the Managed Instance Group. + }, + "minCpuPlatform": "A String", # Optional. Specifies the minimum cpu platform for the Instance Group. See Dataproc -> Minimum CPU Platform (https://cloud.google.com/dataproc/docs/concepts/compute/dataproc-min-cpu). + "numInstances": 42, # Optional. The number of VM instances in the instance group. For HA cluster master_config groups, must be set to 3. For standard cluster master_config groups, must be set to 1. + "preemptibility": "A String", # Optional. Specifies the preemptibility of the instance group.The default value for master and worker groups is NON_PREEMPTIBLE. This default cannot be changed.The default value for secondary instances is PREEMPTIBLE. + }, + "roles": [ # Required. Node group roles. + "A String", + ], + }, + "nodeGroupId": "A String", # Optional. A node group ID. Generated if not specified.The ID must contain only letters (a-z, A-Z), numbers (0-9), underscores (_), and hyphens (-). Cannot begin or end with underscore or hyphen. Must consist of from 3 to 33 characters. + }, + ], "configBucket": "A String", # Optional. A Cloud Storage bucket used to stage job dependencies, config files, and job driver console output. If you do not specify a staging bucket, Cloud Dataproc will determine a Cloud Storage location (US, ASIA, or EU) for your cluster's staging bucket according to the Compute Engine zone where your cluster is deployed, and then create and manage this project-level, per-location bucket (see Dataproc staging and temp buckets (https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/staging-bucket)). This field requires a Cloud Storage bucket name, not a gs://... URI to a Cloud Storage bucket. "dataprocMetricConfig": { # Dataproc metric config. # Optional. The config for Dataproc metrics. "metrics": [ # Required. Metrics sources to enable. @@ -3285,7 +3579,7 @@

Method Details

"a_key": "A String", }, "networkUri": "A String", # Optional. The Compute Engine network to be used for machine communications. Cannot be specified with subnetwork_uri. If neither network_uri nor subnetwork_uri is specified, the "default" network of the project is used, if it exists. Cannot be a "Custom Subnet Network" (see Using Subnetworks (https://cloud.google.com/compute/docs/subnetworks) for more information).A full URL, partial URI, or short name are valid. Examples: https://www.googleapis.com/compute/v1/projects/[project_id]/regions/global/default projects/[project_id]/regions/global/default default - "nodeGroupAffinity": { # Node Group Affinity for clusters using sole-tenant node groups. # Optional. Node Group Affinity for sole-tenant clusters. + "nodeGroupAffinity": { # Node Group Affinity for clusters using sole-tenant node groups. The Dataproc NodeGroupAffinity resource is not related to the Dataproc NodeGroup resource. # Optional. Node Group Affinity for sole-tenant clusters. "nodeGroupUri": "A String", # Required. The URI of a sole-tenant node group resource (https://cloud.google.com/compute/docs/reference/rest/v1/nodeGroups) that the cluster will be created on.A full URL, partial URI, or node group name are valid. Examples: https://www.googleapis.com/compute/v1/projects/[project_id]/zones/us-central1-a/nodeGroups/node-group-1 projects/[project_id]/zones/us-central1-a/nodeGroups/node-group-1 node-group-1 }, "privateIpv6GoogleAccess": "A String", # Optional. The type of IPv6 access for a cluster. @@ -3647,8 +3941,8 @@

Method Details

], }, "scheduling": { # Job scheduling options. # Optional. Job scheduling configuration. - "maxFailuresPerHour": 42, # Optional. Maximum number of times per hour a driver may be restarted as a result of driver exiting with non-zero code before job is reported failed.A job may be reported as thrashing if driver exits with non-zero code 4 times within 10 minute window.Maximum value is 10.Note: Currently, this restartable job option is not supported in Dataproc workflow template (https://cloud.google.com/dataproc/docs/concepts/workflows/using-workflows#adding_jobs_to_a_template) jobs. - "maxFailuresTotal": 42, # Optional. Maximum number of times in total a driver may be restarted as a result of driver exiting with non-zero code before job is reported failed. Maximum value is 240.Note: Currently, this restartable job option is not supported in Dataproc workflow template (https://cloud.google.com/dataproc/docs/concepts/workflows/using-workflows#adding_jobs_to_a_template) jobs. + "maxFailuresPerHour": 42, # Optional. Maximum number of times per hour a driver may be restarted as a result of driver exiting with non-zero code before job is reported failed.A job may be reported as thrashing if the driver exits with a non-zero code four times within a 10-minute window.Maximum value is 10.Note: This restartable job option is not supported in Dataproc workflow templates (https://cloud.google.com/dataproc/docs/concepts/workflows/using-workflows#adding_jobs_to_a_template). + "maxFailuresTotal": 42, # Optional. Maximum total number of times a driver may be restarted as a result of the driver exiting with a non-zero code. After the maximum number is reached, the job will be reported as failed.Maximum value is 240.Note: Currently, this restartable job option is not supported in Dataproc workflow templates (https://cloud.google.com/dataproc/docs/concepts/workflows/using-workflows#adding_jobs_to_a_template). }, "sparkJob": { # A Dataproc job for running Apache Spark (https://spark.apache.org/) applications on YARN. # Optional. Job is a Spark job. "archiveUris": [ # Optional. HCFS URIs of archives to be extracted into the working directory of each executor. Supported file types: .jar, .tar, .tar.gz, .tgz, and .zip. @@ -3778,6 +4072,55 @@

Method Details

"autoscalingConfig": { # Autoscaling Policy config associated with the cluster. # Optional. Autoscaling config for the policy associated with the cluster. Cluster does not autoscale if this field is unset. "policyUri": "A String", # Optional. The autoscaling policy used by the cluster.Only resource names including projectid and location (region) are valid. Examples: https://www.googleapis.com/compute/v1/projects/[project_id]/locations/[dataproc_region]/autoscalingPolicies/[policy_id] projects/[project_id]/locations/[dataproc_region]/autoscalingPolicies/[policy_id]Note that the policy must be in the same project and Dataproc region. }, + "auxiliaryNodeGroups": [ # Optional. The node group settings. + { # Node group identification and configuration information. + "nodeGroup": { # Dataproc Node Group. The Dataproc NodeGroup resource is not related to the Dataproc NodeGroupAffinity resource. # Required. Node group configuration. + "labels": { # Optional. Node group labels. Label keys must consist of from 1 to 63 characters and conform to RFC 1035 (https://www.ietf.org/rfc/rfc1035.txt). Label values can be empty. If specified, they must consist of from 1 to 63 characters and conform to RFC 1035 (https://www.ietf.org/rfc/rfc1035.txt). The node group must have no more than 32 labelsn. + "a_key": "A String", + }, + "name": "A String", # The Node group resource name (https://aip.dev/122). + "nodeGroupConfig": { # The config settings for Compute Engine resources in an instance group, such as a master or worker group. # Optional. The node group instance group configuration. + "accelerators": [ # Optional. The Compute Engine accelerator configuration for these instances. + { # Specifies the type and number of accelerator cards attached to the instances of an instance. See GPUs on Compute Engine (https://cloud.google.com/compute/docs/gpus/). + "acceleratorCount": 42, # The number of the accelerator cards of this type exposed to this instance. + "acceleratorTypeUri": "A String", # Full URL, partial URI, or short name of the accelerator type resource to expose to this instance. See Compute Engine AcceleratorTypes (https://cloud.google.com/compute/docs/reference/beta/acceleratorTypes).Examples: https://www.googleapis.com/compute/beta/projects/[project_id]/zones/us-east1-a/acceleratorTypes/nvidia-tesla-k80 projects/[project_id]/zones/us-east1-a/acceleratorTypes/nvidia-tesla-k80 nvidia-tesla-k80Auto Zone Exception: If you are using the Dataproc Auto Zone Placement (https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/auto-zone#using_auto_zone_placement) feature, you must use the short name of the accelerator type resource, for example, nvidia-tesla-k80. + }, + ], + "diskConfig": { # Specifies the config of disk options for a group of VM instances. # Optional. Disk option config settings. + "bootDiskSizeGb": 42, # Optional. Size in GB of the boot disk (default is 500GB). + "bootDiskType": "A String", # Optional. Type of the boot disk (default is "pd-standard"). Valid values: "pd-balanced" (Persistent Disk Balanced Solid State Drive), "pd-ssd" (Persistent Disk Solid State Drive), or "pd-standard" (Persistent Disk Hard Disk Drive). See Disk types (https://cloud.google.com/compute/docs/disks#disk-types). + "localSsdInterface": "A String", # Optional. Interface type of local SSDs (default is "scsi"). Valid values: "scsi" (Small Computer System Interface), "nvme" (Non-Volatile Memory Express). See local SSD performance (https://cloud.google.com/compute/docs/disks/local-ssd#performance). + "numLocalSsds": 42, # Optional. Number of attached SSDs, from 0 to 8 (default is 0). If SSDs are not attached, the boot disk is used to store runtime logs and HDFS (https://hadoop.apache.org/docs/r1.2.1/hdfs_user_guide.html) data. If one or more SSDs are attached, this runtime bulk data is spread across them, and the boot disk contains only basic config and installed binaries.Note: Local SSD options may vary by machine type and number of vCPUs selected. + }, + "imageUri": "A String", # Optional. The Compute Engine image resource used for cluster instances.The URI can represent an image or image family.Image examples: https://www.googleapis.com/compute/beta/projects/[project_id]/global/images/[image-id] projects/[project_id]/global/images/[image-id] image-idImage family examples. Dataproc will use the most recent image from the family: https://www.googleapis.com/compute/beta/projects/[project_id]/global/images/family/[custom-image-family-name] projects/[project_id]/global/images/family/[custom-image-family-name]If the URI is unspecified, it will be inferred from SoftwareConfig.image_version or the system default. + "instanceNames": [ # Output only. The list of instance names. Dataproc derives the names from cluster_name, num_instances, and the instance group. + "A String", + ], + "instanceReferences": [ # Output only. List of references to Compute Engine instances. + { # A reference to a Compute Engine instance. + "instanceId": "A String", # The unique identifier of the Compute Engine instance. + "instanceName": "A String", # The user-friendly name of the Compute Engine instance. + "publicEciesKey": "A String", # The public ECIES key used for sharing data with this instance. + "publicKey": "A String", # The public RSA key used for sharing data with this instance. + }, + ], + "isPreemptible": True or False, # Output only. Specifies that this instance group contains preemptible instances. + "machineTypeUri": "A String", # Optional. The Compute Engine machine type used for cluster instances.A full URL, partial URI, or short name are valid. Examples: https://www.googleapis.com/compute/v1/projects/[project_id]/zones/us-east1-a/machineTypes/n1-standard-2 projects/[project_id]/zones/us-east1-a/machineTypes/n1-standard-2 n1-standard-2Auto Zone Exception: If you are using the Dataproc Auto Zone Placement (https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/auto-zone#using_auto_zone_placement) feature, you must use the short name of the machine type resource, for example, n1-standard-2. + "managedGroupConfig": { # Specifies the resources used to actively manage an instance group. # Output only. The config for Compute Engine Instance Group Manager that manages this group. This is only used for preemptible instance groups. + "instanceGroupManagerName": "A String", # Output only. The name of the Instance Group Manager for this group. + "instanceTemplateName": "A String", # Output only. The name of the Instance Template used for the Managed Instance Group. + }, + "minCpuPlatform": "A String", # Optional. Specifies the minimum cpu platform for the Instance Group. See Dataproc -> Minimum CPU Platform (https://cloud.google.com/dataproc/docs/concepts/compute/dataproc-min-cpu). + "numInstances": 42, # Optional. The number of VM instances in the instance group. For HA cluster master_config groups, must be set to 3. For standard cluster master_config groups, must be set to 1. + "preemptibility": "A String", # Optional. Specifies the preemptibility of the instance group.The default value for master and worker groups is NON_PREEMPTIBLE. This default cannot be changed.The default value for secondary instances is PREEMPTIBLE. + }, + "roles": [ # Required. Node group roles. + "A String", + ], + }, + "nodeGroupId": "A String", # Optional. A node group ID. Generated if not specified.The ID must contain only letters (a-z, A-Z), numbers (0-9), underscores (_), and hyphens (-). Cannot begin or end with underscore or hyphen. Must consist of from 3 to 33 characters. + }, + ], "configBucket": "A String", # Optional. A Cloud Storage bucket used to stage job dependencies, config files, and job driver console output. If you do not specify a staging bucket, Cloud Dataproc will determine a Cloud Storage location (US, ASIA, or EU) for your cluster's staging bucket according to the Compute Engine zone where your cluster is deployed, and then create and manage this project-level, per-location bucket (see Dataproc staging and temp buckets (https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/staging-bucket)). This field requires a Cloud Storage bucket name, not a gs://... URI to a Cloud Storage bucket. "dataprocMetricConfig": { # Dataproc metric config. # Optional. The config for Dataproc metrics. "metrics": [ # Required. Metrics sources to enable. @@ -3807,7 +4150,7 @@

Method Details

"a_key": "A String", }, "networkUri": "A String", # Optional. The Compute Engine network to be used for machine communications. Cannot be specified with subnetwork_uri. If neither network_uri nor subnetwork_uri is specified, the "default" network of the project is used, if it exists. Cannot be a "Custom Subnet Network" (see Using Subnetworks (https://cloud.google.com/compute/docs/subnetworks) for more information).A full URL, partial URI, or short name are valid. Examples: https://www.googleapis.com/compute/v1/projects/[project_id]/regions/global/default projects/[project_id]/regions/global/default default - "nodeGroupAffinity": { # Node Group Affinity for clusters using sole-tenant node groups. # Optional. Node Group Affinity for sole-tenant clusters. + "nodeGroupAffinity": { # Node Group Affinity for clusters using sole-tenant node groups. The Dataproc NodeGroupAffinity resource is not related to the Dataproc NodeGroup resource. # Optional. Node Group Affinity for sole-tenant clusters. "nodeGroupUri": "A String", # Required. The URI of a sole-tenant node group resource (https://cloud.google.com/compute/docs/reference/rest/v1/nodeGroups) that the cluster will be created on.A full URL, partial URI, or node group name are valid. Examples: https://www.googleapis.com/compute/v1/projects/[project_id]/zones/us-central1-a/nodeGroups/node-group-1 projects/[project_id]/zones/us-central1-a/nodeGroups/node-group-1 node-group-1 }, "privateIpv6GoogleAccess": "A String", # Optional. The type of IPv6 access for a cluster. diff --git a/googleapiclient/discovery_cache/documents/dataproc.v1.json b/googleapiclient/discovery_cache/documents/dataproc.v1.json index 44ea23ecc5c..8f5e91fd60f 100644 --- a/googleapiclient/discovery_cache/documents/dataproc.v1.json +++ b/googleapiclient/discovery_cache/documents/dataproc.v1.json @@ -1663,6 +1663,65 @@ "https://www.googleapis.com/auth/cloud-platform" ] } + }, + "resources": { + "nodeGroups": { + "methods": { + "get": { + "description": "Gets the resource representation for a node group in a cluster.", + "flatPath": "v1/projects/{projectsId}/regions/{regionsId}/clusters/{clustersId}/nodeGroups/{nodeGroupsId}", + "httpMethod": "GET", + "id": "dataproc.projects.regions.clusters.nodeGroups.get", + "parameterOrder": [ + "name" + ], + "parameters": { + "name": { + "description": "Required. The name of the node group to retrieve. Format: projects/{project}/regions/{region}/clusters/{cluster}/nodeGroups/{nodeGroup}", + "location": "path", + "pattern": "^projects/[^/]+/regions/[^/]+/clusters/[^/]+/nodeGroups/[^/]+$", + "required": true, + "type": "string" + } + }, + "path": "v1/{+name}", + "response": { + "$ref": "NodeGroup" + }, + "scopes": [ + "https://www.googleapis.com/auth/cloud-platform" + ] + }, + "resize": { + "description": "Resizes a node group in a cluster. The returned Operation.metadata is NodeGroupOperationMetadata (https://cloud.google.com/dataproc/docs/reference/rpc/google.cloud.dataproc.v1#nodegroupoperationmetadata).", + "flatPath": "v1/projects/{projectsId}/regions/{regionsId}/clusters/{clustersId}/nodeGroups/{nodeGroupsId}:resize", + "httpMethod": "POST", + "id": "dataproc.projects.regions.clusters.nodeGroups.resize", + "parameterOrder": [ + "name" + ], + "parameters": { + "name": { + "description": "Required. The name of the node group to resize. Format: projects/{project}/regions/{region}/clusters/{cluster}/nodeGroups/{nodeGroup}", + "location": "path", + "pattern": "^projects/[^/]+/regions/[^/]+/clusters/[^/]+/nodeGroups/[^/]+$", + "required": true, + "type": "string" + } + }, + "path": "v1/{+name}:resize", + "request": { + "$ref": "ResizeNodeGroupRequest" + }, + "response": { + "$ref": "Operation" + }, + "scopes": [ + "https://www.googleapis.com/auth/cloud-platform" + ] + } + } + } } }, "jobs": { @@ -2564,7 +2623,7 @@ } } }, - "revision": "20221114", + "revision": "20221130", "rootUrl": "https://dataproc.googleapis.com/", "schemas": { "AcceleratorConfig": { @@ -2628,6 +2687,21 @@ }, "type": "object" }, + "AuxiliaryNodeGroup": { + "description": "Node group identification and configuration information.", + "id": "AuxiliaryNodeGroup", + "properties": { + "nodeGroup": { + "$ref": "NodeGroup", + "description": "Required. Node group configuration." + }, + "nodeGroupId": { + "description": "Optional. A node group ID. Generated if not specified.The ID must contain only letters (a-z, A-Z), numbers (0-9), underscores (_), and hyphens (-). Cannot begin or end with underscore or hyphen. Must consist of from 3 to 33 characters.", + "type": "string" + } + }, + "type": "object" + }, "AuxiliaryServicesConfig": { "description": "Auxiliary services configuration for a Cluster.", "id": "AuxiliaryServicesConfig", @@ -2950,6 +3024,13 @@ "$ref": "AutoscalingConfig", "description": "Optional. Autoscaling config for the policy associated with the cluster. Cluster does not autoscale if this field is unset." }, + "auxiliaryNodeGroups": { + "description": "Optional. The node group settings.", + "items": { + "$ref": "AuxiliaryNodeGroup" + }, + "type": "array" + }, "configBucket": { "description": "Optional. A Cloud Storage bucket used to stage job dependencies, config files, and job driver console output. If you do not specify a staging bucket, Cloud Dataproc will determine a Cloud Storage location (US, ASIA, or EU) for your cluster's staging bucket according to the Compute Engine zone where your cluster is deployed, and then create and manage this project-level, per-location bucket (see Dataproc staging and temp buckets (https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/staging-bucket)). This field requires a Cloud Storage bucket name, not a gs://... URI to a Cloud Storage bucket.", "type": "string" @@ -3315,6 +3396,23 @@ }, "type": "object" }, + "DriverSchedulingConfig": { + "description": "Driver scheduling configuration.", + "id": "DriverSchedulingConfig", + "properties": { + "memoryMb": { + "description": "Required. The amount of memory in MB the driver is requesting.", + "format": "int32", + "type": "integer" + }, + "vcores": { + "description": "Required. The number of vCPUs the driver is requesting.", + "format": "int32", + "type": "integer" + } + }, + "type": "object" + }, "Empty": { "description": "A generic empty message that you can re-use to avoid defining duplicated empty messages in your APIs. A typical example is to use it as the request or the response type of an API method. For instance: service Foo { rpc Bar(google.protobuf.Empty) returns (google.protobuf.Empty); } ", "id": "Empty", @@ -3371,7 +3469,7 @@ "id": "ExecutionConfig", "properties": { "idleTtl": { - "description": "Optional. The duration to keep the session alive while it's idling. Passing this threshold will cause the session to be terminated. Minimum value is 10 minutes; maximum value is 14 days (see JSON representation of Duration (https://developers.google.com/protocol-buffers/docs/proto3#json)). Defaults to 10 minutes if not set.", + "description": "Optional. The duration to keep the session alive while it's idling. Passing this threshold will cause the session to be terminated. Minimum value is 10 minutes; maximum value is 14 days (see JSON representation of Duration (https://developers.google.com/protocol-buffers/docs/proto3#json)). Defaults to 4 hours if not set.", "format": "google-duration", "type": "string" }, @@ -3970,6 +4068,10 @@ "readOnly": true, "type": "string" }, + "driverSchedulingConfig": { + "$ref": "DriverSchedulingConfig", + "description": "Optional. Driver scheduling configuration." + }, "hadoopJob": { "$ref": "HadoopJob", "description": "Optional. Job is a Hadoop job." @@ -4125,12 +4227,12 @@ "id": "JobScheduling", "properties": { "maxFailuresPerHour": { - "description": "Optional. Maximum number of times per hour a driver may be restarted as a result of driver exiting with non-zero code before job is reported failed.A job may be reported as thrashing if driver exits with non-zero code 4 times within 10 minute window.Maximum value is 10.Note: Currently, this restartable job option is not supported in Dataproc workflow template (https://cloud.google.com/dataproc/docs/concepts/workflows/using-workflows#adding_jobs_to_a_template) jobs.", + "description": "Optional. Maximum number of times per hour a driver may be restarted as a result of driver exiting with non-zero code before job is reported failed.A job may be reported as thrashing if the driver exits with a non-zero code four times within a 10-minute window.Maximum value is 10.Note: This restartable job option is not supported in Dataproc workflow templates (https://cloud.google.com/dataproc/docs/concepts/workflows/using-workflows#adding_jobs_to_a_template).", "format": "int32", "type": "integer" }, "maxFailuresTotal": { - "description": "Optional. Maximum number of times in total a driver may be restarted as a result of driver exiting with non-zero code before job is reported failed. Maximum value is 240.Note: Currently, this restartable job option is not supported in Dataproc workflow template (https://cloud.google.com/dataproc/docs/concepts/workflows/using-workflows#adding_jobs_to_a_template) jobs.", + "description": "Optional. Maximum total number of times a driver may be restarted as a result of the driver exiting with a non-zero code. After the maximum number is reached, the job will be reported as failed.Maximum value is 240.Note: Currently, this restartable job option is not supported in Dataproc workflow templates (https://cloud.google.com/dataproc/docs/concepts/workflows/using-workflows#adding_jobs_to_a_template).", "format": "int32", "type": "integer" } @@ -4589,8 +4691,45 @@ }, "type": "object" }, + "NodeGroup": { + "description": "Dataproc Node Group. The Dataproc NodeGroup resource is not related to the Dataproc NodeGroupAffinity resource.", + "id": "NodeGroup", + "properties": { + "labels": { + "additionalProperties": { + "type": "string" + }, + "description": "Optional. Node group labels. Label keys must consist of from 1 to 63 characters and conform to RFC 1035 (https://www.ietf.org/rfc/rfc1035.txt). Label values can be empty. If specified, they must consist of from 1 to 63 characters and conform to RFC 1035 (https://www.ietf.org/rfc/rfc1035.txt). The node group must have no more than 32 labelsn.", + "type": "object" + }, + "name": { + "description": "The Node group resource name (https://aip.dev/122).", + "type": "string" + }, + "nodeGroupConfig": { + "$ref": "InstanceGroupConfig", + "description": "Optional. The node group instance group configuration." + }, + "roles": { + "description": "Required. Node group roles.", + "items": { + "enum": [ + "ROLE_UNSPECIFIED", + "DRIVER" + ], + "enumDescriptions": [ + "Required unspecified role.", + "Job drivers run on the node pool." + ], + "type": "string" + }, + "type": "array" + } + }, + "type": "object" + }, "NodeGroupAffinity": { - "description": "Node Group Affinity for clusters using sole-tenant node groups.", + "description": "Node Group Affinity for clusters using sole-tenant node groups. The Dataproc NodeGroupAffinity resource is not related to the Dataproc NodeGroup resource.", "id": "NodeGroupAffinity", "properties": { "nodeGroupUri": { @@ -5150,6 +5289,27 @@ }, "type": "object" }, + "ResizeNodeGroupRequest": { + "description": "A request to resize a node group.", + "id": "ResizeNodeGroupRequest", + "properties": { + "gracefulDecommissionTimeout": { + "description": "Optional. Timeout for graceful YARN decomissioning. Graceful decommissioning (https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/scaling-clusters#graceful_decommissioning) allows the removal of nodes from the Compute Engine node group without interrupting jobs in progress. This timeout specifies how long to wait for jobs in progress to finish before forcefully removing nodes (and potentially interrupting jobs). Default timeout is 0 (for forceful decommission), and the maximum allowed timeout is 1 day. (see JSON representation of Duration (https://developers.google.com/protocol-buffers/docs/proto3#json)).Only supported on Dataproc image versions 1.2 and higher.", + "format": "google-duration", + "type": "string" + }, + "requestId": { + "description": "Optional. A unique ID used to identify the request. If the server receives two ResizeNodeGroupRequest (https://cloud.google.com/dataproc/docs/reference/rpc/google.cloud.dataproc.v1#google.cloud.dataproc.v1.ResizeNodeGroupRequests) with the same ID, the second request is ignored and the first google.longrunning.Operation created and stored in the backend is returned.Recommendation: Set this value to a UUID (https://en.wikipedia.org/wiki/Universally_unique_identifier).The ID must contain only letters (a-z, A-Z), numbers (0-9), underscores (_), and hyphens (-). The maximum length is 40 characters.", + "type": "string" + }, + "size": { + "description": "Required. The number of running instances for the node group to maintain. The group adds or removes instances to maintain the number of instances specified by this parameter.", + "format": "int32", + "type": "integer" + } + }, + "type": "object" + }, "RuntimeConfig": { "description": "Runtime configuration for a workload.", "id": "RuntimeConfig", @@ -5181,6 +5341,11 @@ "description": "Output only. Approximate workload resource usage calculated after workload finishes (see Dataproc Serverless pricing (https://cloud.google.com/dataproc-serverless/pricing)).", "readOnly": true }, + "currentUsage": { + "$ref": "UsageSnapshot", + "description": "Output only. Snapshot of current workload resource usage.", + "readOnly": true + }, "diagnosticOutputUri": { "description": "Output only. A URI pointing to the location of the diagnostics tarball.", "readOnly": true, @@ -5868,6 +6033,28 @@ }, "type": "object" }, + "UsageSnapshot": { + "description": "The usage snaphot represents the resources consumed by a workload at a specified time.", + "id": "UsageSnapshot", + "properties": { + "milliDcu": { + "description": "Optional. Milli (one-thousandth) Dataproc Compute Units (DCUs) (see Dataproc Serverless pricing (https://cloud.google.com/dataproc-serverless/pricing)).", + "format": "int64", + "type": "string" + }, + "shuffleStorageGb": { + "description": "Optional. Shuffle Storage in gigabytes (GB). (see Dataproc Serverless pricing (https://cloud.google.com/dataproc-serverless/pricing))", + "format": "int64", + "type": "string" + }, + "snapshotTime": { + "description": "Optional. The timestamp of the usage snapshot.", + "format": "google-datetime", + "type": "string" + } + }, + "type": "object" + }, "ValueValidation": { "description": "Validation based on a list of allowed values.", "id": "ValueValidation",