Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix deployment of multiple Batch jobs #2543

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 1 addition & 1 deletion modules/scheduler/batch-job-template/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,7 @@ limitations under the License.
| <a name="input_instance_image"></a> [instance\_image](#input\_instance\_image) | Google Cloud Batch compute node image. Ignored if `instance_template` is provided.<br><br>Expected Fields:<br>name: The name of the image. Mutually exclusive with family.<br>family: The image family to use. Mutually exclusive with name.<br>project: The project where the image is hosted. | `map(string)` | <pre>{<br> "family": "batch-hpc-centos-7-official",<br> "project": "batch-custom-image"<br>}</pre> | no |
| <a name="input_instance_template"></a> [instance\_template](#input\_instance\_template) | Compute VM instance template self-link to be used for Google Cloud Batch compute node. If provided, a number of other variables will be ignored as noted by `Ignored if instance_template is provided` in descriptions. | `string` | `null` | no |
| <a name="input_job_filename"></a> [job\_filename](#input\_job\_filename) | The filename of the generated job template file. Will default to `cloud-batch-<job_id>.json` if not specified | `string` | `null` | no |
| <a name="input_job_id"></a> [job\_id](#input\_job\_id) | An id for the Google Cloud Batch job. Used for output instructions and file naming. Defaults to deployment name. | `string` | `null` | no |
| <a name="input_job_id"></a> [job\_id](#input\_job\_id) | An id for the Google Cloud Batch job. Used for output instructions and file naming. Automatically populated by the module id if not set. If setting manually, ensure a unique value across all jobs. | `string` | n/a | yes |
| <a name="input_labels"></a> [labels](#input\_labels) | Labels to add to the Google Cloud Batch compute nodes. Key-value pairs. Ignored if `instance_template` is provided. | `map(string)` | n/a | yes |
| <a name="input_log_policy"></a> [log\_policy](#input\_log\_policy) | Create a block to define log policy.<br>When set to `CLOUD_LOGGING`, logs will be sent to Cloud Logging.<br>When set to `PATH`, path must be added to generated template.<br>When set to `DESTINATION_UNSPECIFIED`, logs will not be preserved. | `string` | `"CLOUD_LOGGING"` | no |
| <a name="input_machine_type"></a> [machine\_type](#input\_machine\_type) | Machine type to use for Google Cloud Batch compute nodes. Ignored if `instance_template` is provided. | `string` | `"n2-standard-4"` | no |
Expand Down
11 changes: 5 additions & 6 deletions modules/scheduler/batch-job-template/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,8 @@ locals {
}
)

job_id_base = coalesce(var.job_id, var.deployment_name)
submit_job_id = "${local.job_id_base}-${random_id.submit_job_suffix.hex}"
job_filename = coalesce(var.job_filename, "cloud-batch-${local.job_id_base}.yaml")
submit_job_id = "${var.job_id}-${random_id.submit_job_suffix.hex}"
job_filename = coalesce(var.job_filename, "${var.job_id}.yaml")
job_template_output_path = "${path.root}/${local.job_filename}"

submit_script_contents = templatefile(
Expand All @@ -54,7 +53,7 @@ locals {
submit_job_id = local.submit_job_id
}
)
submit_script_output_path = "${path.root}/submit-job.sh"
submit_script_output_path = "${path.root}/submit-${var.job_id}.sh"

subnetwork_name = var.subnetwork != null ? var.subnetwork.name : "default"
subnetwork_project = var.subnetwork != null ? var.subnetwork.project : var.project_id
Expand Down Expand Up @@ -82,7 +81,7 @@ module "instance_template" {
source = "terraform-google-modules/vm/google//modules/instance_template"
version = "~> 8.0"

name_prefix = var.instance_template == null ? "${local.job_id_base}-instance-template" : "unused-template"
name_prefix = var.instance_template == null ? "${var.job_id}-instance-template" : "unused-template"
project_id = var.project_id
subnetwork = local.subnetwork_name
subnetwork_project = local.subnetwork_project
Expand Down Expand Up @@ -117,7 +116,7 @@ resource "local_file" "submit_script" {
}

resource "null_resource" "submit_job" {
depends_on = [local_file.job_template]
depends_on = [local_file.job_template, local_file.submit_script]
count = var.submit ? 1 : 0

# A new deployment should always submit a new job. Old finished jobs aren't persistent parts of
Expand Down
2 changes: 2 additions & 0 deletions modules/scheduler/batch-job-template/metadata.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,5 @@ spec:
services:
- batch.googleapis.com
- compute.googleapis.com
ghpc:
inject_module_id: job_id
3 changes: 1 addition & 2 deletions modules/scheduler/batch-job-template/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,8 @@ variable "labels" {
}

variable "job_id" {
description = "An id for the Google Cloud Batch job. Used for output instructions and file naming. Defaults to deployment name."
description = "An id for the Google Cloud Batch job. Used for output instructions and file naming. Automatically populated by the module id if not set. If setting manually, ensure a unique value across all jobs."
type = string
default = null
}

variable "job_filename" {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,15 @@
ansible.builtin.assert:
that:
- cli_deployment_vars.region is defined
- custom_vars.project is defined
- custom_vars.batch_job_id is defined

- name: Batch Job Block
block:
- name: Submit batch job
register: batch_submission
changed_when: batch_submission.rc == 0
ansible.builtin.command: gcloud alpha batch jobs submit {{ deployment_name }} --config=/home/batch-jobs/cloud-batch-{{ deployment_name }}.yaml --location={{ cli_deployment_vars.region }} --project={{ custom_vars.project }}
ansible.builtin.command: gcloud alpha batch jobs submit {{ deployment_name }} --config=/home/batch-jobs/{{ custom_vars.batch_job_id }}.yaml --location={{ cli_deployment_vars.region }} --project={{ custom_vars.project }}
- name: Wait for job to run
changed_when: false
ansible.builtin.command: gcloud alpha batch jobs describe {{ deployment_name }} --location={{ cli_deployment_vars.region }} --project={{ custom_vars.project }}
Expand Down
1 change: 1 addition & 0 deletions tools/cloud-build/daily-tests/tests/batch-mpi.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ post_deploy_tests:
custom_vars:
project: "{{ project }}"
mounts: [/share]
batch_job_id: batch-job # batch-job-template ID
cli_deployment_vars:
region: us-west4
zone: "{{ zone }}"
1 change: 1 addition & 0 deletions tools/cloud-build/daily-tests/tests/cloud-batch.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ post_deploy_tests:
custom_vars:
project: "{{ project }}"
mounts: [/sw]
batch_job_id: batch-job # batch-job-template ID
cli_deployment_vars:
region: us-central1
zone: "{{ zone }}"