diff --git a/src/slurmctld/step_mgr.c b/src/slurmctld/step_mgr.c index cbf2feb07309d2ec0851c7b550a6133b37d492c0..37c1aa64495e81f9dc8d29cce98bbc4470ebbd81 100644 --- a/src/slurmctld/step_mgr.c +++ b/src/slurmctld/step_mgr.c @@ -1036,7 +1036,7 @@ step_create(job_step_create_request_msg_t *step_specs, struct step_record *step_ptr; struct job_record *job_ptr; bitstr_t *nodeset; - int cpus_per_task_work, cpus_per_task_step, node_count, ret_code; + int cpus_per_task, node_count, ret_code; time_t now = time(NULL); char *step_node_list = NULL; uint32_t orig_cpu_count; @@ -1104,13 +1104,15 @@ step_create(job_step_create_request_msg_t *step_specs, /* we can figure out the cpus_per_task here by reversing what happens * in srun, record argument in slurm v1.4 */ - cpus_per_task_work = step_specs->cpu_count / step_specs->num_tasks; - if (cpus_per_task_work < 1) - cpus_per_task_work = 1; - if (step_specs->cpu_count) - cpus_per_task_step = cpus_per_task_work; - else - cpus_per_task_step = 0; + if (step_specs->cpu_count == 0) + cpus_per_task = 0; + else if (step_specs->num_tasks < 1) + cpus_per_task = 1; + else { + cpus_per_task = step_specs->cpu_count / step_specs->num_tasks; + if (cpus_per_task < 1) + cpus_per_task = 1; + } /* if the overcommit flag is checked we 0 out the cpu_count * which makes it so we don't check to see the available cpus @@ -1184,7 +1186,7 @@ step_create(job_step_create_request_msg_t *step_specs, step_ptr->port = step_specs->port; step_ptr->host = xstrdup(step_specs->host); step_ptr->batch_step = batch_step; - step_ptr->cpus_per_task = cpus_per_task_step; + step_ptr->cpus_per_task = cpus_per_task; step_ptr->mem_per_task = step_specs->mem_per_task; step_ptr->ckpt_interval = step_specs->ckpt_interval; step_ptr->ckpt_time = now; @@ -1212,7 +1214,7 @@ step_create(job_step_create_request_msg_t *step_specs, step_node_list, step_specs->node_count, step_specs->num_tasks, - (uint16_t)cpus_per_task_work, + (uint16_t)cpus_per_task, step_specs->task_dist, step_specs->plane_size); if (!step_ptr->step_layout) { diff --git a/src/slurmd/slurmstepd/mgr.c b/src/slurmd/slurmstepd/mgr.c index 489e7b002a3360abac606160cd0d60d068a3d8a9..bcdbd34349acc038966399990610951990b6eb80 100644 --- a/src/slurmd/slurmstepd/mgr.c +++ b/src/slurmd/slurmstepd/mgr.c @@ -1240,7 +1240,7 @@ _wait_for_all_tasks(slurmd_job_t *job) rc = _wait_for_any_task(job, true); if (rc != -1) { i += rc; - if (i < job->ntasks) { + if (i < tasks_left) { /* To limit the amount of traffic back * we will sleep a bit to make sure we * have most if not all the tasks