diff --git a/slurm/slurm_errno.h b/slurm/slurm_errno.h index a2108f05fc0e6d4dabe71d845eb63b2434d9928d..2a788973fe3f83db13424dca5b2306d39c443971 100644 --- a/slurm/slurm_errno.h +++ b/slurm/slurm_errno.h @@ -170,6 +170,7 @@ enum { ESLURM_RESERVATION_OVERLAP, ESLURM_PORTS_BUSY, ESLURM_PORTS_INVALID, + ESLURM_PROLOG_RUNNING, /* switch specific error codes, specific values defined in plugin module */ ESLURM_SWITCH_MIN = 3000, diff --git a/src/common/slurm_errno.c b/src/common/slurm_errno.c index c4acb1adf85159fc8c366bf019ec6c599d981448..02e6071a6a6698ec639173d71c766ea0dadb9ea7 100644 --- a/src/common/slurm_errno.c +++ b/src/common/slurm_errno.c @@ -242,6 +242,8 @@ static slurm_errtab_t slurm_errtab[] = { "Requires ports are in use" }, { ESLURM_PORTS_INVALID, "Requires more ports than can be reserved" }, + { ESLURM_PROLOG_RUNNING, + "SlurmctldProlog is still running" }, /* slurmd error codes */ diff --git a/src/slurmctld/proc_req.c b/src/slurmctld/proc_req.c index b79076423353d44a315731c767d02119fbf858bb..3d1515a73fe3400e04af117332c11bbc3b6a68d0 100644 --- a/src/slurmctld/proc_req.c +++ b/src/slurmctld/proc_req.c @@ -1309,8 +1309,8 @@ static void _slurm_rpc_job_step_create(slurm_msg_t * msg) /* return result */ if (error_code) { unlock_slurmctld(job_write_lock); - error("_slurm_rpc_job_step_create: %s", - slurm_strerror(error_code)); + info("_slurm_rpc_job_step_create: %s", + slurm_strerror(error_code)); slurm_send_rc_msg(msg, error_code); } else { slurm_step_layout_t *layout = step_rec->step_layout; diff --git a/src/slurmctld/step_mgr.c b/src/slurmctld/step_mgr.c index 24424c908711db9da7e4b4e334a7c77abbab1a6b..329860e47dc40dbed9b45ff3f05c3d10bd7c6eb8 100644 --- a/src/slurmctld/step_mgr.c +++ b/src/slurmctld/step_mgr.c @@ -472,7 +472,7 @@ _pick_step_nodes (struct job_record *job_ptr, if (job_ptr->next_step_id == 0) { if (job_ptr->details && job_ptr->details->prolog_running) { - *return_code = ESLURM_NODES_BUSY; + *return_code = ESLURM_PROLOG_RUNNING; return NULL; } for (i=bit_ffs(job_ptr->node_bitmap); i<node_record_count; diff --git a/src/squeue/print.c b/src/squeue/print.c index 797094558aaf7e3a586512d09c9e7e5387ca54a2..2a222df3fc786f3fee5e8731ca226a19a8791f82 100644 --- a/src/squeue/print.c +++ b/src/squeue/print.c @@ -642,7 +642,9 @@ int _print_job_num_procs(job_info_t * job, int width, bool right, char* suffix) if (job == NULL) /* Print the Header instead */ _print_str("CPUS", width, right, true); else { - if (job->job_state == JOB_RUNNING) { + if ((job->num_cpu_groups > 0) && + (job->cpus_per_node) && + (job->cpu_count_reps)) { uint32_t cnt = 0, i; for (i=0; i<job->num_cpu_groups; i++) { cnt += job->cpus_per_node[i] * diff --git a/src/srun/allocate.c b/src/srun/allocate.c index ec04aaba332e951ce81c6f8a913f008c8cdd4344..75994954eed78c1e781e1a3793b3d57685c0759a 100644 --- a/src/srun/allocate.c +++ b/src/srun/allocate.c @@ -604,6 +604,7 @@ create_job_step(srun_job_t *job, bool use_all_cpus) if (opt.immediate || ((rc != ESLURM_NODES_BUSY) && (rc != ESLURM_PORTS_BUSY) && + (rc != ESLURM_PROLOG_RUNNING) && (rc != ESLURM_DISABLED))) { error ("Unable to create job step: %m"); return -1; @@ -617,7 +618,7 @@ create_job_step(srun_job_t *job, bool use_all_cpus) oquitf = xsignal(SIGQUIT, _intr_handler); } else verbose("Job step creation still disabled, retrying"); - sleep(MIN((i*10), 60)); + sleep(MIN((i*10+1), 60)); } if (i > 0) { xsignal(SIGINT, ointf);