diff --git a/NEWS b/NEWS index e2b7c5d216d02d365696ffa4298751fb26c0df3d..8d6482c78da316ad4c1d8714ba4087f52b021617 100644 --- a/NEWS +++ b/NEWS @@ -18,6 +18,9 @@ documents those changes that are of interest to users and admins. ======================== -- Fix bug in scontrol show daemons if NodeName=localhost will work now to display slurmd as place where it is running. + -- Patch from HP for init nodes before init_bitmaps + -- ctrl-c killed sruns will result in job state as cancelled instead of + completed. * Changes in SLURM 1.1.2 ======================== diff --git a/src/slurmctld/job_mgr.c b/src/slurmctld/job_mgr.c index c69f47a67a70b0b9585f703ed171bc8bd3dcf8f0..95754873da20bcd3bb9a354ac9d3402c07c659a5 100644 --- a/src/slurmctld/job_mgr.c +++ b/src/slurmctld/job_mgr.c @@ -1653,7 +1653,9 @@ extern int job_complete(uint32_t job_id, uid_t uid, bool requeue, job_ptr->end_time = now; job_completion_logger(job_ptr); } else { - if (job_return_code) + if (job_return_code == NO_VAL) + job_ptr->job_state = JOB_CANCELLED| job_comp_flag; + else if (job_return_code) job_ptr->job_state = JOB_FAILED | job_comp_flag; else if (job_comp_flag && /* job was running */ (job_ptr->end_time < now)) /* over time limit */ diff --git a/src/slurmctld/read_config.c b/src/slurmctld/read_config.c index 5edfdfa0db9593fc6710d0a452f432c70f66e528..489e3f383da84a29efd050dd5f0fd45ef94920bb 100644 --- a/src/slurmctld/read_config.c +++ b/src/slurmctld/read_config.c @@ -702,7 +702,17 @@ int read_slurm_conf(int recover) } reset_first_job_id(); } - reset_job_bitmaps(); + + if ((select_g_node_init(node_record_table_ptr, node_record_count) + != SLURM_SUCCESS) + || (select_g_block_init(part_list) != SLURM_SUCCESS) + || (select_g_job_init(job_list) != SLURM_SUCCESS)) { + error("failed to initialize node selection plugin state"); + abort(); + } + + reset_job_bitmaps(); /* must follow select_g_job_init() */ + (void) _sync_nodes_to_jobs(); (void) sync_job_files(); _purge_old_node_state(old_node_table_ptr, old_node_record_count); @@ -712,14 +722,7 @@ int read_slurm_conf(int recover) #ifdef HAVE_ELAN _validate_node_proc_count(); #endif - if ((select_g_node_init(node_record_table_ptr, node_record_count) - != SLURM_SUCCESS) - || (select_g_block_init(part_list) != SLURM_SUCCESS) - || (select_g_job_init(job_list) != SLURM_SUCCESS)) { - error("failed to initialize node selection plugin state"); - abort(); - } - (void) _sync_nodes_to_comp_job(); /* must follow select_g_node_init() */ + (void) _sync_nodes_to_comp_job();/* must follow select_g_node_init() */ load_part_uid_allow_list(1); /* sort config_list by weight for scheduling */ diff --git a/src/srun/signals.c b/src/srun/signals.c index 0b71eebdca80fa68edf94356f5dc152264dbff07..a10992c1f3c7dc4191303b8012f85458e44d1fe7 100644 --- a/src/srun/signals.c +++ b/src/srun/signals.c @@ -133,6 +133,7 @@ static void _handle_intr(srun_job_t *job, time_t *last_intr, time_t *last_intr_sent) { if (opt.quit_on_intr) { + update_job_state(job, SRUN_JOB_CANCELLED); job_force_termination(job); pthread_exit (0); } @@ -143,10 +144,9 @@ _handle_intr(srun_job_t *job, time_t *last_intr, time_t *last_intr_sent) report_task_status(job); *last_intr = time(NULL); } else { /* second Ctrl-C in half as many seconds */ - + update_job_state(job, SRUN_JOB_CANCELLED); /* terminate job */ if (job->state < SRUN_JOB_FORCETERM) { - if ((time(NULL) - *last_intr_sent) < 1) { job_force_termination(job); pthread_exit(0); diff --git a/src/srun/srun.c b/src/srun/srun.c index 785ebe6ecec9df482248a22b5c62d340b918d237..9cb91e4c1c74a34f1271cb7bd21f87f849e4001e 100644 --- a/src/srun/srun.c +++ b/src/srun/srun.c @@ -349,7 +349,10 @@ int srun(int ac, char **av) * If job is "forcefully terminated" exit immediately. * */ - if (job->state == SRUN_JOB_FAILED) { + if (job->state == SRUN_JOB_CANCELLED) { + info("Cancelling job"); + srun_job_destroy(job, NO_VAL); + } else if (job->state == SRUN_JOB_FAILED) { info("Terminating job"); srun_job_destroy(job, 0); } else if (job->state == SRUN_JOB_FORCETERM) { diff --git a/src/srun/srun_job.h b/src/srun/srun_job.h index 3f9d157b701970225796fe17e30b12d5e578ee1a..e7db2addb6322b2f225daa5c5a62176c6ab77cf4 100644 --- a/src/srun/srun_job.h +++ b/src/srun/srun_job.h @@ -69,6 +69,7 @@ typedef enum { SRUN_JOB_DONE, /* tasks and IO complete */ SRUN_JOB_DETACHED, /* Detached IO from job (Not used now) */ SRUN_JOB_FAILED, /* Job failed for some reason */ + SRUN_JOB_CANCELLED, /* CTRL-C cancelled */ SRUN_JOB_FORCETERM /* Forced termination of IO thread */ } srun_job_state_t;