From f2b1c35fb899bb9fad5187e95d01ed4463290afe Mon Sep 17 00:00:00 2001 From: Danny Auble <da@schedmd.com> Date: Thu, 14 Jul 2016 16:55:50 -0700 Subject: [PATCH] Move commit 30f4f81c1f4a7b8 to be above code that could call delete_step_records which would delete the steps without the killing flag set. --- src/slurmctld/node_scheduler.c | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/src/slurmctld/node_scheduler.c b/src/slurmctld/node_scheduler.c index 91a50e6fb99..03fc0ae07cb 100644 --- a/src/slurmctld/node_scheduler.c +++ b/src/slurmctld/node_scheduler.c @@ -3726,6 +3726,22 @@ extern void re_kill_job(struct job_record *job_ptr) job_ptr->spank_job_env); kill_job->spank_job_env_size = job_ptr->spank_job_env_size; + /* On a Cray system this will start the NHC early so it is + * able to gather any information it can from the apparent + * unkillable processes. + * NOTE: do not do a list_for_each here, that will hold on the list + * lock while processing the entire list which could + * potentially be needed to lock again in + * select_g_step_finish which could potentially call + * post_job_step which calls delete_step_record which locks + * the list to create a list_iterator on the same list and + * could cause deadlock :). + */ + step_iterator = list_iterator_create(job_ptr->step_list); + while ((step_ptr = list_next(step_iterator))) + select_g_step_finish(step_ptr, true); + list_iterator_destroy(step_iterator); + #ifdef HAVE_FRONT_END if (job_ptr->batch_host && (front_end_ptr = find_front_end_record(job_ptr->batch_host))) { @@ -3819,21 +3835,6 @@ extern void re_kill_job(struct job_record *job_ptr) job_ptr->job_id, host_str); } #endif - /* On a Cray system this will start the NHC early so it is - * able to gather any information it can from the apparent - * unkillable processes. - * NOTE: do not do a list_for_each here, that will hold on the list - * lock while processing the entire list which could - * potentially be needed to lock again in - * select_g_step_finish which could potentially call - * post_job_step which calls delete_step_record which locks - * the list to create a list_iterator on the same list and - * could cause deadlock :). - */ - step_iterator = list_iterator_create(job_ptr->step_list); - while ((step_ptr = list_next(step_iterator))) - select_g_step_finish(step_ptr, true); - list_iterator_destroy(step_iterator); xfree(host_str); last_job_id = job_ptr->job_id; -- GitLab