diff --git a/src/slurmctld/job_mgr.c b/src/slurmctld/job_mgr.c index 143d1afd0fed8cd003c0d43240624dcdc31f94fc..e162f884fad06e39341acc475b993c28fc8c9925 100644 --- a/src/slurmctld/job_mgr.c +++ b/src/slurmctld/job_mgr.c @@ -2450,6 +2450,26 @@ static void _set_job_prio(struct job_record *job_ptr) } +/* After a node is returned to service, reset the priority of jobs + * which may have been held due to that node being unavailable */ +void reset_job_priority(void) +{ + ListIterator job_iterator; + struct job_record *job_ptr; + int count = 0; + + job_iterator = list_iterator_create(job_list); + while ((job_ptr = (struct job_record *) list_next(job_iterator))) { + if (job_ptr->priority == 1) { + _set_job_prio(job_ptr); + count++; + } + } + list_iterator_destroy(job_iterator); + if (count) + last_job_update = time(NULL); +} + /* * _top_priority - determine if any other job for this partition has a * higher priority than specified job diff --git a/src/slurmctld/node_mgr.c b/src/slurmctld/node_mgr.c index 48e6731e39cf3e7faa5188aa3e2f9b3892664de3..4232091c11d4cbce9d302603931217542c39faf4 100644 --- a/src/slurmctld/node_mgr.c +++ b/src/slurmctld/node_mgr.c @@ -970,6 +970,7 @@ int update_node ( update_node_msg_t * update_node_msg ) else if (state_val == NODE_STATE_IDLE) { bit_set (avail_node_bitmap, node_inx); bit_set (idle_node_bitmap, node_inx); + reset_job_priority(); } else if (state_val == NODE_STATE_ALLOCATED) { bit_set (avail_node_bitmap, node_inx); @@ -1157,6 +1158,7 @@ validate_node_specs (char *node_name, uint32_t cpus, node_name); xfree(node_ptr->reason); resp_state = 1; /* just started responding */ + reset_job_priority(); } else if ((node_ptr->node_state == NODE_STATE_ALLOCATED) && (job_count == 0)) { /* job vanished */ node_ptr->node_state = NODE_STATE_IDLE; diff --git a/src/slurmctld/proc_req.c b/src/slurmctld/proc_req.c index 2122a5d231e36a30e6db80a6d2cb4ee8c84f5589..94a5adef8680d35a27bc94bf03b003563352e6ee 100644 --- a/src/slurmctld/proc_req.c +++ b/src/slurmctld/proc_req.c @@ -1410,9 +1410,9 @@ static void _slurm_rpc_update_node(slurm_msg_t * msg) DEF_TIMERS; update_node_msg_t *update_node_msg_ptr = (update_node_msg_t *) msg->data; - /* Locks: Write node */ + /* Locks: Write job and write node */ slurmctld_lock_t node_write_lock = { - NO_LOCK, NO_LOCK, WRITE_LOCK, NO_LOCK }; + NO_LOCK, WRITE_LOCK, WRITE_LOCK, NO_LOCK }; uid_t uid; START_TIMER; diff --git a/src/slurmctld/slurmctld.h b/src/slurmctld/slurmctld.h index 675a565a7328ed00280d8fae25e1bcb16bd68b39..0fafe72fafac3b8019a085bb06921839facbce51 100644 --- a/src/slurmctld/slurmctld.h +++ b/src/slurmctld/slurmctld.h @@ -913,6 +913,10 @@ extern void reset_first_job_id(void); */ extern void reset_job_bitmaps (void); +/* After a node is returned to service, reset the priority of jobs + * which may have been held due to that node being unavailable */ +extern void reset_job_priority(void); + /* run_backup - this is the backup controller, it should run in standby * mode, assuming control when the primary controller stops responding */ extern void run_backup(void);