diff --git a/NEWS b/NEWS index fe8ce36a111e897e387f04d71acae8f9bcf755a1..767f13d89fbc11370ea9f0fb28d5bd41d278f375 100644 --- a/NEWS +++ b/NEWS @@ -229,6 +229,8 @@ documents those changes that are of interest to users and admins. signal 1. -- Update the acct_gather.conf.5 man page removing the reference to InfinibandOFEDFrequency. + -- Fix gang scheduling for jobs submitted to multiple partitions. + -- Enable srun to submit job to multiple partitions. * Changes in Slurm 14.03.6 ========================== diff --git a/src/slurmctld/gang.c b/src/slurmctld/gang.c index 76e8094d52fc9701b2b3dda23de003eae45f34cb..f08275dd40441ad7e629e20a68579ec41d37f49f 100644 --- a/src/slurmctld/gang.c +++ b/src/slurmctld/gang.c @@ -1045,6 +1045,7 @@ static void _scan_slurm_job_list(void) struct gs_part *p_ptr; int i; ListIterator job_iterator; + char *part_name; if (!job_list) { /* no jobs */ if (slurmctld_conf.debug_flags & DEBUG_FLAG_GANG) @@ -1064,10 +1065,15 @@ static void _scan_slurm_job_list(void) if (IS_JOB_SUSPENDED(job_ptr) && (job_ptr->priority == 0)) continue; /* not suspended by us */ + if (job_ptr->part_ptr && job_ptr->part_ptr->name) + part_name = job_ptr->part_ptr->name; + else + part_name = job_ptr->partition; + if (IS_JOB_SUSPENDED(job_ptr) || IS_JOB_RUNNING(job_ptr)) { /* are we tracking this job already? */ p_ptr = list_find_first(gs_part_list, _find_gs_part, - job_ptr->partition); + part_name); if (!p_ptr) /* no partition */ continue; i = _find_job_index(p_ptr, job_ptr->job_id); @@ -1096,8 +1102,7 @@ static void _scan_slurm_job_list(void) /* if the job is not pending, suspended, or running, then * it's completing or completed. Make sure we've released * this job */ - p_ptr = list_find_first(gs_part_list, _find_gs_part, - job_ptr->partition); + p_ptr = list_find_first(gs_part_list, _find_gs_part, part_name); if (!p_ptr) /* no partition */ continue; _remove_job_from_part(job_ptr->job_id, p_ptr, false); @@ -1218,13 +1223,17 @@ extern int gs_job_start(struct job_record *job_ptr) { struct gs_part *p_ptr; uint16_t job_state; + char *part_name; if (slurmctld_conf.debug_flags & DEBUG_FLAG_GANG) info("gang: entering gs_job_start for job %u", job_ptr->job_id); /* add job to partition */ + if (job_ptr->part_ptr && job_ptr->part_ptr->name) + part_name = job_ptr->part_ptr->name; + else + part_name = job_ptr->partition; pthread_mutex_lock(&data_mutex); - p_ptr = list_find_first(gs_part_list, _find_gs_part, - job_ptr->partition); + p_ptr = list_find_first(gs_part_list, _find_gs_part, part_name); if (p_ptr) { job_state = _add_job_to_part(p_ptr, job_ptr); /* if this job is running then check for preemption */ @@ -1238,7 +1247,7 @@ extern int gs_job_start(struct job_record *job_ptr) * uninterupted (what else can we do?) */ error("gang: could not find partition %s for job %u", - job_ptr->partition, job_ptr->job_id); + part_name, job_ptr->job_id); } _preempt_job_dequeue(); /* MUST BE OUTSIDE OF data_mutex lock */ @@ -1289,12 +1298,16 @@ extern void gs_wake_jobs(void) extern int gs_job_fini(struct job_record *job_ptr) { struct gs_part *p_ptr; + char *part_name; if (slurmctld_conf.debug_flags & DEBUG_FLAG_GANG) info("gang: entering gs_job_fini for job %u", job_ptr->job_id); + if (job_ptr->part_ptr && job_ptr->part_ptr->name) + part_name = job_ptr->part_ptr->name; + else + part_name = job_ptr->partition; pthread_mutex_lock(&data_mutex); - p_ptr = list_find_first(gs_part_list, _find_gs_part, - job_ptr->partition); + p_ptr = list_find_first(gs_part_list, _find_gs_part, part_name); if (!p_ptr) { pthread_mutex_unlock(&data_mutex); if (slurmctld_conf.debug_flags & DEBUG_FLAG_GANG) diff --git a/src/slurmctld/job_mgr.c b/src/slurmctld/job_mgr.c index e1401944d806a3d3a96b54d2d86094ce3dbf20af..0833c12575b5a02bc19bfb3238e7e3d345ff04c6 100644 --- a/src/slurmctld/job_mgr.c +++ b/src/slurmctld/job_mgr.c @@ -3573,7 +3573,8 @@ static int _select_nodes_parts(struct job_record *job_ptr, bool test_only, rc = select_nodes(job_ptr, test_only, select_node_bitmap, err_msg); if ((rc != ESLURM_REQUESTED_NODE_CONFIG_UNAVAILABLE) && - (rc != ESLURM_REQUESTED_PART_CONFIG_UNAVAILABLE)) + (rc != ESLURM_REQUESTED_PART_CONFIG_UNAVAILABLE) && + (rc != ESLURM_NODES_BUSY)) break; } list_iterator_destroy(iter);