diff --git a/NEWS b/NEWS index afbd6d61d7316b9f57c9b790435d974c6b303cb1..ebc58971b60eea4a63a257344155c80a654246ac 100644 --- a/NEWS +++ b/NEWS @@ -5,6 +5,7 @@ documents those changes that are of interest to users and administrators. ============================== -- Fix sbatch --export=ALL, it was treated by srun as a request to explicitly export only the environment variable named "ALL". + -- Improve scheduling of jobs in reservations that overlaps other reservations. * Changes in Slurm 14.11.0pre4 ============================== diff --git a/src/plugins/sched/backfill/backfill.c b/src/plugins/sched/backfill/backfill.c index 274d2cafbb2b0a1059b9fc1ce04f42f6a486b7a5..93fcaf53d21f499e4a50baab9940d22bf69a683a 100644 --- a/src/plugins/sched/backfill/backfill.c +++ b/src/plugins/sched/backfill/backfill.c @@ -693,6 +693,7 @@ static int _attempt_backfill(void) struct timeval start_tv; uint32_t test_array_job_id = 0; uint32_t test_array_count = 0; + bool resv_overlap = false; bf_last_yields = 0; #ifdef HAVE_ALPS_CRAY @@ -1056,7 +1057,7 @@ next_task: start_res = later_start; later_start = 0; j = job_test_resv(job_ptr, &start_res, true, &avail_bitmap, - &exc_core_bitmap); + &exc_core_bitmap, &resv_overlap); if (j != SLURM_SUCCESS) { if (debug_flags & DEBUG_FLAG_BACKFILL) info("backfill: job %u reservation defer", @@ -1198,7 +1199,8 @@ next_task: time_limit = job_ptr->time_limit; } - if (rc == ESLURM_ACCOUNTING_POLICY) { + if ((rc == ESLURM_ACCOUNTING_POLICY) || + (rc == ESLURM_RESERVATION_BUSY)) { /* Unknown future start time, just skip job */ if (orig_start_time != 0) { /* Can start in different partition */ diff --git a/src/plugins/sched/builtin/builtin.c b/src/plugins/sched/builtin/builtin.c index b91fe9fd058521c0b77a63d0ac2721ddadb3639a..ccdadb1fbb5351e016fe6b584d27d0ef9280dcaa 100644 --- a/src/plugins/sched/builtin/builtin.c +++ b/src/plugins/sched/builtin/builtin.c @@ -157,6 +157,7 @@ static void _compute_start_times(void) bitstr_t *exc_core_bitmap = NULL; uint32_t max_nodes, min_nodes, req_nodes, time_limit; time_t now = time(NULL), sched_start, last_job_alloc; + bool resv_overlap = false; sched_start = now; last_job_alloc = now - 1; @@ -201,7 +202,7 @@ static void _compute_start_times(void) } j = job_test_resv(job_ptr, &now, true, &avail_bitmap, - &exc_core_bitmap); + &exc_core_bitmap, &resv_overlap); if (j != SLURM_SUCCESS) { FREE_NULL_BITMAP(avail_bitmap); FREE_NULL_BITMAP(exc_core_bitmap); diff --git a/src/plugins/sched/wiki2/job_will_run.c b/src/plugins/sched/wiki2/job_will_run.c index 0fa067fafedacc47434389df2b2bffe6da453de7..4b59a29d0df221a41d739ff5b5399e4379004044 100644 --- a/src/plugins/sched/wiki2/job_will_run.c +++ b/src/plugins/sched/wiki2/job_will_run.c @@ -135,6 +135,7 @@ static char * _will_run_test(uint32_t jobid, time_t start_time, int rc; time_t start_res, orig_start_time; List preemptee_candidates; + bool resv_overlap = false; debug2("wiki2: will_run job_id=%u start_time=%u node_list=%s", jobid, (uint32_t)start_time, node_list); @@ -175,7 +176,7 @@ static char * _will_run_test(uint32_t jobid, time_t start_time, /* Enforce reservation: access control, time and nodes */ start_res = start_time; rc = job_test_resv(job_ptr, &start_res, true, &resv_bitmap, - &exc_core_bitmap); + &exc_core_bitmap, &resv_overlap); if (rc != SLURM_SUCCESS) { *err_code = -730; *err_msg = "Job denied access to reservation"; @@ -427,6 +428,7 @@ static char * _will_run_test2(uint32_t jobid, time_t start_time, time_t orig_start_time; char *reply_msg = NULL; int i, rc; + bool resv_overlap = false; xassert(node_list); debug2("wiki2: will_run2 job_id=%u start_time=%u node_list=%s", @@ -465,7 +467,7 @@ static char * _will_run_test2(uint32_t jobid, time_t start_time, /* Enforce reservation: access control, time and nodes */ start_res = start_time; rc = job_test_resv(job_ptr, &start_res, true, &resv_bitmap, - &exc_core_bitmap); + &exc_core_bitmap, &resv_overlap); if (rc != SLURM_SUCCESS) { *err_code = -730; *err_msg = "Job denied access to reservation"; diff --git a/src/slurmctld/job_mgr.c b/src/slurmctld/job_mgr.c index 76d79db14fd46aca61e71d591a629831fa68319e..40b6b9fbfae737734e7339cad71e3beb513872cc 100644 --- a/src/slurmctld/job_mgr.c +++ b/src/slurmctld/job_mgr.c @@ -3665,6 +3665,7 @@ static int _select_nodes_parts(struct job_record *job_ptr, bool test_only, select_node_bitmap, err_msg); if ((rc != ESLURM_REQUESTED_NODE_CONFIG_UNAVAILABLE) && (rc != ESLURM_REQUESTED_PART_CONFIG_UNAVAILABLE) && + (rc != ESLURM_RESERVATION_BUSY) && (rc != ESLURM_NODES_BUSY)) break; if ((job_ptr->preempt_in_progress) && @@ -3816,6 +3817,7 @@ extern int job_allocate(job_desc_msg_t * job_specs, int immediate, acct_policy_add_job_submit(job_ptr); if ((error_code == ESLURM_NODES_BUSY) || + (error_code == ESLURM_RESERVATION_BUSY) || (error_code == ESLURM_JOB_HELD) || (error_code == ESLURM_NODE_NOT_AVAIL) || (error_code == ESLURM_QOS_THRES) || @@ -3833,6 +3835,7 @@ extern int job_allocate(job_desc_msg_t * job_specs, int immediate, } else { /* job remains queued */ _create_job_array(job_ptr, job_specs); if ((error_code == ESLURM_NODES_BUSY) || + (error_code == ESLURM_RESERVATION_BUSY) || (error_code == ESLURM_ACCOUNTING_POLICY)) { error_code = SLURM_SUCCESS; } diff --git a/src/slurmctld/job_scheduler.c b/src/slurmctld/job_scheduler.c index 3b8a69b8cbea26ea92d0f2bfcd552deefacd61c2..f61dada966428e5c2fc820466de06779c7e51dcf 100644 --- a/src/slurmctld/job_scheduler.c +++ b/src/slurmctld/job_scheduler.c @@ -1333,7 +1333,8 @@ next_task: job_ptr->part_ptr->node_bitmap); bit_not(job_ptr->part_ptr->node_bitmap); } - } else if (error_code == ESLURM_RESERVATION_NOT_USABLE) { + } else if ((error_code == ESLURM_RESERVATION_BUSY) || + (error_code == ESLURM_RESERVATION_NOT_USABLE)) { if (job_ptr->resv_ptr && job_ptr->resv_ptr->node_bitmap) { debug3("sched: JobId=%u. State=%s. " @@ -2435,6 +2436,7 @@ extern int job_start_data(job_desc_msg_t *job_desc_msg, int i, rc = SLURM_SUCCESS; time_t now = time(NULL), start_res, orig_start_time = (time_t) 0; List preemptee_candidates = NULL, preemptee_job_list = NULL; + bool resv_overlap = false; job_ptr = find_job_record(job_desc_msg->job_id); if (job_ptr == NULL) @@ -2484,7 +2486,7 @@ extern int job_start_data(job_desc_msg_t *job_desc_msg, else start_res = now; i = job_test_resv(job_ptr, &start_res, false, &resv_bitmap, - &exc_core_bitmap); + &exc_core_bitmap, &resv_overlap); if (i != SLURM_SUCCESS) return i; bit_and(avail_bitmap, resv_bitmap); diff --git a/src/slurmctld/node_scheduler.c b/src/slurmctld/node_scheduler.c index 60ee38cf4348e19d2dd8b4a01823d1928c757b17..ee4bfe5d2e73cc942f5e5950857af17916c999f5 100644 --- a/src/slurmctld/node_scheduler.c +++ b/src/slurmctld/node_scheduler.c @@ -125,7 +125,7 @@ static int _pick_best_nodes(struct node_set *node_set_ptr, uint32_t req_nodes, bool test_only, List preemptee_candidates, List *preemptee_job_list, bool has_xand, - bitstr_t *exc_node_bitmap); + bitstr_t *exc_node_bitmap, bool resv_overlap); static void _set_err_msg(bool cpus_ok, bool mem_ok, bool disk_ok, bool job_mc_ok, char **err_msg); static bool _valid_feature_counts(struct job_details *detail_ptr, @@ -757,6 +757,7 @@ _get_req_features(struct node_set *node_set_ptr, int node_set_size, bitstr_t *exc_core_bitmap = NULL; List preemptee_candidates = NULL; bool has_xand = false; + bool resv_overlap = false; /* Mark nodes reserved for other jobs as off limit for this job. * If the job has a reservation, we've already limited the contents @@ -764,7 +765,7 @@ _get_req_features(struct node_set *node_set_ptr, int node_set_size, if (job_ptr->resv_name == NULL) { time_t start_res = time(NULL); rc = job_test_resv(job_ptr, &start_res, false, &resv_bitmap, - &exc_core_bitmap); + &exc_core_bitmap, &resv_overlap); if (rc == ESLURM_NODES_BUSY) { save_avail_node_bitmap = avail_node_bitmap; avail_node_bitmap = bit_alloc(node_record_count); @@ -787,7 +788,7 @@ _get_req_features(struct node_set *node_set_ptr, int node_set_size, } else { time_t start_res = time(NULL); rc = job_test_resv(job_ptr, &start_res, false, &resv_bitmap, - &exc_core_bitmap); + &exc_core_bitmap, &resv_overlap); FREE_NULL_BITMAP(resv_bitmap); /* We do not care about return value. * We are just interested in exc_core_bitmap creation */ @@ -864,7 +865,7 @@ _get_req_features(struct node_set *node_set_ptr, int node_set_size, max_nodes, req_nodes, test_only, preemptee_candidates, preemptee_job_list, false, - exc_core_bitmap); + exc_core_bitmap, resv_overlap); #if 0 { char *tmp_str = bitmap2node_name(feature_bitmap); @@ -956,7 +957,7 @@ _get_req_features(struct node_set *node_set_ptr, int node_set_size, select_bitmap, job_ptr, part_ptr, min_nodes, max_nodes, req_nodes, test_only, preemptee_candidates, preemptee_job_list, - has_xand, exc_core_bitmap); + has_xand, exc_core_bitmap, resv_overlap); } #if 0 { @@ -1002,13 +1003,15 @@ _get_req_features(struct node_set *node_set_ptr, int node_set_size, * NULL on first entry * IN has_xand - set of the constraint list includes XAND operators *and* * we have already satisfied them all + * in resv_overlap - designated reservation overlaps another reservation * RET SLURM_SUCCESS on success, * ESLURM_NODES_BUSY if request can not be satisfied now, * ESLURM_REQUESTED_NODE_CONFIG_UNAVAILABLE if request can never - * be satisfied , + * be satisfied, * ESLURM_REQUESTED_PART_CONFIG_UNAVAILABLE if the job can not be * initiated until the parition's configuration changes or * ESLURM_NODE_NOT_AVAIL if required nodes are DOWN or DRAINED + * ESLURM_RESERVATION_BUSY if requested reservation overlaps another * NOTE: the caller must FREE_NULL_BITMAP memory pointed to by select_bitmap * Notes: The algorithm is * 1) If required node list is specified, determine implicitly required @@ -1032,7 +1035,7 @@ _pick_best_nodes(struct node_set *node_set_ptr, int node_set_size, uint32_t min_nodes, uint32_t max_nodes, uint32_t req_nodes, bool test_only, List preemptee_candidates, List *preemptee_job_list, bool has_xand, - bitstr_t *exc_core_bitmap) + bitstr_t *exc_core_bitmap, bool resv_overlap) { struct node_record *node_ptr; int error_code = SLURM_SUCCESS, i, j, pick_code; @@ -1422,6 +1425,10 @@ _pick_best_nodes(struct node_set *node_set_ptr, int node_set_size, /* The job is not able to start right now, return a * value indicating when the job can start */ + if (!runable_ever && resv_overlap) { + error_code = ESLURM_RESERVATION_BUSY; + return error_code; + } if (!runable_ever) { error_code = ESLURM_REQUESTED_NODE_CONFIG_UNAVAILABLE; info("_pick_best_nodes: job %u never runnable", @@ -1743,7 +1750,8 @@ extern int select_nodes(struct job_record *job_ptr, bool test_only, job_ptr->state_reason = WAIT_NODE_NOT_AVAIL; xfree(job_ptr->state_desc); last_job_update = now; - } else if (error_code == ESLURM_RESERVATION_NOT_USABLE) { + } else if ((error_code == ESLURM_RESERVATION_NOT_USABLE) || + (error_code == ESLURM_RESERVATION_BUSY)) { job_ptr->state_reason = WAIT_RESERVATION; xfree(job_ptr->state_desc); } else if ((job_ptr->state_reason == WAIT_BLOCK_MAX_ERR) || @@ -2277,12 +2285,13 @@ static int _build_node_list(struct job_record *job_ptr, bitstr_t *tmp_feature; uint32_t max_weight = 0; bool has_xor = false; + bool resv_overlap = false; if (job_ptr->resv_name) { /* Limit node selection to those in selected reservation */ time_t start_res = time(NULL); rc = job_test_resv(job_ptr, &start_res, false, - &usable_node_mask, NULL); + &usable_node_mask, NULL, &resv_overlap); if (rc != SLURM_SUCCESS) { job_ptr->state_reason = WAIT_RESERVATION; xfree(job_ptr->state_desc); diff --git a/src/slurmctld/reservation.c b/src/slurmctld/reservation.c index ccbd758b9a35624933102b367fb46357704dda22..0b6fb55e1a16d3243d38f0d7518f41b75da4a808 100644 --- a/src/slurmctld/reservation.c +++ b/src/slurmctld/reservation.c @@ -3732,7 +3732,7 @@ extern int job_test_lic_resv(struct job_record *job_ptr, char *lic_name, */ extern int job_test_resv(struct job_record *job_ptr, time_t *when, bool move_time, bitstr_t **node_bitmap, - bitstr_t **exc_core_bitmap) + bitstr_t **exc_core_bitmap, bool *resv_overlap) { slurmctld_resv_t * resv_ptr, *res2_ptr; time_t job_start_time, job_end_time, lic_resv_time; @@ -3799,9 +3799,12 @@ extern int job_test_resv(struct job_record *job_ptr, time_t *when, (res2_ptr->end_time <= job_start_time) || (!res2_ptr->full_nodes)) continue; - bit_not(res2_ptr->node_bitmap); - bit_and(*node_bitmap, res2_ptr->node_bitmap); - bit_not(res2_ptr->node_bitmap); + if (bit_overlap(*node_bitmap, res2_ptr->node_bitmap)) { + *resv_overlap = true; + bit_not(res2_ptr->node_bitmap); + bit_and(*node_bitmap, res2_ptr->node_bitmap); + bit_not(res2_ptr->node_bitmap); + } } list_iterator_destroy(iter); diff --git a/src/slurmctld/reservation.h b/src/slurmctld/reservation.h index f29320eeb9494278ec58c8ca4fd8df7607a7bb42..cb34c8a2917802f5b45ba6a0ca194c7bae2f43aa 100644 --- a/src/slurmctld/reservation.h +++ b/src/slurmctld/reservation.h @@ -144,7 +144,7 @@ extern int job_test_lic_resv(struct job_record *job_ptr, char *lic_name, */ extern int job_test_resv(struct job_record *job_ptr, time_t *when, bool move_time, bitstr_t **node_bitmap, - bitstr_t **exc_core_bitmap); + bitstr_t **exc_core_bitmap, bool *resv_overlap); /* * Determine the time of the first reservation to end after some time.