From f559a55cba989dac7425cbde84950b04814fe2d3 Mon Sep 17 00:00:00 2001 From: Danny Auble <da@schedmd.com> Date: Wed, 6 Apr 2016 11:38:24 -0700 Subject: [PATCH] Fix situation on a heterogeneous memory cluster where the order of constraints mattered in a job. Details include: A job doesn't request memory but the system is running with CR_*MEMORY with no default memory limit and the job requests nodes with features of different sizes. Previously the order of constraints mattered where the smaller memory node would need to be requested first or the job would fail. Bug 2608 --- NEWS | 2 ++ src/slurmctld/node_scheduler.c | 12 ++++++++++++ 2 files changed, 14 insertions(+) diff --git a/NEWS b/NEWS index e3534aaacde..5ad0e5507e5 100644 --- a/NEWS +++ b/NEWS @@ -19,6 +19,8 @@ documents those changes that are of interest to users and administrators. select/cons_res plugin. Bug introduced in 15.08.9. -- Avoid double calculation on partition QOS if the job is using the same QOS. -- Do not change a job's time limit when updating unrelated field in a job. + -- Fix situation on a heterogeneous memory cluster where the order of + constraints mattered in a job. * Changes in Slurm 15.08.9 ========================== diff --git a/src/slurmctld/node_scheduler.c b/src/slurmctld/node_scheduler.c index d81393526b5..9ad1dbc9369 100644 --- a/src/slurmctld/node_scheduler.c +++ b/src/slurmctld/node_scheduler.c @@ -1269,6 +1269,7 @@ _pick_best_nodes(struct node_set *node_set_ptr, int node_set_size, bool nodes_busy = false; int shared = 0, select_mode; List preemptee_cand; + uint32_t orig_req_mem = job_ptr->details->pn_min_memory; if (test_only) select_mode = SELECT_MODE_TEST_ONLY; @@ -1529,6 +1530,17 @@ _pick_best_nodes(struct node_set *node_set_ptr, int node_set_size, preemptee_cand, preemptee_job_list, exc_core_bitmap); + /* If no memory is requested but we are running with + * CR_*_MEMORY and the request is for + * nodes of different memory sizes we need to reset the + * pn_min_memory as select_g_job_test can + * alter that making it so the order of contraints + * matter since the first pass through this will set the + * pn_min_memory based on that first constraint and if + * it isn't smaller than all the other requests they + * will fail. + */ + job_ptr->details->pn_min_memory = orig_req_mem; #if 0 { char *tmp_str1 = bitmap2node_name(backup_bitmap); -- GitLab