From 1437bdda24d1598fab55da8c83de7ca5fb2cc36d Mon Sep 17 00:00:00 2001 From: Moe Jette <jette1@llnl.gov> Date: Wed, 22 Aug 2007 20:42:53 +0000 Subject: [PATCH] svn merge -r12089:12093 https://eris.llnl.gov/svn/slurm/branches/slurm-1.2 --- NEWS | 2 ++ src/slurmctld/node_scheduler.c | 20 ++++++++++++-------- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/NEWS b/NEWS index 6e5bb4eebb8..624af4ac699 100644 --- a/NEWS +++ b/NEWS @@ -48,6 +48,8 @@ documents those changes that are of interest to users and admins. -- Added PMI timing information to srun debug mode to aid in tuning. Use "srun -vv ..." to see the information. -- Added checkpoint/ompi (OpenMPI) plugin (still under development). + -- Fix bug in load leveling logic added to v1.2.13 which can cause an + infinite loop and hang slurmctld when sharing nodes between jobs. * Changes in SLURM 1.2.13 ========================= diff --git a/src/slurmctld/node_scheduler.c b/src/slurmctld/node_scheduler.c index 8660faf2e4e..646316289a0 100644 --- a/src/slurmctld/node_scheduler.c +++ b/src/slurmctld/node_scheduler.c @@ -289,24 +289,28 @@ _pick_best_load(struct job_record *job_ptr, bitstr_t * bitmap, uint32_t req_nodes, bool test_only) { bitstr_t *basemap; - int i, error_code, node_cnt, prev_cnt = 0, equal = 0; - + int i, error_code = EINVAL, node_cnt = 0, prev_cnt = 0, set_cnt; + basemap = bit_copy(bitmap); if (basemap == NULL) fatal("bit_copy malloc failure"); - - for (i = 0; 1; i++) { + + set_cnt = bit_set_count(bitmap); + if ((set_cnt < min_nodes) || + ((req_nodes > min_nodes) && (set_cnt < req_nodes))) + return error_code; /* not usable */ + + for (i=0; node_cnt<set_cnt; i++) { node_cnt = _job_count_bitmap(basemap, bitmap, i); if ((node_cnt == 0) || (node_cnt == prev_cnt)) - continue; + continue; /* nothing new to test */ if ((node_cnt < min_nodes) || ((req_nodes > min_nodes) && (node_cnt < req_nodes))) - continue; - equal = bit_equal(basemap, bitmap); + continue; /* need more nodes */ error_code = select_g_job_test(job_ptr, bitmap, min_nodes, max_nodes, req_nodes, test_only); - if (!error_code || equal) + if (!error_code) break; prev_cnt = node_cnt; } -- GitLab