Skip to content
Snippets Groups Projects
Commit 1437bdda authored by Moe Jette's avatar Moe Jette
Browse files
parent 2a14a769
No related branches found
No related tags found
No related merge requests found
...@@ -48,6 +48,8 @@ documents those changes that are of interest to users and admins. ...@@ -48,6 +48,8 @@ documents those changes that are of interest to users and admins.
-- Added PMI timing information to srun debug mode to aid in tuning. -- Added PMI timing information to srun debug mode to aid in tuning.
Use "srun -vv ..." to see the information. Use "srun -vv ..." to see the information.
-- Added checkpoint/ompi (OpenMPI) plugin (still under development). -- Added checkpoint/ompi (OpenMPI) plugin (still under development).
-- Fix bug in load leveling logic added to v1.2.13 which can cause an
infinite loop and hang slurmctld when sharing nodes between jobs.
* Changes in SLURM 1.2.13 * Changes in SLURM 1.2.13
========================= =========================
......
...@@ -289,24 +289,28 @@ _pick_best_load(struct job_record *job_ptr, bitstr_t * bitmap, ...@@ -289,24 +289,28 @@ _pick_best_load(struct job_record *job_ptr, bitstr_t * bitmap,
uint32_t req_nodes, bool test_only) uint32_t req_nodes, bool test_only)
{ {
bitstr_t *basemap; bitstr_t *basemap;
int i, error_code, node_cnt, prev_cnt = 0, equal = 0; int i, error_code = EINVAL, node_cnt = 0, prev_cnt = 0, set_cnt;
basemap = bit_copy(bitmap); basemap = bit_copy(bitmap);
if (basemap == NULL) if (basemap == NULL)
fatal("bit_copy malloc failure"); fatal("bit_copy malloc failure");
for (i = 0; 1; i++) { set_cnt = bit_set_count(bitmap);
if ((set_cnt < min_nodes) ||
((req_nodes > min_nodes) && (set_cnt < req_nodes)))
return error_code; /* not usable */
for (i=0; node_cnt<set_cnt; i++) {
node_cnt = _job_count_bitmap(basemap, bitmap, i); node_cnt = _job_count_bitmap(basemap, bitmap, i);
if ((node_cnt == 0) || (node_cnt == prev_cnt)) if ((node_cnt == 0) || (node_cnt == prev_cnt))
continue; continue; /* nothing new to test */
if ((node_cnt < min_nodes) || if ((node_cnt < min_nodes) ||
((req_nodes > min_nodes) && (node_cnt < req_nodes))) ((req_nodes > min_nodes) && (node_cnt < req_nodes)))
continue; continue; /* need more nodes */
equal = bit_equal(basemap, bitmap);
error_code = select_g_job_test(job_ptr, bitmap, error_code = select_g_job_test(job_ptr, bitmap,
min_nodes, max_nodes, min_nodes, max_nodes,
req_nodes, test_only); req_nodes, test_only);
if (!error_code || equal) if (!error_code)
break; break;
prev_cnt = node_cnt; prev_cnt = node_cnt;
} }
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment