Skip to content
Snippets Groups Projects
Commit 7d082305 authored by Moe Jette's avatar Moe Jette
Browse files

Fix bug in sched/wiki and sched/wiki2 plugins for reporting job resource

    allocation properly when node names are configured out of sort order 
    with more than one numeric suffix (e.g. "tux10-1" is configured after 
    "tux5-1").
parent 74d3804b
No related branches found
No related tags found
No related merge requests found
...@@ -9,6 +9,10 @@ documents those changes that are of interest to users and admins. ...@@ -9,6 +9,10 @@ documents those changes that are of interest to users and admins.
-- Filter white-space out from node features. -- Filter white-space out from node features.
-- Fixed issue with duration not being honored when updating start time in -- Fixed issue with duration not being honored when updating start time in
reservations. reservations.
-- Fix bug in sched/wiki and sched/wiki2 plugins for reporting job resource
allocation properly when node names are configured out of sort order
with more than one numeric suffix (e.g. "tux10-1" is configured after
"tux5-1").
* Changes in SLURM 2.0.0-rc1 * Changes in SLURM 2.0.0-rc1
============================== ==============================
......
...@@ -163,33 +163,43 @@ extern char * slurm_job2moab_task_list(struct job_record *job_ptr) ...@@ -163,33 +163,43 @@ extern char * slurm_job2moab_task_list(struct job_record *job_ptr)
/* Return task list in Moab format 1: tux0:tux0:tux1:tux1:tux2 */ /* Return task list in Moab format 1: tux0:tux0:tux1:tux1:tux2 */
static char * _task_list(struct job_record *job_ptr) static char * _task_list(struct job_record *job_ptr)
{ {
int i, j; int i, j, node_inx = 0, task_cnt;
char *buf = NULL, *host; char *buf = NULL, *host;
hostlist_t hl = hostlist_create(job_ptr->nodes);
select_job_res_t select_ptr = job_ptr->select_job; select_job_res_t select_ptr = job_ptr->select_job;
if (hl == NULL) { xassert(select_ptr);
error("hostlist_create error for job %u, %s",
job_ptr->job_id, job_ptr->nodes);
return buf;
}
for (i=0; i<select_ptr->nhosts; i++) { for (i=0; i<select_ptr->nhosts; i++) {
host = hostlist_shift(hl); if (i == 0) {
if (host == NULL) { xassert(select_ptr->cpus && select_ptr->node_bitmap);
error("bad node_cnt for job %u (%s, %d)", node_inx = bit_ffs(select_ptr->node_bitmap);
job_ptr->job_id, job_ptr->nodes, } else {
job_ptr->node_cnt); for (node_inx++; node_inx<node_record_count;
break; node_inx++) {
if (bit_test(select_ptr->node_bitmap,node_inx))
break;
}
if (node_inx >= node_record_count) {
error("Improperly formed select_job for %u",
job_ptr->job_id);
break;
}
}
host = node_record_table_ptr[node_inx].name;
task_cnt = select_ptr->cpus[i];
if (job_ptr->details && job_ptr->details->cpus_per_task)
task_cnt /= job_ptr->details->cpus_per_task;
if (task_cnt < 1) {
error("Invalid task_cnt for job %u on node %s",
job_ptr->job_id, host);
task_cnt = 1;
} }
for (j=0; j<select_ptr->cpus[i]; j++) { for (j=0; j<task_cnt; j++) {
if (buf) if (buf)
xstrcat(buf, ":"); xstrcat(buf, ":");
xstrcat(buf, host); xstrcat(buf, host);
} }
free(host);
} }
hostlist_destroy(hl);
return buf; return buf;
} }
...@@ -251,29 +261,39 @@ static void _append_hl_buf(char **buf, hostlist_t *hl_tmp, int *reps) ...@@ -251,29 +261,39 @@ static void _append_hl_buf(char **buf, hostlist_t *hl_tmp, int *reps)
/* Return task list in Moab format 2: tux[0-1]*2:tux2 */ /* Return task list in Moab format 2: tux[0-1]*2:tux2 */
static char * _task_list_exp(struct job_record *job_ptr) static char * _task_list_exp(struct job_record *job_ptr)
{ {
int i, reps = -1; int i, node_inx = 0, reps = -1, task_cnt;
char *buf = NULL, *host; char *buf = NULL, *host;
hostlist_t hl = hostlist_create(job_ptr->nodes);
hostlist_t hl_tmp = (hostlist_t) NULL; hostlist_t hl_tmp = (hostlist_t) NULL;
select_job_res_t select_ptr = job_ptr->select_job; select_job_res_t select_ptr = job_ptr->select_job;
xassert(select_ptr && select_ptr->cpus); xassert(select_ptr);
if (hl == NULL) {
error("hostlist_create error for job %u, %s",
job_ptr->job_id, job_ptr->nodes);
return buf;
}
for (i=0; i<select_ptr->nhosts; i++) { for (i=0; i<select_ptr->nhosts; i++) {
host = hostlist_shift(hl); if (i == 0) {
if (host == NULL) { xassert(select_ptr->cpus && select_ptr->node_bitmap);
error("bad node_cnt for job %u (%s, %d)", node_inx = bit_ffs(select_ptr->node_bitmap);
job_ptr->job_id, job_ptr->nodes, } else {
job_ptr->node_cnt); for (node_inx++; node_inx<node_record_count;
break; node_inx++) {
if (bit_test(select_ptr->node_bitmap,node_inx))
break;
}
if (node_inx >= node_record_count) {
error("Improperly formed select_job for %u",
job_ptr->job_id);
break;
}
} }
host = node_record_table_ptr[node_inx].name;
if (reps == select_ptr->cpus[i]) { task_cnt = select_ptr->cpus[i];
if (job_ptr->details && job_ptr->details->cpus_per_task)
task_cnt /= job_ptr->details->cpus_per_task;
if (task_cnt < 1) {
error("Invalid task_cnt for job %u on node %s",
job_ptr->job_id, host);
task_cnt = 1;
}
if (reps == task_cnt) {
/* append to existing hostlist record */ /* append to existing hostlist record */
if (hostlist_push(hl_tmp, host) == 0) if (hostlist_push(hl_tmp, host) == 0)
error("hostlist_push failure"); error("hostlist_push failure");
...@@ -284,13 +304,11 @@ static char * _task_list_exp(struct job_record *job_ptr) ...@@ -284,13 +304,11 @@ static char * _task_list_exp(struct job_record *job_ptr)
/* start new hostlist record */ /* start new hostlist record */
hl_tmp = hostlist_create(host); hl_tmp = hostlist_create(host);
if (hl_tmp) if (hl_tmp)
reps = select_ptr->cpus[i]; reps = task_cnt;
else else
error("hostlist_create failure"); error("hostlist_create failure");
} }
free(host);
} }
hostlist_destroy(hl);
if (hl_tmp) if (hl_tmp)
_append_hl_buf(&buf, &hl_tmp, &reps); _append_hl_buf(&buf, &hl_tmp, &reps);
return buf; return buf;
......
...@@ -163,37 +163,43 @@ extern char * slurm_job2moab_task_list(struct job_record *job_ptr) ...@@ -163,37 +163,43 @@ extern char * slurm_job2moab_task_list(struct job_record *job_ptr)
/* Return task list in Moab format 1: tux0:tux0:tux1:tux1:tux2 */ /* Return task list in Moab format 1: tux0:tux0:tux1:tux1:tux2 */
static char * _task_list(struct job_record *job_ptr) static char * _task_list(struct job_record *job_ptr)
{ {
int i, j, task_cnt; int i, j, node_inx = 0, task_cnt;
char *buf = NULL, *host; char *buf = NULL, *host;
hostlist_t hl = hostlist_create(job_ptr->nodes);
select_job_res_t select_ptr = job_ptr->select_job; select_job_res_t select_ptr = job_ptr->select_job;
xassert(select_ptr && select_ptr->cpus); xassert(select_ptr);
if (hl == NULL) {
error("hostlist_create error for job %u, %s",
job_ptr->job_id, job_ptr->nodes);
return buf;
}
for (i=0; i<select_ptr->nhosts; i++) { for (i=0; i<select_ptr->nhosts; i++) {
host = hostlist_shift(hl); if (i == 0) {
if (host == NULL) { xassert(select_ptr->cpus && select_ptr->node_bitmap);
error("bad node_cnt for job %u (%s, %d)", node_inx = bit_ffs(select_ptr->node_bitmap);
job_ptr->job_id, job_ptr->nodes, } else {
job_ptr->node_cnt); for (node_inx++; node_inx<node_record_count;
break; node_inx++) {
if (bit_test(select_ptr->node_bitmap,node_inx))
break;
}
if (node_inx >= node_record_count) {
error("Improperly formed select_job for %u",
job_ptr->job_id);
break;
}
} }
host = node_record_table_ptr[node_inx].name;
task_cnt = select_ptr->cpus[i]; task_cnt = select_ptr->cpus[i];
if (job_ptr->details && job_ptr->details->cpus_per_task) if (job_ptr->details && job_ptr->details->cpus_per_task)
task_cnt /= job_ptr->details->cpus_per_task; task_cnt /= job_ptr->details->cpus_per_task;
if (task_cnt < 1) {
error("Invalid task_cnt for job %u on node %s",
job_ptr->job_id, host);
task_cnt = 1;
}
for (j=0; j<task_cnt; j++) { for (j=0; j<task_cnt; j++) {
if (buf) if (buf)
xstrcat(buf, ":"); xstrcat(buf, ":");
xstrcat(buf, host); xstrcat(buf, host);
} }
free(host);
} }
hostlist_destroy(hl);
return buf; return buf;
} }
...@@ -255,31 +261,38 @@ static void _append_hl_buf(char **buf, hostlist_t *hl_tmp, int *reps) ...@@ -255,31 +261,38 @@ static void _append_hl_buf(char **buf, hostlist_t *hl_tmp, int *reps)
/* Return task list in Moab format 2: tux[0-1]*2:tux2 */ /* Return task list in Moab format 2: tux[0-1]*2:tux2 */
static char * _task_list_exp(struct job_record *job_ptr) static char * _task_list_exp(struct job_record *job_ptr)
{ {
int i, reps = -1, task_cnt; int i, node_inx = 0, reps = -1, task_cnt;
char *buf = NULL, *host; char *buf = NULL, *host;
hostlist_t hl = hostlist_create(job_ptr->nodes);
hostlist_t hl_tmp = (hostlist_t) NULL; hostlist_t hl_tmp = (hostlist_t) NULL;
select_job_res_t select_ptr = job_ptr->select_job; select_job_res_t select_ptr = job_ptr->select_job;
xassert(select_ptr && select_ptr->cpus); xassert(select_ptr);
if (hl == NULL) {
error("hostlist_create error for job %u, %s",
job_ptr->job_id, job_ptr->nodes);
return buf;
}
for (i=0; i<select_ptr->nhosts; i++) { for (i=0; i<select_ptr->nhosts; i++) {
host = hostlist_shift(hl); if (i == 0) {
if (host == NULL) { xassert(select_ptr->cpus && select_ptr->node_bitmap);
error("bad node_cnt for job %u (%s, %d)", node_inx = bit_ffs(select_ptr->node_bitmap);
job_ptr->job_id, job_ptr->nodes, } else {
job_ptr->node_cnt); for (node_inx++; node_inx<node_record_count;
break; node_inx++) {
if (bit_test(select_ptr->node_bitmap,node_inx))
break;
}
if (node_inx >= node_record_count) {
error("Improperly formed select_job for %u",
job_ptr->job_id);
break;
}
} }
host = node_record_table_ptr[node_inx].name;
task_cnt = select_ptr->cpus[i]; task_cnt = select_ptr->cpus[i];
if (job_ptr->details && job_ptr->details->cpus_per_task) if (job_ptr->details && job_ptr->details->cpus_per_task)
task_cnt /= job_ptr->details->cpus_per_task; task_cnt /= job_ptr->details->cpus_per_task;
if (task_cnt < 1) {
error("Invalid task_cnt for job %u on node %s",
job_ptr->job_id, host);
task_cnt = 1;
}
if (reps == task_cnt) { if (reps == task_cnt) {
/* append to existing hostlist record */ /* append to existing hostlist record */
if (hostlist_push(hl_tmp, host) == 0) if (hostlist_push(hl_tmp, host) == 0)
...@@ -295,9 +308,7 @@ static char * _task_list_exp(struct job_record *job_ptr) ...@@ -295,9 +308,7 @@ static char * _task_list_exp(struct job_record *job_ptr)
else else
error("hostlist_create failure"); error("hostlist_create failure");
} }
free(host);
} }
hostlist_destroy(hl);
if (hl_tmp) if (hl_tmp)
_append_hl_buf(&buf, &hl_tmp, &reps); _append_hl_buf(&buf, &hl_tmp, &reps);
return buf; return buf;
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment