From 7d082305fd04be2e68755d34e2c65f1168f23bf9 Mon Sep 17 00:00:00 2001
From: Moe Jette <jette1@llnl.gov>
Date: Mon, 11 May 2009 17:09:02 +0000
Subject: [PATCH] Fix bug in sched/wiki and sched/wiki2 plugins for reporting
 job resource     allocation properly when node names are configured out of
 sort order     with more than one numeric suffix (e.g. "tux10-1" is
 configured after     "tux5-1").

---
 NEWS                               |  4 ++
 src/plugins/sched/wiki/hostlist.c  | 90 ++++++++++++++++++------------
 src/plugins/sched/wiki2/hostlist.c | 79 +++++++++++++++-----------
 3 files changed, 103 insertions(+), 70 deletions(-)

diff --git a/NEWS b/NEWS
index 2b29491b7d9..6d08ec36f02 100644
--- a/NEWS
+++ b/NEWS
@@ -9,6 +9,10 @@ documents those changes that are of interest to users and admins.
  -- Filter white-space out from node features.
  -- Fixed issue with duration not being honored when updating start time in 
     reservations.
+ -- Fix bug in sched/wiki and sched/wiki2 plugins for reporting job resource
+    allocation properly when node names are configured out of sort order 
+    with more than one numeric suffix (e.g. "tux10-1" is configured after 
+    "tux5-1").
 
 * Changes in SLURM 2.0.0-rc1
 ==============================
diff --git a/src/plugins/sched/wiki/hostlist.c b/src/plugins/sched/wiki/hostlist.c
index 976c1e2ed67..50d4555dbe4 100644
--- a/src/plugins/sched/wiki/hostlist.c
+++ b/src/plugins/sched/wiki/hostlist.c
@@ -163,33 +163,43 @@ extern char * slurm_job2moab_task_list(struct job_record *job_ptr)
 /* Return task list in Moab format 1: tux0:tux0:tux1:tux1:tux2 */
 static char * _task_list(struct job_record *job_ptr)
 {
-	int i, j;
+	int i, j, node_inx = 0, task_cnt;
 	char *buf = NULL, *host;
-	hostlist_t hl = hostlist_create(job_ptr->nodes);
 	select_job_res_t select_ptr = job_ptr->select_job;
 
-	if (hl == NULL) {
-		error("hostlist_create error for job %u, %s",
-			job_ptr->job_id, job_ptr->nodes);
-		return buf;
-	}
-
+	xassert(select_ptr);
 	for (i=0; i<select_ptr->nhosts; i++) {
-		host = hostlist_shift(hl);
-		if (host == NULL) {
-			error("bad node_cnt for job %u (%s, %d)", 
-				job_ptr->job_id, job_ptr->nodes,
-				job_ptr->node_cnt);
-			break;
+		if (i == 0) {
+			xassert(select_ptr->cpus && select_ptr->node_bitmap);
+			node_inx = bit_ffs(select_ptr->node_bitmap);
+		} else {
+			for (node_inx++; node_inx<node_record_count; 
+			     node_inx++) {
+				if (bit_test(select_ptr->node_bitmap,node_inx))
+					break;
+			}
+			if (node_inx >= node_record_count) {
+				error("Improperly formed select_job for %u",
+				      job_ptr->job_id);
+				break;
+			}
+		}
+		host = node_record_table_ptr[node_inx].name;
+
+		task_cnt = select_ptr->cpus[i];
+		if (job_ptr->details && job_ptr->details->cpus_per_task)
+			task_cnt /= job_ptr->details->cpus_per_task;
+		if (task_cnt < 1) {
+			error("Invalid task_cnt for job %u on node %s",
+			      job_ptr->job_id, host);
+			task_cnt = 1;
 		}
-		for (j=0; j<select_ptr->cpus[i]; j++) {
+		for (j=0; j<task_cnt; j++) {
 			if (buf)
 				xstrcat(buf, ":");
 			xstrcat(buf, host);
 		}
-		free(host);
 	}
-	hostlist_destroy(hl);
 	return buf;
 }
 
@@ -251,29 +261,39 @@ static void _append_hl_buf(char **buf, hostlist_t *hl_tmp, int *reps)
 /* Return task list in Moab format 2: tux[0-1]*2:tux2 */
 static char * _task_list_exp(struct job_record *job_ptr)
 {
-	int i, reps = -1;
+	int i, node_inx = 0, reps = -1, task_cnt;
 	char *buf = NULL, *host;
-	hostlist_t hl = hostlist_create(job_ptr->nodes);
 	hostlist_t hl_tmp = (hostlist_t) NULL;
 	select_job_res_t select_ptr = job_ptr->select_job;
 
-	xassert(select_ptr && select_ptr->cpus);
-	if (hl == NULL) {
-		error("hostlist_create error for job %u, %s",
-			job_ptr->job_id, job_ptr->nodes);
-		return buf;
-	}
-
+	xassert(select_ptr);
 	for (i=0; i<select_ptr->nhosts; i++) {
-		host = hostlist_shift(hl);
-		if (host == NULL) {
-			error("bad node_cnt for job %u (%s, %d)", 
-				job_ptr->job_id, job_ptr->nodes,
-				job_ptr->node_cnt);
-			break;
+		if (i == 0) {
+			xassert(select_ptr->cpus && select_ptr->node_bitmap);
+			node_inx = bit_ffs(select_ptr->node_bitmap);
+		} else {
+			for (node_inx++; node_inx<node_record_count; 
+			     node_inx++) {
+				if (bit_test(select_ptr->node_bitmap,node_inx))
+					break;
+			}
+			if (node_inx >= node_record_count) {
+				error("Improperly formed select_job for %u",
+				      job_ptr->job_id);
+				break;
+			}
 		}
+		host = node_record_table_ptr[node_inx].name;
 
-		if (reps == select_ptr->cpus[i]) {
+		task_cnt = select_ptr->cpus[i];
+		if (job_ptr->details && job_ptr->details->cpus_per_task)
+			task_cnt /= job_ptr->details->cpus_per_task;
+		if (task_cnt < 1) {
+			error("Invalid task_cnt for job %u on node %s",
+			      job_ptr->job_id, host);
+			task_cnt = 1;
+		}
+		if (reps == task_cnt) {
 			/* append to existing hostlist record */
 			if (hostlist_push(hl_tmp, host) == 0)
 				error("hostlist_push failure");
@@ -284,13 +304,11 @@ static char * _task_list_exp(struct job_record *job_ptr)
 			/* start new hostlist record */
 			hl_tmp = hostlist_create(host);
 			if (hl_tmp)
-				reps = select_ptr->cpus[i];
+				reps = task_cnt;
 			else
 				error("hostlist_create failure");
 		}
-		free(host);
 	}
-	hostlist_destroy(hl);
 	if (hl_tmp)
 		_append_hl_buf(&buf, &hl_tmp, &reps);
 	return buf;
diff --git a/src/plugins/sched/wiki2/hostlist.c b/src/plugins/sched/wiki2/hostlist.c
index 9783942588f..b11cf7fe243 100644
--- a/src/plugins/sched/wiki2/hostlist.c
+++ b/src/plugins/sched/wiki2/hostlist.c
@@ -163,37 +163,43 @@ extern char * slurm_job2moab_task_list(struct job_record *job_ptr)
 /* Return task list in Moab format 1: tux0:tux0:tux1:tux1:tux2 */
 static char * _task_list(struct job_record *job_ptr)
 {
-	int i, j, task_cnt;
+	int i, j, node_inx = 0, task_cnt;
 	char *buf = NULL, *host;
-	hostlist_t hl = hostlist_create(job_ptr->nodes);
 	select_job_res_t select_ptr = job_ptr->select_job;
 
-	xassert(select_ptr && select_ptr->cpus);
-	if (hl == NULL) {
-		error("hostlist_create error for job %u, %s",
-			job_ptr->job_id, job_ptr->nodes);
-		return buf;
-	}
-
+	xassert(select_ptr);
 	for (i=0; i<select_ptr->nhosts; i++) {
-		host = hostlist_shift(hl);
-		if (host == NULL) {
-			error("bad node_cnt for job %u (%s, %d)", 
-				job_ptr->job_id, job_ptr->nodes,
-				job_ptr->node_cnt);
-			break;
+		if (i == 0) {
+			xassert(select_ptr->cpus && select_ptr->node_bitmap);
+			node_inx = bit_ffs(select_ptr->node_bitmap);
+		} else {
+			for (node_inx++; node_inx<node_record_count; 
+			     node_inx++) {
+				if (bit_test(select_ptr->node_bitmap,node_inx))
+					break;
+			}
+			if (node_inx >= node_record_count) {
+				error("Improperly formed select_job for %u",
+				      job_ptr->job_id);
+				break;
+			}
 		}
+		host = node_record_table_ptr[node_inx].name;
+
 		task_cnt = select_ptr->cpus[i];
 		if (job_ptr->details && job_ptr->details->cpus_per_task)
 			task_cnt /= job_ptr->details->cpus_per_task;
+		if (task_cnt < 1) {
+			error("Invalid task_cnt for job %u on node %s",
+			      job_ptr->job_id, host);
+			task_cnt = 1;
+		}
 		for (j=0; j<task_cnt; j++) {
 			if (buf)
 				xstrcat(buf, ":");
 			xstrcat(buf, host);
 		}
-		free(host);
 	}
-	hostlist_destroy(hl);
 	return buf;
 }
 
@@ -255,31 +261,38 @@ static void _append_hl_buf(char **buf, hostlist_t *hl_tmp, int *reps)
 /* Return task list in Moab format 2: tux[0-1]*2:tux2 */
 static char * _task_list_exp(struct job_record *job_ptr)
 {
-	int i, reps = -1, task_cnt;
+	int i, node_inx = 0, reps = -1, task_cnt;
 	char *buf = NULL, *host;
-	hostlist_t hl = hostlist_create(job_ptr->nodes);
 	hostlist_t hl_tmp = (hostlist_t) NULL;
 	select_job_res_t select_ptr = job_ptr->select_job;
 
-	xassert(select_ptr && select_ptr->cpus);
-	if (hl == NULL) {
-		error("hostlist_create error for job %u, %s",
-			job_ptr->job_id, job_ptr->nodes);
-		return buf;
-	}
-
+	xassert(select_ptr);
 	for (i=0; i<select_ptr->nhosts; i++) {
-		host = hostlist_shift(hl);
-		if (host == NULL) {
-			error("bad node_cnt for job %u (%s, %d)", 
-				job_ptr->job_id, job_ptr->nodes,
-				job_ptr->node_cnt);
-			break;
+		if (i == 0) {
+			xassert(select_ptr->cpus && select_ptr->node_bitmap);
+			node_inx = bit_ffs(select_ptr->node_bitmap);
+		} else {
+			for (node_inx++; node_inx<node_record_count; 
+			     node_inx++) {
+				if (bit_test(select_ptr->node_bitmap,node_inx))
+					break;
+			}
+			if (node_inx >= node_record_count) {
+				error("Improperly formed select_job for %u",
+				      job_ptr->job_id);
+				break;
+			}
 		}
+		host = node_record_table_ptr[node_inx].name;
 
 		task_cnt = select_ptr->cpus[i];
 		if (job_ptr->details && job_ptr->details->cpus_per_task)
 			task_cnt /= job_ptr->details->cpus_per_task;
+		if (task_cnt < 1) {
+			error("Invalid task_cnt for job %u on node %s",
+			      job_ptr->job_id, host);
+			task_cnt = 1;
+		}
 		if (reps == task_cnt) {
 			/* append to existing hostlist record */
 			if (hostlist_push(hl_tmp, host) == 0)
@@ -295,9 +308,7 @@ static char * _task_list_exp(struct job_record *job_ptr)
 			else
 				error("hostlist_create failure");
 		}
-		free(host);
 	}
-	hostlist_destroy(hl);
 	if (hl_tmp)
 		_append_hl_buf(&buf, &hl_tmp, &reps);
 	return buf;
-- 
GitLab