From ce370ca371a81c7bcdcebab0dbeae10474c30a91 Mon Sep 17 00:00:00 2001
From: Moe Jette <jette1@llnl.gov>
Date: Fri, 14 May 2010 21:07:28 +0000
Subject: [PATCH] Added limited gres support to select/linear. It prevents
 overallocation of gres resources including a map of specific allocated
 resources.

Still need to add save/restore of that information,
propagate detains to slurmd, consider topology, etc.
---
 src/common/gres.c                         |  65 +++++++----
 src/common/gres.h                         |  20 ++--
 src/plugins/gres/gpu/gres_gpu.c           | 125 +++++++++++++++++++---
 src/plugins/gres/nic/gres_nic.c           | 125 +++++++++++++++++++---
 src/plugins/select/linear/select_linear.c |  31 ++++--
 5 files changed, 300 insertions(+), 66 deletions(-)

diff --git a/src/common/gres.c b/src/common/gres.c
index a6c09f8225a..c0f4f394bc8 100644
--- a/src/common/gres.c
+++ b/src/common/gres.c
@@ -112,11 +112,14 @@ typedef struct slurm_gres_ops {
 	uint32_t	(*job_test)		( void *job_gres_data,
 						  void *node_gres_data,
 						  bool use_total_gres );
-	void		(*job_alloc)		( void *job_gres_data,
+	int		(*job_alloc)		( void *job_gres_data,
 						  void *node_gres_data,
+						  int node_cnt,
+						  int node_offset,
 						  uint32_t cpu_cnt );
-	void		(*job_dealloc)		( void *job_gres_data,
+	int		(*job_dealloc)		( void *job_gres_data,
 						  void *node_gres_data,
+						  int node_offset,
 						  uint32_t cpu_cnt );
 	void		(*job_state_log)	( void *gres_data,
 						  uint32_t job_id );
@@ -1259,20 +1262,27 @@ extern uint32_t gres_plugin_job_test(List job_gres_list, List node_gres_list,
 /*
  * Allocate resource to a job and update node and job gres information
  * IN job_gres_list - job's gres_list built by gres_plugin_job_gres_validate()
- * IN node_gres_list - node's gres_list built by gres_plugin_node_config_validate()
+ * IN node_gres_list - node's gres_list built by
+ *		gres_plugin_node_config_validate()
+ * IN node_cnt - total number of nodes originally allocated to the job
+ * IN node_offset - zero-origin index to the node of interest
  * IN cpu_cnt - number of CPUs allocated to this job on this node
+ * RET SLURM_SUCCESS or error code
  */
-extern void gres_plugin_job_alloc(List job_gres_list, List node_gres_list, 
-				  uint32_t cpu_cnt)
+extern int gres_plugin_job_alloc(List job_gres_list, List node_gres_list, 
+				 int node_cnt, int node_offset,
+				 uint32_t cpu_cnt)
 {
-	int i;
+	int i, rc, rc2;
 	ListIterator job_gres_iter,  node_gres_iter;
 	gres_state_t *job_gres_ptr, *node_gres_ptr;
 
-	if ((job_gres_list == NULL) || (node_gres_list == NULL))
-		return;
+	if (job_gres_list == NULL)
+		return SLURM_SUCCESS;
+	if (node_gres_list == NULL)
+		return SLURM_ERROR;
 
-	(void) gres_plugin_init();
+	rc = gres_plugin_init();
 
 	slurm_mutex_lock(&gres_context_lock);
 	job_gres_iter = list_iterator_create(job_gres_list);
@@ -1291,33 +1301,43 @@ extern void gres_plugin_job_alloc(List job_gres_list, List node_gres_list,
 			if (job_gres_ptr->plugin_id != 
 			    *(gres_context[i].ops.plugin_id))
 				continue;
-			(*(gres_context[i].ops.job_alloc))
+			rc2 = (*(gres_context[i].ops.job_alloc))
 					(job_gres_ptr->gres_data, 
-					 node_gres_ptr->gres_data, cpu_cnt);
+					 node_gres_ptr->gres_data, node_cnt,
+					 node_offset, cpu_cnt);
+			if (rc2 != SLURM_SUCCESS)
+				rc = rc2;
 			break;
 		}
 	}
 	list_iterator_destroy(job_gres_iter);
 	slurm_mutex_unlock(&gres_context_lock);
+
+	return rc;
 }
 
 /*
  * Deallocate resource from a job and update node and job gres information
  * IN job_gres_list - job's gres_list built by gres_plugin_job_gres_validate()
- * IN node_gres_list - node's gres_list built by gres_plugin_node_config_validate()
+ * IN node_gres_list - node's gres_list built by
+ *		gres_plugin_node_config_validate()
+ * IN node_offset - zero-origin index to the node of interest
  * IN cpu_cnt - number of CPUs allocated to this job on this node
+ * RET SLURM_SUCCESS or error code
  */
-extern void gres_plugin_job_dealloc(List job_gres_list, List node_gres_list, 
-				    uint32_t cpu_cnt)
+extern int gres_plugin_job_dealloc(List job_gres_list, List node_gres_list, 
+				   int node_offset, uint32_t cpu_cnt)
 {
-	int i;
+	int i, rc, rc2;
 	ListIterator job_gres_iter,  node_gres_iter;
 	gres_state_t *job_gres_ptr, *node_gres_ptr;
 
-	if ((job_gres_list == NULL) || (node_gres_list == NULL))
-		return;
+	if (job_gres_list == NULL)
+		return SLURM_SUCCESS;
+	if (node_gres_list == NULL)
+		return SLURM_ERROR;
 
-	(void) gres_plugin_init();
+	rc = gres_plugin_init();
 
 	slurm_mutex_lock(&gres_context_lock);
 	job_gres_iter = list_iterator_create(job_gres_list);
@@ -1336,14 +1356,19 @@ extern void gres_plugin_job_dealloc(List job_gres_list, List node_gres_list,
 			if (job_gres_ptr->plugin_id != 
 			    *(gres_context[i].ops.plugin_id))
 				continue;
-			(*(gres_context[i].ops.job_dealloc))
+			rc2 = (*(gres_context[i].ops.job_dealloc))
 					(job_gres_ptr->gres_data, 
-					 node_gres_ptr->gres_data, cpu_cnt);
+					 node_gres_ptr->gres_data, node_offset,
+					 cpu_cnt);
+			if (rc2 != SLURM_SUCCESS)
+				rc = rc2;
 			break;
 		}
 	}
 	list_iterator_destroy(job_gres_iter);
 	slurm_mutex_unlock(&gres_context_lock);
+
+	return rc;
 }
 
 /*
diff --git a/src/common/gres.h b/src/common/gres.h
index baaf96acaed..4581ee4e922 100644
--- a/src/common/gres.h
+++ b/src/common/gres.h
@@ -208,20 +208,28 @@ extern uint32_t gres_plugin_job_test(List job_gres_list, List node_gres_list,
 /*
  * Allocate resource to a job and update node and job gres information
  * IN job_gres_list - job's gres_list built by gres_plugin_job_gres_validate()
- * IN node_gres_list - node's gres_list built by gres_plugin_node_config_validate()
+ * IN node_gres_list - node's gres_list built by
+ *		gres_plugin_node_config_validate()
+ * IN node_cnt - total number of nodes originally allocated to the job
+ * IN node_offset - zero-origin index to the node of interest
  * IN cpu_cnt - number of CPUs allocated to this job on this node
+ * RET SLURM_SUCCESS or error code
  */
-extern void gres_plugin_job_alloc(List job_gres_list, List node_gres_list, 
-				  uint32_t cpu_cnt);
+extern int gres_plugin_job_alloc(List job_gres_list, List node_gres_list, 
+				 int node_cnt, int node_offset,
+				 uint32_t cpu_cnt);
 
 /*
  * Deallocate resource from a job and update node and job gres information
  * IN job_gres_list - job's gres_list built by gres_plugin_job_gres_validate()
- * IN node_gres_list - node's gres_list built by gres_plugin_node_config_validate()
+ * IN node_gres_list - node's gres_list built by
+ *		gres_plugin_node_config_validate()
+ * IN node_offset - zero-origin index to the node of interest
  * IN cpu_cnt - number of CPUs allocated to this job on this node
+ * RET SLURM_SUCCESS or error code
  */
-extern void gres_plugin_job_dealloc(List job_gres_list, List node_gres_list, 
-				    uint32_t cpu_cnt);
+extern int gres_plugin_job_dealloc(List job_gres_list, List node_gres_list, 
+				   int node_offset, uint32_t cpu_cnt);
 
 /*
  * Log a job's current gres state
diff --git a/src/plugins/gres/gpu/gres_gpu.c b/src/plugins/gres/gpu/gres_gpu.c
index 9b4c1317d34..55182e93213 100644
--- a/src/plugins/gres/gpu/gres_gpu.c
+++ b/src/plugins/gres/gpu/gres_gpu.c
@@ -136,6 +136,10 @@ typedef struct gpu_job_state {
 	/* If 0 then gpu_cnt_alloc is per node,
 	 * if 1 then gpu_cnt_alloc is per CPU */
 	uint8_t  gpu_cnt_mult;
+
+	/* Resources currently allocated to job on each node */
+	uint32_t node_cnt;
+	bitstr_t **gpu_bit_alloc;
 } gpu_job_state_t;
 
 /*
@@ -628,38 +632,129 @@ extern uint32_t job_test(void *job_gres_data, void *node_gres_data,
 	}
 }
 
-extern void job_alloc(void *job_gres_data, void *node_gres_data, int cpu_cnt)
+extern int job_alloc(void *job_gres_data, void *node_gres_data,
+		     int node_cnt, int node_offset, uint32_t cpu_cnt)
 {
+	int i;
 	uint32_t gres_cnt;
 	gpu_job_state_t  *job_gres_ptr  = (gpu_job_state_t *)  job_gres_data;
 	gpu_node_state_t *node_gres_ptr = (gpu_node_state_t *) node_gres_data;
 
+	/*
+	 * Validate data structures. Either job_gres_data->node_cnt and
+	 * job_gres_data->gpu_bit_alloc are both set or both zero/NULL.
+	 */
+	xassert(node_cnt);
+	xassert(node_offset >= 0);
+	xassert(job_gres_ptr);
+	xassert(node_gres_ptr);
+	xassert(node_gres_ptr->gpu_bit_alloc);
+	if (job_gres_ptr->node_cnt == 0) {
+		job_gres_ptr->node_cnt = node_cnt;
+		if (job_gres_ptr->gpu_bit_alloc) {
+			error("%s: node_cnt==0 and bit_alloc is set",
+			      plugin_name);
+			xfree(job_gres_ptr->gpu_bit_alloc);
+		}
+		job_gres_ptr->gpu_bit_alloc = 
+					xmalloc(sizeof(bitstr_t *) * node_cnt);
+	} else if (job_gres_ptr->node_cnt < node_cnt) {
+		error("%s: node_cnt increase from %u to %d",
+		      plugin_name, job_gres_ptr->node_cnt, node_cnt);
+		if (node_offset >= job_gres_ptr->node_cnt)
+			return SLURM_ERROR;
+	} else if (job_gres_ptr->node_cnt > node_cnt) {
+		error("%s: node_cnt decrease from %u to %d",
+		      plugin_name, job_gres_ptr->node_cnt, node_cnt);
+	}
+
+	/*
+	 * Check that sufficient resources exist on this node
+	 */
 	if (job_gres_ptr->gpu_cnt_mult == 0)
 		gres_cnt = job_gres_ptr->gpu_cnt_alloc;
 	else
 		gres_cnt = (job_gres_ptr->gpu_cnt_alloc * cpu_cnt);
+	i =  node_gres_ptr->gpu_cnt_alloc + gres_cnt;
+	i -= node_gres_ptr->gpu_cnt_avail;
+	if (i > 0) {
+		error("%s: overallocated resources by %d", plugin_name, i);
+		/* proceed with request, give job what's available */
+	}
 
-	node_gres_ptr->gpu_cnt_alloc += gres_cnt;
-	if (node_gres_ptr->gpu_cnt_alloc > node_gres_ptr->gpu_cnt_avail)
-		error("%s: overallocated resources", plugin_name);
+	/*
+	 * Select the specific resources to use for this job.
+	 * We'll need to add topology information in the future
+	 */
+	if (job_gres_ptr->gpu_bit_alloc[node_offset]) {
+		error("%s: job's bit_alloc is set for node %d",
+		      plugin_name, node_offset);
+		bit_free(job_gres_ptr->gpu_bit_alloc[node_offset]);
+	}
+	job_gres_ptr->gpu_bit_alloc[node_offset] = bit_alloc(node_gres_ptr->
+							     gpu_cnt_avail);
+	if (job_gres_ptr->gpu_bit_alloc[node_offset] == NULL)
+		fatal("bit_copy: malloc failure");
+	for (i=0; i<node_gres_ptr->gpu_cnt_avail && gres_cnt>0; i++) {
+		if (bit_test(node_gres_ptr->gpu_bit_alloc, i))
+			continue;
+		bit_set(node_gres_ptr->gpu_bit_alloc, i);
+		bit_set(job_gres_ptr->gpu_bit_alloc[node_offset], i);
+		node_gres_ptr->gpu_cnt_alloc++;
+		gres_cnt--;
+	}
+
+	return SLURM_SUCCESS;
 }
 
-extern void job_dealloc(void *job_gres_data, void *node_gres_data, int cpu_cnt)
+extern int job_dealloc(void *job_gres_data, void *node_gres_data,
+		       int node_offset, uint32_t cpu_cnt)
 {
-	uint32_t gres_cnt;
+	int i, len;
 	gpu_job_state_t  *job_gres_ptr  = (gpu_job_state_t *)  job_gres_data;
 	gpu_node_state_t *node_gres_ptr = (gpu_node_state_t *) node_gres_data;
 
-	if (job_gres_ptr->gpu_cnt_mult == 0)
-		gres_cnt = job_gres_ptr->gpu_cnt_alloc;
-	else
-		gres_cnt = (job_gres_ptr->gpu_cnt_alloc * cpu_cnt);
+	/*
+	 * Validate data structures. Either job_gres_data->node_cnt and
+	 * job_gres_data->gpu_bit_alloc are both set or both zero/NULL.
+	 */
+	xassert(node_offset >= 0);
+	xassert(job_gres_ptr);
+	xassert(node_gres_ptr);
+	xassert(node_gres_ptr->gpu_bit_alloc);
+	if (job_gres_ptr->node_cnt <= node_offset) {
+		error("%s: bad node_offset %d count is %u",
+		      plugin_name, node_offset, job_gres_ptr->node_cnt);
+		return SLURM_ERROR;
+	}
+	if (job_gres_ptr->gpu_bit_alloc == NULL) {
+		error("%s: job's bitmap is NULL", plugin_name);
+		return SLURM_ERROR;
+	}
+	if (job_gres_ptr->gpu_bit_alloc[node_offset] == NULL) {
+		error("%s: job's bitmap is empty", plugin_name);
+		return SLURM_ERROR;
+	}
 
-	if (gres_cnt > node_gres_ptr->gpu_cnt_alloc) {
-		error("%s: resource count underflow", plugin_name);
-		node_gres_ptr->gpu_cnt_alloc = 0;
-	} else
-		node_gres_ptr->gpu_cnt_alloc -= gres_cnt;
+	len = bit_size(job_gres_ptr->gpu_bit_alloc[node_offset]);
+	i   = bit_size(node_gres_ptr->gpu_bit_alloc);
+	if (i != len) {
+		error("%s: job and node bitmap sizes differ (%d != %d)",
+		      plugin_name, len, i);
+		len = MIN(len, i);
+		/* proceed with request, make best effort */
+	}
+	for (i=0; i<len; i++) {
+		if (!bit_test(job_gres_ptr->gpu_bit_alloc[node_offset], i))
+			continue;
+		bit_clear(node_gres_ptr->gpu_bit_alloc, i);
+		/* NOTE: Do not clear bit from
+		 * job_gres_ptr->gpu_bit_alloc[node_offset]
+		 * since this may only be an emulated deallocate */
+		node_gres_ptr->gpu_cnt_alloc--;
+	}
+
+	return SLURM_SUCCESS;
 }
 
 extern void job_state_log(void *gres_data, uint32_t job_id)
diff --git a/src/plugins/gres/nic/gres_nic.c b/src/plugins/gres/nic/gres_nic.c
index 40cbf97c7ea..148d19cee63 100644
--- a/src/plugins/gres/nic/gres_nic.c
+++ b/src/plugins/gres/nic/gres_nic.c
@@ -136,6 +136,10 @@ typedef struct nic_job_state {
 	/* If 0 then nic_cnt_alloc is per node,
 	 * if 1 then nic_cnt_alloc is per CPU */
 	uint8_t  nic_cnt_mult;
+
+	/* Resources currently allocated to job on each node */
+	uint32_t node_cnt;
+	bitstr_t **nic_bit_alloc;
 } nic_job_state_t;
 
 /*
@@ -628,38 +632,129 @@ extern uint32_t job_test(void *job_gres_data, void *node_gres_data,
 	}
 }
 
-extern void job_alloc(void *job_gres_data, void *node_gres_data, int cpu_cnt)
+extern int job_alloc(void *job_gres_data, void *node_gres_data,
+		     int node_cnt, int node_offset, uint32_t cpu_cnt)
 {
+	int i;
 	uint32_t gres_cnt;
 	nic_job_state_t  *job_gres_ptr  = (nic_job_state_t *)  job_gres_data;
 	nic_node_state_t *node_gres_ptr = (nic_node_state_t *) node_gres_data;
 
+	/*
+	 * Validate data structures. Either job_gres_data->node_cnt and
+	 * job_gres_data->nic_bit_alloc are both set or both zero/NULL.
+	 */
+	xassert(node_cnt);
+	xassert(node_offset >= 0);
+	xassert(job_gres_ptr);
+	xassert(node_gres_ptr);
+	xassert(node_gres_ptr->nic_bit_alloc);
+	if (job_gres_ptr->node_cnt == 0) {
+		job_gres_ptr->node_cnt = node_cnt;
+		if (job_gres_ptr->nic_bit_alloc) {
+			error("%s: node_cnt==0 and bit_alloc is set",
+			      plugin_name);
+			xfree(job_gres_ptr->nic_bit_alloc);
+		}
+		job_gres_ptr->nic_bit_alloc = 
+					xmalloc(sizeof(bitstr_t *) * node_cnt);
+	} else if (job_gres_ptr->node_cnt < node_cnt) {
+		error("%s: node_cnt increase from %u to %d",
+		      plugin_name, job_gres_ptr->node_cnt, node_cnt);
+		if (node_offset >= job_gres_ptr->node_cnt)
+			return SLURM_ERROR;
+	} else if (job_gres_ptr->node_cnt > node_cnt) {
+		error("%s: node_cnt decrease from %u to %d",
+		      plugin_name, job_gres_ptr->node_cnt, node_cnt);
+	}
+
+	/*
+	 * Check that sufficient resources exist on this node
+	 */
 	if (job_gres_ptr->nic_cnt_mult == 0)
 		gres_cnt = job_gres_ptr->nic_cnt_alloc;
 	else
 		gres_cnt = (job_gres_ptr->nic_cnt_alloc * cpu_cnt);
+	i =  node_gres_ptr->nic_cnt_alloc + gres_cnt;
+	i -= node_gres_ptr->nic_cnt_avail;
+	if (i > 0) {
+		error("%s: overallocated resources by %d", plugin_name, i);
+		/* proceed with request, give job what's available */
+	}
 
-	node_gres_ptr->nic_cnt_alloc += gres_cnt;
-	if (node_gres_ptr->nic_cnt_alloc > node_gres_ptr->nic_cnt_avail)
-		error("%s: overallocated resources", plugin_name);
+	/*
+	 * Select the specific resources to use for this job.
+	 * We'll need to add topology information in the future
+	 */
+	if (job_gres_ptr->nic_bit_alloc[node_offset]) {
+		error("%s: job's bit_alloc is set for node %d",
+		      plugin_name, node_offset);
+		bit_free(job_gres_ptr->nic_bit_alloc[node_offset]);
+	}
+	job_gres_ptr->nic_bit_alloc[node_offset] = bit_alloc(node_gres_ptr->
+							     nic_cnt_avail);
+	if (job_gres_ptr->nic_bit_alloc[node_offset] == NULL)
+		fatal("bit_copy: malloc failure");
+	for (i=0; i<node_gres_ptr->nic_cnt_avail && gres_cnt>0; i++) {
+		if (bit_test(node_gres_ptr->nic_bit_alloc, i))
+			continue;
+		bit_set(node_gres_ptr->nic_bit_alloc, i);
+		bit_set(job_gres_ptr->nic_bit_alloc[node_offset], i);
+		node_gres_ptr->nic_cnt_alloc++;
+		gres_cnt--;
+	}
+
+	return SLURM_SUCCESS;
 }
 
-extern void job_dealloc(void *job_gres_data, void *node_gres_data, int cpu_cnt)
+extern int job_dealloc(void *job_gres_data, void *node_gres_data,
+		       int node_offset, uint32_t cpu_cnt)
 {
-	uint32_t gres_cnt;
+	int i, len;
 	nic_job_state_t  *job_gres_ptr  = (nic_job_state_t *)  job_gres_data;
 	nic_node_state_t *node_gres_ptr = (nic_node_state_t *) node_gres_data;
 
-	if (job_gres_ptr->nic_cnt_mult == 0)
-		gres_cnt = job_gres_ptr->nic_cnt_alloc;
-	else
-		gres_cnt = (job_gres_ptr->nic_cnt_alloc * cpu_cnt);
+	/*
+	 * Validate data structures. Either job_gres_data->node_cnt and
+	 * job_gres_data->nic_bit_alloc are both set or both zero/NULL.
+	 */
+	xassert(node_offset >= 0);
+	xassert(job_gres_ptr);
+	xassert(node_gres_ptr);
+	xassert(node_gres_ptr->nic_bit_alloc);
+	if (job_gres_ptr->node_cnt <= node_offset) {
+		error("%s: bad node_offset %d count is %u",
+		      plugin_name, node_offset, job_gres_ptr->node_cnt);
+		return SLURM_ERROR;
+	}
+	if (job_gres_ptr->nic_bit_alloc == NULL) {
+		error("%s: job's bitmap is NULL", plugin_name);
+		return SLURM_ERROR;
+	}
+	if (job_gres_ptr->nic_bit_alloc[node_offset] == NULL) {
+		error("%s: job's bitmap is empty", plugin_name);
+		return SLURM_ERROR;
+	}
 
-	if (gres_cnt > node_gres_ptr->nic_cnt_alloc) {
-		error("%s: resource count underflow", plugin_name);
-		node_gres_ptr->nic_cnt_alloc = 0;
-	} else
-		node_gres_ptr->nic_cnt_alloc -= gres_cnt;
+	len = bit_size(job_gres_ptr->nic_bit_alloc[node_offset]);
+	i   = bit_size(node_gres_ptr->nic_bit_alloc);
+	if (i != len) {
+		error("%s: job and node bitmap sizes differ (%d != %d)",
+		      plugin_name, len, i);
+		len = MIN(len, i);
+		/* proceed with request, make best effort */
+	}
+	for (i=0; i<len; i++) {
+		if (!bit_test(job_gres_ptr->nic_bit_alloc[node_offset], i))
+			continue;
+		bit_clear(node_gres_ptr->nic_bit_alloc, i);
+		/* NOTE: Do not clear bit from
+		 * job_gres_ptr->nic_bit_alloc[node_offset]
+		 * since this may only be an emulated deallocate */
+		node_gres_ptr->nic_cnt_alloc--;
+	}
+
+	return SLURM_SUCCESS;
 }
 
 extern void job_state_log(void *gres_data, uint32_t job_id)
diff --git a/src/plugins/select/linear/select_linear.c b/src/plugins/select/linear/select_linear.c
index 30024f78be8..a60afafafc0 100644
--- a/src/plugins/select/linear/select_linear.c
+++ b/src/plugins/select/linear/select_linear.c
@@ -1348,7 +1348,7 @@ static int _rm_job_from_nodes(struct cr_record *cr_ptr,
 			      struct job_record *job_ptr, char *pre_err,
 			      bool remove_all)
 {
-	int i, i_first, i_last, rc = SLURM_SUCCESS;
+	int i, i_first, i_last, node_offset, rc = SLURM_SUCCESS;
 	struct part_cr_record *part_cr_ptr;
 	job_resources_t *job_resrcs_ptr;
 	uint32_t job_memory, job_memory_cpu = 0, job_memory_node = 0;
@@ -1388,9 +1388,12 @@ static int _rm_job_from_nodes(struct cr_record *cr_ptr,
 	i_last  = bit_fls(job_resrcs_ptr->node_bitmap);
 	if (i_first == -1)	/* job has no nodes */
 		i_last = -2;
+	node_offset = -1;
 	for (i = i_first; i <= i_last; i++) {
-		if (!bit_test(job_resrcs_ptr->node_bitmap, i) ||
-		    !bit_test(job_ptr->node_bitmap, i))
+		if (!bit_test(job_resrcs_ptr->node_bitmap, i))
+			continue;
+		node_offset++;
+		if (!bit_test(job_ptr->node_bitmap, i))
 			continue;
 
 		node_ptr = node_record_table_ptr + i;
@@ -1414,7 +1417,8 @@ static int _rm_job_from_nodes(struct cr_record *cr_ptr,
 			gres_list = cr_ptr->nodes[i].gres_list;
 		else
 			gres_list = node_ptr->gres_list;
-		gres_plugin_job_dealloc(job_ptr->gres_list, gres_list, cpu_cnt);
+		gres_plugin_job_dealloc(job_ptr->gres_list, gres_list,
+					node_offset, cpu_cnt);
 		gres_plugin_node_state_log(gres_list, node_ptr->name);
 
 		if (exclusive) {
@@ -1526,7 +1530,8 @@ static int _rm_job_from_one_node(struct job_record *job_ptr,
 	}
 	first_bit = bit_ffs(job_resrcs_ptr->node_bitmap);
 	last_bit  = node_inx;
-	for (i = first_bit, node_offset = -1; i <= node_inx; i++) {
+	node_offset = -1;
+	for (i = first_bit; i <= node_inx; i++) {
 		if (!bit_test(job_resrcs_ptr->node_bitmap, i))
 			continue;
 		node_offset++;
@@ -1560,7 +1565,8 @@ static int _rm_job_from_one_node(struct job_record *job_ptr,
 		gres_list = cr_ptr->nodes[i].gres_list;
 	else
 		gres_list = node_ptr->gres_list;
-	gres_plugin_job_dealloc(job_ptr->gres_list, gres_list, cpu_cnt);
+	gres_plugin_job_dealloc(job_ptr->gres_list, gres_list, node_offset,
+				cpu_cnt);
 	gres_plugin_node_state_log(gres_list, node_ptr->name);
 
 	exclusive = (job_ptr->details->shared == 0);
@@ -1624,7 +1630,7 @@ static int _add_job_to_nodes(struct cr_record *cr_ptr,
 			     struct job_record *job_ptr, char *pre_err,
 			     int alloc_all)
 {
-	int i, i_first, i_last, rc = SLURM_SUCCESS;
+	int i, i_first, i_last, node_cnt, node_offset, rc = SLURM_SUCCESS;
 	bool exclusive;
 	struct part_cr_record *part_cr_ptr;
 	job_resources_t *job_resrcs_ptr;
@@ -1658,11 +1664,15 @@ static int _add_job_to_nodes(struct cr_record *cr_ptr,
 
 	i_first = bit_ffs(job_resrcs_ptr->node_bitmap);
 	i_last  = bit_fls(job_resrcs_ptr->node_bitmap);
+	node_cnt = bit_set_count(job_resrcs_ptr->node_bitmap);
 	if (i_first == -1)	/* job has no nodes */
 		i_last = -2;
+	node_offset = -1;
 	for (i = i_first; i <= i_last; i++) {
-		if (!bit_test(job_resrcs_ptr->node_bitmap, i) ||
-		    !bit_test(job_ptr->node_bitmap, i))
+		if (!bit_test(job_resrcs_ptr->node_bitmap, i))
+			continue;
+		node_offset++;
+		if (!bit_test(job_ptr->node_bitmap, i))
 			continue;
 
 		node_ptr = node_record_table_ptr + i;
@@ -1681,7 +1691,8 @@ static int _add_job_to_nodes(struct cr_record *cr_ptr,
 			gres_list = cr_ptr->nodes[i].gres_list;
 		else
 			gres_list = node_ptr->gres_list;
-		gres_plugin_job_alloc(job_ptr->gres_list, gres_list, cpu_cnt);
+		gres_plugin_job_alloc(job_ptr->gres_list, gres_list,
+				      node_cnt, node_offset, cpu_cnt);
 		gres_plugin_node_state_log(gres_list, node_ptr->name);
 
 		if (exclusive)
-- 
GitLab