From ce370ca371a81c7bcdcebab0dbeae10474c30a91 Mon Sep 17 00:00:00 2001 From: Moe Jette <jette1@llnl.gov> Date: Fri, 14 May 2010 21:07:28 +0000 Subject: [PATCH] Added limited gres support to select/linear. It prevents overallocation of gres resources including a map of specific allocated resources. Still need to add save/restore of that information, propagate detains to slurmd, consider topology, etc. --- src/common/gres.c | 65 +++++++---- src/common/gres.h | 20 ++-- src/plugins/gres/gpu/gres_gpu.c | 125 +++++++++++++++++++--- src/plugins/gres/nic/gres_nic.c | 125 +++++++++++++++++++--- src/plugins/select/linear/select_linear.c | 31 ++++-- 5 files changed, 300 insertions(+), 66 deletions(-) diff --git a/src/common/gres.c b/src/common/gres.c index a6c09f8225a..c0f4f394bc8 100644 --- a/src/common/gres.c +++ b/src/common/gres.c @@ -112,11 +112,14 @@ typedef struct slurm_gres_ops { uint32_t (*job_test) ( void *job_gres_data, void *node_gres_data, bool use_total_gres ); - void (*job_alloc) ( void *job_gres_data, + int (*job_alloc) ( void *job_gres_data, void *node_gres_data, + int node_cnt, + int node_offset, uint32_t cpu_cnt ); - void (*job_dealloc) ( void *job_gres_data, + int (*job_dealloc) ( void *job_gres_data, void *node_gres_data, + int node_offset, uint32_t cpu_cnt ); void (*job_state_log) ( void *gres_data, uint32_t job_id ); @@ -1259,20 +1262,27 @@ extern uint32_t gres_plugin_job_test(List job_gres_list, List node_gres_list, /* * Allocate resource to a job and update node and job gres information * IN job_gres_list - job's gres_list built by gres_plugin_job_gres_validate() - * IN node_gres_list - node's gres_list built by gres_plugin_node_config_validate() + * IN node_gres_list - node's gres_list built by + * gres_plugin_node_config_validate() + * IN node_cnt - total number of nodes originally allocated to the job + * IN node_offset - zero-origin index to the node of interest * IN cpu_cnt - number of CPUs allocated to this job on this node + * RET SLURM_SUCCESS or error code */ -extern void gres_plugin_job_alloc(List job_gres_list, List node_gres_list, - uint32_t cpu_cnt) +extern int gres_plugin_job_alloc(List job_gres_list, List node_gres_list, + int node_cnt, int node_offset, + uint32_t cpu_cnt) { - int i; + int i, rc, rc2; ListIterator job_gres_iter, node_gres_iter; gres_state_t *job_gres_ptr, *node_gres_ptr; - if ((job_gres_list == NULL) || (node_gres_list == NULL)) - return; + if (job_gres_list == NULL) + return SLURM_SUCCESS; + if (node_gres_list == NULL) + return SLURM_ERROR; - (void) gres_plugin_init(); + rc = gres_plugin_init(); slurm_mutex_lock(&gres_context_lock); job_gres_iter = list_iterator_create(job_gres_list); @@ -1291,33 +1301,43 @@ extern void gres_plugin_job_alloc(List job_gres_list, List node_gres_list, if (job_gres_ptr->plugin_id != *(gres_context[i].ops.plugin_id)) continue; - (*(gres_context[i].ops.job_alloc)) + rc2 = (*(gres_context[i].ops.job_alloc)) (job_gres_ptr->gres_data, - node_gres_ptr->gres_data, cpu_cnt); + node_gres_ptr->gres_data, node_cnt, + node_offset, cpu_cnt); + if (rc2 != SLURM_SUCCESS) + rc = rc2; break; } } list_iterator_destroy(job_gres_iter); slurm_mutex_unlock(&gres_context_lock); + + return rc; } /* * Deallocate resource from a job and update node and job gres information * IN job_gres_list - job's gres_list built by gres_plugin_job_gres_validate() - * IN node_gres_list - node's gres_list built by gres_plugin_node_config_validate() + * IN node_gres_list - node's gres_list built by + * gres_plugin_node_config_validate() + * IN node_offset - zero-origin index to the node of interest * IN cpu_cnt - number of CPUs allocated to this job on this node + * RET SLURM_SUCCESS or error code */ -extern void gres_plugin_job_dealloc(List job_gres_list, List node_gres_list, - uint32_t cpu_cnt) +extern int gres_plugin_job_dealloc(List job_gres_list, List node_gres_list, + int node_offset, uint32_t cpu_cnt) { - int i; + int i, rc, rc2; ListIterator job_gres_iter, node_gres_iter; gres_state_t *job_gres_ptr, *node_gres_ptr; - if ((job_gres_list == NULL) || (node_gres_list == NULL)) - return; + if (job_gres_list == NULL) + return SLURM_SUCCESS; + if (node_gres_list == NULL) + return SLURM_ERROR; - (void) gres_plugin_init(); + rc = gres_plugin_init(); slurm_mutex_lock(&gres_context_lock); job_gres_iter = list_iterator_create(job_gres_list); @@ -1336,14 +1356,19 @@ extern void gres_plugin_job_dealloc(List job_gres_list, List node_gres_list, if (job_gres_ptr->plugin_id != *(gres_context[i].ops.plugin_id)) continue; - (*(gres_context[i].ops.job_dealloc)) + rc2 = (*(gres_context[i].ops.job_dealloc)) (job_gres_ptr->gres_data, - node_gres_ptr->gres_data, cpu_cnt); + node_gres_ptr->gres_data, node_offset, + cpu_cnt); + if (rc2 != SLURM_SUCCESS) + rc = rc2; break; } } list_iterator_destroy(job_gres_iter); slurm_mutex_unlock(&gres_context_lock); + + return rc; } /* diff --git a/src/common/gres.h b/src/common/gres.h index baaf96acaed..4581ee4e922 100644 --- a/src/common/gres.h +++ b/src/common/gres.h @@ -208,20 +208,28 @@ extern uint32_t gres_plugin_job_test(List job_gres_list, List node_gres_list, /* * Allocate resource to a job and update node and job gres information * IN job_gres_list - job's gres_list built by gres_plugin_job_gres_validate() - * IN node_gres_list - node's gres_list built by gres_plugin_node_config_validate() + * IN node_gres_list - node's gres_list built by + * gres_plugin_node_config_validate() + * IN node_cnt - total number of nodes originally allocated to the job + * IN node_offset - zero-origin index to the node of interest * IN cpu_cnt - number of CPUs allocated to this job on this node + * RET SLURM_SUCCESS or error code */ -extern void gres_plugin_job_alloc(List job_gres_list, List node_gres_list, - uint32_t cpu_cnt); +extern int gres_plugin_job_alloc(List job_gres_list, List node_gres_list, + int node_cnt, int node_offset, + uint32_t cpu_cnt); /* * Deallocate resource from a job and update node and job gres information * IN job_gres_list - job's gres_list built by gres_plugin_job_gres_validate() - * IN node_gres_list - node's gres_list built by gres_plugin_node_config_validate() + * IN node_gres_list - node's gres_list built by + * gres_plugin_node_config_validate() + * IN node_offset - zero-origin index to the node of interest * IN cpu_cnt - number of CPUs allocated to this job on this node + * RET SLURM_SUCCESS or error code */ -extern void gres_plugin_job_dealloc(List job_gres_list, List node_gres_list, - uint32_t cpu_cnt); +extern int gres_plugin_job_dealloc(List job_gres_list, List node_gres_list, + int node_offset, uint32_t cpu_cnt); /* * Log a job's current gres state diff --git a/src/plugins/gres/gpu/gres_gpu.c b/src/plugins/gres/gpu/gres_gpu.c index 9b4c1317d34..55182e93213 100644 --- a/src/plugins/gres/gpu/gres_gpu.c +++ b/src/plugins/gres/gpu/gres_gpu.c @@ -136,6 +136,10 @@ typedef struct gpu_job_state { /* If 0 then gpu_cnt_alloc is per node, * if 1 then gpu_cnt_alloc is per CPU */ uint8_t gpu_cnt_mult; + + /* Resources currently allocated to job on each node */ + uint32_t node_cnt; + bitstr_t **gpu_bit_alloc; } gpu_job_state_t; /* @@ -628,38 +632,129 @@ extern uint32_t job_test(void *job_gres_data, void *node_gres_data, } } -extern void job_alloc(void *job_gres_data, void *node_gres_data, int cpu_cnt) +extern int job_alloc(void *job_gres_data, void *node_gres_data, + int node_cnt, int node_offset, uint32_t cpu_cnt) { + int i; uint32_t gres_cnt; gpu_job_state_t *job_gres_ptr = (gpu_job_state_t *) job_gres_data; gpu_node_state_t *node_gres_ptr = (gpu_node_state_t *) node_gres_data; + /* + * Validate data structures. Either job_gres_data->node_cnt and + * job_gres_data->gpu_bit_alloc are both set or both zero/NULL. + */ + xassert(node_cnt); + xassert(node_offset >= 0); + xassert(job_gres_ptr); + xassert(node_gres_ptr); + xassert(node_gres_ptr->gpu_bit_alloc); + if (job_gres_ptr->node_cnt == 0) { + job_gres_ptr->node_cnt = node_cnt; + if (job_gres_ptr->gpu_bit_alloc) { + error("%s: node_cnt==0 and bit_alloc is set", + plugin_name); + xfree(job_gres_ptr->gpu_bit_alloc); + } + job_gres_ptr->gpu_bit_alloc = + xmalloc(sizeof(bitstr_t *) * node_cnt); + } else if (job_gres_ptr->node_cnt < node_cnt) { + error("%s: node_cnt increase from %u to %d", + plugin_name, job_gres_ptr->node_cnt, node_cnt); + if (node_offset >= job_gres_ptr->node_cnt) + return SLURM_ERROR; + } else if (job_gres_ptr->node_cnt > node_cnt) { + error("%s: node_cnt decrease from %u to %d", + plugin_name, job_gres_ptr->node_cnt, node_cnt); + } + + /* + * Check that sufficient resources exist on this node + */ if (job_gres_ptr->gpu_cnt_mult == 0) gres_cnt = job_gres_ptr->gpu_cnt_alloc; else gres_cnt = (job_gres_ptr->gpu_cnt_alloc * cpu_cnt); + i = node_gres_ptr->gpu_cnt_alloc + gres_cnt; + i -= node_gres_ptr->gpu_cnt_avail; + if (i > 0) { + error("%s: overallocated resources by %d", plugin_name, i); + /* proceed with request, give job what's available */ + } - node_gres_ptr->gpu_cnt_alloc += gres_cnt; - if (node_gres_ptr->gpu_cnt_alloc > node_gres_ptr->gpu_cnt_avail) - error("%s: overallocated resources", plugin_name); + /* + * Select the specific resources to use for this job. + * We'll need to add topology information in the future + */ + if (job_gres_ptr->gpu_bit_alloc[node_offset]) { + error("%s: job's bit_alloc is set for node %d", + plugin_name, node_offset); + bit_free(job_gres_ptr->gpu_bit_alloc[node_offset]); + } + job_gres_ptr->gpu_bit_alloc[node_offset] = bit_alloc(node_gres_ptr-> + gpu_cnt_avail); + if (job_gres_ptr->gpu_bit_alloc[node_offset] == NULL) + fatal("bit_copy: malloc failure"); + for (i=0; i<node_gres_ptr->gpu_cnt_avail && gres_cnt>0; i++) { + if (bit_test(node_gres_ptr->gpu_bit_alloc, i)) + continue; + bit_set(node_gres_ptr->gpu_bit_alloc, i); + bit_set(job_gres_ptr->gpu_bit_alloc[node_offset], i); + node_gres_ptr->gpu_cnt_alloc++; + gres_cnt--; + } + + return SLURM_SUCCESS; } -extern void job_dealloc(void *job_gres_data, void *node_gres_data, int cpu_cnt) +extern int job_dealloc(void *job_gres_data, void *node_gres_data, + int node_offset, uint32_t cpu_cnt) { - uint32_t gres_cnt; + int i, len; gpu_job_state_t *job_gres_ptr = (gpu_job_state_t *) job_gres_data; gpu_node_state_t *node_gres_ptr = (gpu_node_state_t *) node_gres_data; - if (job_gres_ptr->gpu_cnt_mult == 0) - gres_cnt = job_gres_ptr->gpu_cnt_alloc; - else - gres_cnt = (job_gres_ptr->gpu_cnt_alloc * cpu_cnt); + /* + * Validate data structures. Either job_gres_data->node_cnt and + * job_gres_data->gpu_bit_alloc are both set or both zero/NULL. + */ + xassert(node_offset >= 0); + xassert(job_gres_ptr); + xassert(node_gres_ptr); + xassert(node_gres_ptr->gpu_bit_alloc); + if (job_gres_ptr->node_cnt <= node_offset) { + error("%s: bad node_offset %d count is %u", + plugin_name, node_offset, job_gres_ptr->node_cnt); + return SLURM_ERROR; + } + if (job_gres_ptr->gpu_bit_alloc == NULL) { + error("%s: job's bitmap is NULL", plugin_name); + return SLURM_ERROR; + } + if (job_gres_ptr->gpu_bit_alloc[node_offset] == NULL) { + error("%s: job's bitmap is empty", plugin_name); + return SLURM_ERROR; + } - if (gres_cnt > node_gres_ptr->gpu_cnt_alloc) { - error("%s: resource count underflow", plugin_name); - node_gres_ptr->gpu_cnt_alloc = 0; - } else - node_gres_ptr->gpu_cnt_alloc -= gres_cnt; + len = bit_size(job_gres_ptr->gpu_bit_alloc[node_offset]); + i = bit_size(node_gres_ptr->gpu_bit_alloc); + if (i != len) { + error("%s: job and node bitmap sizes differ (%d != %d)", + plugin_name, len, i); + len = MIN(len, i); + /* proceed with request, make best effort */ + } + for (i=0; i<len; i++) { + if (!bit_test(job_gres_ptr->gpu_bit_alloc[node_offset], i)) + continue; + bit_clear(node_gres_ptr->gpu_bit_alloc, i); + /* NOTE: Do not clear bit from + * job_gres_ptr->gpu_bit_alloc[node_offset] + * since this may only be an emulated deallocate */ + node_gres_ptr->gpu_cnt_alloc--; + } + + return SLURM_SUCCESS; } extern void job_state_log(void *gres_data, uint32_t job_id) diff --git a/src/plugins/gres/nic/gres_nic.c b/src/plugins/gres/nic/gres_nic.c index 40cbf97c7ea..148d19cee63 100644 --- a/src/plugins/gres/nic/gres_nic.c +++ b/src/plugins/gres/nic/gres_nic.c @@ -136,6 +136,10 @@ typedef struct nic_job_state { /* If 0 then nic_cnt_alloc is per node, * if 1 then nic_cnt_alloc is per CPU */ uint8_t nic_cnt_mult; + + /* Resources currently allocated to job on each node */ + uint32_t node_cnt; + bitstr_t **nic_bit_alloc; } nic_job_state_t; /* @@ -628,38 +632,129 @@ extern uint32_t job_test(void *job_gres_data, void *node_gres_data, } } -extern void job_alloc(void *job_gres_data, void *node_gres_data, int cpu_cnt) +extern int job_alloc(void *job_gres_data, void *node_gres_data, + int node_cnt, int node_offset, uint32_t cpu_cnt) { + int i; uint32_t gres_cnt; nic_job_state_t *job_gres_ptr = (nic_job_state_t *) job_gres_data; nic_node_state_t *node_gres_ptr = (nic_node_state_t *) node_gres_data; + /* + * Validate data structures. Either job_gres_data->node_cnt and + * job_gres_data->nic_bit_alloc are both set or both zero/NULL. + */ + xassert(node_cnt); + xassert(node_offset >= 0); + xassert(job_gres_ptr); + xassert(node_gres_ptr); + xassert(node_gres_ptr->nic_bit_alloc); + if (job_gres_ptr->node_cnt == 0) { + job_gres_ptr->node_cnt = node_cnt; + if (job_gres_ptr->nic_bit_alloc) { + error("%s: node_cnt==0 and bit_alloc is set", + plugin_name); + xfree(job_gres_ptr->nic_bit_alloc); + } + job_gres_ptr->nic_bit_alloc = + xmalloc(sizeof(bitstr_t *) * node_cnt); + } else if (job_gres_ptr->node_cnt < node_cnt) { + error("%s: node_cnt increase from %u to %d", + plugin_name, job_gres_ptr->node_cnt, node_cnt); + if (node_offset >= job_gres_ptr->node_cnt) + return SLURM_ERROR; + } else if (job_gres_ptr->node_cnt > node_cnt) { + error("%s: node_cnt decrease from %u to %d", + plugin_name, job_gres_ptr->node_cnt, node_cnt); + } + + /* + * Check that sufficient resources exist on this node + */ if (job_gres_ptr->nic_cnt_mult == 0) gres_cnt = job_gres_ptr->nic_cnt_alloc; else gres_cnt = (job_gres_ptr->nic_cnt_alloc * cpu_cnt); + i = node_gres_ptr->nic_cnt_alloc + gres_cnt; + i -= node_gres_ptr->nic_cnt_avail; + if (i > 0) { + error("%s: overallocated resources by %d", plugin_name, i); + /* proceed with request, give job what's available */ + } - node_gres_ptr->nic_cnt_alloc += gres_cnt; - if (node_gres_ptr->nic_cnt_alloc > node_gres_ptr->nic_cnt_avail) - error("%s: overallocated resources", plugin_name); + /* + * Select the specific resources to use for this job. + * We'll need to add topology information in the future + */ + if (job_gres_ptr->nic_bit_alloc[node_offset]) { + error("%s: job's bit_alloc is set for node %d", + plugin_name, node_offset); + bit_free(job_gres_ptr->nic_bit_alloc[node_offset]); + } + job_gres_ptr->nic_bit_alloc[node_offset] = bit_alloc(node_gres_ptr-> + nic_cnt_avail); + if (job_gres_ptr->nic_bit_alloc[node_offset] == NULL) + fatal("bit_copy: malloc failure"); + for (i=0; i<node_gres_ptr->nic_cnt_avail && gres_cnt>0; i++) { + if (bit_test(node_gres_ptr->nic_bit_alloc, i)) + continue; + bit_set(node_gres_ptr->nic_bit_alloc, i); + bit_set(job_gres_ptr->nic_bit_alloc[node_offset], i); + node_gres_ptr->nic_cnt_alloc++; + gres_cnt--; + } + + return SLURM_SUCCESS; } -extern void job_dealloc(void *job_gres_data, void *node_gres_data, int cpu_cnt) +extern int job_dealloc(void *job_gres_data, void *node_gres_data, + int node_offset, uint32_t cpu_cnt) { - uint32_t gres_cnt; + int i, len; nic_job_state_t *job_gres_ptr = (nic_job_state_t *) job_gres_data; nic_node_state_t *node_gres_ptr = (nic_node_state_t *) node_gres_data; - if (job_gres_ptr->nic_cnt_mult == 0) - gres_cnt = job_gres_ptr->nic_cnt_alloc; - else - gres_cnt = (job_gres_ptr->nic_cnt_alloc * cpu_cnt); + /* + * Validate data structures. Either job_gres_data->node_cnt and + * job_gres_data->nic_bit_alloc are both set or both zero/NULL. + */ + xassert(node_offset >= 0); + xassert(job_gres_ptr); + xassert(node_gres_ptr); + xassert(node_gres_ptr->nic_bit_alloc); + if (job_gres_ptr->node_cnt <= node_offset) { + error("%s: bad node_offset %d count is %u", + plugin_name, node_offset, job_gres_ptr->node_cnt); + return SLURM_ERROR; + } + if (job_gres_ptr->nic_bit_alloc == NULL) { + error("%s: job's bitmap is NULL", plugin_name); + return SLURM_ERROR; + } + if (job_gres_ptr->nic_bit_alloc[node_offset] == NULL) { + error("%s: job's bitmap is empty", plugin_name); + return SLURM_ERROR; + } - if (gres_cnt > node_gres_ptr->nic_cnt_alloc) { - error("%s: resource count underflow", plugin_name); - node_gres_ptr->nic_cnt_alloc = 0; - } else - node_gres_ptr->nic_cnt_alloc -= gres_cnt; + len = bit_size(job_gres_ptr->nic_bit_alloc[node_offset]); + i = bit_size(node_gres_ptr->nic_bit_alloc); + if (i != len) { + error("%s: job and node bitmap sizes differ (%d != %d)", + plugin_name, len, i); + len = MIN(len, i); + /* proceed with request, make best effort */ + } + for (i=0; i<len; i++) { + if (!bit_test(job_gres_ptr->nic_bit_alloc[node_offset], i)) + continue; + bit_clear(node_gres_ptr->nic_bit_alloc, i); + /* NOTE: Do not clear bit from + * job_gres_ptr->nic_bit_alloc[node_offset] + * since this may only be an emulated deallocate */ + node_gres_ptr->nic_cnt_alloc--; + } + + return SLURM_SUCCESS; } extern void job_state_log(void *gres_data, uint32_t job_id) diff --git a/src/plugins/select/linear/select_linear.c b/src/plugins/select/linear/select_linear.c index 30024f78be8..a60afafafc0 100644 --- a/src/plugins/select/linear/select_linear.c +++ b/src/plugins/select/linear/select_linear.c @@ -1348,7 +1348,7 @@ static int _rm_job_from_nodes(struct cr_record *cr_ptr, struct job_record *job_ptr, char *pre_err, bool remove_all) { - int i, i_first, i_last, rc = SLURM_SUCCESS; + int i, i_first, i_last, node_offset, rc = SLURM_SUCCESS; struct part_cr_record *part_cr_ptr; job_resources_t *job_resrcs_ptr; uint32_t job_memory, job_memory_cpu = 0, job_memory_node = 0; @@ -1388,9 +1388,12 @@ static int _rm_job_from_nodes(struct cr_record *cr_ptr, i_last = bit_fls(job_resrcs_ptr->node_bitmap); if (i_first == -1) /* job has no nodes */ i_last = -2; + node_offset = -1; for (i = i_first; i <= i_last; i++) { - if (!bit_test(job_resrcs_ptr->node_bitmap, i) || - !bit_test(job_ptr->node_bitmap, i)) + if (!bit_test(job_resrcs_ptr->node_bitmap, i)) + continue; + node_offset++; + if (!bit_test(job_ptr->node_bitmap, i)) continue; node_ptr = node_record_table_ptr + i; @@ -1414,7 +1417,8 @@ static int _rm_job_from_nodes(struct cr_record *cr_ptr, gres_list = cr_ptr->nodes[i].gres_list; else gres_list = node_ptr->gres_list; - gres_plugin_job_dealloc(job_ptr->gres_list, gres_list, cpu_cnt); + gres_plugin_job_dealloc(job_ptr->gres_list, gres_list, + node_offset, cpu_cnt); gres_plugin_node_state_log(gres_list, node_ptr->name); if (exclusive) { @@ -1526,7 +1530,8 @@ static int _rm_job_from_one_node(struct job_record *job_ptr, } first_bit = bit_ffs(job_resrcs_ptr->node_bitmap); last_bit = node_inx; - for (i = first_bit, node_offset = -1; i <= node_inx; i++) { + node_offset = -1; + for (i = first_bit; i <= node_inx; i++) { if (!bit_test(job_resrcs_ptr->node_bitmap, i)) continue; node_offset++; @@ -1560,7 +1565,8 @@ static int _rm_job_from_one_node(struct job_record *job_ptr, gres_list = cr_ptr->nodes[i].gres_list; else gres_list = node_ptr->gres_list; - gres_plugin_job_dealloc(job_ptr->gres_list, gres_list, cpu_cnt); + gres_plugin_job_dealloc(job_ptr->gres_list, gres_list, node_offset, + cpu_cnt); gres_plugin_node_state_log(gres_list, node_ptr->name); exclusive = (job_ptr->details->shared == 0); @@ -1624,7 +1630,7 @@ static int _add_job_to_nodes(struct cr_record *cr_ptr, struct job_record *job_ptr, char *pre_err, int alloc_all) { - int i, i_first, i_last, rc = SLURM_SUCCESS; + int i, i_first, i_last, node_cnt, node_offset, rc = SLURM_SUCCESS; bool exclusive; struct part_cr_record *part_cr_ptr; job_resources_t *job_resrcs_ptr; @@ -1658,11 +1664,15 @@ static int _add_job_to_nodes(struct cr_record *cr_ptr, i_first = bit_ffs(job_resrcs_ptr->node_bitmap); i_last = bit_fls(job_resrcs_ptr->node_bitmap); + node_cnt = bit_set_count(job_resrcs_ptr->node_bitmap); if (i_first == -1) /* job has no nodes */ i_last = -2; + node_offset = -1; for (i = i_first; i <= i_last; i++) { - if (!bit_test(job_resrcs_ptr->node_bitmap, i) || - !bit_test(job_ptr->node_bitmap, i)) + if (!bit_test(job_resrcs_ptr->node_bitmap, i)) + continue; + node_offset++; + if (!bit_test(job_ptr->node_bitmap, i)) continue; node_ptr = node_record_table_ptr + i; @@ -1681,7 +1691,8 @@ static int _add_job_to_nodes(struct cr_record *cr_ptr, gres_list = cr_ptr->nodes[i].gres_list; else gres_list = node_ptr->gres_list; - gres_plugin_job_alloc(job_ptr->gres_list, gres_list, cpu_cnt); + gres_plugin_job_alloc(job_ptr->gres_list, gres_list, + node_cnt, node_offset, cpu_cnt); gres_plugin_node_state_log(gres_list, node_ptr->name); if (exclusive) -- GitLab