diff --git a/src/common/gres.c b/src/common/gres.c index a1155575cceb3996a4672f6d6405b0602e573142..fa321506b4f11560383449bf3dad4c051f9ee1ea 100644 --- a/src/common/gres.c +++ b/src/common/gres.c @@ -2681,8 +2681,8 @@ extern uint32_t _job_test(void *job_gres_data, void *node_gres_data, * IN cpu_end_bit - index into cpu_bitmap for this node's last CPU * IN job_id - job's ID (for logging) * IN node_name - name of the node (for logging) - * RET: NO_VAL - All CPUs on node are available - * otherwise - Specific CPU count + * RET: NO_VAL - All cores on node are available + * otherwise - Count of available cores */ extern uint32_t gres_plugin_job_test(List job_gres_list, List node_gres_list, bool use_total_gres, bitstr_t *cpu_bitmap, diff --git a/src/common/gres.h b/src/common/gres.h index 525998ef29ba149df8a576129561be42e83ebe3c..65d2470c8ac84e82c2b869d3a396417e75eacaec 100644 --- a/src/common/gres.h +++ b/src/common/gres.h @@ -374,8 +374,8 @@ extern int gres_plugin_job_state_unpack(List *gres_list, Buf buffer, * IN cpu_end_bit - index into cpu_bitmap for this node's last CPU * IN job_id - job's ID (for logging) * IN node_name - name of the node (for logging) - * RET: NO_VAL - All CPUs on node are available - * otherwise - Specific CPU count + * RET: NO_VAL - All cores on node are available + * otherwise - Count of available cores */ extern uint32_t gres_plugin_job_test(List job_gres_list, List node_gres_list, bool use_total_gres, bitstr_t *cpu_bitmap, diff --git a/src/plugins/select/cons_res/job_test.c b/src/plugins/select/cons_res/job_test.c index a31d3520aab655ed498005de4d4dfc543030f2eb..1ce0972a45875a8eaa5c2b7d00fb114283cf2229 100644 --- a/src/plugins/select/cons_res/job_test.c +++ b/src/plugins/select/cons_res/job_test.c @@ -588,7 +588,7 @@ uint16_t _can_job_run_on_node(struct job_record *job_ptr, bitstr_t *core_map, bool test_only) { uint16_t cpus; - uint32_t avail_mem, req_mem, gres_cpus; + uint32_t avail_mem, req_mem, gres_cores, gres_cpus, cpus_per_core; int core_start_bit, core_end_bit, cpu_alloc_size; struct node_record *node_ptr = node_record_table_ptr + node_i; List gres_list; @@ -614,6 +614,8 @@ uint16_t _can_job_run_on_node(struct job_record *job_ptr, bitstr_t *core_map, } core_start_bit = cr_get_coremap_offset(node_i); core_end_bit = cr_get_coremap_offset(node_i+1) - 1; + cpus_per_core = select_node_record[node_i].cpus / + (core_end_bit - core_start_bit + 1); node_ptr = select_node_record[node_i].node_ptr; if (cr_type & CR_MEMORY) { @@ -645,11 +647,14 @@ uint16_t _can_job_run_on_node(struct job_record *job_ptr, bitstr_t *core_map, gres_list = node_usage[node_i].gres_list; else gres_list = node_ptr->gres_list; - gres_cpus = gres_plugin_job_test(job_ptr->gres_list, - gres_list, test_only, - core_map, core_start_bit, - core_end_bit, job_ptr->job_id, - node_ptr->name); + gres_cores = gres_plugin_job_test(job_ptr->gres_list, + gres_list, test_only, + core_map, core_start_bit, + core_end_bit, job_ptr->job_id, + node_ptr->name); + gres_cpus = gres_cores; + if (gres_cpus != NO_VAL) + gres_cpus *= cpus_per_core; if ((gres_cpus < job_ptr->details->ntasks_per_node) || ((job_ptr->details->cpus_per_task > 1) && (gres_cpus < job_ptr->details->cpus_per_task))) @@ -729,7 +734,8 @@ static int _verify_node_state(struct part_res_record *cr_part_ptr, enum node_cr_state job_node_req) { struct node_record *node_ptr; - uint32_t i, free_mem, gres_cpus, min_mem, size; + uint32_t i, free_mem, gres_cpus, gres_cores, min_mem, size; + int core_start_bit, core_end_bit, cpus_per_core; List gres_list; if (job_ptr->details->pn_min_memory & MEM_PER_CPU) { @@ -748,7 +754,10 @@ static int _verify_node_state(struct part_res_record *cr_part_ptr, if (!bit_test(bitmap, i)) continue; node_ptr = select_node_record[i].node_ptr; - + core_start_bit = cr_get_coremap_offset(i); + core_end_bit = cr_get_coremap_offset(i+1) - 1; + cpus_per_core = select_node_record[i].cpus / + (core_end_bit - core_start_bit + 1); /* node-level memory check */ if ((job_ptr->details->pn_min_memory) && (cr_type & CR_MEMORY)) { @@ -771,10 +780,13 @@ static int _verify_node_state(struct part_res_record *cr_part_ptr, gres_list = node_usage[i].gres_list; else gres_list = node_ptr->gres_list; - gres_cpus = gres_plugin_job_test(job_ptr->gres_list, - gres_list, true, - NULL, 0, 0, job_ptr->job_id, - node_ptr->name); + gres_cores = gres_plugin_job_test(job_ptr->gres_list, + gres_list, true, + NULL, 0, 0, job_ptr->job_id, + node_ptr->name); + gres_cpus = gres_cores; + if (gres_cpus != NO_VAL) + gres_cpus *= cpus_per_core; if (gres_cpus == 0) { debug3("cons_res: _vns: node %s lacks gres", node_ptr->name); diff --git a/src/plugins/select/linear/select_linear.c b/src/plugins/select/linear/select_linear.c index 116c923f76a8c7c3246046f80272ed57d2e28271..f353f68a5a555ff906df39c8a7214a3ae49178ca 100644 --- a/src/plugins/select/linear/select_linear.c +++ b/src/plugins/select/linear/select_linear.c @@ -635,7 +635,8 @@ static int _job_count_bitmap(struct cr_record *cr_ptr, struct node_record *node_ptr; uint32_t job_memory_cpu = 0, job_memory_node = 0; uint32_t alloc_mem = 0, job_mem = 0, avail_mem = 0; - uint32_t cpu_cnt, gres_cpus; + uint32_t cpu_cnt, gres_cpus, gres_cores; + int core_start_bit, core_end_bit, cpus_per_core; List gres_list; bool use_total_gres = true; @@ -675,11 +676,16 @@ static int _job_count_bitmap(struct cr_record *cr_ptr, gres_list = cr_ptr->nodes[i].gres_list; else gres_list = node_ptr->gres_list; - gres_cpus = gres_plugin_job_test(job_ptr->gres_list, - gres_list, use_total_gres, - NULL, 0, 0, job_ptr->job_id, - node_ptr->name); + core_start_bit = cr_get_coremap_offset(i); + core_end_bit = cr_get_coremap_offset(i+1) - 1; + cpus_per_core = cpu_cnt / (core_end_bit - core_start_bit + 1); + gres_cores = gres_plugin_job_test(job_ptr->gres_list, + gres_list, use_total_gres, + NULL, 0, 0, job_ptr->job_id, + node_ptr->name); + gres_cpus = gres_cores; if (gres_cpus != NO_VAL) { + gres_cpus *= cpus_per_core; if ((gres_cpus < cpu_cnt) || (gres_cpus < job_ptr->details->ntasks_per_node) || ((job_ptr->details->cpus_per_task > 1) && diff --git a/src/plugins/select/serial/job_test.c b/src/plugins/select/serial/job_test.c index 997360e19230abb764328d7bbdf1fcc13f156a75..78cf7e41e48b73065d4644663030ab7a83829fca 100644 --- a/src/plugins/select/serial/job_test.c +++ b/src/plugins/select/serial/job_test.c @@ -101,7 +101,7 @@ uint16_t _can_job_run_on_node(struct job_record *job_ptr, bitstr_t *core_map, bool test_only) { uint16_t cpus; - uint32_t avail_mem, req_mem, gres_cpus; + uint32_t avail_mem, req_mem, gres_cpus, gres_cores, cpus_per_core; int core_start_bit, core_end_bit; struct node_record *node_ptr = node_record_table_ptr + node_i; List gres_list; @@ -117,7 +117,8 @@ uint16_t _can_job_run_on_node(struct job_record *job_ptr, bitstr_t *core_map, core_start_bit = cr_get_coremap_offset(node_i); core_end_bit = cr_get_coremap_offset(node_i + 1) - 1; node_ptr = select_node_record[node_i].node_ptr; - + cpus_per_core = select_node_record[node_i].cpus / + (core_end_bit - core_start_bit + 1); if ((cr_type & CR_MEMORY) && cpus) { req_mem = job_ptr->details->pn_min_memory & ~MEM_PER_CPU; avail_mem = select_node_record[node_i].real_memory; @@ -131,11 +132,14 @@ uint16_t _can_job_run_on_node(struct job_record *job_ptr, bitstr_t *core_map, gres_list = node_usage[node_i].gres_list; else gres_list = node_ptr->gres_list; - gres_cpus = gres_plugin_job_test(job_ptr->gres_list, - gres_list, test_only, - core_map, core_start_bit, - core_end_bit, job_ptr->job_id, - node_ptr->name); + gres_cores = gres_plugin_job_test(job_ptr->gres_list, + gres_list, test_only, + core_map, core_start_bit, + core_end_bit, job_ptr->job_id, + node_ptr->name); + gres_cpus = gres_cores; + if (gres_cpus != NO_VAL) + gres_cpus *= cpus_per_core; if ((gres_cpus < job_ptr->details->ntasks_per_node) || ((job_ptr->details->cpus_per_task > 1) && (gres_cpus < job_ptr->details->cpus_per_task))) @@ -215,8 +219,9 @@ static int _verify_node_state(struct part_res_record *cr_part_ptr, enum node_cr_state job_node_req) { struct node_record *node_ptr; - uint32_t i, free_mem, gres_cpus, min_mem; + uint32_t i, free_mem, gres_cpus, gres_cores, min_mem; int i_first, i_last; + int core_start_bit, core_end_bit, cpus_per_core; List gres_list; if (job_ptr->details->pn_min_memory & MEM_PER_CPU) @@ -232,7 +237,10 @@ static int _verify_node_state(struct part_res_record *cr_part_ptr, if (!bit_test(bitmap, i)) continue; node_ptr = select_node_record[i].node_ptr; - + core_start_bit = cr_get_coremap_offset(i); + core_end_bit = cr_get_coremap_offset(i+1) - 1; + cpus_per_core = select_node_record[i].cpus / + (core_end_bit - core_start_bit + 1); /* node-level memory check */ if ((job_ptr->details->pn_min_memory) && (cr_type & CR_MEMORY)) { @@ -251,10 +259,13 @@ static int _verify_node_state(struct part_res_record *cr_part_ptr, gres_list = node_usage[i].gres_list; else gres_list = node_ptr->gres_list; - gres_cpus = gres_plugin_job_test(job_ptr->gres_list, - gres_list, true, - NULL, 0, 0, job_ptr->job_id, - node_ptr->name); + gres_cores = gres_plugin_job_test(job_ptr->gres_list, + gres_list, true, + NULL, 0, 0, job_ptr->job_id, + node_ptr->name); + gres_cpus = gres_cores; + if (gres_cpus != NO_VAL) + gres_cpus *= cpus_per_core; if (gres_cpus == 0) { debug3("select/serial: node %s lacks gres", node_ptr->name);