diff --git a/slurm/slurm.h.in b/slurm/slurm.h.in index 745a5069a252bf572f0b89898340673a2a501e41..de8c6d6d59e3785a5a9829f3c3d86596178ba552 100644 --- a/slurm/slurm.h.in +++ b/slurm/slurm.h.in @@ -143,11 +143,11 @@ BEGIN_C_DECLS typedef struct switch_jobinfo switch_jobinfo_t; /* opaque data type */ #endif -/* Define select_job_res_t below +/* Define job_resources_t below * to avoid including extraneous slurm headers */ -#ifndef __select_job_res_t_defined -# define __select_job_res_t_defined /* Opaque data for select plugins */ - typedef struct select_job_res select_job_res_t; +#ifndef __job_resources_t_defined +# define __job_resources_t_defined /* Opaque data for select plugins */ + typedef struct job_resources job_resources_t; #endif /* Define select_jobinfo_t, select_nodeinfo_t below @@ -710,7 +710,7 @@ typedef struct job_info { char *resv_name; /* reservation name */ select_jobinfo_t *select_jobinfo; /* opaque data type, * process using slurm_get_select_jobinfo() */ - select_job_res_t *select_job_res; /* job resources from select plugin */ + job_resources_t *job_resources; /* opaque data type, job resources */ uint16_t shared; /* 1 if job can share nodes with other jobs */ time_t start_time; /* time execution begins, actual or expected */ char *state_desc; /* optional details for state_reason */ diff --git a/src/api/job_info.c b/src/api/job_info.c index 3004b58a41a5f3384e3918bd80339d4ba59f649f..4f302858444544d971387fa682d3038d56293f35 100644 --- a/src/api/job_info.c +++ b/src/api/job_info.c @@ -134,7 +134,7 @@ slurm_sprint_job_info ( job_info_t * job_ptr, int one_liner ) char tmp_line[512]; char *ionodes = NULL; uint16_t exit_status = 0, term_sig = 0; - select_job_res_t *select_job_res = job_ptr->select_job_res; + job_resources_t *job_resources = job_ptr->job_resources; char *out = NULL; uint32_t min_nodes, max_nodes; @@ -281,14 +281,14 @@ slurm_sprint_job_info ( job_info_t * job_ptr, int one_liner ) xstrcat(out, tmp_line); #endif - if (!select_job_res) + if (!job_resources) goto line7; #ifndef HAVE_BG - if (!select_job_res->core_bitmap) + if (!job_resources->core_bitmap) goto line7; - last = bit_fls(select_job_res->core_bitmap); + last = bit_fls(job_resources->core_bitmap); if (last == -1) goto line7; @@ -310,18 +310,18 @@ slurm_sprint_job_info ( job_info_t * job_ptr, int one_liner ) /* tmp1[] stores the current cpu(s) allocated */ tmp2[0] = '\0'; /* stores last cpu(s) allocated */ - for (rel_node_inx=0; rel_node_inx < select_job_res->nhosts; + for (rel_node_inx=0; rel_node_inx < job_resources->nhosts; rel_node_inx++) { if (sock_reps >= - select_job_res->sock_core_rep_count[sock_inx]) { + job_resources->sock_core_rep_count[sock_inx]) { sock_inx++; sock_reps = 0; } sock_reps++; - bit_reps = select_job_res->sockets_per_node[sock_inx] * - select_job_res->cores_per_socket[sock_inx]; + bit_reps = job_resources->sockets_per_node[sock_inx] * + job_resources->cores_per_socket[sock_inx]; core_bitmap = bit_alloc(bit_reps); if (core_bitmap == NULL) { @@ -330,7 +330,7 @@ slurm_sprint_job_info ( job_info_t * job_ptr, int one_liner ) } for (j=0; j < bit_reps; j++) { - if (bit_test(select_job_res->core_bitmap, bit_inx)) + if (bit_test(job_resources->core_bitmap, bit_inx)) bit_set(core_bitmap, j); bit_inx++; } @@ -346,10 +346,10 @@ slurm_sprint_job_info ( job_info_t * job_ptr, int one_liner ) * identical allocation values. */ if (strcmp(tmp1, tmp2) || - (last_mem_alloc_ptr != select_job_res->memory_allocated) || - (select_job_res->memory_allocated && + (last_mem_alloc_ptr != job_resources->memory_allocated) || + (job_resources->memory_allocated && (last_mem_alloc != - select_job_res->memory_allocated[rel_node_inx]))) { + job_resources->memory_allocated[rel_node_inx]))) { if (hostlist_count(hl_last)) { hostlist_ranged_string(hl_last, sizeof(last_hosts), last_hosts); @@ -367,9 +367,9 @@ slurm_sprint_job_info ( job_info_t * job_ptr, int one_liner ) hl_last = hostlist_create(NULL); } strcpy(tmp2, tmp1); - last_mem_alloc_ptr = select_job_res->memory_allocated; + last_mem_alloc_ptr = job_resources->memory_allocated; if (last_mem_alloc_ptr) - last_mem_alloc = select_job_res-> + last_mem_alloc = job_resources-> memory_allocated[rel_node_inx]; else last_mem_alloc = NO_VAL; @@ -402,16 +402,16 @@ slurm_sprint_job_info ( job_info_t * job_ptr, int one_liner ) hostlist_destroy(hl); hostlist_destroy(hl_last); #else - if ((select_job_res->cpu_array_cnt > 0) && - (select_job_res->cpu_array_value) && - (select_job_res->cpu_array_reps)) { + if ((job_resources->cpu_array_cnt > 0) && + (job_resources->cpu_array_value) && + (job_resources->cpu_array_reps)) { int length = 0; xstrcat(out, "CPUs="); length += 10; - for (i = 0; i < select_job_res->cpu_array_cnt; i++) { + for (i = 0; i < job_resources->cpu_array_cnt; i++) { if (length > 70) { /* skip to last CPU group entry */ - if (i < select_job_res->cpu_array_cnt - 1) { + if (i < job_resources->cpu_array_cnt - 1) { continue; } /* add elipsis before last entry */ @@ -420,16 +420,16 @@ slurm_sprint_job_info ( job_info_t * job_ptr, int one_liner ) } snprintf(tmp_line, sizeof(tmp_line), "%d", - select_job_res->cpu_array_value[i]); + job_resources->cpu_array_value[i]); xstrcat(out, tmp_line); length += strlen(tmp_line); - if (select_job_res->cpu_array_reps[i] > 1) { + if (job_resources->cpu_array_reps[i] > 1) { snprintf(tmp_line, sizeof(tmp_line), "*%d", - select_job_res->cpu_array_reps[i]); + job_resources->cpu_array_reps[i]); xstrcat(out, tmp_line); length += strlen(tmp_line); } - if (i < select_job_res->cpu_array_cnt - 1) { + if (i < job_resources->cpu_array_cnt - 1) { xstrcat(out, ","); length++; } diff --git a/src/common/Makefile.am b/src/common/Makefile.am index 82942e3aa8483b2509479644ba6b73dac8eeb7f8..796cc4c80567b05ab069283be39134173fc9cf61 100644 --- a/src/common/Makefile.am +++ b/src/common/Makefile.am @@ -94,7 +94,7 @@ libcommon_la_SOURCES = \ hostlist.c hostlist.h \ slurm_step_layout.c slurm_step_layout.h \ checkpoint.c checkpoint.h \ - select_job_res.c select_job_res.h \ + job_resources.c job_resources.h \ parse_time.c parse_time.h \ job_options.c job_options.h \ global_defaults.c \ diff --git a/src/common/Makefile.in b/src/common/Makefile.in index 1a54bb9e21e7f8b448137d53ac8bf91bf60a01d7..59294a5536972df0b5fec9dde40bc5417259ec31 100644 --- a/src/common/Makefile.in +++ b/src/common/Makefile.in @@ -118,7 +118,7 @@ am__libcommon_la_SOURCES_DIST = assoc_mgr.c assoc_mgr.h \ getopt1.c unsetenv.c unsetenv.h slurm_selecttype_info.c \ slurm_resource_info.c slurm_resource_info.h hostlist.c \ hostlist.h slurm_step_layout.c slurm_step_layout.h \ - checkpoint.c checkpoint.h select_job_res.c select_job_res.h \ + checkpoint.c checkpoint.h job_resources.c job_resources.h \ parse_time.c parse_time.h job_options.c job_options.h \ global_defaults.c timers.c timers.h slurm_xlator.h stepd_api.c \ stepd_api.h write_labelled_message.c write_labelled_message.h \ @@ -139,7 +139,7 @@ am_libcommon_la_OBJECTS = assoc_mgr.lo basil_resv_conf.lo xmalloc.lo \ slurm_jobcomp.lo slurm_topology.lo switch.lo arg_desc.lo \ malloc.lo getopt.lo getopt1.lo $(am__objects_1) \ slurm_selecttype_info.lo slurm_resource_info.lo hostlist.lo \ - slurm_step_layout.lo checkpoint.lo select_job_res.lo \ + slurm_step_layout.lo checkpoint.lo job_resources.lo \ parse_time.lo job_options.lo global_defaults.lo timers.lo \ stepd_api.lo write_labelled_message.lo proc_args.lo \ slurm_strcasestr.lo node_conf.lo @@ -449,7 +449,7 @@ libcommon_la_SOURCES = \ hostlist.c hostlist.h \ slurm_step_layout.c slurm_step_layout.h \ checkpoint.c checkpoint.h \ - select_job_res.c select_job_res.h \ + job_resources.c job_resources.h \ parse_time.c parse_time.h \ job_options.c job_options.h \ global_defaults.c \ @@ -593,7 +593,7 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/proc_args.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/read_config.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/safeopen.Plo@am__quote@ -@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/select_job_res.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/job_resources.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/slurm_accounting_storage.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/slurm_auth.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/slurm_cred.Plo@am__quote@ diff --git a/src/common/job_resources.c b/src/common/job_resources.c new file mode 100644 index 0000000000000000000000000000000000000000..5020a633c887f75bf31c005c9fd10e7864a1821d --- /dev/null +++ b/src/common/job_resources.c @@ -0,0 +1,927 @@ +/*****************************************************************************\ + * job_resources.c - functions to manage data structure identifying specific + * CPUs allocated to a job, step or partition + ***************************************************************************** + * Copyright (C) 2008 Lawrence Livermore National Security. + * Written by Morris Jette <jette1@llnl.gov>. + * CODE-OCEC-09-009. All rights reserved. + * + * This file is part of SLURM, a resource management program. + * For details, see <https://computing.llnl.gov/linux/slurm/>. + * Please also read the included file: DISCLAIMER. + * + * SLURM is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * In addition, as a special exception, the copyright holders give permission + * to link the code of portions of this program with the OpenSSL library under + * certain conditions as described in each individual source file, and + * distribute linked combinations including the two. You must obey the GNU + * General Public License in all respects for all of the code used other than + * OpenSSL. If you modify file(s) with this exception, you may extend this + * exception to your version of the file(s), but you are not obligated to do + * so. If you do not wish to do so, delete this exception statement from your + * version. If you delete this exception statement from all source files in + * the program, then also delete it here. + * + * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with SLURM; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. +\*****************************************************************************/ + +#include <stdlib.h> +#include <string.h> +#include <slurm/slurm_errno.h> + +#include "src/common/hostlist.h" +#include "src/common/log.h" +#include "src/common/job_resources.h" +#include "src/common/xmalloc.h" +#include "src/common/xassert.h" +#include "src/slurmctld/slurmctld.h" + + +/* Create an empty job_resources data structure */ +extern job_resources_t *create_job_resources(void) +{ + job_resources_t *job_resources; + + job_resources = xmalloc(sizeof(struct job_resources)); + return job_resources; +} + +/* Set the socket and core counts associated with a set of selected + * nodes of a job_resources data structure based upon slurmctld state. + * (sets cores_per_socket, sockets_per_node, and sock_core_rep_count based + * upon the value of node_bitmap, also creates core_bitmap based upon + * the total number of cores in the allocation). Call this ONLY from + * slurmctld. Example of use: + * + * job_resources_t *job_resources_ptr = create_job_resources(); + * node_name2bitmap("dummy[2,5,12,16]", true, &(job_res_ptr->node_bitmap)); + * rc = build_job_resources(job_resources_ptr, node_record_table_ptr, + * slurmctld_conf.fast_schedule); + */ +extern int build_job_resources(job_resources_t *job_resources, + void *node_rec_table, uint16_t fast_schedule) +{ + int i, bitmap_len; + int core_cnt = 0, sock_inx = -1; + uint32_t cores, socks; + struct node_record *node_ptr, *node_record_table; + + if (job_resources->node_bitmap == NULL) { + error("build_job_resources: node_bitmap is NULL"); + return SLURM_ERROR; + } + + node_record_table = (struct node_record *) node_rec_table; + xfree(job_resources->sockets_per_node); + xfree(job_resources->cores_per_socket); + xfree(job_resources->sock_core_rep_count); + job_resources->sockets_per_node = xmalloc(sizeof(uint16_t) * + job_resources->nhosts); + job_resources->cores_per_socket = xmalloc(sizeof(uint16_t) * + job_resources->nhosts); + job_resources->sock_core_rep_count = xmalloc(sizeof(uint32_t) * + job_resources->nhosts); + + bitmap_len = bit_size(job_resources->node_bitmap); + for (i=0; i<bitmap_len; i++) { + if (!bit_test(job_resources->node_bitmap, i)) + continue; + node_ptr = node_record_table + i; + if (fast_schedule) { + socks = node_ptr->config_ptr->sockets; + cores = node_ptr->config_ptr->cores; + } else { + socks = node_ptr->sockets; + cores = node_ptr->cores; + } + if ((sock_inx < 0) || + (socks != job_resources->sockets_per_node[sock_inx]) || + (cores != job_resources->cores_per_socket[sock_inx])) { + sock_inx++; + job_resources->sockets_per_node[sock_inx] = socks; + job_resources->cores_per_socket[sock_inx] = cores; + } + job_resources->sock_core_rep_count[sock_inx]++; + core_cnt += (cores * socks); + } +#ifndef HAVE_BG + job_resources->core_bitmap = bit_alloc(core_cnt); + job_resources->core_bitmap_used = bit_alloc(core_cnt); + if ((job_resources->core_bitmap == NULL) || + (job_resources->core_bitmap_used == NULL)) + fatal("bit_alloc malloc failure"); +#endif + return SLURM_SUCCESS; +} + +/* Rebuild cpu_array_cnt, cpu_array_value, and cpu_array_reps based upon the + * values of nhosts and cpus in an existing data structure + * Return total CPU count or -1 on error */ +extern int build_job_resources_cpu_array(job_resources_t *job_resources_ptr) +{ + int cpu_count = 0, i; + uint32_t last_cpu_cnt = 0; + + if (job_resources_ptr->nhosts == 0) + return cpu_count; /* no work to do */ + if (job_resources_ptr->cpus == NULL) { + error("build_job_resources_cpu_array: cpus==NULL"); + return -1; + } + + /* clear vestigial data and create new arrays of max size */ + job_resources_ptr->cpu_array_cnt = 0; + xfree(job_resources_ptr->cpu_array_reps); + job_resources_ptr->cpu_array_reps = + xmalloc(job_resources_ptr->nhosts * sizeof(uint32_t)); + xfree(job_resources_ptr->cpu_array_value); + job_resources_ptr->cpu_array_value = + xmalloc(job_resources_ptr->nhosts * sizeof(uint16_t)); + + for (i=0; i<job_resources_ptr->nhosts; i++) { + if (job_resources_ptr->cpus[i] != last_cpu_cnt) { + last_cpu_cnt = job_resources_ptr->cpus[i]; + job_resources_ptr->cpu_array_value[ + job_resources_ptr->cpu_array_cnt] + = last_cpu_cnt; + job_resources_ptr->cpu_array_reps[ + job_resources_ptr->cpu_array_cnt] = 1; + job_resources_ptr->cpu_array_cnt++; + } else { + job_resources_ptr->cpu_array_reps[ + job_resources_ptr->cpu_array_cnt-1]++; + } + cpu_count += last_cpu_cnt; + } + return cpu_count; +} + +/* Rebuild cpus array based upon the values of nhosts, cpu_array_value and + * cpu_array_reps in an existing data structure + * Return total CPU count or -1 on error */ +extern int build_job_resources_cpus_array(job_resources_t *job_resources_ptr) +{ + int cpu_count = 0, cpu_inx, i, j; + + if (job_resources_ptr->nhosts == 0) + return cpu_count; /* no work to do */ + if (job_resources_ptr->cpu_array_cnt == 0) { + error("build_job_resources_cpus_array: cpu_array_cnt==0"); + return -1; + } + if (job_resources_ptr->cpu_array_value == NULL) { + error("build_job_resources_cpus_array: cpu_array_value==NULL"); + return -1; + } + if (job_resources_ptr->cpu_array_reps == NULL) { + error("build_job_resources_cpus_array: cpu_array_reps==NULL"); + return -1; + } + + /* clear vestigial data and create new arrays of max size */ + xfree(job_resources_ptr->cpus); + job_resources_ptr->cpus = + xmalloc(job_resources_ptr->nhosts * sizeof(uint16_t)); + + cpu_inx = 0; + for (i=0; i<job_resources_ptr->cpu_array_cnt; i++) { + for (j=0; j<job_resources_ptr->cpu_array_reps[i]; j++) { + if (cpu_inx >= job_resources_ptr->nhosts) { + error("build_job_resources_cpus_array: " + "cpu_array is too long"); + return -1; + } + cpu_count += job_resources_ptr->cpus[i]; + job_resources_ptr->cpus[cpu_inx++] = + job_resources_ptr->cpus[i]; + } + } + if (cpu_inx < job_resources_ptr->nhosts) { + error("build_job_resources_cpus_array: " + "cpu_array is incomplete"); + return -1; + } + return cpu_count; +} + +/* Reset the node_bitmap in a job_resources data structure + * This is needed after a restart/reconfiguration since nodes can + * be added or removed from the system resulting in changing in + * the bitmap size or bit positions */ +extern void reset_node_bitmap(job_resources_t *job_resources_ptr, + bitstr_t *new_node_bitmap) +{ + if (job_resources_ptr) { + if (job_resources_ptr->node_bitmap) + bit_free(job_resources_ptr->node_bitmap); + if (new_node_bitmap) { + job_resources_ptr->node_bitmap = + bit_copy(new_node_bitmap); + } + } +} + +extern int valid_job_resources(job_resources_t *job_resources, + void *node_rec_table, + uint16_t fast_schedule) +{ + int i, bitmap_len; + int sock_inx = 0, sock_cnt = 0; + uint32_t cores, socks; + struct node_record *node_ptr, *node_record_table; + + if (job_resources->node_bitmap == NULL) { + error("valid_job_resources: node_bitmap is NULL"); + return SLURM_ERROR; + } + if ((job_resources->sockets_per_node == NULL) || + (job_resources->cores_per_socket == NULL) || + (job_resources->sock_core_rep_count == NULL)) { + error("valid_job_resources: socket/core array is NULL"); + return SLURM_ERROR; + } + + node_record_table = (struct node_record *) node_rec_table; + bitmap_len = bit_size(job_resources->node_bitmap); + for (i=0; i<bitmap_len; i++) { + if (!bit_test(job_resources->node_bitmap, i)) + continue; + node_ptr = node_record_table + i; + if (fast_schedule) { + socks = node_ptr->config_ptr->sockets; + cores = node_ptr->config_ptr->cores; + } else { + socks = node_ptr->sockets; + cores = node_ptr->cores; + } + if (sock_cnt >= job_resources->sock_core_rep_count[sock_inx]) { + sock_inx++; + sock_cnt = 0; + } + if ((socks != job_resources->sockets_per_node[sock_inx]) || + (cores != job_resources->cores_per_socket[sock_inx])) { + error("valid_job_resources: " + "%s sockets:%u,%u, cores %u,%u", + node_ptr->name, + socks, + job_resources->sockets_per_node[sock_inx], + cores, + job_resources->cores_per_socket[sock_inx]); + return SLURM_ERROR; + } + sock_cnt++; + } + return SLURM_SUCCESS; +} + +extern job_resources_t *copy_job_resources( + job_resources_t *job_resources_ptr) +{ + int i, sock_inx = 0; + job_resources_t *new_layout = xmalloc(sizeof(struct job_resources)); + + xassert(job_resources_ptr); + new_layout->nhosts = job_resources_ptr->nhosts; + new_layout->nprocs = job_resources_ptr->nprocs; + new_layout->node_req = job_resources_ptr->node_req; + if (job_resources_ptr->core_bitmap) { + new_layout->core_bitmap = bit_copy(job_resources_ptr-> + core_bitmap); + } + if (job_resources_ptr->core_bitmap_used) { + new_layout->core_bitmap_used = bit_copy(job_resources_ptr-> + core_bitmap_used); + } + if (job_resources_ptr->node_bitmap) { + new_layout->node_bitmap = bit_copy(job_resources_ptr-> + node_bitmap); + } + + new_layout->cpu_array_cnt = job_resources_ptr->cpu_array_cnt; + if (job_resources_ptr->cpu_array_reps && + job_resources_ptr->cpu_array_cnt) { + new_layout->cpu_array_reps = + xmalloc(sizeof(uint32_t) * + job_resources_ptr->cpu_array_cnt); + memcpy(new_layout->cpu_array_reps, + job_resources_ptr->cpu_array_reps, + (sizeof(uint32_t) * job_resources_ptr->cpu_array_cnt)); + } + if (job_resources_ptr->cpu_array_value && + job_resources_ptr->cpu_array_cnt) { + new_layout->cpu_array_value = + xmalloc(sizeof(uint16_t) * + job_resources_ptr->cpu_array_cnt); + memcpy(new_layout->cpu_array_value, + job_resources_ptr->cpu_array_value, + (sizeof(uint16_t) * job_resources_ptr->cpu_array_cnt)); + } + + if (job_resources_ptr->cpus) { + new_layout->cpus = xmalloc(sizeof(uint16_t) * + job_resources_ptr->nhosts); + memcpy(new_layout->cpus, job_resources_ptr->cpus, + (sizeof(uint16_t) * job_resources_ptr->nhosts)); + } + if (job_resources_ptr->cpus_used) { + new_layout->cpus_used = xmalloc(sizeof(uint16_t) * + job_resources_ptr->nhosts); + memcpy(new_layout->cpus_used, job_resources_ptr->cpus_used, + (sizeof(uint16_t) * job_resources_ptr->nhosts)); + } + + if (job_resources_ptr->memory_allocated) { + new_layout->memory_allocated = xmalloc(sizeof(uint32_t) * + new_layout->nhosts); + memcpy(new_layout->memory_allocated, + job_resources_ptr->memory_allocated, + (sizeof(uint32_t) * job_resources_ptr->nhosts)); + } + if (job_resources_ptr->memory_used) { + new_layout->memory_used = xmalloc(sizeof(uint32_t) * + new_layout->nhosts); + memcpy(new_layout->memory_used, + job_resources_ptr->memory_used, + (sizeof(uint32_t) * job_resources_ptr->nhosts)); + } + + /* Copy sockets_per_node, cores_per_socket and core_sock_rep_count */ + new_layout->sockets_per_node = xmalloc(sizeof(uint16_t) * + new_layout->nhosts); + new_layout->cores_per_socket = xmalloc(sizeof(uint16_t) * + new_layout->nhosts); + new_layout->sock_core_rep_count = xmalloc(sizeof(uint32_t) * + new_layout->nhosts); + for (i=0; i<new_layout->nhosts; i++) { + if (job_resources_ptr->sock_core_rep_count[i] == 0) { + error("copy_job_resources: sock_core_rep_count=0"); + break; + } + sock_inx += job_resources_ptr->sock_core_rep_count[i]; + if (sock_inx >= job_resources_ptr->nhosts) { + i++; + break; + } + } + memcpy(new_layout->sockets_per_node, + job_resources_ptr->sockets_per_node, (sizeof(uint16_t) * i)); + memcpy(new_layout->cores_per_socket, + job_resources_ptr->cores_per_socket, (sizeof(uint16_t) * i)); + memcpy(new_layout->sock_core_rep_count, + job_resources_ptr->sock_core_rep_count, + (sizeof(uint32_t) * i)); + + return new_layout; +} + +extern void free_job_resources(job_resources_t **job_resources_pptr) +{ + job_resources_t *job_resources_ptr = *job_resources_pptr; + + if (job_resources_ptr) { + if (job_resources_ptr->core_bitmap) + bit_free(job_resources_ptr->core_bitmap); + if (job_resources_ptr->core_bitmap_used) + bit_free(job_resources_ptr->core_bitmap_used); + xfree(job_resources_ptr->cores_per_socket); + xfree(job_resources_ptr->cpu_array_reps); + xfree(job_resources_ptr->cpu_array_value); + xfree(job_resources_ptr->cpus); + xfree(job_resources_ptr->cpus_used); + xfree(job_resources_ptr->memory_allocated); + xfree(job_resources_ptr->memory_used); + if (job_resources_ptr->node_bitmap) + bit_free(job_resources_ptr->node_bitmap); + xfree(job_resources_ptr->sock_core_rep_count); + xfree(job_resources_ptr->sockets_per_node); + xfree(job_resources_ptr); + *job_resources_pptr = NULL; + } +} + +/* Log the contents of a job_resources data structure using info() */ +extern void log_job_resources(uint32_t job_id, + job_resources_t *job_resources_ptr) +{ + int bit_inx = 0, bit_reps, i; + int array_size, node_inx; + int sock_inx = 0, sock_reps = 0; + + if (job_resources_ptr == NULL) { + error("log_job_resources: job_resources_ptr is NULL"); + return; + } + + info("===================="); + info("job_id:%u nhosts:%u nprocs:%u node_req:%u", + job_id, job_resources_ptr->nhosts, job_resources_ptr->nprocs, + job_resources_ptr->node_req); + + if (job_resources_ptr->cpus == NULL) { + error("log_job_resources: cpus array is NULL"); + return; + } + if (job_resources_ptr->memory_allocated == NULL) { + error("log_job_resources: memory array is NULL"); + return; + } + if ((job_resources_ptr->cores_per_socket == NULL) || + (job_resources_ptr->sockets_per_node == NULL) || + (job_resources_ptr->sock_core_rep_count == NULL)) { + error("log_job_resources: socket/core array is NULL"); + return; + } + if (job_resources_ptr->core_bitmap == NULL) { + error("log_job_resources: core_bitmap is NULL"); + return; + } + if (job_resources_ptr->core_bitmap_used == NULL) { + error("log_job_resources: core_bitmap_used is NULL"); + return; + } + array_size = bit_size(job_resources_ptr->core_bitmap); + + /* Can only log node_bitmap from slurmctld, so don't bother here */ + for (node_inx=0; node_inx<job_resources_ptr->nhosts; node_inx++) { + uint32_t cpus_used = 0, memory_allocated = 0, memory_used = 0; + info("Node[%d]:", node_inx); + + if (sock_reps >= + job_resources_ptr->sock_core_rep_count[sock_inx]) { + sock_inx++; + sock_reps = 0; + } + sock_reps++; + + if (job_resources_ptr->cpus_used) + cpus_used = job_resources_ptr->cpus_used[node_inx]; + if (job_resources_ptr->memory_used) + memory_used = job_resources_ptr->memory_used[node_inx]; + if (job_resources_ptr->memory_allocated) + memory_allocated = job_resources_ptr-> + memory_allocated[node_inx]; + + info(" Mem(MB):%u:%u Sockets:%u Cores:%u CPUs:%u:%u", + memory_allocated, memory_used, + job_resources_ptr->sockets_per_node[sock_inx], + job_resources_ptr->cores_per_socket[sock_inx], + job_resources_ptr->cpus[node_inx], + cpus_used); + + bit_reps = job_resources_ptr->sockets_per_node[sock_inx] * + job_resources_ptr->cores_per_socket[sock_inx]; + for (i=0; i<bit_reps; i++) { + if (bit_inx >= array_size) { + error("log_job_resources: array size wrong"); + break; + } + if (bit_test(job_resources_ptr->core_bitmap, + bit_inx)) { + char *core_used = ""; + if (bit_test(job_resources_ptr-> + core_bitmap_used, bit_inx)) + core_used = " and in use"; + info(" Socket[%d] Core[%d] is allocated%s", + (i / job_resources_ptr-> + cores_per_socket[sock_inx]), + (i % job_resources_ptr-> + cores_per_socket[sock_inx]), + core_used); + } + bit_inx++; + } + } + for (node_inx=0; node_inx<job_resources_ptr->cpu_array_cnt; + node_inx++) { + if (node_inx == 0) + info("--------------------"); + info("cpu_array_value[%d]:%u reps:%u", node_inx, + job_resources_ptr->cpu_array_value[node_inx], + job_resources_ptr->cpu_array_reps[node_inx]); + } + info("===================="); +} + +extern void pack_job_resources(job_resources_t *job_resources_ptr, Buf buffer) +{ + if (job_resources_ptr == NULL) { + uint32_t empty = NO_VAL; + pack32(empty, buffer); + return; + } + + xassert(job_resources_ptr->nhosts); + + pack32(job_resources_ptr->nhosts, buffer); + pack32(job_resources_ptr->nprocs, buffer); + pack8(job_resources_ptr->node_req, buffer); + + if (job_resources_ptr->cpu_array_reps) + pack32_array(job_resources_ptr->cpu_array_reps, + job_resources_ptr->cpu_array_cnt, buffer); + else + pack32_array(job_resources_ptr->cpu_array_reps, 0, buffer); + + if (job_resources_ptr->cpu_array_value) + pack16_array(job_resources_ptr->cpu_array_value, + job_resources_ptr->cpu_array_cnt, buffer); + else + pack16_array(job_resources_ptr->cpu_array_value, 0, buffer); + + if (job_resources_ptr->cpus) + pack16_array(job_resources_ptr->cpus, + job_resources_ptr->nhosts, buffer); + else + pack16_array(job_resources_ptr->cpus, 0, buffer); + + if (job_resources_ptr->cpus_used) + pack16_array(job_resources_ptr->cpus_used, + job_resources_ptr->nhosts, buffer); + else + pack16_array(job_resources_ptr->cpus_used, 0, buffer); + + if (job_resources_ptr->memory_allocated) + pack32_array(job_resources_ptr->memory_allocated, + job_resources_ptr->nhosts, buffer); + else + pack32_array(job_resources_ptr->memory_allocated, 0, buffer); + + if (job_resources_ptr->memory_used) + pack32_array(job_resources_ptr->memory_used, + job_resources_ptr->nhosts, buffer); + else + pack32_array(job_resources_ptr->memory_used, 0, buffer); + +#ifndef HAVE_BG +{ + int i; + uint32_t core_cnt = 0, sock_recs = 0; + xassert(job_resources_ptr->cores_per_socket); + xassert(job_resources_ptr->sock_core_rep_count); + xassert(job_resources_ptr->sockets_per_node); + + for (i=0; i<job_resources_ptr->nhosts; i++) { + core_cnt += job_resources_ptr->sockets_per_node[i] * + job_resources_ptr->cores_per_socket[i] * + job_resources_ptr->sock_core_rep_count[i]; + sock_recs += job_resources_ptr->sock_core_rep_count[i]; + if (sock_recs >= job_resources_ptr->nhosts) + break; + } + i++; + pack16_array(job_resources_ptr->sockets_per_node, + (uint32_t) i, buffer); + pack16_array(job_resources_ptr->cores_per_socket, + (uint32_t) i, buffer); + pack32_array(job_resources_ptr->sock_core_rep_count, + (uint32_t) i, buffer); + + xassert(job_resources_ptr->core_bitmap); + xassert(job_resources_ptr->core_bitmap_used); + pack_bit_str(job_resources_ptr->core_bitmap, buffer); + pack_bit_str(job_resources_ptr->core_bitmap_used, buffer); + /* Do not pack the node_bitmap, but rebuild it in reset_node_bitmap() + * based upon job_ptr->nodes and the current node table */ +} +#endif +} + +extern int unpack_job_resources(job_resources_t **job_resources_pptr, + Buf buffer) +{ + char *bit_fmt = NULL; + uint32_t empty, tmp32; + job_resources_t *job_resources; + + xassert(job_resources_pptr); + safe_unpack32(&empty, buffer); + if (empty == NO_VAL) { + *job_resources_pptr = NULL; + return SLURM_SUCCESS; + } + + job_resources = xmalloc(sizeof(struct job_resources)); + job_resources->nhosts = empty; + safe_unpack32(&job_resources->nprocs, buffer); + safe_unpack8(&job_resources->node_req, buffer); + + safe_unpack32_array(&job_resources->cpu_array_reps, + &job_resources->cpu_array_cnt, buffer); + if (tmp32 == 0) + xfree(job_resources->cpu_array_reps); + + safe_unpack16_array(&job_resources->cpu_array_value, + &tmp32, buffer); + if (tmp32 == 0) + xfree(job_resources->cpu_array_value); + else if(!job_resources->cpu_array_cnt) + job_resources->cpu_array_cnt = tmp32; + + safe_unpack16_array(&job_resources->cpus, &tmp32, buffer); + if (tmp32 == 0) + xfree(job_resources->cpus); + safe_unpack16_array(&job_resources->cpus_used, &tmp32, buffer); + if (tmp32 == 0) + xfree(job_resources->cpus_used); + + safe_unpack32_array(&job_resources->memory_allocated, + &tmp32, buffer); + if (tmp32 == 0) + xfree(job_resources->memory_allocated); + else if (tmp32 != job_resources->nhosts) + goto unpack_error; + safe_unpack32_array(&job_resources->memory_used, &tmp32, buffer); + if (tmp32 == 0) + xfree(job_resources->memory_used); + +#ifndef HAVE_BG + safe_unpack16_array(&job_resources->sockets_per_node, &tmp32, buffer); + if (tmp32 == 0) + xfree(job_resources->sockets_per_node); + safe_unpack16_array(&job_resources->cores_per_socket, &tmp32, buffer); + if (tmp32 == 0) + xfree(job_resources->cores_per_socket); + safe_unpack32_array(&job_resources->sock_core_rep_count, + &tmp32, buffer); + if (tmp32 == 0) + xfree(job_resources->sock_core_rep_count); + + unpack_bit_str(&job_resources->core_bitmap, buffer); + unpack_bit_str(&job_resources->core_bitmap_used, buffer); + /* node_bitmap is not packed, but rebuilt in reset_node_bitmap() + * based upon job_ptr->nodes and the current node table */ +#endif + + *job_resources_pptr = job_resources; + return SLURM_SUCCESS; + + unpack_error: + free_job_resources(&job_resources); + xfree(bit_fmt); + *job_resources_pptr = NULL; + return SLURM_ERROR; +} + +extern int get_job_resources_offset(job_resources_t *job_resources_ptr, + uint32_t node_id, uint16_t socket_id, + uint16_t core_id) +{ + int i, bit_inx = 0; + + xassert(job_resources_ptr); + + for (i=0; i<job_resources_ptr->nhosts; i++) { + if (job_resources_ptr->sock_core_rep_count[i] <= node_id) { + bit_inx += job_resources_ptr->sockets_per_node[i] * + job_resources_ptr->cores_per_socket[i] * + job_resources_ptr->sock_core_rep_count[i]; + node_id -= job_resources_ptr->sock_core_rep_count[i]; + } else if (socket_id >= job_resources_ptr-> + sockets_per_node[i]) { + error("get_job_resources_bit: socket_id >= socket_cnt " + "(%u >= %u)", socket_id, + job_resources_ptr->sockets_per_node[i]); + return -1; + } else if (core_id >= job_resources_ptr->cores_per_socket[i]) { + error("get_job_resources_bit: core_id >= core_cnt " + "(%u >= %u)", core_id, + job_resources_ptr->cores_per_socket[i]); + return -1; + } else { + bit_inx += job_resources_ptr->sockets_per_node[i] * + job_resources_ptr->cores_per_socket[i] * + node_id; + bit_inx += job_resources_ptr->cores_per_socket[i] * + socket_id; + bit_inx += core_id; + break; + } + } + i = bit_size(job_resources_ptr->core_bitmap); + if (bit_inx >= i) { + error("get_job_resources_bit: offset >= bitmap size " + "(%d >= %d)", bit_inx, i); + return -1; + } + + return bit_inx; +} + +extern int get_job_resources_bit(job_resources_t *job_resources_ptr, + uint32_t node_id, uint16_t socket_id, + uint16_t core_id) +{ + int bit_inx = get_job_resources_offset(job_resources_ptr, node_id, + socket_id, core_id); + if (bit_inx < 0) + return SLURM_ERROR; + + return bit_test(job_resources_ptr->core_bitmap, bit_inx); +} + +extern int set_job_resources_bit(job_resources_t *job_resources_ptr, + uint32_t node_id, uint16_t socket_id, + uint16_t core_id) +{ + int bit_inx = get_job_resources_offset(job_resources_ptr, node_id, + socket_id, core_id); + if (bit_inx < 0) + return SLURM_ERROR; + + bit_set(job_resources_ptr->core_bitmap, bit_inx); + return SLURM_SUCCESS; +} + +extern int get_job_resources_node(job_resources_t *job_resources_ptr, + uint32_t node_id) +{ + int i, bit_inx = 0, core_cnt = 0; + + xassert(job_resources_ptr); + + for (i=0; i<job_resources_ptr->nhosts; i++) { + if (job_resources_ptr->sock_core_rep_count[i] <= node_id) { + bit_inx += job_resources_ptr->sockets_per_node[i] * + job_resources_ptr->cores_per_socket[i] * + job_resources_ptr->sock_core_rep_count[i]; + node_id -= job_resources_ptr->sock_core_rep_count[i]; + } else { + bit_inx += job_resources_ptr->sockets_per_node[i] * + job_resources_ptr->cores_per_socket[i] * + node_id; + core_cnt = job_resources_ptr->sockets_per_node[i] * + job_resources_ptr->cores_per_socket[i]; + break; + } + } + if (core_cnt < 1) { + error("get_job_resources_node: core_cnt=0"); + return 0; + } + i = bit_size(job_resources_ptr->core_bitmap); + if ((bit_inx + core_cnt) > i) { + error("get_job_resources_node: offset > bitmap size " + "(%d >= %d)", (bit_inx + core_cnt), i); + return 0; + } + + for (i=0; i<core_cnt; i++) { + if (bit_test(job_resources_ptr->core_bitmap, bit_inx++)) + return 1; + } + return 0; +} + +extern int set_job_resources_node(job_resources_t *job_resources_ptr, + uint32_t node_id) +{ + int i, bit_inx = 0, core_cnt = 0; + + xassert(job_resources_ptr); + + for (i=0; i<job_resources_ptr->nhosts; i++) { + if (job_resources_ptr->sock_core_rep_count[i] <= node_id) { + bit_inx += job_resources_ptr->sockets_per_node[i] * + job_resources_ptr->cores_per_socket[i] * + job_resources_ptr->sock_core_rep_count[i]; + node_id -= job_resources_ptr->sock_core_rep_count[i]; + } else { + bit_inx += job_resources_ptr->sockets_per_node[i] * + job_resources_ptr->cores_per_socket[i] * + node_id; + core_cnt = job_resources_ptr->sockets_per_node[i] * + job_resources_ptr->cores_per_socket[i]; + break; + } + } + if (core_cnt < 1) { + error("set_job_resources_node: core_cnt=0"); + return SLURM_ERROR; + } + + i = bit_size(job_resources_ptr->core_bitmap); + if ((bit_inx + core_cnt) > i) { + error("set_job_resources_node: offset > bitmap size " + "(%d >= %d)", (bit_inx + core_cnt), i); + return SLURM_ERROR; + } + + for (i=0; i<core_cnt; i++) + bit_set(job_resources_ptr->core_bitmap, bit_inx++); + + return SLURM_SUCCESS; +} + +extern int get_job_resources_cnt(job_resources_t *job_resources_ptr, + uint32_t node_id,uint16_t *socket_cnt, + uint16_t *cores_per_socket_cnt) +{ + int i, node_inx = -1; + + xassert(socket_cnt); + xassert(cores_per_socket_cnt); + xassert(job_resources_ptr->cores_per_socket); + xassert(job_resources_ptr->sock_core_rep_count); + xassert(job_resources_ptr->sockets_per_node); + + for (i=0; i<job_resources_ptr->nhosts; i++) { + node_inx += job_resources_ptr->sock_core_rep_count[i]; + if (node_id <= node_inx) { + *cores_per_socket_cnt = job_resources_ptr-> + cores_per_socket[i]; + *socket_cnt = job_resources_ptr->sockets_per_node[i]; + return SLURM_SUCCESS; + } + } + + error("get_job_resources_cnt: invalid node_id: %u", node_id); + *cores_per_socket_cnt = 0; + *socket_cnt = 0; + return SLURM_ERROR; +} + +/* Return 1 if the given job can fit into the given full-length core_bitmap, + * else return 0. + */ +extern int job_fits_into_cores(job_resources_t *job_resources_ptr, + bitstr_t *full_bitmap, + const uint16_t *bits_per_node, + const uint32_t *bit_rep_count) +{ + uint32_t i, n, count = 1, last_bit = 0; + uint32_t c = 0, j = 0, k = 0; + + if (!full_bitmap) + return 1; + + for (i = 0, n = 0; i < job_resources_ptr->nhosts; n++) { + last_bit += bits_per_node[k]; + if (++count > bit_rep_count[k]) { + k++; + count = 1; + } + if (bit_test(job_resources_ptr->node_bitmap, n) == 0) { + c = last_bit; + continue; + } + for (; c < last_bit; c++, j++) { + if (bit_test(full_bitmap, c) && + bit_test(job_resources_ptr->core_bitmap, j)) + return 0; + } + i++; + } + return 1; +} + +/* add the given job to the given full_core_bitmap */ +extern void add_job_to_cores(job_resources_t *job_resources_ptr, + bitstr_t **full_core_bitmap, + const uint16_t *cores_per_node, + const uint32_t *core_rep_count) +{ + uint32_t i, n, count = 1, last_bit = 0; + uint32_t c = 0, j = 0, k = 0; + + if (!job_resources_ptr->core_bitmap) + return; + + /* add the job to the row_bitmap */ + if (*full_core_bitmap == NULL) { + uint32_t size = 0; + for (i = 0; core_rep_count[i]; i++) { + size += cores_per_node[i] * core_rep_count[i]; + } + *full_core_bitmap = bit_alloc(size); + if (!*full_core_bitmap) + fatal("add_job_to_cores: bitmap memory error"); + } + + for (i = 0, n = 0; i < job_resources_ptr->nhosts; n++) { + last_bit += cores_per_node[k]; + if (++count > core_rep_count[k]) { + k++; + count = 1; + } + if (bit_test(job_resources_ptr->node_bitmap, n) == 0) { + c = last_bit; + continue; + } + for (; c < last_bit; c++, j++) { + if (bit_test(job_resources_ptr->core_bitmap, j)) + bit_set(*full_core_bitmap, c); + } + i++; + } +} diff --git a/src/common/select_job_res.h b/src/common/job_resources.h similarity index 62% rename from src/common/select_job_res.h rename to src/common/job_resources.h index 2e1da1e67bd5b85226fe8981d07c9dca2fac2e17..59d28c903343e10a03c80c5239cdb85e57a711f3 100644 --- a/src/common/select_job_res.h +++ b/src/common/job_resources.h @@ -1,5 +1,5 @@ /*****************************************************************************\ - * select_job_res.h - functions to manage data structure identifying specific + * job_resources.h - functions to manage data structure identifying specific * CPUs allocated to a job, step or partition ***************************************************************************** * Copyright (C) 2008 Lawrence Livermore National Security. @@ -36,8 +36,8 @@ * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. \*****************************************************************************/ -#ifndef _SELECT_JOB_RES_H -#define _SELECT_JOB_RES_H +#ifndef _JOB_RESOURCES_H +#define _JOB_RESOURCES_H #if HAVE_CONFIG_H # include "config.h" @@ -54,13 +54,13 @@ #include "src/common/pack.h" #include "src/slurmctld/slurmctld.h" -/* struct select_job_res defines exactly which resources are allocated +/* struct job_resources defines exactly which resources are allocated * to a job, step, partition, etc. * * core_bitmap - Bitmap of allocated cores for all nodes and sockets * core_bitmap_used - Bitmap of cores allocated to job steps - * cores_per_socket - Count of cores per socket on this node, build by - * build_select_job_res() and insures consistent + * cores_per_socket - Count of cores per socket on this node, build by + * build_job_resources() and insures consistent * interpretation of core_bitmap * cpus - Count of desired/allocated CPUs per node for job/step * cpus_used - For a job, count of CPUs per node used by job steps @@ -78,19 +78,19 @@ * node_req - NODE_CR_RESERVED|NODE_CR_ONE_ROW|NODE_CR_AVAILABLE * nprocs - Number of processors in the allocation * sock_core_rep_count - How many consecutive nodes that sockets_per_node - * and cores_per_socket apply to, build by - * build_select_job_res() and insures consistent + * and cores_per_socket apply to, build by + * build_job_resources() and insures consistent * interpretation of core_bitmap - * sockets_per_node - Count of sockets on this node, build by - * build_select_job_res() and insures consistent + * sockets_per_node - Count of sockets on this node, build by + * build_job_resources() and insures consistent * interpretation of core_bitmap * * NOTES: * cpu_array_* contains the same information as "cpus", but in a more compact * format. For example if cpus = {4, 4, 2, 2, 2, 2, 2, 2} then cpu_array_cnt=2 - * cpu_array_value = {4, 2} and cpu_array_reps = {2, 6}. We do not need to - * save/restore these values, but generate them by calling - * build_select_job_res_cpu_array() + * cpu_array_value = {4, 2} and cpu_array_reps = {2, 6}. We do not need to + * save/restore these values, but generate them by calling + * build_job_resources_cpu_array() * * Sample layout of core_bitmap: * | Node_0 | Node_1 | @@ -98,7 +98,7 @@ * | Core_0 | Core_1 | Core_0 | Core_1 | Core_0 | Core_1 | Core_0 | Core_1 | * | Bit_0 | Bit_1 | Bit_2 | Bit_3 | Bit_4 | Bit_5 | Bit_6 | Bit_7 | */ -struct select_job_res { +struct job_resources { bitstr_t * core_bitmap; bitstr_t * core_bitmap_used; uint32_t cpu_array_cnt; @@ -117,118 +117,112 @@ struct select_job_res { uint16_t * sockets_per_node; }; -/* Create an empty select_job_res data structure, just a call to xmalloc() */ -extern select_job_res_t *create_select_job_res(void); +/* Create an empty job_resources data structure, just a call to xmalloc() */ +extern job_resources_t *create_job_resources(void); /* Set the socket and core counts associated with a set of selected - * nodes of a select_job_res data structure based upon slurmctld state. + * nodes of a job_resources data structure based upon slurmctld state. * (sets cores_per_socket, sockets_per_node, and sock_core_rep_count based * upon the value of node_bitmap, also creates core_bitmap based upon - * the total number of cores in the allocation). Call this ONLY from + * the total number of cores in the allocation). Call this ONLY from * slurmctld. Example of use: * - * select_job_res_t *select_job_res_ptr = create_select_job_res(); - * node_name2bitmap("dummy[2,5,12,16]", true, &(select_res_ptr->node_bitmap)); - * rc = build_select_job_res(select_job_res_ptr, node_record_table_ptr, + * job_resources_t *job_resources_ptr = create_job_resources(); + * node_name2bitmap("dummy[2,5,12,16]", true, &(job_res_ptr->node_bitmap)); + * rc = build_job_resources(job_resources_ptr, node_record_table_ptr, * slurmctld_conf.fast_schedule); */ -extern int build_select_job_res(select_job_res_t *select_job_res_ptr, - void *node_rec_table, - uint16_t fast_schedule); +extern int build_job_resources(job_resources_t *job_resources_ptr, + void *node_rec_table, uint16_t fast_schedule); /* Rebuild cpu_array_cnt, cpu_array_value, and cpu_array_reps based upon the * values of cpus in an existing data structure * Return total CPU count or -1 on error */ -extern int build_select_job_res_cpu_array(select_job_res_t *select_job_res_ptr); +extern int build_job_resources_cpu_array(job_resources_t *job_resources_ptr); /* Rebuild cpus array based upon the values of nhosts, cpu_array_value and * cpu_array_reps in an existing data structure * Return total CPU count or -1 on error */ -extern int build_select_job_res_cpus_array( - select_job_res_t *select_job_res_ptr); +extern int build_job_resources_cpus_array(job_resources_t *job_resources_ptr); -/* Validate a select_job_res data structure originally built using - * build_select_job_res() is still valid based upon slurmctld state. +/* Validate a job_resources data structure originally built using + * build_job_resources() is still valid based upon slurmctld state. * NOTE: Reset the node_bitmap field before calling this function. - * If the sockets_per_node or cores_per_socket for any node in the allocation - * changes, then return SLURM_ERROR. Otherwise return SLURM_SUCCESS. Any + * If the sockets_per_node or cores_per_socket for any node in the allocation + * changes, then return SLURM_ERROR. Otherwise return SLURM_SUCCESS. Any * change in a node's socket or core count require that any job running on * that node be killed. Example of use: * - * rc = valid_select_job_res(select_job_res_ptr, node_record_table_ptr, + * rc = valid_job_resources(job_resources_ptr, node_record_table_ptr, * slurmctld_conf.fast_schedule); */ -extern int valid_select_job_res(select_job_res_t *select_job_res_ptr, - void *node_rec_table, - uint16_t fast_schedule); +extern int valid_job_resources(job_resources_t *job_resources_ptr, + void *node_rec_table, uint16_t fast_schedule); -/* Make a copy of a select_job_res data structure, - * free using free_select_job_res() */ -extern select_job_res_t *copy_select_job_res( - select_job_res_t *select_job_res_ptr); +/* Make a copy of a job_resources data structure, + * free using free_job_resources() */ +extern job_resources_t *copy_job_resources(job_resources_t *job_resources_ptr); -/* Free select_job_res data structure created using copy_select_job_res() or - * unpack_select_job_res() */ -extern void free_select_job_res(select_job_res_t **select_job_res_pptr); +/* Free job_resources data structure created using copy_job_resources() or + * unpack_job_resources() */ +extern void free_job_resources(job_resources_t **job_resources_pptr); -/* Log the contents of a select_job_res data structure using info() */ -extern void log_select_job_res(uint32_t job_id, - select_job_res_t *select_job_res_ptr); +/* Log the contents of a job_resources data structure using info() */ +extern void log_job_resources(uint32_t job_id, + job_resources_t *job_resources_ptr); -/* Un/pack full select_job_res data structure */ -extern void pack_select_job_res(select_job_res_t *select_job_res_ptr, +/* Un/pack full job_resources data structure */ +extern void pack_job_resources(job_resources_t *job_resources_ptr, Buf buffer); +extern int unpack_job_resources(job_resources_t **job_resources_pptr, Buf buffer); -extern int unpack_select_job_res(select_job_res_t **select_job_res_pptr, - Buf buffer); -/* Reset the node_bitmap in a select_job_res data structure - * This is needed after a restart/reconfiguration since nodes can - * be added or removed from the system resulting in changing in +/* Reset the node_bitmap in a job_resources data structure + * This is needed after a restart/reconfiguration since nodes can + * be added or removed from the system resulting in changing in * the bitmap size or bit positions */ -extern void reset_node_bitmap(select_job_res_t *select_job_res_ptr, +extern void reset_node_bitmap(job_resources_t *job_resources_ptr, bitstr_t *new_node_bitmap); /* For a given node_id, socket_id and core_id, get it's offset within * the core bitmap */ -extern int get_select_job_res_offset(select_job_res_t *select_job_res_ptr, - uint32_t node_id, uint16_t socket_id, - uint16_t core_id); +extern int get_job_resources_offset(job_resources_t *job_resources_ptr, + uint32_t node_id, uint16_t socket_id, + uint16_t core_id); /* Get/set bit value at specified location. * node_id, socket_id and core_id are all zero origin */ -extern int get_select_job_res_bit(select_job_res_t *select_job_res_ptr, - uint32_t node_id, - uint16_t socket_id, uint16_t core_id); -extern int set_select_job_res_bit(select_job_res_t *select_job_res_ptr, - uint32_t node_id, - uint16_t socket_id, uint16_t core_id); +extern int get_job_resources_bit(job_resources_t *job_resources_ptr, + uint32_t node_id, uint16_t socket_id, + uint16_t core_id); +extern int set_job_resources_bit(job_resources_t *job_resources_ptr, + uint32_t node_id, uint16_t socket_id, + uint16_t core_id); /* Get/set bit value at specified location for whole node allocations * get is for any socket/core on the specified node * set is for all sockets/cores on the specified node - * fully comptabable with set/get_select_job_res_bit() + * fully comptabable with set/get_job_resources_bit() * node_id is all zero origin */ -extern int get_select_job_res_node(select_job_res_t *select_job_res_ptr, - uint32_t node_id); -extern int set_select_job_res_node(select_job_res_t *select_job_res_ptr, - uint32_t node_id); +extern int get_job_resources_node(job_resources_t *job_resources_ptr, + uint32_t node_id); +extern int set_job_resources_node(job_resources_t *job_resources_ptr, + uint32_t node_id); /* Get socket and core count for a specific node_id (zero origin) */ -extern int get_select_job_res_cnt(select_job_res_t *select_job_res_ptr, - uint32_t node_id, - uint16_t *socket_cnt, - uint16_t *cores_per_socket_cnt); +extern int get_job_resources_cnt(job_resources_t *job_resources_ptr, + uint32_t node_id, uint16_t *socket_cnt, + uint16_t *cores_per_socket_cnt); /* check if given job can fit into the given full-length core_bitmap */ -extern int can_select_job_cores_fit(select_job_res_t *select_ptr, - bitstr_t *full_bitmap, - const uint16_t *bits_per_node, - const uint32_t *bit_rep_count); +extern int job_fits_into_cores(job_resources_t *job_resources_ptr, + bitstr_t *full_bitmap, + const uint16_t *bits_per_node, + const uint32_t *bit_rep_count); /* add the given job to the given full_core_bitmap */ -extern void add_select_job_to_row(select_job_res_t *select_ptr, - bitstr_t **full_core_bitmap, - const uint16_t *cores_per_node, - const uint32_t *core_rep_count); +extern void add_job_to_cores(job_resources_t *job_resources_ptr, + bitstr_t **full_core_bitmap, + const uint16_t *cores_per_node, + const uint32_t *core_rep_count); -#endif /* !_SELECT_JOB_RES_H */ +#endif /* !_JOB_RESOURCES_H */ diff --git a/src/common/select_job_res.c b/src/common/select_job_res.c deleted file mode 100644 index aebae71671cf879ef6027fac1b3c700dbd370cca..0000000000000000000000000000000000000000 --- a/src/common/select_job_res.c +++ /dev/null @@ -1,930 +0,0 @@ -/*****************************************************************************\ - * select_job_res.c - functions to manage data structure identifying specific - * CPUs allocated to a job, step or partition - ***************************************************************************** - * Copyright (C) 2008 Lawrence Livermore National Security. - * Written by Morris Jette <jette1@llnl.gov>. - * CODE-OCEC-09-009. All rights reserved. - * - * This file is part of SLURM, a resource management program. - * For details, see <https://computing.llnl.gov/linux/slurm/>. - * Please also read the included file: DISCLAIMER. - * - * SLURM is free software; you can redistribute it and/or modify it under - * the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) - * any later version. - * - * In addition, as a special exception, the copyright holders give permission - * to link the code of portions of this program with the OpenSSL library under - * certain conditions as described in each individual source file, and - * distribute linked combinations including the two. You must obey the GNU - * General Public License in all respects for all of the code used other than - * OpenSSL. If you modify file(s) with this exception, you may extend this - * exception to your version of the file(s), but you are not obligated to do - * so. If you do not wish to do so, delete this exception statement from your - * version. If you delete this exception statement from all source files in - * the program, then also delete it here. - * - * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY - * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS - * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more - * details. - * - * You should have received a copy of the GNU General Public License along - * with SLURM; if not, write to the Free Software Foundation, Inc., - * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. -\*****************************************************************************/ - -#include <stdlib.h> -#include <string.h> -#include <slurm/slurm_errno.h> - -#include "src/common/hostlist.h" -#include "src/common/log.h" -#include "src/common/select_job_res.h" -#include "src/common/xmalloc.h" -#include "src/common/xassert.h" -#include "src/slurmctld/slurmctld.h" - - -/* Create an empty select_job_res data structure */ -extern select_job_res_t *create_select_job_res(void) -{ - select_job_res_t *select_job_res; - - select_job_res = xmalloc(sizeof(struct select_job_res)); - return select_job_res; -} - -/* Set the socket and core counts associated with a set of selected - * nodes of a select_job_res data structure based upon slurmctld state. - * (sets cores_per_socket, sockets_per_node, and sock_core_rep_count based - * upon the value of node_bitmap, also creates core_bitmap based upon - * the total number of cores in the allocation). Call this ONLY from - * slurmctld. Example of use: - * - * select_job_res_t *select_job_res_ptr = create_select_job_res(); - * node_name2bitmap("dummy[2,5,12,16]", true, &(select_res_ptr->node_bitmap)); - * rc = build_select_job_res(select_job_res_ptr, node_record_table_ptr, - * slurmctld_conf.fast_schedule); - */ -extern int build_select_job_res(select_job_res_t *select_job_res, - void *node_rec_table, - uint16_t fast_schedule) -{ - int i, bitmap_len; - int core_cnt = 0, sock_inx = -1; - uint32_t cores, socks; - struct node_record *node_ptr, *node_record_table; - - if (select_job_res->node_bitmap == NULL) { - error("build_select_job_res: node_bitmap is NULL"); - return SLURM_ERROR; - } - - node_record_table = (struct node_record *) node_rec_table; - xfree(select_job_res->sockets_per_node); - xfree(select_job_res->cores_per_socket); - xfree(select_job_res->sock_core_rep_count); - select_job_res->sockets_per_node = xmalloc(sizeof(uint16_t) * - select_job_res->nhosts); - select_job_res->cores_per_socket = xmalloc(sizeof(uint16_t) * - select_job_res->nhosts); - select_job_res->sock_core_rep_count = xmalloc(sizeof(uint32_t) * - select_job_res->nhosts); - - bitmap_len = bit_size(select_job_res->node_bitmap); - for (i=0; i<bitmap_len; i++) { - if (!bit_test(select_job_res->node_bitmap, i)) - continue; - node_ptr = node_record_table + i; - if (fast_schedule) { - socks = node_ptr->config_ptr->sockets; - cores = node_ptr->config_ptr->cores; - } else { - socks = node_ptr->sockets; - cores = node_ptr->cores; - } - if ((sock_inx < 0) || - (socks != select_job_res->sockets_per_node[sock_inx]) || - (cores != select_job_res->cores_per_socket[sock_inx])) { - sock_inx++; - select_job_res->sockets_per_node[sock_inx] = socks; - select_job_res->cores_per_socket[sock_inx] = cores; - } - select_job_res->sock_core_rep_count[sock_inx]++; - core_cnt += (cores * socks); - } -#ifndef HAVE_BG - select_job_res->core_bitmap = bit_alloc(core_cnt); - select_job_res->core_bitmap_used = bit_alloc(core_cnt); - if ((select_job_res->core_bitmap == NULL) || - (select_job_res->core_bitmap_used == NULL)) - fatal("bit_alloc malloc failure"); -#endif - return SLURM_SUCCESS; -} - -/* Rebuild cpu_array_cnt, cpu_array_value, and cpu_array_reps based upon the - * values of nhosts and cpus in an existing data structure - * Return total CPU count or -1 on error */ -extern int build_select_job_res_cpu_array(select_job_res_t *select_job_res_ptr) -{ - int cpu_count = 0, i; - uint32_t last_cpu_cnt = 0; - - if (select_job_res_ptr->nhosts == 0) - return cpu_count; /* no work to do */ - if (select_job_res_ptr->cpus == NULL) { - error("build_select_job_res_cpu_array: cpus==NULL"); - return -1; - } - - /* clear vestigial data and create new arrays of max size */ - select_job_res_ptr->cpu_array_cnt = 0; - xfree(select_job_res_ptr->cpu_array_reps); - select_job_res_ptr->cpu_array_reps = - xmalloc(select_job_res_ptr->nhosts * sizeof(uint32_t)); - xfree(select_job_res_ptr->cpu_array_value); - select_job_res_ptr->cpu_array_value = - xmalloc(select_job_res_ptr->nhosts * sizeof(uint16_t)); - - for (i=0; i<select_job_res_ptr->nhosts; i++) { - if (select_job_res_ptr->cpus[i] != last_cpu_cnt) { - last_cpu_cnt = select_job_res_ptr->cpus[i]; - select_job_res_ptr->cpu_array_value[ - select_job_res_ptr->cpu_array_cnt] - = last_cpu_cnt; - select_job_res_ptr->cpu_array_reps[ - select_job_res_ptr->cpu_array_cnt] = 1; - select_job_res_ptr->cpu_array_cnt++; - } else { - select_job_res_ptr->cpu_array_reps[ - select_job_res_ptr->cpu_array_cnt-1]++; - } - cpu_count += last_cpu_cnt; - } - return cpu_count; -} - -/* Rebuild cpus array based upon the values of nhosts, cpu_array_value and - * cpu_array_reps in an existing data structure - * Return total CPU count or -1 on error */ -extern int build_select_job_res_cpus_array(select_job_res_t *select_job_res_ptr) -{ - int cpu_count = 0, cpu_inx, i, j; - - if (select_job_res_ptr->nhosts == 0) - return cpu_count; /* no work to do */ - if (select_job_res_ptr->cpu_array_cnt == 0) { - error("build_select_job_res_cpus_array: cpu_array_cnt==0"); - return -1; - } - if (select_job_res_ptr->cpu_array_value == NULL) { - error("build_select_job_res_cpus_array: cpu_array_value==NULL"); - return -1; - } - if (select_job_res_ptr->cpu_array_reps == NULL) { - error("build_select_job_res_cpus_array: cpu_array_reps==NULL"); - return -1; - } - - /* clear vestigial data and create new arrays of max size */ - xfree(select_job_res_ptr->cpus); - select_job_res_ptr->cpus = - xmalloc(select_job_res_ptr->nhosts * sizeof(uint16_t)); - - cpu_inx = 0; - for (i=0; i<select_job_res_ptr->cpu_array_cnt; i++) { - for (j=0; j<select_job_res_ptr->cpu_array_reps[i]; j++) { - if (cpu_inx >= select_job_res_ptr->nhosts) { - error("build_select_job_res_cpus_array: " - "cpu_array is too long"); - return -1; - } - cpu_count += select_job_res_ptr->cpus[i]; - select_job_res_ptr->cpus[cpu_inx++] = - select_job_res_ptr->cpus[i]; - } - } - if (cpu_inx < select_job_res_ptr->nhosts) { - error("build_select_job_res_cpus_array: " - "cpu_array is incomplete"); - return -1; - } - return cpu_count; -} - -/* Reset the node_bitmap in a select_job_res data structure - * This is needed after a restart/reconfiguration since nodes can - * be added or removed from the system resulting in changing in - * the bitmap size or bit positions */ -extern void reset_node_bitmap(select_job_res_t *select_job_res_ptr, - bitstr_t *new_node_bitmap) -{ - if (select_job_res_ptr) { - if (select_job_res_ptr->node_bitmap) - bit_free(select_job_res_ptr->node_bitmap); - if (new_node_bitmap) { - select_job_res_ptr->node_bitmap = - bit_copy(new_node_bitmap); - } - } -} - -extern int valid_select_job_res(select_job_res_t *select_job_res, - void *node_rec_table, - uint16_t fast_schedule) -{ - int i, bitmap_len; - int sock_inx = 0, sock_cnt = 0; - uint32_t cores, socks; - struct node_record *node_ptr, *node_record_table; - - if (select_job_res->node_bitmap == NULL) { - error("valid_select_job_res: node_bitmap is NULL"); - return SLURM_ERROR; - } - if ((select_job_res->sockets_per_node == NULL) || - (select_job_res->cores_per_socket == NULL) || - (select_job_res->sock_core_rep_count == NULL)) { - error("valid_select_job_res: socket/core array is NULL"); - return SLURM_ERROR; - } - - node_record_table = (struct node_record *) node_rec_table; - bitmap_len = bit_size(select_job_res->node_bitmap); - for (i=0; i<bitmap_len; i++) { - if (!bit_test(select_job_res->node_bitmap, i)) - continue; - node_ptr = node_record_table + i; - if (fast_schedule) { - socks = node_ptr->config_ptr->sockets; - cores = node_ptr->config_ptr->cores; - } else { - socks = node_ptr->sockets; - cores = node_ptr->cores; - } - if (sock_cnt >= select_job_res->sock_core_rep_count[sock_inx]) { - sock_inx++; - sock_cnt = 0; - } - if ((socks != select_job_res->sockets_per_node[sock_inx]) || - (cores != select_job_res->cores_per_socket[sock_inx])) { - error("valid_select_job_res: " - "%s sockets:%u,%u, cores %u,%u", - node_ptr->name, - socks, - select_job_res->sockets_per_node[sock_inx], - cores, - select_job_res->cores_per_socket[sock_inx]); - return SLURM_ERROR; - } - sock_cnt++; - } - return SLURM_SUCCESS; -} - -extern select_job_res_t *copy_select_job_res( - select_job_res_t *select_job_res_ptr) -{ - int i, sock_inx = 0; - select_job_res_t *new_layout = xmalloc(sizeof(struct select_job_res)); - - xassert(select_job_res_ptr); - new_layout->nhosts = select_job_res_ptr->nhosts; - new_layout->nprocs = select_job_res_ptr->nprocs; - new_layout->node_req = select_job_res_ptr->node_req; - if (select_job_res_ptr->core_bitmap) { - new_layout->core_bitmap = bit_copy(select_job_res_ptr-> - core_bitmap); - } - if (select_job_res_ptr->core_bitmap_used) { - new_layout->core_bitmap_used = bit_copy(select_job_res_ptr-> - core_bitmap_used); - } - if (select_job_res_ptr->node_bitmap) { - new_layout->node_bitmap = bit_copy(select_job_res_ptr-> - node_bitmap); - } - - new_layout->cpu_array_cnt = select_job_res_ptr->cpu_array_cnt; - if (select_job_res_ptr->cpu_array_reps && - select_job_res_ptr->cpu_array_cnt) { - new_layout->cpu_array_reps = - xmalloc(sizeof(uint32_t) * - select_job_res_ptr->cpu_array_cnt); - memcpy(new_layout->cpu_array_reps, - select_job_res_ptr->cpu_array_reps, - (sizeof(uint32_t) * select_job_res_ptr->cpu_array_cnt)); - } - if (select_job_res_ptr->cpu_array_value && - select_job_res_ptr->cpu_array_cnt) { - new_layout->cpu_array_value = - xmalloc(sizeof(uint16_t) * - select_job_res_ptr->cpu_array_cnt); - memcpy(new_layout->cpu_array_value, - select_job_res_ptr->cpu_array_value, - (sizeof(uint16_t) * select_job_res_ptr->cpu_array_cnt)); - } - - if (select_job_res_ptr->cpus) { - new_layout->cpus = xmalloc(sizeof(uint16_t) * - select_job_res_ptr->nhosts); - memcpy(new_layout->cpus, select_job_res_ptr->cpus, - (sizeof(uint16_t) * select_job_res_ptr->nhosts)); - } - if (select_job_res_ptr->cpus_used) { - new_layout->cpus_used = xmalloc(sizeof(uint16_t) * - select_job_res_ptr->nhosts); - memcpy(new_layout->cpus_used, select_job_res_ptr->cpus_used, - (sizeof(uint16_t) * select_job_res_ptr->nhosts)); - } - - if (select_job_res_ptr->memory_allocated) { - new_layout->memory_allocated = xmalloc(sizeof(uint32_t) * - new_layout->nhosts); - memcpy(new_layout->memory_allocated, - select_job_res_ptr->memory_allocated, - (sizeof(uint32_t) * select_job_res_ptr->nhosts)); - } - if (select_job_res_ptr->memory_used) { - new_layout->memory_used = xmalloc(sizeof(uint32_t) * - new_layout->nhosts); - memcpy(new_layout->memory_used, - select_job_res_ptr->memory_used, - (sizeof(uint32_t) * select_job_res_ptr->nhosts)); - } - - /* Copy sockets_per_node, cores_per_socket and core_sock_rep_count */ - new_layout->sockets_per_node = xmalloc(sizeof(uint16_t) * - new_layout->nhosts); - new_layout->cores_per_socket = xmalloc(sizeof(uint16_t) * - new_layout->nhosts); - new_layout->sock_core_rep_count = xmalloc(sizeof(uint32_t) * - new_layout->nhosts); - for (i=0; i<new_layout->nhosts; i++) { - if (select_job_res_ptr->sock_core_rep_count[i] == 0) { - error("copy_select_job_res: sock_core_rep_count=0"); - break; - } - sock_inx += select_job_res_ptr->sock_core_rep_count[i]; - if (sock_inx >= select_job_res_ptr->nhosts) { - i++; - break; - } - } - memcpy(new_layout->sockets_per_node, - select_job_res_ptr->sockets_per_node, (sizeof(uint16_t) * i)); - memcpy(new_layout->cores_per_socket, - select_job_res_ptr->cores_per_socket, (sizeof(uint16_t) * i)); - memcpy(new_layout->sock_core_rep_count, - select_job_res_ptr->sock_core_rep_count, - (sizeof(uint32_t) * i)); - - return new_layout; -} - -extern void free_select_job_res(select_job_res_t **select_job_res_pptr) -{ - select_job_res_t *select_job_res_ptr = *select_job_res_pptr; - - if (select_job_res_ptr) { - if (select_job_res_ptr->core_bitmap) - bit_free(select_job_res_ptr->core_bitmap); - if (select_job_res_ptr->core_bitmap_used) - bit_free(select_job_res_ptr->core_bitmap_used); - xfree(select_job_res_ptr->cores_per_socket); - xfree(select_job_res_ptr->cpu_array_reps); - xfree(select_job_res_ptr->cpu_array_value); - xfree(select_job_res_ptr->cpus); - xfree(select_job_res_ptr->cpus_used); - xfree(select_job_res_ptr->memory_allocated); - xfree(select_job_res_ptr->memory_used); - if (select_job_res_ptr->node_bitmap) - bit_free(select_job_res_ptr->node_bitmap); - xfree(select_job_res_ptr->sock_core_rep_count); - xfree(select_job_res_ptr->sockets_per_node); - xfree(select_job_res_ptr); - *select_job_res_pptr = NULL; - } -} - -/* Log the contents of a select_job_res data structure using info() */ -extern void log_select_job_res(uint32_t job_id, - select_job_res_t *select_job_res_ptr) -{ - int bit_inx = 0, bit_reps, i; - int array_size, node_inx; - int sock_inx = 0, sock_reps = 0; - - if (select_job_res_ptr == NULL) { - error("log_select_job_res: select_job_res_ptr is NULL"); - return; - } - - info("===================="); - info("job_id:%u nhosts:%u nprocs:%u node_req:%u", - job_id, select_job_res_ptr->nhosts, select_job_res_ptr->nprocs, - select_job_res_ptr->node_req); - - if (select_job_res_ptr->cpus == NULL) { - error("log_select_job_res: cpus array is NULL"); - return; - } - if (select_job_res_ptr->memory_allocated == NULL) { - error("log_select_job_res: memory array is NULL"); - return; - } - if ((select_job_res_ptr->cores_per_socket == NULL) || - (select_job_res_ptr->sockets_per_node == NULL) || - (select_job_res_ptr->sock_core_rep_count == NULL)) { - error("log_select_job_res: socket/core array is NULL"); - return; - } - if (select_job_res_ptr->core_bitmap == NULL) { - error("log_select_job_res: core_bitmap is NULL"); - return; - } - if (select_job_res_ptr->core_bitmap_used == NULL) { - error("log_select_job_res: core_bitmap_used is NULL"); - return; - } - array_size = bit_size(select_job_res_ptr->core_bitmap); - - /* Can only log node_bitmap from slurmctld, so don't bother here */ - for (node_inx=0; node_inx<select_job_res_ptr->nhosts; node_inx++) { - uint32_t cpus_used = 0, memory_allocated = 0, memory_used = 0; - info("Node[%d]:", node_inx); - - if (sock_reps >= - select_job_res_ptr->sock_core_rep_count[sock_inx]) { - sock_inx++; - sock_reps = 0; - } - sock_reps++; - - if (select_job_res_ptr->cpus_used) - cpus_used = select_job_res_ptr->cpus_used[node_inx]; - if (select_job_res_ptr->memory_used) - memory_used = select_job_res_ptr->memory_used[node_inx]; - if (select_job_res_ptr->memory_allocated) - memory_allocated = select_job_res_ptr-> - memory_allocated[node_inx]; - - info(" Mem(MB):%u:%u Sockets:%u Cores:%u CPUs:%u:%u", - memory_allocated, memory_used, - select_job_res_ptr->sockets_per_node[sock_inx], - select_job_res_ptr->cores_per_socket[sock_inx], - select_job_res_ptr->cpus[node_inx], - cpus_used); - - bit_reps = select_job_res_ptr->sockets_per_node[sock_inx] * - select_job_res_ptr->cores_per_socket[sock_inx]; - for (i=0; i<bit_reps; i++) { - if (bit_inx >= array_size) { - error("log_select_job_res: array size wrong"); - break; - } - if (bit_test(select_job_res_ptr->core_bitmap, - bit_inx)) { - char *core_used = ""; - if (bit_test(select_job_res_ptr-> - core_bitmap_used, bit_inx)) - core_used = " and in use"; - info(" Socket[%d] Core[%d] is allocated%s", - (i / select_job_res_ptr-> - cores_per_socket[sock_inx]), - (i % select_job_res_ptr-> - cores_per_socket[sock_inx]), - core_used); - } - bit_inx++; - } - } - for (node_inx=0; node_inx<select_job_res_ptr->cpu_array_cnt; - node_inx++) { - if (node_inx == 0) - info("--------------------"); - info("cpu_array_value[%d]:%u reps:%u", node_inx, - select_job_res_ptr->cpu_array_value[node_inx], - select_job_res_ptr->cpu_array_reps[node_inx]); - } - info("===================="); -} - -extern void pack_select_job_res(select_job_res_t *select_job_res_ptr, - Buf buffer) -{ - if (select_job_res_ptr == NULL) { - uint32_t empty = NO_VAL; - pack32(empty, buffer); - return; - } - - xassert(select_job_res_ptr->nhosts); - - pack32(select_job_res_ptr->nhosts, buffer); - pack32(select_job_res_ptr->nprocs, buffer); - pack8(select_job_res_ptr->node_req, buffer); - - if (select_job_res_ptr->cpu_array_reps) - pack32_array(select_job_res_ptr->cpu_array_reps, - select_job_res_ptr->cpu_array_cnt, buffer); - else - pack32_array(select_job_res_ptr->cpu_array_reps, 0, buffer); - - if (select_job_res_ptr->cpu_array_value) - pack16_array(select_job_res_ptr->cpu_array_value, - select_job_res_ptr->cpu_array_cnt, buffer); - else - pack16_array(select_job_res_ptr->cpu_array_value, 0, buffer); - - if (select_job_res_ptr->cpus) - pack16_array(select_job_res_ptr->cpus, - select_job_res_ptr->nhosts, buffer); - else - pack16_array(select_job_res_ptr->cpus, 0, buffer); - - if (select_job_res_ptr->cpus_used) - pack16_array(select_job_res_ptr->cpus_used, - select_job_res_ptr->nhosts, buffer); - else - pack16_array(select_job_res_ptr->cpus_used, 0, buffer); - - if (select_job_res_ptr->memory_allocated) - pack32_array(select_job_res_ptr->memory_allocated, - select_job_res_ptr->nhosts, buffer); - else - pack32_array(select_job_res_ptr->memory_allocated, 0, buffer); - - if (select_job_res_ptr->memory_used) - pack32_array(select_job_res_ptr->memory_used, - select_job_res_ptr->nhosts, buffer); - else - pack32_array(select_job_res_ptr->memory_used, 0, buffer); - -#ifndef HAVE_BG -{ - int i; - uint32_t core_cnt = 0, sock_recs = 0; - xassert(select_job_res_ptr->cores_per_socket); - xassert(select_job_res_ptr->sock_core_rep_count); - xassert(select_job_res_ptr->sockets_per_node); - - for (i=0; i<select_job_res_ptr->nhosts; i++) { - core_cnt += select_job_res_ptr->sockets_per_node[i] * - select_job_res_ptr->cores_per_socket[i] * - select_job_res_ptr->sock_core_rep_count[i]; - sock_recs += select_job_res_ptr->sock_core_rep_count[i]; - if (sock_recs >= select_job_res_ptr->nhosts) - break; - } - i++; - pack16_array(select_job_res_ptr->sockets_per_node, - (uint32_t) i, buffer); - pack16_array(select_job_res_ptr->cores_per_socket, - (uint32_t) i, buffer); - pack32_array(select_job_res_ptr->sock_core_rep_count, - (uint32_t) i, buffer); - - xassert(select_job_res_ptr->core_bitmap); - xassert(select_job_res_ptr->core_bitmap_used); - pack_bit_str(select_job_res_ptr->core_bitmap, buffer); - pack_bit_str(select_job_res_ptr->core_bitmap_used, buffer); - /* Do not pack the node_bitmap, but rebuild it in reset_node_bitmap() - * based upon job_ptr->nodes and the current node table */ -} -#endif -} - -extern int unpack_select_job_res(select_job_res_t **select_job_res_pptr, - Buf buffer) -{ - char *bit_fmt = NULL; - uint32_t empty, tmp32; - select_job_res_t *select_job_res; - - xassert(select_job_res_pptr); - safe_unpack32(&empty, buffer); - if (empty == NO_VAL) { - *select_job_res_pptr = NULL; - return SLURM_SUCCESS; - } - - select_job_res = xmalloc(sizeof(struct select_job_res)); - select_job_res->nhosts = empty; - safe_unpack32(&select_job_res->nprocs, buffer); - safe_unpack8(&select_job_res->node_req, buffer); - - safe_unpack32_array(&select_job_res->cpu_array_reps, - &select_job_res->cpu_array_cnt, buffer); - if (tmp32 == 0) - xfree(select_job_res->cpu_array_reps); - - safe_unpack16_array(&select_job_res->cpu_array_value, - &tmp32, buffer); - if (tmp32 == 0) - xfree(select_job_res->cpu_array_value); - else if(!select_job_res->cpu_array_cnt) - select_job_res->cpu_array_cnt = tmp32; - - safe_unpack16_array(&select_job_res->cpus, &tmp32, buffer); - if (tmp32 == 0) - xfree(select_job_res->cpus); - safe_unpack16_array(&select_job_res->cpus_used, &tmp32, buffer); - if (tmp32 == 0) - xfree(select_job_res->cpus_used); - - safe_unpack32_array(&select_job_res->memory_allocated, - &tmp32, buffer); - if (tmp32 == 0) - xfree(select_job_res->memory_allocated); - else if (tmp32 != select_job_res->nhosts) - goto unpack_error; - safe_unpack32_array(&select_job_res->memory_used, &tmp32, buffer); - if (tmp32 == 0) - xfree(select_job_res->memory_used); - -#ifndef HAVE_BG - safe_unpack16_array(&select_job_res->sockets_per_node, &tmp32, buffer); - if (tmp32 == 0) - xfree(select_job_res->sockets_per_node); - safe_unpack16_array(&select_job_res->cores_per_socket, &tmp32, buffer); - if (tmp32 == 0) - xfree(select_job_res->cores_per_socket); - safe_unpack32_array(&select_job_res->sock_core_rep_count, - &tmp32, buffer); - if (tmp32 == 0) - xfree(select_job_res->sock_core_rep_count); - - unpack_bit_str(&select_job_res->core_bitmap, buffer); - unpack_bit_str(&select_job_res->core_bitmap_used, buffer); - /* node_bitmap is not packed, but rebuilt in reset_node_bitmap() - * based upon job_ptr->nodes and the current node table */ -#endif - - *select_job_res_pptr = select_job_res; - return SLURM_SUCCESS; - - unpack_error: - free_select_job_res(&select_job_res); - xfree(bit_fmt); - *select_job_res_pptr = NULL; - return SLURM_ERROR; -} - -extern int get_select_job_res_offset(select_job_res_t *select_job_res_ptr, - uint32_t node_id, uint16_t socket_id, - uint16_t core_id) -{ - int i, bit_inx = 0; - - xassert(select_job_res_ptr); - - for (i=0; i<select_job_res_ptr->nhosts; i++) { - if (select_job_res_ptr->sock_core_rep_count[i] <= node_id) { - bit_inx += select_job_res_ptr->sockets_per_node[i] * - select_job_res_ptr->cores_per_socket[i] * - select_job_res_ptr->sock_core_rep_count[i]; - node_id -= select_job_res_ptr->sock_core_rep_count[i]; - } else if (socket_id >= select_job_res_ptr-> - sockets_per_node[i]) { - error("get_select_job_res_bit: socket_id >= socket_cnt " - "(%u >= %u)", socket_id, - select_job_res_ptr->sockets_per_node[i]); - return -1; - } else if (core_id >= select_job_res_ptr->cores_per_socket[i]) { - error("get_select_job_res_bit: core_id >= core_cnt " - "(%u >= %u)", core_id, - select_job_res_ptr->cores_per_socket[i]); - return -1; - } else { - bit_inx += select_job_res_ptr->sockets_per_node[i] * - select_job_res_ptr->cores_per_socket[i] * - node_id; - bit_inx += select_job_res_ptr->cores_per_socket[i] * - socket_id; - bit_inx += core_id; - break; - } - } - i = bit_size(select_job_res_ptr->core_bitmap); - if (bit_inx >= i) { - error("get_select_job_res_bit: offset >= bitmap size " - "(%d >= %d)", bit_inx, i); - return -1; - } - - return bit_inx; -} - -extern int get_select_job_res_bit(select_job_res_t *select_job_res_ptr, - uint32_t node_id, uint16_t socket_id, - uint16_t core_id) -{ - int bit_inx = get_select_job_res_offset(select_job_res_ptr, node_id, - socket_id, core_id); - if (bit_inx < 0) - return SLURM_ERROR; - - return bit_test(select_job_res_ptr->core_bitmap, bit_inx); -} - -extern int set_select_job_res_bit(select_job_res_t *select_job_res_ptr, - uint32_t node_id, uint16_t socket_id, - uint16_t core_id) -{ - int bit_inx = get_select_job_res_offset(select_job_res_ptr, node_id, - socket_id, core_id); - if (bit_inx < 0) - return SLURM_ERROR; - - bit_set(select_job_res_ptr->core_bitmap, bit_inx); - return SLURM_SUCCESS; -} - -extern int get_select_job_res_node(select_job_res_t *select_job_res_ptr, - uint32_t node_id) -{ - int i, bit_inx = 0, core_cnt = 0; - - xassert(select_job_res_ptr); - - for (i=0; i<select_job_res_ptr->nhosts; i++) { - if (select_job_res_ptr->sock_core_rep_count[i] <= node_id) { - bit_inx += select_job_res_ptr->sockets_per_node[i] * - select_job_res_ptr->cores_per_socket[i] * - select_job_res_ptr->sock_core_rep_count[i]; - node_id -= select_job_res_ptr->sock_core_rep_count[i]; - } else { - bit_inx += select_job_res_ptr->sockets_per_node[i] * - select_job_res_ptr->cores_per_socket[i] * - node_id; - core_cnt = select_job_res_ptr->sockets_per_node[i] * - select_job_res_ptr->cores_per_socket[i]; - break; - } - } - if (core_cnt < 1) { - error("get_select_job_res_node: core_cnt=0"); - return 0; - } - i = bit_size(select_job_res_ptr->core_bitmap); - if ((bit_inx + core_cnt) > i) { - error("get_select_job_res_node: offset > bitmap size " - "(%d >= %d)", (bit_inx + core_cnt), i); - return 0; - } - - for (i=0; i<core_cnt; i++) { - if (bit_test(select_job_res_ptr->core_bitmap, bit_inx++)) - return 1; - } - return 0; -} - -extern int set_select_job_res_node(select_job_res_t *select_job_res_ptr, - uint32_t node_id) -{ - int i, bit_inx = 0, core_cnt = 0; - - xassert(select_job_res_ptr); - - for (i=0; i<select_job_res_ptr->nhosts; i++) { - if (select_job_res_ptr->sock_core_rep_count[i] <= node_id) { - bit_inx += select_job_res_ptr->sockets_per_node[i] * - select_job_res_ptr->cores_per_socket[i] * - select_job_res_ptr->sock_core_rep_count[i]; - node_id -= select_job_res_ptr->sock_core_rep_count[i]; - } else { - bit_inx += select_job_res_ptr->sockets_per_node[i] * - select_job_res_ptr->cores_per_socket[i] * - node_id; - core_cnt = select_job_res_ptr->sockets_per_node[i] * - select_job_res_ptr->cores_per_socket[i]; - break; - } - } - if (core_cnt < 1) { - error("set_select_job_res_node: core_cnt=0"); - return SLURM_ERROR; - } - - i = bit_size(select_job_res_ptr->core_bitmap); - if ((bit_inx + core_cnt) > i) { - error("set_select_job_res_node: offset > bitmap size " - "(%d >= %d)", (bit_inx + core_cnt), i); - return SLURM_ERROR; - } - - for (i=0; i<core_cnt; i++) - bit_set(select_job_res_ptr->core_bitmap, bit_inx++); - - return SLURM_SUCCESS; -} - -extern int get_select_job_res_cnt(select_job_res_t *select_job_res_ptr, - uint32_t node_id, - uint16_t *socket_cnt, - uint16_t *cores_per_socket_cnt) -{ - int i, node_inx = -1; - - xassert(socket_cnt); - xassert(cores_per_socket_cnt); - xassert(select_job_res_ptr->cores_per_socket); - xassert(select_job_res_ptr->sock_core_rep_count); - xassert(select_job_res_ptr->sockets_per_node); - - for (i=0; i<select_job_res_ptr->nhosts; i++) { - node_inx += select_job_res_ptr->sock_core_rep_count[i]; - if (node_id <= node_inx) { - *cores_per_socket_cnt = select_job_res_ptr-> - cores_per_socket[i]; - *socket_cnt = select_job_res_ptr->sockets_per_node[i]; - return SLURM_SUCCESS; - } - } - - error("get_select_job_res_cnt: invalid node_id: %u", node_id); - *cores_per_socket_cnt = 0; - *socket_cnt = 0; - return SLURM_ERROR; -} - -/* Return 1 if the given job can fit into the given full-length core_bitmap, - * else return 0. - */ -extern int can_select_job_cores_fit(select_job_res_t *select_ptr, - bitstr_t *full_bitmap, - const uint16_t *bits_per_node, - const uint32_t *bit_rep_count) -{ - uint32_t i, n, count = 1, last_bit = 0; - uint32_t c = 0, j = 0, k = 0; - - if (!full_bitmap) - return 1; - - for (i = 0, n = 0; i < select_ptr->nhosts; n++) { - last_bit += bits_per_node[k]; - if (++count > bit_rep_count[k]) { - k++; - count = 1; - } - if (bit_test(select_ptr->node_bitmap, n) == 0) { - c = last_bit; - continue; - } - for (; c < last_bit; c++, j++) { - if (bit_test(full_bitmap, c) && - bit_test(select_ptr->core_bitmap, j)) - return 0; - } - i++; - } - return 1; -} - -/* add the given job to the given full_core_bitmap */ -extern void add_select_job_to_row(select_job_res_t *select_ptr, - bitstr_t **full_core_bitmap, - const uint16_t *cores_per_node, - const uint32_t *core_rep_count) -{ - uint32_t i, n, count = 1, last_bit = 0; - uint32_t c = 0, j = 0, k = 0; - - if (!select_ptr->core_bitmap) - return; - - /* add the job to the row_bitmap */ - if (*full_core_bitmap == NULL) { - uint32_t size = 0; - for (i = 0; core_rep_count[i]; i++) { - size += cores_per_node[i] * core_rep_count[i]; - } - *full_core_bitmap = bit_alloc(size); - if (!*full_core_bitmap) - fatal("add_select_job_to_row: bitmap memory error"); - } - - for (i = 0, n = 0; i < select_ptr->nhosts; n++) { - last_bit += cores_per_node[k]; - if (++count > core_rep_count[k]) { - k++; - count = 1; - } - if (bit_test(select_ptr->node_bitmap, n) == 0) { - c = last_bit; - continue; - } - for (; c < last_bit; c++, j++) { - if (bit_test(select_ptr->core_bitmap, j)) - bit_set(*full_core_bitmap, c); - } - i++; - } -} diff --git a/src/common/slurm_cred.c b/src/common/slurm_cred.c index 5921d7b71126553ed0297ec63e4642645b178e32..260863d58b8226846102c608ee0643188a81d116 100644 --- a/src/common/slurm_cred.c +++ b/src/common/slurm_cred.c @@ -59,7 +59,7 @@ #include "src/common/macros.h" #include "src/common/plugin.h" #include "src/common/plugrack.h" -#include "src/common/select_job_res.h" +#include "src/common/job_resources.h" #include "src/common/slurm_cred.h" #include "src/common/slurm_protocol_api.h" #include "src/common/xassert.h" diff --git a/src/common/slurm_protocol_defs.c b/src/common/slurm_protocol_defs.c index 0bd0457193fcb483fe8371d177de40972aade47e..2c85a5e0a366186a415acf2f7c8e07bc7a3d3647 100644 --- a/src/common/slurm_protocol_defs.c +++ b/src/common/slurm_protocol_defs.c @@ -417,7 +417,7 @@ void slurm_free_job_info_members(job_info_t * job) xfree(job->req_nodes); xfree(job->resv_name); select_g_select_jobinfo_free(job->select_jobinfo); - free_select_job_res(&job->select_job_res); + free_job_resources(&job->job_resources); xfree(job->state_desc); xfree(job->wckey); xfree(job->work_dir); diff --git a/src/common/slurm_protocol_pack.c b/src/common/slurm_protocol_pack.c index bcdcbb65e460ca1bc089d30bdbf87d8c5b0b5362..955ed133d9670a255576cb99f769fe7473981e6d 100644 --- a/src/common/slurm_protocol_pack.c +++ b/src/common/slurm_protocol_pack.c @@ -2815,7 +2815,7 @@ _unpack_job_info_members(job_info_t * job, Buf buffer) safe_unpackstr_xmalloc(&job->resv_name, &uint32_tmp, buffer); safe_unpack32(&job->exit_code, buffer); - unpack_select_job_res(&job->select_job_res, buffer); + unpack_job_resources(&job->job_resources, buffer); safe_unpackstr_xmalloc(&job->name, &uint32_tmp, buffer); safe_unpackstr_xmalloc(&job->wckey, &uint32_tmp, buffer); diff --git a/src/plugins/sched/wiki/get_jobs.c b/src/plugins/sched/wiki/get_jobs.c index c9b1edb983b6a8e8c9818ec1ec4440a419858470..5546d6fb0b36da186e3de5592a58e84e74f399fa 100644 --- a/src/plugins/sched/wiki/get_jobs.c +++ b/src/plugins/sched/wiki/get_jobs.c @@ -577,14 +577,14 @@ static char * _task_list(struct job_record *job_ptr) int i, j, task_cnt; char *buf = NULL, *host; hostlist_t hl = hostlist_create(job_ptr->nodes); - select_job_res_t *select_ptr = job_ptr->select_job; + job_resources_t *job_resrcs_ptr = job_ptr->job_resrcs; - xassert(select_ptr && select_ptr->cpus); + xassert(job_resrcs_ptr && job_resrcs_ptr->cpus); buf = xstrdup(""); if (hl == NULL) return buf; - for (i=0; i<select_ptr->nhosts; i++) { + for (i=0; i<job_resrcs_ptr->nhosts; i++) { host = hostlist_shift(hl); if (host == NULL) { error("bad node_cnt for job %u (%s, %d)", @@ -592,7 +592,7 @@ static char * _task_list(struct job_record *job_ptr) job_ptr->node_cnt); break; } - task_cnt = select_ptr->cpus[i]; + task_cnt = job_resrcs_ptr->cpus[i]; if (job_ptr->details && job_ptr->details->cpus_per_task) task_cnt /= job_ptr->details->cpus_per_task; for (j=0; j<task_cnt; j++) { diff --git a/src/plugins/sched/wiki/hostlist.c b/src/plugins/sched/wiki/hostlist.c index d433d38c95c795500cc9e3fb3b23aa4fb0316f4d..efb365fff00288c8a96356d0111c8d64cab2b4c8 100644 --- a/src/plugins/sched/wiki/hostlist.c +++ b/src/plugins/sched/wiki/hostlist.c @@ -165,28 +165,30 @@ static char * _task_list(struct job_record *job_ptr) { int i, j, node_inx = 0, task_cnt; char *buf = NULL, *host; - select_job_res_t *select_ptr = job_ptr->select_job; + job_resources_t *job_resrcs_ptr = job_ptr->job_resrcs; - xassert(select_ptr); - for (i=0; i<select_ptr->nhosts; i++) { + xassert(job_resrcs_ptr); + for (i=0; i<job_resrcs_ptr->nhosts; i++) { if (i == 0) { - xassert(select_ptr->cpus && select_ptr->node_bitmap); - node_inx = bit_ffs(select_ptr->node_bitmap); + xassert(job_resrcs_ptr->cpus && + job_resrcs_ptr->node_bitmap); + node_inx = bit_ffs(job_resrcs_ptr->node_bitmap); } else { for (node_inx++; node_inx<node_record_count; node_inx++) { - if (bit_test(select_ptr->node_bitmap,node_inx)) + if (bit_test(job_resrcs_ptr->node_bitmap, + node_inx)) break; } if (node_inx >= node_record_count) { - error("Improperly formed select_job for %u", + error("Improperly formed job_resrcs for %u", job_ptr->job_id); break; } } host = node_record_table_ptr[node_inx].name; - task_cnt = select_ptr->cpus[i]; + task_cnt = job_resrcs_ptr->cpus[i]; if (job_ptr->details && job_ptr->details->cpus_per_task) task_cnt /= job_ptr->details->cpus_per_task; if (task_cnt < 1) { @@ -264,28 +266,30 @@ static char * _task_list_exp(struct job_record *job_ptr) int i, node_inx = 0, reps = -1, task_cnt; char *buf = NULL, *host; hostlist_t hl_tmp = (hostlist_t) NULL; - select_job_res_t *select_ptr = job_ptr->select_job; + job_resources_t *job_resrcs_ptr = job_ptr->job_resrcs; - xassert(select_ptr); - for (i=0; i<select_ptr->nhosts; i++) { + xassert(job_resrcs_ptr); + for (i=0; i<job_resrcs_ptr->nhosts; i++) { if (i == 0) { - xassert(select_ptr->cpus && select_ptr->node_bitmap); - node_inx = bit_ffs(select_ptr->node_bitmap); + xassert(job_resrcs_ptr->cpus && + job_resrcs_ptr->node_bitmap); + node_inx = bit_ffs(job_resrcs_ptr->node_bitmap); } else { - for (node_inx++; node_inx<node_record_count; + for (node_inx++; node_inx<node_record_count; node_inx++) { - if (bit_test(select_ptr->node_bitmap,node_inx)) + if (bit_test(job_resrcs_ptr->node_bitmap, + node_inx)) break; } if (node_inx >= node_record_count) { - error("Improperly formed select_job for %u", + error("Improperly formed job_resrcs for %u", job_ptr->job_id); break; } } host = node_record_table_ptr[node_inx].name; - task_cnt = select_ptr->cpus[i]; + task_cnt = job_resrcs_ptr->cpus[i]; if (job_ptr->details && job_ptr->details->cpus_per_task) task_cnt /= job_ptr->details->cpus_per_task; if (task_cnt < 1) { diff --git a/src/plugins/sched/wiki2/hostlist.c b/src/plugins/sched/wiki2/hostlist.c index da0e4654e7edd820c772e4c8bfe52fb21c6a3321..8818ff211a92fb39f1f62d9ab967d49edb4b31b4 100644 --- a/src/plugins/sched/wiki2/hostlist.c +++ b/src/plugins/sched/wiki2/hostlist.c @@ -105,7 +105,7 @@ extern char * moab2slurm_task_list(char *moab_tasklist, int *task_cnt) (*task_cnt)++; } else if (slurm_tasklist[i] == ',') (*task_cnt)++; - } + } return slurm_tasklist; } @@ -165,28 +165,30 @@ static char * _task_list(struct job_record *job_ptr) { int i, j, node_inx = 0, task_cnt; char *buf = NULL, *host; - select_job_res_t *select_ptr = job_ptr->select_job; + job_resources_t *job_resrcs_ptr = job_ptr->job_resrcs; - xassert(select_ptr); - for (i=0; i<select_ptr->nhosts; i++) { + xassert(job_resrcs_ptr); + for (i=0; i<job_resrcs_ptr->nhosts; i++) { if (i == 0) { - xassert(select_ptr->cpus && select_ptr->node_bitmap); - node_inx = bit_ffs(select_ptr->node_bitmap); + xassert(job_resrcs_ptr->cpus && + job_resrcs_ptr->node_bitmap); + node_inx = bit_ffs(job_resrcs_ptr->node_bitmap); } else { - for (node_inx++; node_inx<node_record_count; + for (node_inx++; node_inx<node_record_count; node_inx++) { - if (bit_test(select_ptr->node_bitmap,node_inx)) + if (bit_test(job_resrcs_ptr->node_bitmap, + node_inx)) break; } if (node_inx >= node_record_count) { - error("Improperly formed select_job for %u", + error("Improperly formed job_resrcs for %u", job_ptr->job_id); break; } } host = node_record_table_ptr[node_inx].name; - task_cnt = select_ptr->cpus[i]; + task_cnt = job_resrcs_ptr->cpus[i]; if (job_ptr->details && job_ptr->details->cpus_per_task) task_cnt /= job_ptr->details->cpus_per_task; if (task_cnt < 1) { @@ -264,28 +266,30 @@ static char * _task_list_exp(struct job_record *job_ptr) int i, node_inx = 0, reps = -1, task_cnt; char *buf = NULL, *host; hostlist_t hl_tmp = (hostlist_t) NULL; - select_job_res_t *select_ptr = job_ptr->select_job; + job_resources_t *job_resrcs_ptr = job_ptr->job_resrcs; - xassert(select_ptr); - for (i=0; i<select_ptr->nhosts; i++) { + xassert(job_resrcs_ptr); + for (i=0; i<job_resrcs_ptr->nhosts; i++) { if (i == 0) { - xassert(select_ptr->cpus && select_ptr->node_bitmap); - node_inx = bit_ffs(select_ptr->node_bitmap); + xassert(job_resrcs_ptr->cpus && + job_resrcs_ptr->node_bitmap); + node_inx = bit_ffs(job_resrcs_ptr->node_bitmap); } else { - for (node_inx++; node_inx<node_record_count; + for (node_inx++; node_inx<node_record_count; node_inx++) { - if (bit_test(select_ptr->node_bitmap,node_inx)) + if (bit_test(job_resrcs_ptr->node_bitmap, + node_inx)) break; } if (node_inx >= node_record_count) { - error("Improperly formed select_job for %u", + error("Improperly formed job_resrcs for %u", job_ptr->job_id); break; } } host = node_record_table_ptr[node_inx].name; - task_cnt = select_ptr->cpus[i]; + task_cnt = job_resrcs_ptr->cpus[i]; if (job_ptr->details && job_ptr->details->cpus_per_task) task_cnt /= job_ptr->details->cpus_per_task; if (task_cnt < 1) { diff --git a/src/plugins/select/bluegene/plugin/bg_job_place.c b/src/plugins/select/bluegene/plugin/bg_job_place.c index 087f4e4d2845272a979f4354b31472e7f7407020..04968d9cd9b92dbd41a08de65acea1074acfe3c2 100644 --- a/src/plugins/select/bluegene/plugin/bg_job_place.c +++ b/src/plugins/select/bluegene/plugin/bg_job_place.c @@ -1233,26 +1233,26 @@ static int _sync_block_lists(List full_list, List incomp_list) /* int i, j, k; */ /* int first_bit, last_bit; */ /* uint32_t node_cpus, total_cpus = 0, node_cnt; */ -/* select_job_res_t *select_ptr; */ +/* job_resources_t *job_resrcs_ptr; */ /* if (job_ptr->select_job) { */ /* error("select_p_job_test: already have select_job"); */ -/* free_select_job_res(&job_ptr->select_job); */ +/* free_job_resources(&job_ptr->select_job); */ /* } */ /* node_cnt = bit_set_count(bitmap); */ -/* job_ptr->select_job = select_ptr = create_select_job_res(); */ -/* select_ptr->cpu_array_reps = xmalloc(sizeof(uint32_t) * node_cnt); */ -/* select_ptr->cpu_array_value = xmalloc(sizeof(uint16_t) * node_cnt); */ -/* select_ptr->cpus = xmalloc(sizeof(uint16_t) * node_cnt); */ -/* select_ptr->cpus_used = xmalloc(sizeof(uint16_t) * node_cnt); */ -/* select_ptr->nhosts = node_cnt; */ -/* select_ptr->node_bitmap = bit_copy(bitmap); */ -/* if (select_ptr->node_bitmap == NULL) */ +/* job_ptr->select_job = job_resrcs_ptr = create_job_resources(); */ +/* job_resrcs_ptr->cpu_array_reps = xmalloc(sizeof(uint32_t) * node_cnt); */ +/* job_resrcs_ptr->cpu_array_value = xmalloc(sizeof(uint16_t) * node_cnt); */ +/* job_resrcs_ptr->cpus = xmalloc(sizeof(uint16_t) * node_cnt); */ +/* job_resrcs_ptr->cpus_used = xmalloc(sizeof(uint16_t) * node_cnt); */ +/* job_resrcs_ptr->nhosts = node_cnt; */ +/* job_resrcs_ptr->node_bitmap = bit_copy(bitmap); */ +/* if (job_resrcs_ptr->node_bitmap == NULL) */ /* fatal("bit_copy malloc failure"); */ -/* select_ptr->nprocs = job_ptr->num_procs; */ -/* if (build_select_job_res(select_ptr, (void *)node_record_table_ptr, 1)) */ -/* error("select_p_job_test: build_select_job_res: %m"); */ +/* job_resrcs_ptr->nprocs = job_ptr->num_procs; */ +/* if (build_job_resources(job_resrcs_ptr, (void *)node_record_table_ptr, 1)) */ +/* error("select_p_job_test: build_job_resources: %m"); */ /* if (job_ptr->num_procs <= bg_conf->cpus_per_bp) */ /* node_cpus = job_ptr->num_procs; */ @@ -1265,29 +1265,29 @@ static int _sync_block_lists(List full_list, List incomp_list) /* if (!bit_test(bitmap, i)) */ /* continue; */ -/* select_ptr->cpus[j] = node_cpus; */ +/* job_resrcs_ptr->cpus[j] = node_cpus; */ /* if ((k == -1) || */ -/* (select_ptr->cpu_array_value[k] != node_cpus)) { */ -/* select_ptr->cpu_array_cnt++; */ -/* select_ptr->cpu_array_reps[++k] = 1; */ -/* select_ptr->cpu_array_value[k] = node_cpus; */ +/* (job_resrcs_ptr->cpu_array_value[k] != node_cpus)) { */ +/* job_resrcs_ptr->cpu_array_cnt++; */ +/* job_resrcs_ptr->cpu_array_reps[++k] = 1; */ +/* job_resrcs_ptr->cpu_array_value[k] = node_cpus; */ /* } else */ -/* select_ptr->cpu_array_reps[k]++; */ +/* job_resrcs_ptr->cpu_array_reps[k]++; */ /* total_cpus += node_cpus; */ /* #if 0 */ /* /\* This function could be used to control allocation of */ /* * specific c-nodes for multiple job steps per job allocation. */ /* * Such functionality is not currently support on BlueGene */ /* * systems. */ -/* * Also see #ifdef HAVE_BG logic in common/select_job_res.c *\/ */ -/* if (set_select_job_res_node(select_ptr, j)) */ -/* error("select_p_job_test: set_select_job_res_node: %m"); */ +/* * Also see #ifdef HAVE_BG logic in common/job_resources.c *\/ */ +/* if (set_job_resources_node(job_resrcs_ptr, j)) */ +/* error("select_p_job_test: set_job_resources_node: %m"); */ /* #endif */ /* j++; */ /* } */ -/* if (select_ptr->nprocs != total_cpus) { */ +/* if (job_resrcs_ptr->nprocs != total_cpus) { */ /* error("select_p_job_test: nprocs mismatch %u != %u", */ -/* select_ptr->nprocs, total_cpus); */ +/* job_resrcs_ptr->nprocs, total_cpus); */ /* } */ /* } */ @@ -1296,37 +1296,37 @@ static void _build_select_struct(struct job_record *job_ptr, { int i; uint32_t total_cpus = 0; - select_job_res_t *select_ptr; + job_resources_t *job_resrcs_ptr; xassert(job_ptr); if (job_ptr->select_job) { error("select_p_job_test: already have select_job"); - free_select_job_res(&job_ptr->select_job); + free_job_resources(&job_ptr->select_job); } - job_ptr->select_job = select_ptr = create_select_job_res(); - select_ptr->cpu_array_reps = xmalloc(sizeof(uint32_t)); - select_ptr->cpu_array_value = xmalloc(sizeof(uint16_t)); - select_ptr->cpus = xmalloc(sizeof(uint16_t) * node_cnt); - select_ptr->cpus_used = xmalloc(sizeof(uint16_t) * node_cnt); - select_ptr->nhosts = node_cnt; - select_ptr->nprocs = job_ptr->num_procs; - select_ptr->node_bitmap = bit_copy(bitmap); - if (select_ptr->node_bitmap == NULL) + job_ptr->select_job = job_resrcs_ptr = create_job_resources(); + job_resrcs_ptr->cpu_array_reps = xmalloc(sizeof(uint32_t)); + job_resrcs_ptr->cpu_array_value = xmalloc(sizeof(uint16_t)); + job_resrcs_ptr->cpus = xmalloc(sizeof(uint16_t) * node_cnt); + job_resrcs_ptr->cpus_used = xmalloc(sizeof(uint16_t) * node_cnt); + job_resrcs_ptr->nhosts = node_cnt; + job_resrcs_ptr->nprocs = job_ptr->num_procs; + job_resrcs_ptr->node_bitmap = bit_copy(bitmap); + if (job_resrcs_ptr->node_bitmap == NULL) fatal("bit_copy malloc failure"); - select_ptr->cpu_array_cnt = 1; - select_ptr->cpu_array_value[0] = bg_conf->cpu_ratio; - select_ptr->cpu_array_reps[0] = node_cnt; + job_resrcs_ptr->cpu_array_cnt = 1; + job_resrcs_ptr->cpu_array_value[0] = bg_conf->cpu_ratio; + job_resrcs_ptr->cpu_array_reps[0] = node_cnt; total_cpus = bg_conf->cpu_ratio * node_cnt; for (i=0; i<node_cnt; i++) - select_ptr->cpus[i] = bg_conf->cpu_ratio; + job_resrcs_ptr->cpus[i] = bg_conf->cpu_ratio; - if (select_ptr->nprocs != total_cpus) { + if (job_resrcs_ptr->nprocs != total_cpus) { error("select_p_job_test: nprocs mismatch %u != %u", - select_ptr->nprocs, total_cpus); + job_resrcs_ptr->nprocs, total_cpus); } } diff --git a/src/plugins/select/cons_res/dist_tasks.c b/src/plugins/select/cons_res/dist_tasks.c index b3b08db2f2ae6a818a9ce10f9090fbab4af19347..5c44654d7f8fb8c911733cf2ed9e5525bf2b2de2 100644 --- a/src/plugins/select/cons_res/dist_tasks.c +++ b/src/plugins/select/cons_res/dist_tasks.c @@ -73,7 +73,7 @@ static int _compute_c_b_task_dist(struct job_record *job_ptr) bool over_subscribe = false; uint32_t n, i, tid, maxtasks; uint16_t *avail_cpus; - select_job_res_t *job_res = job_ptr->select_job; + job_resources_t *job_res = job_ptr->job_resrcs; if (!job_res || !job_res->cpus) { error("cons_res: _compute_c_b_task_dist given NULL job_ptr"); return SLURM_ERROR; @@ -117,7 +117,7 @@ static int _compute_plane_dist(struct job_record *job_ptr) bool over_subscribe = false; uint32_t n, i, p, tid, maxtasks; uint16_t *avail_cpus, plane_size = 1; - select_job_res_t *job_res = job_ptr->select_job; + job_resources_t *job_res = job_ptr->job_resrcs; if (!job_res || !job_res->cpus) { error("cons_res: _compute_plane_dist given NULL job_res"); return SLURM_ERROR; @@ -175,7 +175,7 @@ static void _block_sync_core_bitmap(struct job_record *job_ptr, { uint32_t c, i, n, size, csize, core_cnt; uint16_t cpus, num_bits, vpus = 1; - select_job_res_t *job_res = job_ptr->select_job; + job_resources_t *job_res = job_ptr->job_resrcs; bool alloc_cores = false, alloc_sockets = false; if (!job_res) @@ -253,7 +253,7 @@ static void _cyclic_sync_core_bitmap(struct job_record *job_ptr, { uint32_t c, i, j, s, n, *sock_start, *sock_end, size, csize, core_cnt; uint16_t cps = 0, cpus, vpus, sockets, sock_size; - select_job_res_t *job_res = job_ptr->select_job; + job_resources_t *job_res = job_ptr->job_resrcs; bitstr_t *core_map; bool *sock_used, alloc_cores = false, alloc_sockets = false; @@ -398,12 +398,12 @@ extern int cr_dist(struct job_record *job_ptr, { int error_code, cr_cpu = 1; - if (job_ptr->select_job->node_req == NODE_CR_RESERVED) { + if (job_ptr->job_resrcs->node_req == NODE_CR_RESERVED) { /* the job has been allocated an EXCLUSIVE set of nodes, * so it gets all of the bits in the core_bitmap and * all of the available CPUs in the cpus array */ - int size = bit_size(job_ptr->select_job->core_bitmap); - bit_nset(job_ptr->select_job->core_bitmap, 0, size-1); + int size = bit_size(job_ptr->job_resrcs->core_bitmap); + bit_nset(job_ptr->job_resrcs->core_bitmap, 0, size-1); return SLURM_SUCCESS; } diff --git a/src/plugins/select/cons_res/job_test.c b/src/plugins/select/cons_res/job_test.c index 95f1da24a993f2d872de2d19c9089a784e54669d..5564eff6a1928516faff67aa6c3526fa9e2957ac 100644 --- a/src/plugins/select/cons_res/job_test.c +++ b/src/plugins/select/cons_res/job_test.c @@ -1633,14 +1633,14 @@ extern int cr_job_test(struct job_record *job_ptr, bitstr_t *bitmap, bool test_only; uint32_t c, i, n, csize, total_cpus, save_mem = 0; int32_t build_cnt; - select_job_res_t *job_res; + job_resources_t *job_res; struct part_res_record *p_ptr, *jp_ptr; uint16_t *cpu_count; layout_ptr = job_ptr->details->req_node_layout; reqmap = job_ptr->details->req_node_bitmap; - free_select_job_res(&job_ptr->select_job); + free_job_resources(&job_ptr->job_resrcs); if (mode == SELECT_MODE_TEST_ONLY) test_only = true; @@ -1936,7 +1936,7 @@ alloc_job: * - cpu_count is the number of cpus per allocated node * * Next steps are to cleanup the worker variables, - * create the select_job_res struct, + * create the job_resources struct, * distribute the job on the bits, and exit */ FREE_NULL_BITMAP(orig_map); @@ -1971,7 +1971,7 @@ alloc_job: debug3("cons_res: cr_job_test: distributing job %u", job_ptr->job_id); /** create the struct_job_res **/ - job_res = create_select_job_res(); + job_res = create_job_resources(); job_res->node_bitmap = bit_copy(bitmap); if (job_res->node_bitmap == NULL) fatal("bit_copy malloc failure"); @@ -1990,10 +1990,10 @@ alloc_job: sizeof(uint32_t)); /* store the hardware data for the selected nodes */ - error_code = build_select_job_res(job_res, node_record_table_ptr, + error_code = build_job_resources(job_res, node_record_table_ptr, select_fast_schedule); if (error_code != SLURM_SUCCESS) { - free_select_job_res(&job_res); + free_job_resources(&job_res); FREE_NULL_BITMAP(free_cores); return error_code; } @@ -2046,15 +2046,15 @@ alloc_job: FREE_NULL_BITMAP(free_cores); /* distribute the tasks and clear any unused cores */ - job_ptr->select_job = job_res; + job_ptr->job_resrcs = job_res; error_code = cr_dist(job_ptr, cr_type); if (error_code != SLURM_SUCCESS) { - free_select_job_res(&job_ptr->select_job); + free_job_resources(&job_ptr->job_resrcs); return error_code; } /* translate job_res->cpus array into format with rep count */ - build_cnt = build_select_job_res_cpu_array(job_res); + build_cnt = build_job_resources_cpu_array(job_res); if (build_cnt >= 0) job_ptr->total_procs = build_cnt; else diff --git a/src/plugins/select/cons_res/select_cons_res.c b/src/plugins/select/cons_res/select_cons_res.c index e8da4b78414bcbf12af5a8a7afba8e52c91636d1..7a4c7b801e738086c5b5c764f617cae0aa8d30e7 100644 --- a/src/plugins/select/cons_res/select_cons_res.c +++ b/src/plugins/select/cons_res/select_cons_res.c @@ -200,14 +200,14 @@ static int _will_run_test(struct job_record *job_ptr, bitstr_t *bitmap, #if (CR_DEBUG) -static void _dump_job_res(select_job_res_t job) { +static void _dump_job_res(job_resources_t job) { char str[64]; if (job->core_bitmap) bit_fmt(str, sizeof(str), job->core_bitmap); else sprintf(str, "[no core_bitmap]"); - info("DEBUG: Dump select_job_res: nhosts %u cb %s", job->nhosts, str); + info("DEBUG: Dump job_resources: nhosts %u cb %s", job->nhosts, str); } static void _dump_nodes() @@ -530,7 +530,7 @@ static void _destroy_node_data(struct node_use_record *node_usage, } -static void _add_job_to_row(struct select_job_res *job, +static void _add_job_to_row(struct job_resources *job, struct part_row_data *r_ptr) { /* add the job to the row_bitmap */ @@ -539,26 +539,26 @@ static void _add_job_to_row(struct select_job_res *job, uint32_t size = bit_size(r_ptr->row_bitmap); bit_nclear(r_ptr->row_bitmap, 0, size-1); } - add_select_job_to_row(job, &(r_ptr->row_bitmap), cr_node_num_cores, + add_job_to_cores(job, &(r_ptr->row_bitmap), cr_node_num_cores, cr_num_core_count); /* add the job to the job_list */ if (r_ptr->num_jobs >= r_ptr->job_list_size) { r_ptr->job_list_size += 8; xrealloc(r_ptr->job_list, r_ptr->job_list_size * - sizeof(struct select_job_res *)); + sizeof(struct job_resources *)); } r_ptr->job_list[r_ptr->num_jobs++] = job; } /* test for conflicting core_bitmap bits */ -static int _can_job_fit_in_row(struct select_job_res *job, +static int _can_job_fit_in_row(struct job_resources *job, struct part_row_data *r_ptr) { if (r_ptr->num_jobs == 0 || !r_ptr->row_bitmap) return 1; - return can_select_job_cores_fit(job, r_ptr->row_bitmap, + return job_fits_into_cores(job, r_ptr->row_bitmap, cr_node_num_cores, cr_num_core_count); } @@ -626,7 +626,7 @@ static void _build_row_bitmaps(struct part_res_record *p_ptr) uint32_t i, j, num_jobs, size; int x, *jstart; struct part_row_data *this_row, *orig_row; - struct select_job_res **tmpjobs, *job; + struct job_resources **tmpjobs, *job; if (!p_ptr->row) return; @@ -643,7 +643,7 @@ static void _build_row_bitmaps(struct part_res_record *p_ptr) /* rebuild the row bitmap */ num_jobs = this_row->num_jobs; - tmpjobs = xmalloc(num_jobs * sizeof(struct select_job_res *)); + tmpjobs = xmalloc(num_jobs * sizeof(struct job_resources *)); for (i = 0; i < num_jobs; i++) { tmpjobs[i] = this_row->job_list[i]; this_row->job_list[i] = NULL; @@ -690,7 +690,7 @@ static void _build_row_bitmaps(struct part_res_record *p_ptr) size = bit_size(p_ptr->row[0].row_bitmap); /* create a master job list and clear out ALL row data */ - tmpjobs = xmalloc(num_jobs * sizeof(struct select_job_res *)); + tmpjobs = xmalloc(num_jobs * sizeof(struct job_resources *)); jstart = xmalloc(num_jobs * sizeof(int)); x = 0; for (i = 0; i < p_ptr->num_rows; i++) { @@ -789,7 +789,7 @@ static void _build_row_bitmaps(struct part_res_record *p_ptr) if (p_ptr->row[i].num_jobs == 0) continue; for (j = 0; j < p_ptr->row[i].num_jobs; j++) { - add_select_job_to_row( + add_job_to_cores( p_ptr->row[i].job_list[j], &(p_ptr->row[i].row_bitmap), cr_node_num_cores, @@ -841,7 +841,7 @@ static void _build_row_bitmaps(struct part_res_record *p_ptr) /* allocate resources to the given job - * - add 'struct select_job_res' resources to 'struct part_res_record' + * - add 'struct job_resources' resources to 'struct part_res_record' * - add job's memory requirements to 'struct node_res_record' * * if action = 0 then add cores and memory @@ -850,7 +850,7 @@ static void _build_row_bitmaps(struct part_res_record *p_ptr) */ static int _add_job_to_res(struct job_record *job_ptr, int action) { - struct select_job_res *job = job_ptr->select_job; + struct job_resources *job = job_ptr->job_resrcs; struct part_res_record *p_ptr; int i, n; @@ -935,7 +935,7 @@ static int _add_job_to_res(struct job_record *job_ptr, int action) /* deallocate resources to the given job - * - subtract 'struct select_job_res' resources from 'struct part_res_record' + * - subtract 'struct job_resources' resources from 'struct part_res_record' * - subtract job's memory requirements from 'struct node_res_record' * * if action = 0 then subtract cores and memory @@ -947,7 +947,7 @@ static int _rm_job_from_res(struct part_res_record *part_record_ptr, struct node_use_record *node_usage, struct job_record *job_ptr, int action) { - struct select_job_res *job = job_ptr->select_job; + struct job_resources *job = job_ptr->job_resrcs; int i, n; if (!job || !job->core_bitmap) { @@ -1679,14 +1679,14 @@ extern int select_p_job_test(struct job_record *job_ptr, bitstr_t * bitmap, fatal("select_p_job_test: Mode %d is invalid", mode); #if (CR_DEBUG) - if (job_ptr->select_job) - log_select_job_res(job_ptr->job_id, job_ptr->select_job); + if (job_ptr->job_resrcs) + log_job_resources(job_ptr->job_id, job_ptr->job_resrcs); else - info("no select_job_res info for job %u", + info("no job_resources info for job %u", job_ptr->job_id); #else - if (debug_cpu_bind && job_ptr->select_job) - log_select_job_res(job_ptr->job_id, job_ptr->select_job); + if (debug_cpu_bind && job_ptr->job_resrcs) + log_job_resources(job_ptr->job_id, job_ptr->job_resrcs); #endif return rc; diff --git a/src/plugins/select/cons_res/select_cons_res.h b/src/plugins/select/cons_res/select_cons_res.h index 01da757ae41bbbf28c426d316e9261c291c16bc8..9b65eea77ec761893ded456bb6a12a4bc39a7aa5 100644 --- a/src/plugins/select/cons_res/select_cons_res.h +++ b/src/plugins/select/cons_res/select_cons_res.h @@ -85,7 +85,7 @@ enum node_cr_state { struct part_row_data { bitstr_t *row_bitmap; /* contains all jobs for this row */ uint32_t num_jobs; /* Number of jobs in this row */ - struct select_job_res **job_list;/* List of jobs in this row */ + struct job_resources **job_list;/* List of jobs in this row */ uint32_t job_list_size; /* Size of job_list array */ }; diff --git a/src/plugins/select/linear/select_linear.c b/src/plugins/select/linear/select_linear.c index ae1ef76cb9b6a4690fbfb414dbff040ba72490f6..765a58fa042786f115156c4106a3c858d27cfa69 100644 --- a/src/plugins/select/linear/select_linear.c +++ b/src/plugins/select/linear/select_linear.c @@ -61,7 +61,7 @@ #include "src/common/log.h" #include "src/common/node_select.h" #include "src/common/parse_time.h" -#include "src/common/select_job_res.h" +#include "src/common/job_resources.h" #include "src/common/slurm_protocol_api.h" #include "src/common/slurm_resource_info.h" #include "src/common/xassert.h" @@ -459,7 +459,7 @@ static uint16_t _get_total_cpus(int index) return node_ptr->cpus; } -/* Build the full select_job_res_t *structure for a job based upon the nodes +/* Build the full job_resources_t *structure for a job based upon the nodes * allocated to it (the bitmap) and the job's memory requirement */ static void _build_select_struct(struct job_record *job_ptr, bitstr_t *bitmap) { @@ -469,7 +469,7 @@ static void _build_select_struct(struct job_record *job_ptr, bitstr_t *bitmap) struct node_record *node_ptr; uint32_t job_memory_cpu = 0, job_memory_node = 0; bool memory_info = false; - select_job_res_t *select_ptr; + job_resources_t *job_resrcs_ptr; if (job_ptr->details->job_min_memory && (cr_type == CR_MEMORY)) { if (job_ptr->details->job_min_memory & MEM_PER_CPU) { @@ -482,25 +482,25 @@ static void _build_select_struct(struct job_record *job_ptr, bitstr_t *bitmap) } } - if (job_ptr->select_job) /* Old struct due to job requeue */ - free_select_job_res(&job_ptr->select_job); + if (job_ptr->job_resrcs) /* Old struct due to job requeue */ + free_job_resources(&job_ptr->job_resrcs); node_cnt = bit_set_count(bitmap); - job_ptr->select_job = select_ptr = create_select_job_res(); - select_ptr->cpu_array_reps = xmalloc(sizeof(uint32_t) * node_cnt); - select_ptr->cpu_array_value = xmalloc(sizeof(uint16_t) * node_cnt); - select_ptr->cpus = xmalloc(sizeof(uint16_t) * node_cnt); - select_ptr->cpus_used = xmalloc(sizeof(uint16_t) * node_cnt); - select_ptr->memory_allocated = xmalloc(sizeof(uint32_t) * node_cnt); - select_ptr->memory_used = xmalloc(sizeof(uint32_t) * node_cnt); - select_ptr->nhosts = node_cnt; - select_ptr->node_bitmap = bit_copy(bitmap); - if (select_ptr->node_bitmap == NULL) + job_ptr->job_resrcs = job_resrcs_ptr = create_job_resources(); + job_resrcs_ptr->cpu_array_reps = xmalloc(sizeof(uint32_t) * node_cnt); + job_resrcs_ptr->cpu_array_value = xmalloc(sizeof(uint16_t) * node_cnt); + job_resrcs_ptr->cpus = xmalloc(sizeof(uint16_t) * node_cnt); + job_resrcs_ptr->cpus_used = xmalloc(sizeof(uint16_t) * node_cnt); + job_resrcs_ptr->memory_allocated = xmalloc(sizeof(uint32_t) * node_cnt); + job_resrcs_ptr->memory_used = xmalloc(sizeof(uint32_t) * node_cnt); + job_resrcs_ptr->nhosts = node_cnt; + job_resrcs_ptr->node_bitmap = bit_copy(bitmap); + if (job_resrcs_ptr->node_bitmap == NULL) fatal("bit_copy malloc failure"); - select_ptr->nprocs = job_ptr->total_procs; - if (build_select_job_res(select_ptr, (void *)select_node_ptr, + job_resrcs_ptr->nprocs = job_ptr->total_procs; + if (build_job_resources(job_resrcs_ptr, (void *)select_node_ptr, select_fast_schedule)) - error("_build_select_struct: build_select_job_res: %m"); + error("_build_select_struct: build_job_resources: %m"); first_bit = bit_ffs(bitmap); last_bit = bit_fls(bitmap); @@ -512,34 +512,34 @@ static void _build_select_struct(struct job_record *job_ptr, bitstr_t *bitmap) node_cpus = node_ptr->config_ptr->cpus; else node_cpus = node_ptr->cpus; - select_ptr->cpus[j] = node_cpus; + job_resrcs_ptr->cpus[j] = node_cpus; if ((k == -1) || - (select_ptr->cpu_array_value[k] != node_cpus)) { - select_ptr->cpu_array_cnt++; - select_ptr->cpu_array_reps[++k] = 1; - select_ptr->cpu_array_value[k] = node_cpus; + (job_resrcs_ptr->cpu_array_value[k] != node_cpus)) { + job_resrcs_ptr->cpu_array_cnt++; + job_resrcs_ptr->cpu_array_reps[++k] = 1; + job_resrcs_ptr->cpu_array_value[k] = node_cpus; } else - select_ptr->cpu_array_reps[k]++; + job_resrcs_ptr->cpu_array_reps[k]++; total_cpus += node_cpus; if (!memory_info) ; else if (job_memory_node) - select_ptr->memory_allocated[j] = job_memory_node; + job_resrcs_ptr->memory_allocated[j] = job_memory_node; else if (job_memory_cpu) { - select_ptr->memory_allocated[j] = + job_resrcs_ptr->memory_allocated[j] = job_memory_cpu * node_cpus; } - if (set_select_job_res_node(select_ptr, j)) { - error("_build_select_struct: set_select_job_res_node: " + if (set_job_resources_node(job_resrcs_ptr, j)) { + error("_build_select_struct: set_job_resources_node: " "%m"); } j++; } - if (select_ptr->nprocs != total_cpus) { + if (job_resrcs_ptr->nprocs != total_cpus) { error("_build_select_struct: nprocs mismatch %u != %u", - select_ptr->nprocs, total_cpus); + job_resrcs_ptr->nprocs, total_cpus); } } @@ -1234,7 +1234,7 @@ static int _rm_job_from_nodes(struct node_cr_record *node_cr_ptr, { int i, i_first, i_last, rc = SLURM_SUCCESS; struct part_cr_record *part_cr_ptr; - select_job_res_t *select_ptr; + job_resources_t *job_resrcs_ptr; uint32_t job_memory, job_memory_cpu = 0, job_memory_node = 0; bool exclusive; @@ -1253,20 +1253,20 @@ static int _rm_job_from_nodes(struct node_cr_record *node_cr_ptr, job_memory_node = job_ptr->details->job_min_memory; } - if ((select_ptr = job_ptr->select_job) == NULL) { - error("job %u lacks a select_job_res struct", + if ((job_resrcs_ptr = job_ptr->job_resrcs) == NULL) { + error("job %u lacks a job_resources struct", job_ptr->job_id); return SLURM_ERROR; } - i_first = bit_ffs(select_ptr->node_bitmap); - i_last = bit_fls(select_ptr->node_bitmap); + i_first = bit_ffs(job_resrcs_ptr->node_bitmap); + i_last = bit_fls(job_resrcs_ptr->node_bitmap); if (i_first < 0) { error("job %u allocated nodes which have been removed " "from slurm.conf", job_ptr->job_id); return SLURM_ERROR; } for (i = i_first; i <= i_last; i++) { - if (bit_test(select_ptr->node_bitmap, i) == 0) + if (bit_test(job_resrcs_ptr->node_bitmap, i) == 0) continue; if (job_memory_cpu == 0) job_memory = job_memory_node; @@ -1359,7 +1359,7 @@ static int _add_job_to_nodes(struct node_cr_record *node_cr_ptr, int i, i_first, i_last, rc = SLURM_SUCCESS; bool exclusive; struct part_cr_record *part_cr_ptr; - select_job_res_t *select_ptr; + job_resources_t *job_resrcs_ptr; uint32_t job_memory_cpu = 0, job_memory_node = 0; if (node_cr_ptr == NULL) { @@ -1377,15 +1377,15 @@ static int _add_job_to_nodes(struct node_cr_record *node_cr_ptr, } exclusive = (job_ptr->details->shared == 0); - if ((select_ptr = job_ptr->select_job) == NULL) { - error("job %u lacks a select_job_res struct", + if ((job_resrcs_ptr = job_ptr->job_resrcs) == NULL) { + error("job %u lacks a job_resources struct", job_ptr->job_id); return SLURM_ERROR; } - i_first = bit_ffs(select_ptr->node_bitmap); - i_last = bit_fls(select_ptr->node_bitmap); + i_first = bit_ffs(job_resrcs_ptr->node_bitmap); + i_last = bit_fls(job_resrcs_ptr->node_bitmap); for (i=i_first; ((i<=i_last) && (i_first>=0)); i++) { - if (bit_test(select_ptr->node_bitmap, i) == 0) + if (bit_test(job_resrcs_ptr->node_bitmap, i) == 0) continue; if (job_memory_cpu == 0) node_cr_ptr[i].alloc_memory += job_memory_node; @@ -1521,7 +1521,7 @@ static void _init_node_cr(void) { struct part_record *part_ptr; struct part_cr_record *part_cr_ptr; - select_job_res_t *select_ptr; + job_resources_t *job_resrcs_ptr; ListIterator part_iterator; struct job_record *job_ptr; ListIterator job_iterator; @@ -1556,8 +1556,8 @@ static void _init_node_cr(void) if (!IS_JOB_RUNNING(job_ptr) && !IS_JOB_SUSPENDED(job_ptr) && !IS_JOB_COMPLETING(job_ptr)) continue; - if ((select_ptr = job_ptr->select_job) == NULL) { - error("job %u lacks a select_job_res struct", + if ((job_resrcs_ptr = job_ptr->job_resrcs) == NULL) { + error("job %u lacks a job_resources struct", job_ptr->job_id); continue; } @@ -1577,14 +1577,14 @@ static void _init_node_cr(void) } exclusive = (job_ptr->details->shared == 0); - /* Use select_ptr->node_bitmap rather than job_ptr->node_bitmap + /* Use job_resrcs_ptr->node_bitmap rather than job_ptr->node_bitmap * which can have DOWN nodes cleared from the bitmap */ - if (select_ptr->node_bitmap == NULL) + if (job_resrcs_ptr->node_bitmap == NULL) continue; - i_first = bit_ffs(select_ptr->node_bitmap); - i_last = bit_fls(select_ptr->node_bitmap); + i_first = bit_ffs(job_resrcs_ptr->node_bitmap); + i_last = bit_fls(job_resrcs_ptr->node_bitmap); for (i=i_first; ((i<=i_last) && (i_first>=0)); i++) { - if (!bit_test(select_ptr->node_bitmap, i)) + if (!bit_test(job_resrcs_ptr->node_bitmap, i)) continue; if (exclusive) node_cr_ptr[i].exclusive_cnt++; diff --git a/src/slurmctld/gang.c b/src/slurmctld/gang.c index 78efcea4421112faff101af24f0d6f31183c66db..6251c0b36ab26fc7b1dc5ca2343bc7fbc8a6ff01 100644 --- a/src/slurmctld/gang.c +++ b/src/slurmctld/gang.c @@ -104,10 +104,10 @@ struct gs_part { * * SUMMARY OF DATA MANAGEMENT * - * For GS_NODE: job_ptr->select_job->node_bitmap only - * For GS_CPU: job_ptr->select_job->{node_bitmap, cpus} - * For GS_SOCKET: job_ptr->select_job->{node,core}_bitmap - * For GS_CORE: job_ptr->select_job->{node,core}_bitmap + * For GS_NODE: job_ptr->job_resrcs->node_bitmap only + * For GS_CPU: job_ptr->job_resrcs->{node_bitmap, cpus} + * For GS_SOCKET: job_ptr->job_resrcs->{node,core}_bitmap + * For GS_CORE: job_ptr->job_resrcs->{node,core}_bitmap * * EVALUATION ALGORITHM * @@ -468,7 +468,7 @@ static int _can_cpus_fit(struct job_record *job_ptr, struct gs_part *p_ptr) { int i, j, size; uint16_t *p_cpus, *j_cpus; - select_job_res_t *job_res = job_ptr->select_job; + job_resources_t *job_res = job_ptr->job_resrcs; if (gr_type != GS_CPU) return 0; @@ -495,7 +495,7 @@ static int _can_cpus_fit(struct job_record *job_ptr, struct gs_part *p_ptr) static int _job_fits_in_active_row(struct job_record *job_ptr, struct gs_part *p_ptr) { - select_job_res_t *job_res = job_ptr->select_job; + job_resources_t *job_res = job_ptr->job_resrcs; int count; bitstr_t *job_map; @@ -503,7 +503,7 @@ static int _job_fits_in_active_row(struct job_record *job_ptr, return 1; if ((gr_type == GS_CORE) || (gr_type == GS_SOCKET)) { - return can_select_job_cores_fit(job_res, p_ptr->active_resmap, + return job_fits_into_cores(job_res, p_ptr->active_resmap, gs_bits_per_node, gs_bit_rep_count); } @@ -575,7 +575,7 @@ static void _fill_sockets(bitstr_t *job_nodemap, struct gs_part *p_ptr) static void _add_job_to_active(struct job_record *job_ptr, struct gs_part *p_ptr) { - select_job_res_t *job_res = job_ptr->select_job; + job_resources_t *job_res = job_ptr->job_resrcs; /* add job to active_resmap */ if (gr_type == GS_CORE || gr_type == GS_SOCKET) { @@ -583,7 +583,7 @@ static void _add_job_to_active(struct job_record *job_ptr, uint32_t size = bit_size(p_ptr->active_resmap); bit_nclear(p_ptr->active_resmap, 0, size-1); } - add_select_job_to_row(job_res, &(p_ptr->active_resmap), + add_job_to_cores(job_res, &(p_ptr->active_resmap), gs_bits_per_node, gs_bit_rep_count); if (gr_type == GS_SOCKET) _fill_sockets(job_res->node_bitmap, p_ptr); @@ -975,9 +975,9 @@ static uint16_t _add_job_to_part(struct gs_part *p_ptr, xassert(p_ptr); xassert(job_ptr->job_id > 0); - xassert(job_ptr->select_job); - xassert(job_ptr->select_job->node_bitmap); - xassert(job_ptr->select_job->core_bitmap); + xassert(job_ptr->job_resrcs); + xassert(job_ptr->job_resrcs->node_bitmap); + xassert(job_ptr->job_resrcs->core_bitmap); debug3("gang: _add_job_to_part: adding job %u to %s", job_ptr->job_id, p_ptr->part_name); diff --git a/src/slurmctld/job_mgr.c b/src/slurmctld/job_mgr.c index 1246c54f5467c580cc909d92c28304a2e9c8aa13..0ed3a7d21cd0c2eb9c298095bdc212a2baf21d1b 100644 --- a/src/slurmctld/job_mgr.c +++ b/src/slurmctld/job_mgr.c @@ -742,7 +742,7 @@ static void _dump_job_state(struct job_record *dump_job_ptr, Buf buffer) packstr(dump_job_ptr->resv_name, buffer); select_g_select_jobinfo_pack(dump_job_ptr->select_jobinfo, buffer); - pack_select_job_res(dump_job_ptr->select_job, buffer); + pack_job_resources(dump_job_ptr->job_resrcs, buffer); pack16(dump_job_ptr->ckpt_interval, buffer); checkpoint_pack_jobinfo(dump_job_ptr->check_job, buffer); @@ -792,7 +792,7 @@ static int _load_job_state(Buf buffer) struct part_record *part_ptr; int error_code, i, qos_error; select_jobinfo_t *select_jobinfo = NULL; - select_job_res_t *select_job = NULL; + job_resources_t *job_resources = NULL; check_jobinfo_t check_job = NULL; acct_association_rec_t assoc_rec; acct_qos_rec_t qos_rec; @@ -855,7 +855,7 @@ static int _load_job_state(Buf buffer) if (select_g_select_jobinfo_unpack(&select_jobinfo, buffer)) goto unpack_error; - if (unpack_select_job_res(&select_job, buffer)) + if (unpack_job_resources(&job_resources, buffer)) goto unpack_error; safe_unpack16(&ckpt_interval, buffer); @@ -991,7 +991,7 @@ static int _load_job_state(Buf buffer) resv_name = NULL; /* reused, nothing left to free */ job_ptr->resv_flags = resv_flags; job_ptr->select_jobinfo = select_jobinfo; - job_ptr->select_job = select_job; + job_ptr->job_resrcs = job_resources; job_ptr->spank_job_env = spank_job_env; job_ptr->spank_job_env_size = spank_job_env_size; job_ptr->ckpt_interval = ckpt_interval; @@ -1606,11 +1606,11 @@ extern void excise_node_from_job(struct job_record *job_ptr, { int i, orig_pos = -1, new_pos = -1; bitstr_t *orig_bitmap = bit_copy(job_ptr->node_bitmap); - select_job_res_t *select_ptr = job_ptr->select_job; + job_resources_t *job_resrcs_ptr = job_ptr->job_resrcs; - xassert(select_ptr); - xassert(select_ptr->cpus); - xassert(select_ptr->cpus_used); + xassert(job_resrcs_ptr); + xassert(job_resrcs_ptr->cpus); + xassert(job_resrcs_ptr->cpus_used); make_node_idle(node_ptr, job_ptr); /* updates bitmap */ xfree(job_ptr->nodes); @@ -1626,7 +1626,7 @@ extern void excise_node_from_job(struct job_record *job_ptr, continue; memcpy(&job_ptr->node_addr[new_pos], &job_ptr->node_addr[orig_pos], sizeof(slurm_addr)); - /* NOTE: The job's allocation in the job_ptr->select_job + /* NOTE: The job's allocation in the job_ptr->job_resrcs * data structure is unchanged even after a node allocated * to the job goes DOWN. */ } @@ -3985,7 +3985,7 @@ static void _list_delete_job(void *job_entry) xfree(job_ptr->partition); xfree(job_ptr->resp_host); xfree(job_ptr->resv_name); - free_select_job_res(&job_ptr->select_job); + free_job_resources(&job_ptr->job_resrcs); select_g_select_jobinfo_free(job_ptr->select_jobinfo); for (i=0; i<job_ptr->spank_job_env_size; i++) xfree(job_ptr->spank_job_env[i]); @@ -4237,7 +4237,7 @@ void pack_job(struct job_record *dump_job_ptr, uint16_t show_flags, Buf buffer) pack32(dump_job_ptr->exit_code, buffer); if (show_flags & SHOW_DETAIL) { - pack_select_job_res(dump_job_ptr->select_job, buffer); + pack_job_resources(dump_job_ptr->job_resrcs, buffer); } else { uint32_t empty = NO_VAL; pack32(empty, buffer); @@ -4443,11 +4443,11 @@ void reset_job_bitmaps(void) job_ptr->nodes, job_ptr->job_id); job_fail = true; } - reset_node_bitmap(job_ptr->select_job, + reset_node_bitmap(job_ptr->job_resrcs, job_ptr->node_bitmap); if (!job_fail && !IS_JOB_FINISHED(job_ptr) && - job_ptr->select_job && (cr_flag || gang_flag) && - valid_select_job_res(job_ptr->select_job, + job_ptr->job_resrcs && (cr_flag || gang_flag) && + valid_job_resources(job_ptr->job_resrcs, node_record_table_ptr, slurmctld_conf.fast_schedule)) { error("Aborting JobID %u due to change in socket/core " diff --git a/src/slurmctld/job_scheduler.c b/src/slurmctld/job_scheduler.c index f1c491e9db356d67a29e5fcf76c5ccc66b26335d..f3e7d9e99d71fac2b0c6d0419bead081c51ab14e 100644 --- a/src/slurmctld/job_scheduler.c +++ b/src/slurmctld/job_scheduler.c @@ -574,19 +574,18 @@ extern void launch_job(struct job_record *job_ptr) launch_msg_ptr->script = get_job_script(job_ptr); launch_msg_ptr->environment = get_job_env(job_ptr, &launch_msg_ptr->envc); - launch_msg_ptr->job_mem = job_ptr->details->job_min_memory; - - launch_msg_ptr->num_cpu_groups = job_ptr->select_job->cpu_array_cnt; + launch_msg_ptr->job_mem = job_ptr->details->job_min_memory; + launch_msg_ptr->num_cpu_groups = job_ptr->job_resrcs->cpu_array_cnt; launch_msg_ptr->cpus_per_node = xmalloc(sizeof(uint16_t) * - job_ptr->select_job->cpu_array_cnt); + job_ptr->job_resrcs->cpu_array_cnt); memcpy(launch_msg_ptr->cpus_per_node, - job_ptr->select_job->cpu_array_value, - (sizeof(uint16_t) * job_ptr->select_job->cpu_array_cnt)); + job_ptr->job_resrcs->cpu_array_value, + (sizeof(uint16_t) * job_ptr->job_resrcs->cpu_array_cnt)); launch_msg_ptr->cpu_count_reps = xmalloc(sizeof(uint32_t) * - job_ptr->select_job->cpu_array_cnt); + job_ptr->job_resrcs->cpu_array_cnt); memcpy(launch_msg_ptr->cpu_count_reps, - job_ptr->select_job->cpu_array_reps, - (sizeof(uint32_t) * job_ptr->select_job->cpu_array_cnt)); + job_ptr->job_resrcs->cpu_array_reps, + (sizeof(uint32_t) * job_ptr->job_resrcs->cpu_array_cnt)); launch_msg_ptr->select_jobinfo = select_g_select_jobinfo_copy( job_ptr->select_jobinfo); @@ -613,7 +612,7 @@ extern int make_batch_job_cred(batch_job_launch_msg_t *launch_msg_ptr, struct job_record *job_ptr) { slurm_cred_arg_t cred_arg; - select_job_res_t *select_ptr; + job_resources_t *job_resrcs_ptr; cred_arg.jobid = launch_msg_ptr->job_id; cred_arg.stepid = launch_msg_ptr->step_id; @@ -626,22 +625,22 @@ extern int make_batch_job_cred(batch_job_launch_msg_t *launch_msg_ptr, if (job_ptr->details == NULL) cred_arg.job_mem = 0; else if (job_ptr->details->job_min_memory & MEM_PER_CPU) { - xassert(job_ptr->select_job); - xassert(job_ptr->select_job->cpus); + xassert(job_ptr->job_resrcs); + xassert(job_ptr->job_resrcs->cpus); cred_arg.job_mem = job_ptr->details->job_min_memory; cred_arg.job_mem &= (~MEM_PER_CPU); - cred_arg.job_mem *= job_ptr->select_job->cpus[0]; + cred_arg.job_mem *= job_ptr->job_resrcs->cpus[0]; } else cred_arg.job_mem = job_ptr->details->job_min_memory; /* Identify the cores allocated to this job. */ - xassert(job_ptr->select_job); - select_ptr = job_ptr->select_job; - cred_arg.core_bitmap = select_ptr->core_bitmap; - cred_arg.cores_per_socket = select_ptr->cores_per_socket; - cred_arg.sockets_per_node = select_ptr->sockets_per_node; - cred_arg.sock_core_rep_count = select_ptr->sock_core_rep_count; - cred_arg.job_nhosts = select_ptr->nhosts; + xassert(job_ptr->job_resrcs); + job_resrcs_ptr = job_ptr->job_resrcs; + cred_arg.core_bitmap = job_resrcs_ptr->core_bitmap; + cred_arg.cores_per_socket = job_resrcs_ptr->cores_per_socket; + cred_arg.sockets_per_node = job_resrcs_ptr->sockets_per_node; + cred_arg.sock_core_rep_count = job_resrcs_ptr->sock_core_rep_count; + cred_arg.job_nhosts = job_resrcs_ptr->nhosts; cred_arg.job_hostlist = job_ptr->nodes; launch_msg_ptr->cred = slurm_cred_create(slurmctld_config.cred_ctx, diff --git a/src/slurmctld/proc_req.c b/src/slurmctld/proc_req.c index 1a173ea8a19cc840c0153dc7f9bef5cd2d5e1df1..4309895daaaf2f69066adbb11b4f4fc0b7986a61 100644 --- a/src/slurmctld/proc_req.c +++ b/src/slurmctld/proc_req.c @@ -613,9 +613,9 @@ static int _make_step_cred(struct step_record *step_ptr, { slurm_cred_arg_t cred_arg; struct job_record* job_ptr = step_ptr->job_ptr; - select_job_res_t *select_ptr = job_ptr->select_job; + job_resources_t *job_resrcs_ptr = job_ptr->job_resrcs; - xassert(select_ptr && select_ptr->cpus); + xassert(job_resrcs_ptr && job_resrcs_ptr->cpus); cred_arg.jobid = job_ptr->job_id; cred_arg.stepid = step_ptr->step_id; cred_arg.uid = job_ptr->user_id; @@ -636,10 +636,10 @@ static int _make_step_cred(struct step_record *step_ptr, * The slurmd must identify the appropriate cores to be used * by each step. */ cred_arg.core_bitmap = step_ptr->core_bitmap_job; - cred_arg.cores_per_socket = select_ptr->cores_per_socket; - cred_arg.sockets_per_node = select_ptr->sockets_per_node; - cred_arg.sock_core_rep_count = select_ptr->sock_core_rep_count; - cred_arg.job_nhosts = select_ptr->nhosts; + cred_arg.cores_per_socket = job_resrcs_ptr->cores_per_socket; + cred_arg.sockets_per_node = job_resrcs_ptr->sockets_per_node; + cred_arg.sock_core_rep_count = job_resrcs_ptr->sock_core_rep_count; + cred_arg.job_nhosts = job_resrcs_ptr->nhosts; cred_arg.job_hostlist = job_ptr->nodes; *slurm_cred = slurm_cred_create(slurmctld_config.cred_ctx, &cred_arg); @@ -717,22 +717,22 @@ static void _slurm_rpc_allocate_resources(slurm_msg_t * msg) job_ptr->job_id, job_ptr->nodes, TIME_STR); /* send job_ID and node_name_ptr */ - if (job_ptr->select_job && job_ptr->select_job->cpu_array_cnt) { - alloc_msg.num_cpu_groups = job_ptr->select_job-> + if (job_ptr->job_resrcs && job_ptr->job_resrcs->cpu_array_cnt) { + alloc_msg.num_cpu_groups = job_ptr->job_resrcs-> cpu_array_cnt; alloc_msg.cpu_count_reps = xmalloc(sizeof(uint32_t) * - job_ptr->select_job-> + job_ptr->job_resrcs-> cpu_array_cnt); memcpy(alloc_msg.cpu_count_reps, - job_ptr->select_job->cpu_array_reps, - (sizeof(uint32_t) * job_ptr->select_job-> + job_ptr->job_resrcs->cpu_array_reps, + (sizeof(uint32_t) * job_ptr->job_resrcs-> cpu_array_cnt)); alloc_msg.cpus_per_node = xmalloc(sizeof(uint16_t) * - job_ptr->select_job-> + job_ptr->job_resrcs-> cpu_array_cnt); memcpy(alloc_msg.cpus_per_node, - job_ptr->select_job->cpu_array_value, - (sizeof(uint16_t) * job_ptr->select_job-> + job_ptr->job_resrcs->cpu_array_value, + (sizeof(uint16_t) * job_ptr->job_resrcs-> cpu_array_cnt)); } else { alloc_msg.num_cpu_groups = 0; @@ -1646,7 +1646,7 @@ static void _slurm_rpc_job_alloc_info(slurm_msg_t * msg) END_TIMER2("_slurm_rpc_job_alloc_info"); /* return result */ - if (error_code || (job_ptr == NULL) || (job_ptr->select_job == NULL)) { + if (error_code || (job_ptr == NULL) || (job_ptr->job_resrcs == NULL)) { if (do_unlock) unlock_slurmctld(job_read_lock); debug2("_slurm_rpc_job_alloc_info: JobId=%u, uid=%u: %s", @@ -1658,20 +1658,20 @@ static void _slurm_rpc_job_alloc_info(slurm_msg_t * msg) job_info_msg->job_id, job_ptr->nodes, TIME_STR); /* send job_ID and node_name_ptr */ - job_info_resp_msg.num_cpu_groups = job_ptr->select_job-> + job_info_resp_msg.num_cpu_groups = job_ptr->job_resrcs-> cpu_array_cnt; job_info_resp_msg.cpu_count_reps = xmalloc(sizeof(uint32_t) * - job_ptr->select_job->cpu_array_cnt); + job_ptr->job_resrcs->cpu_array_cnt); memcpy(job_info_resp_msg.cpu_count_reps, - job_ptr->select_job->cpu_array_reps, - (sizeof(uint32_t) * job_ptr->select_job->cpu_array_cnt)); + job_ptr->job_resrcs->cpu_array_reps, + (sizeof(uint32_t) * job_ptr->job_resrcs->cpu_array_cnt)); job_info_resp_msg.cpus_per_node = xmalloc(sizeof(uint16_t) * - job_ptr->select_job->cpu_array_cnt); + job_ptr->job_resrcs->cpu_array_cnt); memcpy(job_info_resp_msg.cpus_per_node, - job_ptr->select_job->cpu_array_value, - (sizeof(uint16_t) * job_ptr->select_job->cpu_array_cnt)); + job_ptr->job_resrcs->cpu_array_value, + (sizeof(uint16_t) * job_ptr->job_resrcs->cpu_array_cnt)); job_info_resp_msg.error_code = error_code; job_info_resp_msg.job_id = job_info_msg->job_id; job_info_resp_msg.node_addr = xmalloc(sizeof(slurm_addr) * @@ -1725,7 +1725,7 @@ static void _slurm_rpc_job_alloc_info_lite(slurm_msg_t * msg) END_TIMER2("_slurm_rpc_job_alloc_info_lite"); /* return result */ - if (error_code || (job_ptr == NULL) || (job_ptr->select_job == NULL)) { + if (error_code || (job_ptr == NULL) || (job_ptr->job_resrcs == NULL)) { if (do_unlock) unlock_slurmctld(job_read_lock); info("_slurm_rpc_job_alloc_info_lite: JobId=%u, uid=%u: %s", @@ -1736,20 +1736,20 @@ static void _slurm_rpc_job_alloc_info_lite(slurm_msg_t * msg) job_info_msg->job_id, job_ptr->nodes, TIME_STR); /* send job_ID and node_name_ptr */ - job_info_resp_msg.num_cpu_groups = job_ptr->select_job-> + job_info_resp_msg.num_cpu_groups = job_ptr->job_resrcs-> cpu_array_cnt; job_info_resp_msg.cpu_count_reps = xmalloc(sizeof(uint32_t) * - job_ptr->select_job->cpu_array_cnt); + job_ptr->job_resrcs->cpu_array_cnt); memcpy(job_info_resp_msg.cpu_count_reps, - job_ptr->select_job->cpu_array_reps, - (sizeof(uint32_t) * job_ptr->select_job->cpu_array_cnt)); + job_ptr->job_resrcs->cpu_array_reps, + (sizeof(uint32_t) * job_ptr->job_resrcs->cpu_array_cnt)); job_info_resp_msg.cpus_per_node = xmalloc(sizeof(uint16_t) * - job_ptr->select_job->cpu_array_cnt); + job_ptr->job_resrcs->cpu_array_cnt); memcpy(job_info_resp_msg.cpus_per_node, - job_ptr->select_job->cpu_array_value, - (sizeof(uint16_t) * job_ptr->select_job->cpu_array_cnt)); + job_ptr->job_resrcs->cpu_array_value, + (sizeof(uint16_t) * job_ptr->job_resrcs->cpu_array_cnt)); job_info_resp_msg.error_code = error_code; job_info_resp_msg.job_id = job_info_msg->job_id; job_info_resp_msg.node_cnt = job_ptr->node_cnt; @@ -3277,17 +3277,17 @@ int _launch_batch_step(job_desc_msg_t *job_desc_msg, uid_t uid, if (launch_msg_ptr->nprocs < 0) launch_msg_ptr->nprocs = job_ptr->num_procs; - launch_msg_ptr->num_cpu_groups = job_ptr->select_job->cpu_array_cnt; + launch_msg_ptr->num_cpu_groups = job_ptr->job_resrcs->cpu_array_cnt; launch_msg_ptr->cpus_per_node = xmalloc(sizeof(uint16_t) * - job_ptr->select_job->cpu_array_cnt); + job_ptr->job_resrcs->cpu_array_cnt); memcpy(launch_msg_ptr->cpus_per_node, - job_ptr->select_job->cpu_array_value, - (sizeof(uint16_t) * job_ptr->select_job->cpu_array_cnt)); + job_ptr->job_resrcs->cpu_array_value, + (sizeof(uint16_t) * job_ptr->job_resrcs->cpu_array_cnt)); launch_msg_ptr->cpu_count_reps = xmalloc(sizeof(uint32_t) * - job_ptr->select_job->cpu_array_cnt); + job_ptr->job_resrcs->cpu_array_cnt); memcpy(launch_msg_ptr->cpu_count_reps, - job_ptr->select_job->cpu_array_reps, - (sizeof(uint32_t) * job_ptr->select_job->cpu_array_cnt)); + job_ptr->job_resrcs->cpu_array_reps, + (sizeof(uint32_t) * job_ptr->job_resrcs->cpu_array_cnt)); launch_msg_ptr->select_jobinfo = select_g_select_jobinfo_copy( job_ptr->select_jobinfo); diff --git a/src/slurmctld/slurmctld.h b/src/slurmctld/slurmctld.h index 4b7db84f3dbbe56b517eb9275f011472fdb0159d..71d6a7fe705353dde3ce779aef1aea78d97c7d2a 100644 --- a/src/slurmctld/slurmctld.h +++ b/src/slurmctld/slurmctld.h @@ -73,7 +73,7 @@ #include "src/common/node_conf.h" #include "src/common/pack.h" #include "src/common/read_config.h" /* location of slurmctld_conf */ -#include "src/common/select_job_res.h" +#include "src/common/job_resources.h" #include "src/common/slurm_cred.h" #include "src/common/slurm_protocol_api.h" #include "src/common/slurm_protocol_defs.h" @@ -448,7 +448,7 @@ struct job_record { uint32_t requid; /* requester user ID */ char *resp_host; /* host for srun communications */ select_jobinfo_t *select_jobinfo;/* opaque data, BlueGene */ - select_job_res_t *select_job; /* details of allocated cores */ + job_resources_t *job_resrcs; /* details of allocated cores */ char **spank_job_env; /* environment variables for job prolog * and epilog scripts as set by SPANK * plugins */ @@ -493,7 +493,7 @@ struct step_record { time_t ckpt_time; /* time of last checkpoint */ bitstr_t *core_bitmap_job; /* bitmap of cores allocated to this * step relative to job's nodes, - * see src/common/select_job_res.h */ + * see src/common/job_resources.h */ uint32_t cpu_count; /* count of step's CPUs */ uint16_t cpus_per_task; /* cpus per task initiated */ uint16_t cyclic_alloc; /* set for cyclic task allocation diff --git a/src/slurmctld/srun_comm.c b/src/slurmctld/srun_comm.c index 8983c2b9e01da7b200332ff763f5a4bdbcdfacf8..ee94432b8a8d7a65c31f0abb0a96f1769c427a24 100644 --- a/src/slurmctld/srun_comm.c +++ b/src/slurmctld/srun_comm.c @@ -81,11 +81,11 @@ extern void srun_allocate (uint32_t job_id) xassert(job_ptr); if (job_ptr && job_ptr->alloc_resp_port && job_ptr->alloc_node && - job_ptr->resp_host && job_ptr->select_job && - job_ptr->select_job->cpu_array_cnt) { + job_ptr->resp_host && job_ptr->job_resrcs && + job_ptr->job_resrcs->cpu_array_cnt) { slurm_addr * addr; resource_allocation_response_msg_t *msg_arg; - select_job_res_t *select_ptr = job_ptr->select_job; + job_resources_t *job_resrcs_ptr = job_ptr->job_resrcs; addr = xmalloc(sizeof(struct sockaddr_in)); slurm_set_addr(addr, job_ptr->alloc_resp_port, @@ -93,17 +93,17 @@ extern void srun_allocate (uint32_t job_id) msg_arg = xmalloc(sizeof(resource_allocation_response_msg_t)); msg_arg->job_id = job_ptr->job_id; msg_arg->node_list = xstrdup(job_ptr->nodes); - msg_arg->num_cpu_groups = select_ptr->cpu_array_cnt; + msg_arg->num_cpu_groups = job_resrcs_ptr->cpu_array_cnt; msg_arg->cpus_per_node = xmalloc(sizeof(uint16_t) * - select_ptr->cpu_array_cnt); + job_resrcs_ptr->cpu_array_cnt); memcpy(msg_arg->cpus_per_node, - select_ptr->cpu_array_value, - (sizeof(uint16_t) * select_ptr->cpu_array_cnt)); + job_resrcs_ptr->cpu_array_value, + (sizeof(uint16_t) * job_resrcs_ptr->cpu_array_cnt)); msg_arg->cpu_count_reps = xmalloc(sizeof(uint32_t) * - select_ptr->cpu_array_cnt); + job_resrcs_ptr->cpu_array_cnt); memcpy(msg_arg->cpu_count_reps, - select_ptr->cpu_array_reps, - (sizeof(uint32_t) * select_ptr->cpu_array_cnt)); + job_resrcs_ptr->cpu_array_reps, + (sizeof(uint32_t) * job_resrcs_ptr->cpu_array_cnt)); msg_arg->node_cnt = job_ptr->node_cnt; msg_arg->select_jobinfo = select_g_select_jobinfo_copy( job_ptr->select_jobinfo); diff --git a/src/slurmctld/step_mgr.c b/src/slurmctld/step_mgr.c index b25e869661e3d9ea8ee046e5c00c7f27e5e0c1ad..e243e250891385a8b979401cffad25a1750f1315 100644 --- a/src/slurmctld/step_mgr.c +++ b/src/slurmctld/step_mgr.c @@ -451,11 +451,11 @@ _pick_step_nodes (struct job_record *job_ptr, int mem_blocked_nodes = 0, mem_blocked_cpus = 0; ListIterator step_iterator; struct step_record *step_p; - select_job_res_t *select_ptr = job_ptr->select_job; + job_resources_t *job_resrcs_ptr = job_ptr->job_resrcs; - xassert(select_ptr); - xassert(select_ptr->cpus); - xassert(select_ptr->cpus_used); + xassert(job_resrcs_ptr); + xassert(job_resrcs_ptr->cpus); + xassert(job_resrcs_ptr->cpus_used); *return_code = SLURM_SUCCESS; if (job_ptr->node_bitmap == NULL) { @@ -469,8 +469,8 @@ _pick_step_nodes (struct job_record *job_ptr, bit_and (nodes_avail, up_node_bitmap); if (step_spec->mem_per_cpu && - ((select_ptr->memory_allocated == NULL) || - (select_ptr->memory_used == NULL))) { + ((job_resrcs_ptr->memory_allocated == NULL) || + (job_resrcs_ptr->memory_used == NULL))) { error("_pick_step_nodes: lack memory allocation details " "to enforce memory limits for job %u", job_ptr->job_id); step_spec->mem_per_cpu = 0; @@ -543,17 +543,17 @@ _pick_step_nodes (struct job_record *job_ptr, } node_inx = -1; - i_first = bit_ffs(select_ptr->node_bitmap); - i_last = bit_fls(select_ptr->node_bitmap); + i_first = bit_ffs(job_resrcs_ptr->node_bitmap); + i_last = bit_fls(job_resrcs_ptr->node_bitmap); for (i=i_first; i<=i_last; i++) { - if (!bit_test(select_ptr->node_bitmap, i)) + if (!bit_test(job_resrcs_ptr->node_bitmap, i)) continue; node_inx++; if (!bit_test(nodes_avail, i)) continue; /* node now DOWN */ - avail_cpus = select_ptr->cpus[node_inx] - - select_ptr->cpus_used[node_inx]; - total_cpus = select_ptr->cpus[node_inx]; + avail_cpus = job_resrcs_ptr->cpus[node_inx] - + job_resrcs_ptr->cpus_used[node_inx]; + total_cpus = job_resrcs_ptr->cpus[node_inx]; if (cpus_per_task > 0) { avail_tasks = avail_cpus / cpus_per_task; total_tasks = total_cpus / cpus_per_task; @@ -562,15 +562,15 @@ _pick_step_nodes (struct job_record *job_ptr, total_tasks = step_spec->num_tasks; } if (step_spec->mem_per_cpu) { - avail_mem = select_ptr-> + avail_mem = job_resrcs_ptr-> memory_allocated[node_inx] - - select_ptr->memory_used[node_inx]; + job_resrcs_ptr->memory_used[node_inx]; task_cnt = avail_mem / step_spec->mem_per_cpu; if (cpus_per_task > 0) task_cnt /= cpus_per_task; avail_tasks = MIN(avail_tasks, task_cnt); - total_mem = select_ptr-> + total_mem = job_resrcs_ptr-> memory_allocated[node_inx]; task_cnt = total_mem / step_spec->mem_per_cpu; if (cpus_per_task > 0) @@ -611,12 +611,12 @@ _pick_step_nodes (struct job_record *job_ptr, if (step_spec->mem_per_cpu) { int node_inx = 0, usable_mem; - for (i=bit_ffs(select_ptr->node_bitmap); i<node_record_count; + for (i=bit_ffs(job_resrcs_ptr->node_bitmap); i<node_record_count; i++) { - if (!bit_test(select_ptr->node_bitmap, i)) + if (!bit_test(job_resrcs_ptr->node_bitmap, i)) continue; - usable_mem = select_ptr->memory_allocated[node_inx] - - select_ptr->memory_used[node_inx]; + usable_mem = job_resrcs_ptr->memory_allocated[node_inx] - + job_resrcs_ptr->memory_used[node_inx]; task_cnt = usable_mem / step_spec->mem_per_cpu; if (cpus_per_task > 0) task_cnt /= cpus_per_task; @@ -629,14 +629,14 @@ _pick_step_nodes (struct job_record *job_ptr, } bit_clear(nodes_avail, i); mem_blocked_nodes++; - task_cnt = select_ptr-> + task_cnt = job_resrcs_ptr-> memory_allocated[node_inx] / step_spec->mem_per_cpu; mem_blocked_cpus += MIN(task_cnt, - select_ptr-> + job_resrcs_ptr-> cpus[node_inx]); } - if (++node_inx >= select_ptr->nhosts) + if (++node_inx >= job_resrcs_ptr->nhosts) break; } } @@ -779,12 +779,12 @@ _pick_step_nodes (struct job_record *job_ptr, /* if user specifies step needs a specific processor count and * all nodes have the same processor count, just translate this to * a node count */ - if (step_spec->cpu_count && job_ptr->select_job && - (job_ptr->select_job->cpu_array_cnt == 1) && - (job_ptr->select_job->cpu_array_value)) { + if (step_spec->cpu_count && job_ptr->job_resrcs && + (job_ptr->job_resrcs->cpu_array_cnt == 1) && + (job_ptr->job_resrcs->cpu_array_value)) { i = (step_spec->cpu_count + - (job_ptr->select_job->cpu_array_value[0] - 1)) / - job_ptr->select_job->cpu_array_value[0]; + (job_ptr->job_resrcs->cpu_array_value[0] - 1)) / + job_ptr->job_resrcs->cpu_array_value[0]; step_spec->node_count = (i > step_spec->node_count) ? i : step_spec->node_count ; //step_spec->cpu_count = 0; @@ -894,7 +894,7 @@ static int _count_cpus(bitstr_t *bitmap) * Add the specified task count for a specific node in the job's * and step's allocation */ static void _pick_step_cores(struct step_record *step_ptr, - select_job_res_t *select_ptr, + job_resources_t *job_resrcs_ptr, int job_node_inx, uint16_t task_cnt) { int bit_offset, core_inx, i, sock_inx; @@ -904,11 +904,11 @@ static void _pick_step_cores(struct step_record *step_ptr, static int last_core_inx; if (!step_ptr->core_bitmap_job) { - step_ptr->core_bitmap_job = bit_alloc(bit_size(select_ptr-> + step_ptr->core_bitmap_job = bit_alloc(bit_size(job_resrcs_ptr-> core_bitmap)); } - if (get_select_job_res_cnt(select_ptr, job_node_inx, &sockets, &cores)) - fatal("get_select_job_res_cnt"); + if (get_job_resources_cnt(job_resrcs_ptr, job_node_inx, &sockets, &cores)) + fatal("get_job_resources_cnt"); if (task_cnt == (cores * sockets)) use_all_cores = true; @@ -920,18 +920,18 @@ static void _pick_step_cores(struct step_record *step_ptr, /* select idle cores first */ for (core_inx=0; core_inx<cores; core_inx++) { for (sock_inx=0; sock_inx<sockets; sock_inx++) { - bit_offset = get_select_job_res_offset(select_ptr, + bit_offset = get_job_resources_offset(job_resrcs_ptr, job_node_inx, sock_inx, core_inx); if (bit_offset < 0) - fatal("get_select_job_res_offset"); - if (!bit_test(select_ptr->core_bitmap, bit_offset)) + fatal("get_job_resources_offset"); + if (!bit_test(job_resrcs_ptr->core_bitmap, bit_offset)) continue; if ((use_all_cores == false) && - bit_test(select_ptr->core_bitmap_used, bit_offset)) + bit_test(job_resrcs_ptr->core_bitmap_used, bit_offset)) continue; - bit_set(select_ptr->core_bitmap_used, bit_offset); + bit_set(job_resrcs_ptr->core_bitmap_used, bit_offset); bit_set(step_ptr->core_bitmap_job, bit_offset); #if 0 info("step alloc N:%d S:%dC :%d", @@ -952,13 +952,13 @@ static void _pick_step_cores(struct step_record *step_ptr, for (i=0; i<cores; i++) { core_inx = (last_core_inx + i) % cores; for (sock_inx=0; sock_inx<sockets; sock_inx++) { - bit_offset = get_select_job_res_offset(select_ptr, + bit_offset = get_job_resources_offset(job_resrcs_ptr, job_node_inx, sock_inx, core_inx); if (bit_offset < 0) - fatal("get_select_job_res_offset"); - if (!bit_test(select_ptr->core_bitmap, bit_offset)) + fatal("get_job_resources_offset"); + if (!bit_test(job_resrcs_ptr->core_bitmap, bit_offset)) continue; if (bit_test(step_ptr->core_bitmap_job, bit_offset)) continue; /* already taken by this step */ @@ -978,29 +978,29 @@ static void _pick_step_cores(struct step_record *step_ptr, extern void step_alloc_lps(struct step_record *step_ptr) { struct job_record *job_ptr = step_ptr->job_ptr; - select_job_res_t *select_ptr = job_ptr->select_job; + job_resources_t *job_resrcs_ptr = job_ptr->job_resrcs; int cpus_alloc; int i_node, i_first, i_last; int job_node_inx = -1, step_node_inx = -1; bool pick_step_cores = true; - xassert(select_ptr); - xassert(select_ptr->cpus); - xassert(select_ptr->cpus_used); + xassert(job_resrcs_ptr); + xassert(job_resrcs_ptr->cpus); + xassert(job_resrcs_ptr->cpus_used); if (step_ptr->step_layout == NULL) /* batch step */ return; - i_first = bit_ffs(select_ptr->node_bitmap); - i_last = bit_fls(select_ptr->node_bitmap); + i_first = bit_ffs(job_resrcs_ptr->node_bitmap); + i_last = bit_fls(job_resrcs_ptr->node_bitmap); if (i_first == -1) /* empty bitmap */ return; #ifdef HAVE_BG pick_step_cores = false; #else - xassert(select_ptr->core_bitmap); - xassert(select_ptr->core_bitmap_used); + xassert(job_resrcs_ptr->core_bitmap); + xassert(job_resrcs_ptr->core_bitmap_used); if (step_ptr->core_bitmap_job) { /* "scontrol reconfig" of live system */ pick_step_cores = false; @@ -1008,39 +1008,39 @@ extern void step_alloc_lps(struct step_record *step_ptr) (step_ptr->cpu_count == job_ptr->total_procs)) { /* Step uses all of job's cores * Just copy the bitmap to save time */ - step_ptr->core_bitmap_job = bit_copy(select_ptr->core_bitmap); + step_ptr->core_bitmap_job = bit_copy(job_resrcs_ptr->core_bitmap); pick_step_cores = false; } #endif if (step_ptr->mem_per_cpu && - ((select_ptr->memory_allocated == NULL) || - (select_ptr->memory_used == NULL))) { + ((job_resrcs_ptr->memory_allocated == NULL) || + (job_resrcs_ptr->memory_used == NULL))) { error("step_alloc_lps: lack memory allocation details " "to enforce memory limits for job %u", job_ptr->job_id); step_ptr->mem_per_cpu = 0; } for (i_node = i_first; i_node <= i_last; i_node++) { - if (!bit_test(select_ptr->node_bitmap, i_node)) + if (!bit_test(job_resrcs_ptr->node_bitmap, i_node)) continue; job_node_inx++; if (!bit_test(step_ptr->step_node_bitmap, i_node)) continue; step_node_inx++; - if (job_node_inx >= select_ptr->nhosts) + if (job_node_inx >= job_resrcs_ptr->nhosts) fatal("step_alloc_lps: node index bad"); /* NOTE: The --overcommit option can result in * cpus_used[] having a higher value than cpus[] */ cpus_alloc = step_ptr->step_layout->tasks[step_node_inx] * step_ptr->cpus_per_task; - select_ptr->cpus_used[job_node_inx] += cpus_alloc; + job_resrcs_ptr->cpus_used[job_node_inx] += cpus_alloc; if (step_ptr->mem_per_cpu) { - select_ptr->memory_used[job_node_inx] += + job_resrcs_ptr->memory_used[job_node_inx] += (step_ptr->mem_per_cpu * cpus_alloc); } if (pick_step_cores) { - _pick_step_cores(step_ptr, select_ptr, + _pick_step_cores(step_ptr, job_resrcs_ptr, job_node_inx, step_ptr->step_layout-> tasks[step_node_inx]); @@ -1050,8 +1050,8 @@ extern void step_alloc_lps(struct step_record *step_ptr) if (slurm_get_debug_flags() & DEBUG_FLAG_STEPS) { info("step alloc of %s procs: %u of %u", node_record_table_ptr[i_node].name, - select_ptr->cpus_used[job_node_inx], - select_ptr->cpus[job_node_inx]); + job_resrcs_ptr->cpus_used[job_node_inx], + job_resrcs_ptr->cpus[job_node_inx]); } if (step_node_inx == (step_ptr->step_layout->node_cnt - 1)) break; @@ -1065,22 +1065,22 @@ extern void step_alloc_lps(struct step_record *step_ptr) static void _dump_step_layout(struct step_record *step_ptr) { struct job_record* job_ptr = step_ptr->job_ptr; - select_job_res_t *select_ptr = job_ptr->select_job; + job_resources_t *job_resrcs_ptr = job_ptr->job_resrcs; int i, bit_inx, core_inx, node_inx, rep, sock_inx; if ((step_ptr->core_bitmap_job == NULL) || - (select_ptr == NULL) || (select_ptr->cores_per_socket == NULL)) + (job_resrcs_ptr == NULL) || (job_resrcs_ptr->cores_per_socket == NULL)) return; info("===================="); info("step_id:%u.%u", job_ptr->job_id, step_ptr->step_id); - for (i=0, bit_inx= 0, node_inx=0; node_inx<select_ptr->nhosts; i++) { - for (rep=0; rep<select_ptr->sock_core_rep_count[i]; rep++) { + for (i=0, bit_inx= 0, node_inx=0; node_inx<job_resrcs_ptr->nhosts; i++) { + for (rep=0; rep<job_resrcs_ptr->sock_core_rep_count[i]; rep++) { for (sock_inx=0; - sock_inx<select_ptr->sockets_per_node[i]; + sock_inx<job_resrcs_ptr->sockets_per_node[i]; sock_inx++) { for (core_inx=0; - core_inx<select_ptr->cores_per_socket[i]; + core_inx<job_resrcs_ptr->cores_per_socket[i]; core_inx++) { if (bit_test(step_ptr-> core_bitmap_job, @@ -1101,78 +1101,78 @@ static void _dump_step_layout(struct step_record *step_ptr) static void _step_dealloc_lps(struct step_record *step_ptr) { struct job_record *job_ptr = step_ptr->job_ptr; - select_job_res_t *select_ptr = job_ptr->select_job; + job_resources_t *job_resrcs_ptr = job_ptr->job_resrcs; int cpus_alloc; int i_node, i_first, i_last; int job_node_inx = -1, step_node_inx = -1; - xassert(select_ptr); - xassert(select_ptr->cpus); - xassert(select_ptr->cpus_used); + xassert(job_resrcs_ptr); + xassert(job_resrcs_ptr->cpus); + xassert(job_resrcs_ptr->cpus_used); if (step_ptr->step_layout == NULL) /* batch step */ return; - i_first = bit_ffs(select_ptr->node_bitmap); - i_last = bit_fls(select_ptr->node_bitmap); + i_first = bit_ffs(job_resrcs_ptr->node_bitmap); + i_last = bit_fls(job_resrcs_ptr->node_bitmap); if (i_first == -1) /* empty bitmap */ return; if (step_ptr->mem_per_cpu && - ((select_ptr->memory_allocated == NULL) || - (select_ptr->memory_used == NULL))) { + ((job_resrcs_ptr->memory_allocated == NULL) || + (job_resrcs_ptr->memory_used == NULL))) { error("_step_dealloc_lps: lack memory allocation details " "to enforce memory limits for job %u", job_ptr->job_id); step_ptr->mem_per_cpu = 0; } for (i_node = i_first; i_node <= i_last; i_node++) { - if (!bit_test(select_ptr->node_bitmap, i_node)) + if (!bit_test(job_resrcs_ptr->node_bitmap, i_node)) continue; job_node_inx++; if (!bit_test(step_ptr->step_node_bitmap, i_node)) continue; step_node_inx++; - if (job_node_inx >= select_ptr->nhosts) + if (job_node_inx >= job_resrcs_ptr->nhosts) fatal("_step_dealloc_lps: node index bad"); cpus_alloc = step_ptr->step_layout->tasks[step_node_inx] * step_ptr->cpus_per_task; - if (select_ptr->cpus_used[job_node_inx] >= cpus_alloc) - select_ptr->cpus_used[job_node_inx] -= cpus_alloc; + if (job_resrcs_ptr->cpus_used[job_node_inx] >= cpus_alloc) + job_resrcs_ptr->cpus_used[job_node_inx] -= cpus_alloc; else { error("_step_dealloc_lps: cpu underflow for %u.%u", job_ptr->job_id, step_ptr->step_id); - select_ptr->cpus_used[job_node_inx] = 0; + job_resrcs_ptr->cpus_used[job_node_inx] = 0; } if (step_ptr->mem_per_cpu) { uint32_t mem_use = step_ptr->mem_per_cpu * cpus_alloc; - if (select_ptr->memory_used[job_node_inx] >= mem_use) { - select_ptr->memory_used[job_node_inx] -= + if (job_resrcs_ptr->memory_used[job_node_inx] >= mem_use) { + job_resrcs_ptr->memory_used[job_node_inx] -= mem_use; } else { error("_step_dealloc_lps: " "mem underflow for %u.%u", job_ptr->job_id, step_ptr->step_id); - select_ptr->memory_used[job_node_inx] = 0; + job_resrcs_ptr->memory_used[job_node_inx] = 0; } } if (slurm_get_debug_flags() & DEBUG_FLAG_STEPS) { info("step dealloc of %s procs: %u of %u", node_record_table_ptr[i_node].name, - select_ptr->cpus_used[job_node_inx], - select_ptr->cpus[job_node_inx]); + job_resrcs_ptr->cpus_used[job_node_inx], + job_resrcs_ptr->cpus[job_node_inx]); } if (step_node_inx == (step_ptr->step_layout->node_cnt - 1)) break; } #ifndef HAVE_BG - xassert(select_ptr->core_bitmap); - xassert(select_ptr->core_bitmap_used); + xassert(job_resrcs_ptr->core_bitmap); + xassert(job_resrcs_ptr->core_bitmap_used); if (step_ptr->core_bitmap_job) { /* Mark the job's cores as no longer in use */ bit_not(step_ptr->core_bitmap_job); - bit_and(select_ptr->core_bitmap_used, + bit_and(job_resrcs_ptr->core_bitmap_used, step_ptr->core_bitmap_job); /* no need for bit_not(step_ptr->core_bitmap_job); */ FREE_NULL_BITMAP(step_ptr->core_bitmap_job); @@ -1479,15 +1479,15 @@ extern slurm_step_layout_t *step_layout_create(struct step_record *step_ptr, int pos = -1; int first_bit, last_bit; struct job_record *job_ptr = step_ptr->job_ptr; - select_job_res_t *select_ptr = job_ptr->select_job; + job_resources_t *job_resrcs_ptr = job_ptr->job_resrcs; - xassert(select_ptr); - xassert(select_ptr->cpus); - xassert(select_ptr->cpus_used); + xassert(job_resrcs_ptr); + xassert(job_resrcs_ptr->cpus); + xassert(job_resrcs_ptr->cpus_used); if (step_ptr->mem_per_cpu && - ((select_ptr->memory_allocated == NULL) || - (select_ptr->memory_used == NULL))) { + ((job_resrcs_ptr->memory_allocated == NULL) || + (job_resrcs_ptr->memory_used == NULL))) { error("step_layout_create: lack memory allocation details " "to enforce memory limits for job %u", job_ptr->job_id); step_ptr->mem_per_cpu = 0; @@ -1500,19 +1500,19 @@ extern slurm_step_layout_t *step_layout_create(struct step_record *step_ptr, for (i = first_bit; i <= last_bit; i++) { if (bit_test(step_ptr->step_node_bitmap, i)) { /* find out the position in the job */ - pos = bit_get_pos_num(select_ptr->node_bitmap, i); + pos = bit_get_pos_num(job_resrcs_ptr->node_bitmap, i); if (pos == -1) return NULL; - if (pos >= select_ptr->nhosts) + if (pos >= job_resrcs_ptr->nhosts) fatal("step_layout_create: node index bad"); if (step_ptr->exclusive) { - usable_cpus = select_ptr->cpus[pos] - - select_ptr->cpus_used[pos]; + usable_cpus = job_resrcs_ptr->cpus[pos] - + job_resrcs_ptr->cpus_used[pos]; } else - usable_cpus = select_ptr->cpus[pos]; + usable_cpus = job_resrcs_ptr->cpus[pos]; if (step_ptr->mem_per_cpu) { - usable_mem = select_ptr->memory_allocated[pos]- - select_ptr->memory_used[pos]; + usable_mem = job_resrcs_ptr->memory_allocated[pos]- + job_resrcs_ptr->memory_used[pos]; usable_mem /= step_ptr->mem_per_cpu; usable_cpus = MIN(usable_cpus, usable_mem); } diff --git a/src/squeue/print.c b/src/squeue/print.c index 55b9c8d620ca9a70bb40902778c2c5df2ec3e379..3415e2a969464e2fe0c0b0911a8c83520a4eb440 100644 --- a/src/squeue/print.c +++ b/src/squeue/print.c @@ -662,14 +662,14 @@ int _print_job_num_procs(job_info_t * job, int width, bool right, char* suffix) if (job == NULL) /* Print the Header instead */ _print_str("CPUS", width, right, true); else { - if (job->select_job_res && - (job->select_job_res->cpu_array_cnt > 0) && - (job->select_job_res->cpu_array_value) && - (job->select_job_res->cpu_array_reps)) { + if (job->job_resources && + (job->job_resources->cpu_array_cnt > 0) && + (job->job_resources->cpu_array_value) && + (job->job_resources->cpu_array_reps)) { uint32_t cnt = 0, i; - for (i=0; i<job->select_job_res->cpu_array_cnt; i++) { - cnt += job->select_job_res->cpu_array_value[i] * - job->select_job_res->cpu_array_reps[i]; + for (i=0; i<job->job_resources->cpu_array_cnt; i++) { + cnt += job->job_resources->cpu_array_value[i] * + job->job_resources->cpu_array_reps[i]; } convert_num_unit((float)cnt, tmp_char, sizeof(tmp_char), UNIT_NONE);