diff --git a/NEWS b/NEWS index 9b386ff3ba5f962552adaac6c24ba1ecebda192b..0ad9c0af29e03c6884369f3bad24fc7ac343469f 100644 --- a/NEWS +++ b/NEWS @@ -6,6 +6,7 @@ documents those changes that are of interest to users and admins. -- Maintain actually job step run time with suspend/resume use. -- Allow slurm.conf options to appear multiple times. SLURM will use the last instance of any particular option. + -- Add version number to node state save file, requires code-start (slurmctld -c) * Changes in SLURM 1.2.0-pre5 ============================= diff --git a/slurm/slurm.h.in b/slurm/slurm.h.in index 4a370cb25c024c8a1069bcd646d6b98f7bb5449b..db1b8eca991d63759e06c3a314f6494dec4a1de5 100644 --- a/slurm/slurm.h.in +++ b/slurm/slurm.h.in @@ -699,11 +699,11 @@ typedef struct job_step_info_response_msg { typedef struct node_info { char *name; /* node name */ uint16_t node_state; /* see enum node_states */ - uint32_t cpus; /* configured count of cpus running on + uint16_t cpus; /* configured count of cpus running on * the node */ - uint32_t sockets; /* number of sockets per node */ - uint32_t cores; /* number of cores per CPU */ - uint32_t threads; /* number of threads per core */ + uint16_t sockets; /* number of sockets per node */ + uint16_t cores; /* number of cores per CPU */ + uint16_t threads; /* number of threads per core */ uint32_t real_memory; /* configured MB of real memory on the node */ uint32_t tmp_disk; /* configured MB of total disk in TMP_FS */ uint32_t weight; /* arbitrary priority of node for scheduling */ diff --git a/src/common/read_config.c b/src/common/read_config.c index 83180d94aef2207e8118177b059c9d1bd136a19f..f2062b0347d8fc54ca1031353ee61521cfa92495 100644 --- a/src/common/read_config.c +++ b/src/common/read_config.c @@ -97,10 +97,10 @@ typedef struct names_ll_s { char *hostname; /* NodeHostname */ char *address; /* NodeAddr */ uint16_t port; - uint32_t cpus; - uint32_t sockets; - uint32_t cores; - uint32_t threads; + uint16_t cpus; + uint16_t sockets; + uint16_t cores; + uint16_t threads; slurm_addr addr; bool addr_initialized; struct names_ll_s *next_alias; @@ -260,15 +260,15 @@ static int parse_nodename(void **dest, slurm_parser_enum_t type, static s_p_options_t _nodename_options[] = { {"NodeHostname", S_P_STRING}, {"NodeAddr", S_P_STRING}, - {"CoresPerSocket", S_P_UINT32}, + {"CoresPerSocket", S_P_UINT16}, {"Feature", S_P_STRING}, {"Port", S_P_UINT16}, - {"Procs", S_P_UINT32}, + {"Procs", S_P_UINT16}, {"RealMemory", S_P_UINT32}, {"Reason", S_P_STRING}, - {"Sockets", S_P_UINT32}, + {"Sockets", S_P_UINT16}, {"State", S_P_STRING}, - {"ThreadsPerCore", S_P_UINT32}, + {"ThreadsPerCore", S_P_UINT16}, {"TmpDisk", S_P_UINT32}, {"Weight", S_P_UINT32}, {NULL} @@ -313,8 +313,8 @@ static int parse_nodename(void **dest, slurm_parser_enum_t type, if (!s_p_get_string(&n->addresses, "NodeAddr", tbl)) n->addresses = xstrdup(n->hostnames); - if (!s_p_get_uint32(&n->cores, "CoresPerSocket", tbl) - && !s_p_get_uint32(&n->cores, "CoresPerSocket", dflt)) { + if (!s_p_get_uint16(&n->cores, "CoresPerSocket", tbl) + && !s_p_get_uint16(&n->cores, "CoresPerSocket", dflt)) { n->cores = 1; no_cores = true; } @@ -330,8 +330,8 @@ static int parse_nodename(void **dest, slurm_parser_enum_t type, n->port = SLURMD_PORT; } - if (!s_p_get_uint32(&n->cpus, "Procs", tbl) - && !s_p_get_uint32(&n->cpus, "Procs", dflt)) { + if (!s_p_get_uint16(&n->cpus, "Procs", tbl) + && !s_p_get_uint16(&n->cpus, "Procs", dflt)) { n->cpus = 1; no_cpus = true; } @@ -343,8 +343,8 @@ static int parse_nodename(void **dest, slurm_parser_enum_t type, if (!s_p_get_string(&n->reason, "Reason", tbl)) s_p_get_string(&n->reason, "Reason", dflt); - if (!s_p_get_uint32(&n->sockets, "Sockets", tbl) - && !s_p_get_uint32(&n->sockets, "Sockets", dflt)) { + if (!s_p_get_uint16(&n->sockets, "Sockets", tbl) + && !s_p_get_uint16(&n->sockets, "Sockets", dflt)) { n->sockets = 1; no_sockets = true; } @@ -353,8 +353,8 @@ static int parse_nodename(void **dest, slurm_parser_enum_t type, && !s_p_get_string(&n->state, "State", dflt)) n->state = NULL; - if (!s_p_get_uint32(&n->threads, "ThreadsPerCore", tbl) - && !s_p_get_uint32(&n->threads, "ThreadsPerCore", dflt)) { + if (!s_p_get_uint16(&n->threads, "ThreadsPerCore", tbl) + && !s_p_get_uint16(&n->threads, "ThreadsPerCore", dflt)) { n->threads = 1; no_threads = true; } @@ -663,8 +663,8 @@ static int _get_hash_idx(const char *s) static void _push_to_hashtbls(char *alias, char *hostname, char *address, uint16_t port, - uint32_t cpus, uint32_t sockets, - uint32_t cores, uint32_t threads) + uint16_t cpus, uint16_t sockets, + uint16_t cores, uint16_t threads) { int hostname_idx, alias_idx; names_ll_t *p, *new; @@ -948,8 +948,8 @@ extern int slurm_conf_get_addr(const char *node_name, slurm_addr *address) * Returns SLURM_SUCCESS on success, SLURM_FAILURE on failure. */ extern int slurm_conf_get_cpus_sct(const char *node_name, - uint32_t *cpus, uint32_t *sockets, - uint32_t *cores, uint32_t *threads) + uint16_t *cpus, uint16_t *sockets, + uint16_t *cores, uint16_t *threads) { int idx; names_ll_t *p; diff --git a/src/common/read_config.h b/src/common/read_config.h index 4ed982693f8f0d4bff1d5d135222c10025b4780e..e3f0b11c45a34735ca0d671dfbf27ba804744fb9 100644 --- a/src/common/read_config.h +++ b/src/common/read_config.h @@ -99,10 +99,10 @@ typedef struct slurm_conf_node { char *addresses; char *feature; /* arbitrary list of features associated */ uint16_t port; - uint32_t cpus; /* count of cpus running on the node */ - uint32_t sockets; /* number of sockets per node */ - uint32_t cores; /* number of cores per CPU */ - uint32_t threads; /* number of threads per core */ + uint16_t cpus; /* count of cpus running on the node */ + uint16_t sockets; /* number of sockets per node */ + uint16_t cores; /* number of cores per CPU */ + uint16_t threads; /* number of threads per core */ uint32_t real_memory; /* MB real memory on the node */ char *reason; char *state; @@ -255,14 +255,14 @@ extern int slurm_conf_get_addr(const char *node_name, slurm_addr *address); /* * slurm_conf_get_cpus_sct - - * Return the cpus, sockets, cores, and threads for a given NodeName + * Return the cpus, sockets, cores, and threads configured for a given NodeName * Returns SLURM_SUCCESS on success, SLURM_FAILURE on failure. * * NOTE: Caller must NOT be holding slurm_conf_lock(). */ extern int slurm_conf_get_cpus_sct(const char *node_name, - uint32_t *procs, uint32_t *sockets, - uint32_t *cores, uint32_t *threads); + uint16_t *procs, uint16_t *sockets, + uint16_t *cores, uint16_t *threads); /* * init_slurm_conf - initialize or re-initialize the slurm configuration diff --git a/src/common/slurm_protocol_defs.h b/src/common/slurm_protocol_defs.h index 29f8893de10dd65476a2bdbfa134bccbd71d8b19..fdc500cb90df8a23f1d7d59680ce0f550cc468b0 100644 --- a/src/common/slurm_protocol_defs.h +++ b/src/common/slurm_protocol_defs.h @@ -596,10 +596,10 @@ typedef struct file_bcast_msg { typedef struct slurm_node_registration_status_msg { time_t timestamp; char *node_name; - uint32_t cpus; - uint32_t sockets; - uint32_t cores; - uint32_t threads; + uint16_t cpus; + uint16_t sockets; + uint16_t cores; + uint16_t threads; uint32_t real_memory_size; uint32_t temporary_disk_space; uint32_t job_count; /* number of associate job_id's */ diff --git a/src/common/slurm_protocol_pack.c b/src/common/slurm_protocol_pack.c index 8f7dad2950cb761b8d28bf76e2448e69ca8f265e..934d5e2ffcd41ed8e9054365bbb3469c1a2a3cfb 100644 --- a/src/common/slurm_protocol_pack.c +++ b/src/common/slurm_protocol_pack.c @@ -1023,10 +1023,10 @@ _pack_node_registration_status_msg(slurm_node_registration_status_msg_t * pack_time(msg->timestamp, buffer); pack32((uint32_t)msg->status, buffer); packstr(msg->node_name, buffer); - pack32((uint32_t)msg->cpus, buffer); - pack32((uint32_t)msg->sockets, buffer); - pack32((uint32_t)msg->cores, buffer); - pack32((uint32_t)msg->threads, buffer); + pack16((uint32_t)msg->cpus, buffer); + pack16((uint32_t)msg->sockets, buffer); + pack16((uint32_t)msg->cores, buffer); + pack16((uint32_t)msg->threads, buffer); pack32((uint32_t)msg->real_memory_size, buffer); pack32((uint32_t)msg->temporary_disk_space, buffer); pack32((uint32_t)msg->job_count, buffer); @@ -1059,10 +1059,10 @@ _unpack_node_registration_status_msg(slurm_node_registration_status_msg_t /* load the data values */ safe_unpack32(&node_reg_ptr->status, buffer); safe_unpackstr_xmalloc(&node_reg_ptr->node_name, &uint16_tmp, buffer); - safe_unpack32(&node_reg_ptr->cpus, buffer); - safe_unpack32(&node_reg_ptr->sockets, buffer); - safe_unpack32(&node_reg_ptr->cores, buffer); - safe_unpack32(&node_reg_ptr->threads, buffer); + safe_unpack16(&node_reg_ptr->cpus, buffer); + safe_unpack16(&node_reg_ptr->sockets, buffer); + safe_unpack16(&node_reg_ptr->cores, buffer); + safe_unpack16(&node_reg_ptr->threads, buffer); safe_unpack32(&node_reg_ptr->real_memory_size, buffer); safe_unpack32(&node_reg_ptr->temporary_disk_space, buffer); safe_unpack32(&node_reg_ptr->job_count, buffer); @@ -1325,10 +1325,10 @@ _unpack_node_info_members(node_info_t * node, Buf buffer) safe_unpackstr_xmalloc(&node->name, &uint16_tmp, buffer); safe_unpack16(&node->node_state, buffer); - safe_unpack32(&node->cpus, buffer); - safe_unpack32(&node->sockets, buffer); - safe_unpack32(&node->cores, buffer); - safe_unpack32(&node->threads, buffer); + safe_unpack16(&node->cpus, buffer); + safe_unpack16(&node->sockets, buffer); + safe_unpack16(&node->cores, buffer); + safe_unpack16(&node->threads, buffer); safe_unpack32(&node->real_memory, buffer); safe_unpack32(&node->tmp_disk, buffer); safe_unpack32(&node->weight, buffer); diff --git a/src/plugins/task/affinity/dist_tasks.c b/src/plugins/task/affinity/dist_tasks.c index ec38c81730fcac0ed03bbe118bc3e209e606940c..711a4b745d342f64587c2131069701797afed8f3 100644 --- a/src/plugins/task/affinity/dist_tasks.c +++ b/src/plugins/task/affinity/dist_tasks.c @@ -107,7 +107,7 @@ static void _cr_update_reservation(int reserve, uint32_t *reserved, #define BLOCK_MAP(index) _block_map(index, conf->block_map) #define BLOCK_MAP_INV(index) _block_map(index, conf->block_map_inv) -static uint32_t _block_map(uint32_t index, uint32_t *map); +static uint16_t _block_map(uint16_t index, uint16_t *map); /* * lllp_distribution @@ -1202,14 +1202,14 @@ void _print_tasks_per_lllp (void) * IN - index to map * IN - map to use */ -static uint32_t _block_map(uint32_t index, uint32_t *map) +static uint16_t _block_map(uint16_t index, uint16_t *map) { if (map == NULL) { return index; } /* make sure bit falls in map */ if (index >= conf->block_map_size) { - debug3("wrapping index %d into block_map_size of %d", + debug3("wrapping index %u into block_map_size of %u", index, conf->block_map_size); index = index % conf->block_map_size; } diff --git a/src/slurmctld/node_mgr.c b/src/slurmctld/node_mgr.c index 70e32c47fac9bec8854da340268b001c9d244020..165c5cad3ef84c03c8c609347523073fdf58abcc 100644 --- a/src/slurmctld/node_mgr.c +++ b/src/slurmctld/node_mgr.c @@ -71,6 +71,9 @@ #define _DEBUG 0 #define MAX_RETRIES 10 +/* Change NODE_STATE_VERSION value when changing the state save format */ +#define NODE_STATE_VERSION "VER001" + /* Global variables */ List config_list = NULL; /* list of config_record entries */ struct node_record *node_record_table_ptr = NULL; /* node records */ @@ -235,8 +238,9 @@ int dump_all_node_state ( void ) DEF_TIMERS; START_TIMER; - /* write header: time */ - pack_time (time (NULL), buffer); + /* write header: version, time */ + packstr(NODE_STATE_VERSION, buffer); + pack_time(time (NULL), buffer); /* write node records to buffer */ lock_slurmctld (node_read_lock); @@ -310,10 +314,10 @@ _dump_node_state (struct node_record *dump_node_ptr, Buf buffer) packstr (dump_node_ptr->name, buffer); packstr (dump_node_ptr->reason, buffer); pack16 (dump_node_ptr->node_state, buffer); - pack32 (dump_node_ptr->cpus, buffer); - pack32 (dump_node_ptr->sockets, buffer); - pack32 (dump_node_ptr->cores, buffer); - pack32 (dump_node_ptr->threads, buffer); + pack16 (dump_node_ptr->cpus, buffer); + pack16 (dump_node_ptr->sockets, buffer); + pack16 (dump_node_ptr->cores, buffer); + pack16 (dump_node_ptr->threads, buffer); pack32 (dump_node_ptr->real_memory, buffer); pack32 (dump_node_ptr->tmp_disk, buffer); } @@ -331,12 +335,14 @@ extern int load_all_node_state ( bool state_only ) char *node_name, *reason = NULL, *data = NULL, *state_file; int data_allocated, data_read = 0, error_code = 0, node_cnt = 0; uint16_t node_state, name_len; - uint32_t cpus = 1, sockets = 1, cores = 1, threads = 1; + uint16_t cpus = 1, sockets = 1, cores = 1, threads = 1; uint32_t real_memory, tmp_disk, data_size = 0; struct node_record *node_ptr; int state_fd; time_t time_stamp; Buf buffer; + char *ver_str = NULL; + uint16_t ver_str_len; /* read the file */ state_file = xstrdup (slurmctld_conf.state_save_location); @@ -372,6 +378,27 @@ extern int load_all_node_state ( bool state_only ) unlock_state_files (); buffer = create_buf (data, data_size); + + /* + * Check the data version so that when the format changes, we + * we don't try to unpack data using the wrong format routines + */ + if (size_buf(buffer) >= sizeof(uint16_t) + strlen(NODE_STATE_VERSION)) { + char *ptr = get_buf_data(buffer); + + if (memcmp( &ptr[sizeof(uint16_t)], NODE_STATE_VERSION, 3) == 0) { + safe_unpackstr_xmalloc( &ver_str, &ver_str_len, buffer); + debug3("Version string in node_state header is %s", + ver_str); + } + } + if (strcmp(ver_str, NODE_STATE_VERSION) != 0) { + error("Can not recover node state, data version incompatable"); + xfree(ver_str); + free_buf(buffer); + return EFAULT; + } + xfree(ver_str); safe_unpack_time (&time_stamp, buffer); while (remaining_buf (buffer) > 0) { @@ -379,10 +406,10 @@ extern int load_all_node_state ( bool state_only ) safe_unpackstr_xmalloc (&node_name, &name_len, buffer); safe_unpackstr_xmalloc (&reason, &name_len, buffer); safe_unpack16 (&node_state, buffer); - safe_unpack32 (&cpus, buffer); - safe_unpack32 (&sockets, buffer); - safe_unpack32 (&cores, buffer); - safe_unpack32 (&threads, buffer); + safe_unpack16 (&cpus, buffer); + safe_unpack16 (&sockets, buffer); + safe_unpack16 (&cores, buffer); + safe_unpack16 (&threads, buffer); safe_unpack32 (&real_memory, buffer); safe_unpack32 (&tmp_disk, buffer); base_state = node_state & NODE_STATE_BASE; @@ -743,18 +770,18 @@ static void _pack_node (struct node_record *dump_node_ptr, Buf buffer) pack16 (dump_node_ptr->node_state, buffer); if (slurmctld_conf.fast_schedule) { /* Only data from config_record used for scheduling */ - pack32 (dump_node_ptr->config_ptr->cpus, buffer); - pack32 (dump_node_ptr->config_ptr->cores, buffer); - pack32 (dump_node_ptr->config_ptr->sockets, buffer); - pack32 (dump_node_ptr->config_ptr->threads, buffer); + pack16 (dump_node_ptr->config_ptr->cpus, buffer); + pack16 (dump_node_ptr->config_ptr->cores, buffer); + pack16 (dump_node_ptr->config_ptr->sockets, buffer); + pack16 (dump_node_ptr->config_ptr->threads, buffer); pack32 (dump_node_ptr->config_ptr->real_memory, buffer); pack32 (dump_node_ptr->config_ptr->tmp_disk, buffer); } else { /* Individual node data used for scheduling */ - pack32 (dump_node_ptr->cpus, buffer); - pack32 (dump_node_ptr->cores, buffer); - pack32 (dump_node_ptr->sockets, buffer); - pack32 (dump_node_ptr->threads, buffer); + pack16 (dump_node_ptr->cpus, buffer); + pack16 (dump_node_ptr->cores, buffer); + pack16 (dump_node_ptr->sockets, buffer); + pack16 (dump_node_ptr->threads, buffer); pack32 (dump_node_ptr->real_memory, buffer); pack32 (dump_node_ptr->tmp_disk, buffer); } @@ -1068,8 +1095,8 @@ static bool _valid_node_state_change(uint16_t old, uint16_t new) * NOTE: READ lock_slurmctld config before entry */ extern int -validate_node_specs (char *node_name, uint32_t cpus, - uint32_t sockets, uint32_t cores, uint32_t threads, +validate_node_specs (char *node_name, uint16_t cpus, + uint16_t sockets, uint16_t cores, uint16_t threads, uint32_t real_memory, uint32_t tmp_disk, uint32_t job_count, uint32_t status) { @@ -1087,6 +1114,32 @@ validate_node_specs (char *node_name, uint32_t cpus, config_ptr = node_ptr->config_ptr; error_code = 0; +#if 1 + /* Disable these tests if you want to emulate a system + * with different hardware that configured (sockets, cores + * and threads */ + if (sockets < config_ptr->sockets) { + error("Node %s has low socket count %u", node_name, sockets); + error_code = EINVAL; + reason_down = "Low socket count"; + } + node_ptr->sockets = sockets; + + if (cores < config_ptr->cores) { + error("Node %s has low core count %u", node_name, cores); + error_code = EINVAL; + reason_down = "Low core count"; + } + node_ptr->cores = cores; + + if (threads < config_ptr->threads) { + error("Node %s has low thread count %u", node_name, threads); + error_code = EINVAL; + reason_down = "Low thread count"; + } + node_ptr->threads = threads; +#endif + if (cpus < config_ptr->cpus) { error ("Node %s has low cpu count %u", node_name, cpus); error_code = EINVAL; @@ -1100,9 +1153,6 @@ validate_node_specs (char *node_name, uint32_t cpus, } } node_ptr->cpus = cpus; - node_ptr->sockets = sockets; - node_ptr->cores = cores; - node_ptr->threads = threads; if (real_memory < config_ptr->real_memory) { error ("Node %s has low real_memory size %u", diff --git a/src/slurmctld/slurmctld.h b/src/slurmctld/slurmctld.h index 65204a85785f5c3835c74a98cb34d5340970646a..f7c7c0d50d8aa210d6993fd92324278d9bd132d6 100644 --- a/src/slurmctld/slurmctld.h +++ b/src/slurmctld/slurmctld.h @@ -156,10 +156,10 @@ extern int bg_recover; /* state recovery mode */ struct config_record { uint32_t magic; /* magic cookie to test data integrity */ - uint32_t cpus; /* count of processors running on the node */ - uint32_t sockets; /* number of sockets per node */ - uint32_t cores; /* number of cores per CPU */ - uint32_t threads; /* number of threads per core */ + uint16_t cpus; /* count of processors running on the node */ + uint16_t sockets; /* number of sockets per node */ + uint16_t cores; /* number of cores per CPU */ + uint16_t threads; /* number of threads per core */ uint32_t real_memory; /* MB real memory on the node */ uint32_t tmp_disk; /* MB total storage in TMP_FS file system */ uint32_t weight; /* arbitrary priority of node for @@ -178,10 +178,10 @@ struct node_record { * NODE_STATE_NO_RESPOND if not * responding */ time_t last_response; /* last response from the node */ - uint32_t cpus; /* count of processors on the node */ - uint32_t sockets; /* number of sockets per node */ - uint32_t cores; /* number of cores per CPU */ - uint32_t threads; /* number of threads per core */ + uint16_t cpus; /* count of processors on the node */ + uint16_t sockets; /* number of sockets per node */ + uint16_t cores; /* number of cores per CPU */ + uint16_t threads; /* number of threads per core */ uint32_t real_memory; /* MB real memory on the node */ uint32_t tmp_disk; /* MB total disk in TMP_FS */ struct config_record *config_ptr; /* configuration spec ptr */ @@ -1356,10 +1356,10 @@ extern void validate_jobs_on_node ( char *node_name, uint32_t *job_count, * global: node_record_table_ptr - pointer to global node table */ extern int validate_node_specs (char *node_name, - uint32_t cpus, - uint32_t sockets, - uint32_t cores, - uint32_t threads, + uint16_t cpus, + uint16_t sockets, + uint16_t cores, + uint16_t threads, uint32_t real_memory, uint32_t tmp_disk, uint32_t job_count, uint32_t status); diff --git a/src/slurmd/common/slurmstepd_init.c b/src/slurmd/common/slurmstepd_init.c index bf1ba6270c598b55c37a37b9e65028a674f1c40c..591f17f0e3a28cb19722e6bd689906d488720392 100644 --- a/src/slurmd/common/slurmstepd_init.c +++ b/src/slurmd/common/slurmstepd_init.c @@ -41,9 +41,9 @@ extern void pack_slurmd_conf_lite(slurmd_conf_t *conf, Buf buffer) { xassert(conf != NULL); packstr(conf->hostname, buffer); - pack32(conf->sockets, buffer); - pack32(conf->cores, buffer); - pack32(conf->threads, buffer); + pack16(conf->sockets, buffer); + pack16(conf->cores, buffer); + pack16(conf->threads, buffer); packstr(conf->spooldir, buffer); packstr(conf->node_name, buffer); packstr(conf->logfile, buffer); @@ -62,9 +62,9 @@ extern int unpack_slurmd_conf_lite_no_alloc(slurmd_conf_t *conf, Buf buffer) uint16_t uint16_tmp; uint32_t uint32_tmp; safe_unpackstr_xmalloc(&conf->hostname, &uint16_tmp, buffer); - safe_unpack32(&conf->sockets, buffer); - safe_unpack32(&conf->cores, buffer); - safe_unpack32(&conf->threads, buffer); + safe_unpack16(&conf->sockets, buffer); + safe_unpack16(&conf->cores, buffer); + safe_unpack16(&conf->threads, buffer); safe_unpackstr_xmalloc(&conf->spooldir, &uint16_tmp, buffer); safe_unpackstr_xmalloc(&conf->node_name, &uint16_tmp, buffer); safe_unpackstr_xmalloc(&conf->logfile, &uint16_tmp, buffer); diff --git a/src/slurmd/slurmd/get_mach_stat.c b/src/slurmd/slurmd/get_mach_stat.c index 2093d8b0e6754c91577651f5689a5d87e201d94e..b2e778d737b026bd45b2a0bce7d634534287ccf5 100644 --- a/src/slurmd/slurmd/get_mach_stat.c +++ b/src/slurmd/slurmd/get_mach_stat.c @@ -78,10 +78,10 @@ static char* _cpuinfo_path = "/proc/cpuinfo"; -int compute_block_map(uint32_t numproc, - uint32_t **block_map, uint32_t **block_map_inv); +int compute_block_map(uint16_t numproc, + uint16_t **block_map, uint16_t **block_map_inv); int chk_cpuinfo_str(char *buffer, char *keyword, char **valptr); -int chk_cpuinfo_uint32(char *buffer, char *keyword, uint32_t *val); +int chk_cpuinfo_uint16(char *buffer, char *keyword, uint16_t *val); int chk_cpuinfo_float(char *buffer, char *keyword, float *val); /* #define DEBUG_DETAIL 1 */ /* enable detailed debugging within SLURM */ @@ -102,13 +102,13 @@ int main(int argc, char * argv[]) { int error_code; - uint32_t sockets, cores, threads; - uint32_t block_map_size; - uint32_t *block_map, *block_map_inv; + uint16_t sockets, cores, threads; + uint16_t block_map_size; + uint16_t *block_map, *block_map_inv; struct config_record this_node; char node_name[MAX_SLURM_NAME]; float speed; - uint32_t testnumproc = 0; + uint16_t testnumproc = 0; if (argc > 1) { _cpuinfo_path = argv[1]; @@ -183,7 +183,7 @@ getnodename (char *name, size_t len) * return code - 0 if no error, otherwise errno */ extern int -get_procs(uint32_t *procs) +get_procs(uint16_t *procs) { #ifdef LPAR_INFO_FORMAT2 /* AIX 5.3 only */ @@ -195,7 +195,7 @@ get_procs(uint32_t *procs) return EINVAL; } - *procs = (uint32_t)info.online_vcpus; + *procs = (uint16_t)info.online_vcpus; #else /* !LPAR_INFO_FORMAT2 */ # ifdef _SC_NPROCESSORS_ONLN @@ -208,7 +208,7 @@ get_procs(uint32_t *procs) return EINVAL; } - *procs = my_proc_tally; + *procs = (uint16_t) my_proc_tally; # else # ifdef HAVE_SYSCTLBYNAME int ncpu; @@ -219,7 +219,7 @@ get_procs(uint32_t *procs) error("get_procs: error running sysctl(HW_NCPU)"); return EINVAL; } - *procs = ncpu; + *procs = (uint16_t) ncpu; # else /* !HAVE_SYSCTLBYNAME */ *procs = 1; # endif /* HAVE_SYSCTLBYNAME */ @@ -387,15 +387,15 @@ int chk_cpuinfo_str(char *buffer, char *keyword, char **valptr) return true; } -/* chk_cpuinfo_uint32 +/* chk_cpuinfo_uint16 * check a line of cpuinfo data (buffer) for a keyword. If it - * exists, return the uint32 value for that keyword in *valptr. + * exists, return the uint16 value for that keyword in *valptr. * Input: buffer - single line of cpuinfo data * keyword - keyword to check for - * Output: valptr - uint32 value corresponding to keyword + * Output: valptr - uint16 value corresponding to keyword * return code - true if keyword found, false if not found */ -int chk_cpuinfo_uint32(char *buffer, char *keyword, uint32_t *val) +int chk_cpuinfo_uint16(char *buffer, char *keyword, uint16_t *val) { char *valptr; if (chk_cpuinfo_str(buffer, keyword, &valptr)) { @@ -467,41 +467,41 @@ get_speed(float *speed) */ typedef struct cpuinfo { int seen; - uint32_t id; - uint32_t physid; - uint32_t physcnt; - uint32_t siblings; - uint32_t cores; - uint32_t coreid; - uint32_t corecnt; + uint16_t id; + uint16_t physid; + uint16_t physcnt; + uint16_t siblings; + uint16_t cores; + uint16_t coreid; + uint16_t corecnt; } cpuinfo_t; static cpuinfo_t *cpuinfo = NULL; /* array of CPU information for get_cpuinfo */ /* Note: file static for qsort/_compare_cpus*/ extern int -get_cpuinfo(uint32_t numproc, - uint32_t *p_sockets, uint32_t *p_cores, uint32_t *p_threads, - uint32_t *block_map_size, - uint32_t **block_map, uint32_t **block_map_inv) +get_cpuinfo(uint16_t numproc, + uint16_t *p_sockets, uint16_t *p_cores, uint16_t *p_threads, + uint16_t *block_map_size, + uint16_t **block_map, uint16_t **block_map_inv) { FILE *cpu_info_file; char buffer[128]; int retval; - uint32_t curcpu, sockets, cores, threads; - uint32_t numcpu = 0; /* number of cpus seen */ - uint32_t numphys = 0; /* number of unique "physical id"s */ - uint32_t numcores = 0; /* number of unique "cores id"s */ - - uint32_t maxsibs = 0; /* maximum value of "siblings" */ - uint32_t maxcores = 0; /* maximum value of "cores" */ - uint32_t minsibs = INT_MAX; /* minimum value of "siblings" */ - uint32_t mincores = INT_MAX; /* minimum value of "cores" */ - - uint32_t maxcpuid = 0; /* maximum CPU ID ("processor") */ - uint32_t maxphysid = 0; /* maximum "physical id" */ - uint32_t maxcoreid = 0; /* maximum "core id" */ - uint32_t mincpuid = INT_MAX; /* minimum CPU ID ("processor") */ - uint32_t minphysid = INT_MAX; /* minimum "physical id" */ - uint32_t mincoreid = INT_MAX; /* minimum "core id" */ + uint16_t curcpu, sockets, cores, threads; + uint16_t numcpu = 0; /* number of cpus seen */ + uint16_t numphys = 0; /* number of unique "physical id"s */ + uint16_t numcores = 0; /* number of unique "cores id"s */ + + uint16_t maxsibs = 0; /* maximum value of "siblings" */ + uint16_t maxcores = 0; /* maximum value of "cores" */ + uint16_t minsibs = 0xffff; /* minimum value of "siblings" */ + uint16_t mincores = 0xffff; /* minimum value of "cores" */ + + uint16_t maxcpuid = 0; /* maximum CPU ID ("processor") */ + uint16_t maxphysid = 0; /* maximum "physical id" */ + uint16_t maxcoreid = 0; /* maximum "core id" */ + uint16_t mincpuid = 0xffff; /* minimum CPU ID ("processor") */ + uint16_t minphysid = 0xffff; /* minimum "physical id" */ + uint16_t mincoreid = 0xffff; /* minimum "core id" */ #ifdef DEBUG_DETAIL int i; #endif @@ -524,31 +524,31 @@ get_cpuinfo(uint32_t numproc, memset(cpuinfo, 0, numproc * sizeof(cpuinfo_t)); curcpu = 0; while (fgets(buffer, sizeof(buffer), cpu_info_file) != NULL) { - uint32_t val; - if (chk_cpuinfo_uint32(buffer, "processor", &val)) { + uint16_t val; + if (chk_cpuinfo_uint16(buffer, "processor", &val)) { curcpu = val; cpuinfo[val].seen = 1; cpuinfo[val].id = val; numcpu++; maxcpuid = MAX(maxcpuid, val); mincpuid = MIN(mincpuid, val); - } else if (chk_cpuinfo_uint32(buffer, "physical id", &val)) { + } else if (chk_cpuinfo_uint16(buffer, "physical id", &val)) { cpuinfo[curcpu].physid = val; if (cpuinfo[val].physcnt == 0) numphys++; cpuinfo[val].physcnt++; maxphysid = MAX(maxphysid, val); minphysid = MIN(minphysid, val); - } else if (chk_cpuinfo_uint32(buffer, "core id", &val)) { + } else if (chk_cpuinfo_uint16(buffer, "core id", &val)) { cpuinfo[curcpu].coreid = val; if (cpuinfo[val].corecnt == 0) numcores++; cpuinfo[val].corecnt++; maxcoreid = MAX(maxcoreid, val); mincoreid = MIN(mincoreid, val); - } else if (chk_cpuinfo_uint32(buffer, "siblings", &val)) { + } else if (chk_cpuinfo_uint16(buffer, "siblings", &val)) { cpuinfo[curcpu].siblings = val; maxsibs = MAX(maxsibs, val) ; minsibs = MIN(minsibs, val) ; - } else if (chk_cpuinfo_uint32(buffer, "cpu cores", &val)) { + } else if (chk_cpuinfo_uint16(buffer, "cpu cores", &val)) { cpuinfo[curcpu].cores = val; maxcores = MAX(maxcores, val); mincores = MIN(mincores, val); @@ -685,7 +685,7 @@ get_cpuinfo(uint32_t numproc, /* physical cpu comparison with void * arguments to allow use with * libc qsort() */ -static int _icmp(uint32_t a, uint32_t b) +static int _icmp(uint16_t a, uint16_t b) { if (a < b) { return -1; @@ -697,8 +697,8 @@ static int _icmp(uint32_t a, uint32_t b) } int _compare_cpus(const void *a1, const void *b1) { - uint32_t *a = (uint32_t *) a1; - uint32_t *b = (uint32_t *) b1; + uint16_t *a = (uint16_t *) a1; + uint16_t *b = (uint16_t *) b1; int cmp; cmp = -1 * _icmp(cpuinfo[*a].seen,cpuinfo[*b].seen); /* seen to front */ @@ -717,24 +717,24 @@ int _compare_cpus(const void *a1, const void *b1) { return cmp; } -int compute_block_map(uint32_t numproc, - uint32_t **block_map, uint32_t **block_map_inv) +int compute_block_map(uint16_t numproc, + uint16_t **block_map, uint16_t **block_map_inv) { - uint32_t i; + uint16_t i; /* Compute abstract->machine block mapping (and inverse) */ if (block_map) { - *block_map = xmalloc(numproc * sizeof(uint32_t)); - memset(*block_map, 0, numproc * sizeof(uint32_t)); + *block_map = xmalloc(numproc * sizeof(uint16_t)); + memset(*block_map, 0, numproc * sizeof(uint16_t)); for (i = 0; i < numproc; i++) { (*block_map)[i] = i; } - qsort(*block_map, numproc, sizeof(uint32_t), &_compare_cpus); + qsort(*block_map, numproc, sizeof(uint16_t), &_compare_cpus); } if (block_map_inv) { - *block_map_inv = xmalloc(numproc * sizeof(uint32_t)); - memset(*block_map_inv, 0, numproc * sizeof(uint32_t)); + *block_map_inv = xmalloc(numproc * sizeof(uint16_t)); + memset(*block_map_inv, 0, numproc * sizeof(uint16_t)); for (i = 0; i < numproc; i++) { - uint32_t idx = (*block_map)[i]; + uint16_t idx = (*block_map)[i]; (*block_map_inv)[idx] = i; } } @@ -768,7 +768,7 @@ int compute_block_map(uint32_t numproc, debug3("\n"); debug3("Physical Socket ID: "); for (i = 0; i < numproc; i++) { - uint32_t id = (*block_map)[i]; + uint16_t id = (*block_map)[i]; debug3("%3d", cpuinfo[id].physid); } debug3("\n"); diff --git a/src/slurmd/slurmd/get_mach_stat.h b/src/slurmd/slurmd/get_mach_stat.h index 3024f2660be417eeae2afc71e92043b00ffa3e9b..8211ae78c57aa28ea00d05a5eba56bb8a5979b19 100644 --- a/src/slurmd/slurmd/get_mach_stat.h +++ b/src/slurmd/slurmd/get_mach_stat.h @@ -51,11 +51,11 @@ # include <inttypes.h> #endif /* HAVE_CONFIG_H */ -extern int get_procs(uint32_t *procs); -extern int get_cpuinfo(uint32_t numproc, - uint32_t *sockets, uint32_t *cores, uint32_t *threads, - uint32_t *block_map_size, - uint32_t **block_map, uint32_t **block_map_inv); +extern int get_procs(uint16_t *procs); +extern int get_cpuinfo(uint16_t numproc, + uint16_t *sockets, uint16_t *cores, uint16_t *threads, + uint16_t *block_map_size, + uint16_t **block_map, uint16_t **block_map_inv); extern int get_mach_name(char *node_name); extern int get_memory(uint32_t *real_memory); extern int get_tmp_disk(uint32_t *tmp_disk, char *tmp_fs); diff --git a/src/slurmd/slurmd/slurmd.c b/src/slurmd/slurmd/slurmd.c index 8d3d076c46c446005bd0dcf10f7a34f2adffe8ad..f05f5d4156750197f1759128b66ccb88376ba802 100644 --- a/src/slurmd/slurmd/slurmd.c +++ b/src/slurmd/slurmd/slurmd.c @@ -436,7 +436,7 @@ _fill_registration_msg(slurm_node_registration_status_msg_t *msg) msg->real_memory_size = conf->real_memory_size; msg->temporary_disk_space = conf->tmp_disk_space; - debug3("Procs=%u, S=%u, C=%u, T=%u, Memory=%u, TmpDisk=%u", + debug3("Procs=%u Sockets=%u Cores=%u Threads=%u Memory=%u TmpDisk=%u", msg->cpus, msg->sockets, msg->cores, msg->threads, msg->real_memory_size, msg->temporary_disk_space); diff --git a/src/slurmd/slurmd/slurmd.h b/src/slurmd/slurmd/slurmd.h index aea083aeea5571a752fa8fd8f427669b5b09b7c7..4b95aab47ef1d994e5fa8578381cad649687e897 100644 --- a/src/slurmd/slurmd/slurmd.h +++ b/src/slurmd/slurmd/slurmd.h @@ -74,23 +74,23 @@ typedef struct slurmd_config { char ***argv; /* pointer to argument vector */ int *argc; /* pointer to argument count */ char *hostname; /* local hostname */ - uint32_t cpus; /* lowest-level logical processors */ - uint32_t sockets; /* sockets count */ - uint32_t cores; /* core count */ - uint32_t threads; /* thread per core count */ - uint32_t conf_cpus; /* conf file logical processors */ - uint32_t conf_sockets; /* conf file sockets count */ - uint32_t conf_cores; /* conf file core count */ - uint32_t conf_threads; /* conf file thread per core count */ - uint32_t actual_cpus; /* actual logical processors */ - uint32_t actual_sockets; /* actual sockets count */ - uint32_t actual_cores; /* actual core count */ - uint32_t actual_threads; /* actual thread per core count */ + uint16_t cpus; /* lowest-level logical processors */ + uint16_t sockets; /* sockets count */ + uint16_t cores; /* core count */ + uint16_t threads; /* thread per core count */ + uint16_t conf_cpus; /* conf file logical processors */ + uint16_t conf_sockets; /* conf file sockets count */ + uint16_t conf_cores; /* conf file core count */ + uint16_t conf_threads; /* conf file thread per core count */ + uint16_t actual_cpus; /* actual logical processors */ + uint16_t actual_sockets; /* actual sockets count */ + uint16_t actual_cores; /* actual core count */ + uint16_t actual_threads; /* actual thread per core count */ uint32_t real_memory_size; /* amount of real memory */ uint32_t tmp_disk_space; /* size of temporary disk */ - uint32_t block_map_size; /* size of block map */ - uint32_t *block_map; /* abstract->machine block map */ - uint32_t *block_map_inv; /* machine->abstract (inverse) map */ + uint16_t block_map_size; /* size of block map */ + uint16_t *block_map; /* abstract->machine block map */ + uint16_t *block_map_inv; /* machine->abstract (inverse) map */ uint16_t cr_type; /* Consumable Resource Type: * * CR_SOCKET, CR_CORE, CR_MEMORY, * * CR_DEFAULT, etc. */