From 1b4ac02a4b14e457dc84dfd4d06c43a1b6318e18 Mon Sep 17 00:00:00 2001 From: Morris Jette <jette@schedmd.com> Date: Fri, 17 Jan 2014 09:25:07 -0800 Subject: [PATCH] Pass slurmd version string to slurmctld This augments an earlier patch that picked up the protocol version number, which lets us distinguish between 2.6.0 and 2.6.4 instead of just being able to tell it is version 2.6.x. --- slurm/slurm.h.in | 4 +- src/api/front_end_info.c | 2 +- src/api/node_info.c | 2 +- src/common/node_conf.c | 1 + src/common/node_conf.h | 1 + src/common/slurm_protocol_defs.c | 3 + src/common/slurm_protocol_defs.h | 1 + src/common/slurm_protocol_pack.c | 103 +++++++++++++++++++++++++++++-- src/common/slurm_protocol_util.c | 12 ---- src/common/slurm_protocol_util.h | 3 - src/slurmctld/front_end.c | 6 +- src/slurmctld/node_mgr.c | 10 ++- src/slurmctld/slurmctld.h | 1 + src/slurmd/slurmd/slurmd.c | 3 + 14 files changed, 127 insertions(+), 25 deletions(-) diff --git a/slurm/slurm.h.in b/slurm/slurm.h.in index 4e5581edd60..dacb41f74b7 100644 --- a/slurm/slurm.h.in +++ b/slurm/slurm.h.in @@ -1591,7 +1591,6 @@ typedef struct node_info { char *node_hostname; /* node's hostname (optional) */ uint16_t node_state; /* see enum node_states */ char *os; /* operating system currently running */ - uint16_t protocol_version; /* Slurm version number */ uint32_t real_memory; /* configured MB of real memory on the node */ char *reason; /* reason for node being DOWN or DRAINING */ time_t reason_time; /* Time stamp when reason was set, ignore if @@ -1607,6 +1606,7 @@ typedef struct node_info { * use * slurm_get_select_nodeinfo() * to access contents */ + char *version; /* Slurm version number */ } node_info_t; typedef struct node_info_msg { @@ -1630,7 +1630,6 @@ typedef struct front_end_info { char *deny_users; /* denied user string */ char *name; /* node name */ uint16_t node_state; /* see enum node_states */ - uint16_t protocol_version; /* Slurm version number */ char *reason; /* reason for node being DOWN or * DRAINING */ time_t reason_time; /* Time stamp when reason was set, @@ -1638,6 +1637,7 @@ typedef struct front_end_info { uint32_t reason_uid; /* User that set the reason, * ignore if no reason is set. */ time_t slurmd_start_time; /* Time of slurmd startup */ + char *version; /* Slurm version number */ } front_end_info_t; typedef struct front_end_info_msg { diff --git a/src/api/front_end_info.c b/src/api/front_end_info.c index 9e32f4e3e87..06462c1ad84 100644 --- a/src/api/front_end_info.c +++ b/src/api/front_end_info.c @@ -138,7 +138,7 @@ slurm_sprint_front_end_table (front_end_info_t * front_end_ptr, node_state_string(my_state), drain_str); xstrcat(out, tmp_line); snprintf(tmp_line, sizeof(tmp_line), "Version=%s ", - protocol_to_version(front_end_ptr->protocol_version)); + front_end_ptr->version); xstrcat(out, tmp_line); if (front_end_ptr->reason_time) { char *user_name = uid_to_string(front_end_ptr->reason_uid); diff --git a/src/api/node_info.c b/src/api/node_info.c index 897baa7df63..a618a65a8fd 100644 --- a/src/api/node_info.c +++ b/src/api/node_info.c @@ -242,7 +242,7 @@ slurm_sprint_node_table (node_info_t * node_ptr, snprintf(tmp_line, sizeof(tmp_line), "NodeAddr=%s NodeHostName=%s Version=%s", node_ptr->node_addr, node_ptr->node_hostname, - protocol_to_version(node_ptr->protocol_version)); + node_ptr->version); xstrcat(out, tmp_line); if (one_liner) xstrcat(out, " "); diff --git a/src/common/node_conf.c b/src/common/node_conf.c index 20d1683a666..84d022824f7 100644 --- a/src/common/node_conf.c +++ b/src/common/node_conf.c @@ -999,6 +999,7 @@ extern void purge_node_rec (struct node_record *node_ptr) xfree(node_ptr->os); xfree(node_ptr->part_pptr); xfree(node_ptr->reason); + xfree(node_ptr->version); acct_gather_energy_destroy(node_ptr->energy); ext_sensors_destroy(node_ptr->ext_sensors); select_g_select_nodeinfo_free(node_ptr->select_nodeinfo); diff --git a/src/common/node_conf.h b/src/common/node_conf.h index 9768ca47a1c..1f0d2ca2887 100644 --- a/src/common/node_conf.h +++ b/src/common/node_conf.h @@ -159,6 +159,7 @@ struct node_record { * to access contents */ uint32_t cpu_load; /* CPU load * 100 */ uint16_t protocol_version; /* Slurm version number */ + char *version; /* Slurm version */ }; extern struct node_record *node_record_table_ptr; /* ptr to node records */ extern int node_record_count; /* count in node_record_table_ptr */ diff --git a/src/common/slurm_protocol_defs.c b/src/common/slurm_protocol_defs.c index d96fef5579a..7cc01ec3400 100644 --- a/src/common/slurm_protocol_defs.c +++ b/src/common/slurm_protocol_defs.c @@ -585,6 +585,7 @@ extern void slurm_free_node_registration_status_msg( xfree(msg->step_id); if (msg->switch_nodeinfo) switch_g_free_node_info(&msg->switch_nodeinfo); + xfree(msg->version); xfree(msg); } } @@ -2252,6 +2253,7 @@ extern void slurm_free_front_end_info_members(front_end_info_t * front_end) xfree(front_end->deny_users); xfree(front_end->name); xfree(front_end->reason); + xfree(front_end->version); } } @@ -2294,6 +2296,7 @@ extern void slurm_free_node_info_members(node_info_t * node) xfree(node->reason); select_g_select_nodeinfo_free(node->select_nodeinfo); node->select_nodeinfo = NULL; + xfree(node->version); } } diff --git a/src/common/slurm_protocol_defs.h b/src/common/slurm_protocol_defs.h index df579e00df0..41786be79b0 100644 --- a/src/common/slurm_protocol_defs.h +++ b/src/common/slurm_protocol_defs.h @@ -1034,6 +1034,7 @@ typedef struct slurm_node_registration_status_msg { time_t timestamp; uint32_t tmp_disk; uint32_t up_time; /* seconds since reboot */ + char *version; } slurm_node_registration_status_msg_t; diff --git a/src/common/slurm_protocol_pack.c b/src/common/slurm_protocol_pack.c index 8385d208ac0..4c7d1f22265 100644 --- a/src/common/slurm_protocol_pack.c +++ b/src/common/slurm_protocol_pack.c @@ -2567,7 +2567,45 @@ _pack_node_registration_status_msg(slurm_node_registration_status_msg_t * uint32_t gres_info_size = 0; xassert(msg != NULL); - if (protocol_version >= SLURM_2_5_PROTOCOL_VERSION) { + if (protocol_version >= SLURM_14_03_PROTOCOL_VERSION) { + pack_time(msg->timestamp, buffer); + pack_time(msg->slurmd_start_time, buffer); + pack32(msg->status, buffer); + packstr(msg->node_name, buffer); + packstr(msg->arch, buffer); + packstr(msg->os, buffer); + pack16(msg->cpus, buffer); + pack16(msg->boards, buffer); + pack16(msg->sockets, buffer); + pack16(msg->cores, buffer); + pack16(msg->threads, buffer); + pack32(msg->real_memory, buffer); + pack32(msg->tmp_disk, buffer); + pack32(msg->up_time, buffer); + pack32(msg->hash_val, buffer); + pack32(msg->cpu_load, buffer); + + pack32(msg->job_count, buffer); + for (i = 0; i < msg->job_count; i++) { + pack32(msg->job_id[i], buffer); + } + for (i = 0; i < msg->job_count; i++) { + pack32(msg->step_id[i], buffer); + } + pack16(msg->startup, buffer); + if (msg->startup) + switch_g_pack_node_info(msg->switch_nodeinfo, buffer, + protocol_version); + if (msg->gres_info) + gres_info_size = get_buf_offset(msg->gres_info); + pack32(gres_info_size, buffer); + if (gres_info_size) { + packmem(get_buf_data(msg->gres_info), gres_info_size, + buffer); + } + acct_gather_energy_pack(msg->energy, buffer, protocol_version); + packstr(msg->version, buffer); + } else if (protocol_version >= SLURM_2_5_PROTOCOL_VERSION) { pack_time(msg->timestamp, buffer); pack_time(msg->slurmd_start_time, buffer); pack32(msg->status, buffer); @@ -2625,7 +2663,64 @@ _unpack_node_registration_status_msg(slurm_node_registration_status_msg_t node_reg_ptr = xmalloc(sizeof(slurm_node_registration_status_msg_t)); *msg = node_reg_ptr; - if (protocol_version >= SLURM_2_5_PROTOCOL_VERSION) { + if (protocol_version >= SLURM_14_03_PROTOCOL_VERSION) { + /* unpack timestamp of snapshot */ + safe_unpack_time(&node_reg_ptr->timestamp, buffer); + safe_unpack_time(&node_reg_ptr->slurmd_start_time, buffer); + /* load the data values */ + safe_unpack32(&node_reg_ptr->status, buffer); + safe_unpackstr_xmalloc(&node_reg_ptr->node_name, + &uint32_tmp, buffer); + safe_unpackstr_xmalloc(&node_reg_ptr->arch, + &uint32_tmp, buffer); + safe_unpackstr_xmalloc(&node_reg_ptr->os, &uint32_tmp, buffer); + safe_unpack16(&node_reg_ptr->cpus, buffer); + safe_unpack16(&node_reg_ptr->boards, buffer); + safe_unpack16(&node_reg_ptr->sockets, buffer); + safe_unpack16(&node_reg_ptr->cores, buffer); + safe_unpack16(&node_reg_ptr->threads, buffer); + safe_unpack32(&node_reg_ptr->real_memory, buffer); + safe_unpack32(&node_reg_ptr->tmp_disk, buffer); + safe_unpack32(&node_reg_ptr->up_time, buffer); + safe_unpack32(&node_reg_ptr->hash_val, buffer); + safe_unpack32(&node_reg_ptr->cpu_load, buffer); + + safe_unpack32(&node_reg_ptr->job_count, buffer); + node_reg_ptr->job_id = + xmalloc(sizeof(uint32_t) * node_reg_ptr->job_count); + for (i = 0; i < node_reg_ptr->job_count; i++) { + safe_unpack32(&node_reg_ptr->job_id[i], buffer); + } + node_reg_ptr->step_id = + xmalloc(sizeof(uint32_t) * node_reg_ptr->job_count); + for (i = 0; i < node_reg_ptr->job_count; i++) { + safe_unpack32(&node_reg_ptr->step_id[i], buffer); + } + + safe_unpack16(&node_reg_ptr->startup, buffer); + if (node_reg_ptr->startup + && (switch_g_alloc_node_info( + &node_reg_ptr->switch_nodeinfo) + || switch_g_unpack_node_info( + node_reg_ptr->switch_nodeinfo, buffer, + protocol_version))) + goto unpack_error; + + safe_unpack32(&gres_info_size, buffer); + if (gres_info_size) { + safe_unpackmem_xmalloc(&gres_info, &uint32_tmp, buffer); + if (gres_info_size != uint32_tmp) + goto unpack_error; + node_reg_ptr->gres_info = create_buf(gres_info, + gres_info_size); + } + if (acct_gather_energy_unpack(&node_reg_ptr->energy, buffer, + protocol_version) + != SLURM_SUCCESS) + goto unpack_error; + safe_unpackstr_xmalloc(&node_reg_ptr->version, + &uint32_tmp, buffer); + } else if (protocol_version >= SLURM_2_5_PROTOCOL_VERSION) { /* unpack timestamp of snapshot */ safe_unpack_time(&node_reg_ptr->timestamp, buffer); safe_unpack_time(&node_reg_ptr->slurmd_start_time, buffer); @@ -3006,7 +3101,7 @@ _unpack_node_info_members(node_info_t * node, Buf buffer, buffer); safe_unpackstr_xmalloc(&node->node_addr, &uint32_tmp, buffer); safe_unpack16(&node->node_state, buffer); - safe_unpack16(&node->protocol_version, buffer); + safe_unpackstr_xmalloc(&node->version, &uint32_tmp, buffer); safe_unpack16(&node->cpus, buffer); safe_unpack16(&node->boards, buffer); @@ -9325,7 +9420,7 @@ _unpack_front_end_info_members(front_end_info_t *front_end, Buf buffer, buffer); safe_unpackstr_xmalloc(&front_end->name, &uint32_tmp, buffer); safe_unpack16(&front_end->node_state, buffer); - safe_unpack16(&front_end->protocol_version, buffer); + safe_unpackstr_xmalloc(&front_end->version, &uint32_tmp, buffer); safe_unpackstr_xmalloc(&front_end->reason, &uint32_tmp, buffer); safe_unpack_time(&front_end->reason_time, buffer); diff --git a/src/common/slurm_protocol_util.c b/src/common/slurm_protocol_util.c index bd51c711071..62e2d687fb7 100644 --- a/src/common/slurm_protocol_util.c +++ b/src/common/slurm_protocol_util.c @@ -191,15 +191,3 @@ void slurm_print_launch_task_msg(launch_tasks_request_msg_t *msg, char *name) msg->global_task_ids[node_id][i]); } } - -/* Given a Slurm protocol version, return the version of Slurm that uses it */ -extern char *protocol_to_version(uint16_t protocol_version) -{ - if (protocol_version == SLURM_14_03_PROTOCOL_VERSION) - return "14.03"; - if (protocol_version == SLURM_2_6_PROTOCOL_VERSION) - return "2.6"; - if (protocol_version == SLURM_2_5_PROTOCOL_VERSION) - return "2.5"; - return "N/A"; -} diff --git a/src/common/slurm_protocol_util.h b/src/common/slurm_protocol_util.h index a22c2c91d3b..98965c72dc7 100644 --- a/src/common/slurm_protocol_util.h +++ b/src/common/slurm_protocol_util.h @@ -90,7 +90,4 @@ update_header(header_t * header, uint32_t msg_length); extern void slurm_print_launch_task_msg(launch_tasks_request_msg_t * msg, char *name); -/* Given a Slurm protocol version, return the version of Slurm that uses it */ -extern char *protocol_to_version(uint16_t protocol_version); - #endif /* !_SLURM_PROTOCOL_UTIL_H */ diff --git a/src/slurmctld/front_end.c b/src/slurmctld/front_end.c index 580e031bb65..815ef94efe5 100644 --- a/src/slurmctld/front_end.c +++ b/src/slurmctld/front_end.c @@ -137,7 +137,7 @@ static void _pack_front_end(struct front_end_record *dump_front_end_ptr, packstr(dump_front_end_ptr->deny_users, buffer); packstr(dump_front_end_ptr->name, buffer); pack16(dump_front_end_ptr->node_state, buffer); - pack16(dump_front_end_ptr->protocol_version, buffer); + packstr(dump_front_end_ptr->version, buffer); packstr(dump_front_end_ptr->reason, buffer); pack_time(dump_front_end_ptr->reason_time, buffer); @@ -421,13 +421,17 @@ extern void purge_front_end_state(void) for (i = 0, front_end_ptr = front_end_nodes; i < front_end_node_cnt; i++, front_end_ptr++) { xassert(front_end_ptr->magic == FRONT_END_MAGIC); + xfree(front_end_ptr->allow_gids); xfree(front_end_ptr->allow_groups); + xfree(front_end_ptr->allow_uids); xfree(front_end_ptr->allow_users); xfree(front_end_ptr->comm_name); + xfree(front_end_ptr->deny_gids); xfree(front_end_ptr->deny_groups); xfree(front_end_ptr->deny_users); xfree(front_end_ptr->name); xfree(front_end_ptr->reason); + xfree(front_end_ptr->version); } xfree(front_end_nodes); front_end_node_cnt = 0; diff --git a/src/slurmctld/node_mgr.c b/src/slurmctld/node_mgr.c index 288b2f74275..c26f8aa6b3a 100644 --- a/src/slurmctld/node_mgr.c +++ b/src/slurmctld/node_mgr.c @@ -800,7 +800,7 @@ static void _pack_node (struct node_record *dump_node_ptr, Buf buffer, packstr (dump_node_ptr->node_hostname, buffer); packstr (dump_node_ptr->comm_name, buffer); pack16 (dump_node_ptr->node_state, buffer); - pack16 (dump_node_ptr->protocol_version, buffer); + packstr (dump_node_ptr->version, buffer); /* On a bluegene system always use the regular node * infomation not what is in the config_ptr. */ @@ -1822,6 +1822,10 @@ extern int validate_node_specs(slurm_node_registration_status_msg_t *reg_msg, error_code = SLURM_SUCCESS; node_ptr->protocol_version = protocol_version; + xfree(node_ptr->version); + node_ptr->version = reg_msg->version; + reg_msg->version = NULL; + if (cr_flag == NO_VAL) { cr_flag = 0; /* call is no-op for select/linear and bluegene */ if (select_g_get_info_from_plugin(SELECT_CR_PLUGIN, @@ -2198,6 +2202,10 @@ extern int validate_nodes_via_front_end( return ESLURM_INVALID_NODE_NAME; front_end_ptr->protocol_version = protocol_version; + xfree(front_end_ptr->version); + front_end_ptr->version = reg_msg->version; + reg_msg->version = NULL; + if (reg_msg->status == ESLURMD_PROLOG_FAILED) { error("Prolog failed on node %s", reg_msg->node_name); /* Do NOT set the node DOWN here. Unlike non-front-end systems, diff --git a/src/slurmctld/slurmctld.h b/src/slurmctld/slurmctld.h index 781cdd0fee4..1711864c47b 100644 --- a/src/slurmctld/slurmctld.h +++ b/src/slurmctld/slurmctld.h @@ -284,6 +284,7 @@ typedef struct front_end_record { uint32_t reason_uid; /* User that set the reason, ignore if * no reason is set. */ time_t slurmd_start_time; /* Time of slurmd startup */ + char *version; /* Slurm version */ } front_end_record_t; extern front_end_record_t *front_end_nodes; diff --git a/src/slurmd/slurmd/slurmd.c b/src/slurmd/slurmd/slurmd.c index c406b066748..1b4f221d78f 100644 --- a/src/slurmd/slurmd/slurmd.c +++ b/src/slurmd/slurmd/slurmd.c @@ -613,6 +613,9 @@ _fill_registration_msg(slurm_node_registration_status_msg_t *msg) Buf gres_info; msg->node_name = xstrdup (conf->node_name); + msg->version = xstrdup (PACKAGE_VERSION); + + msg->cpus = conf->cpus; msg->boards = conf->boards; msg->sockets = conf->sockets; -- GitLab