diff --git a/slurm/slurm.h.in b/slurm/slurm.h.in index 4e5581edd60299b1df2767d4d52f98b7bbc84436..dacb41f74b7ce7e6e81cd71818950b229c99bcf0 100644 --- a/slurm/slurm.h.in +++ b/slurm/slurm.h.in @@ -1591,7 +1591,6 @@ typedef struct node_info { char *node_hostname; /* node's hostname (optional) */ uint16_t node_state; /* see enum node_states */ char *os; /* operating system currently running */ - uint16_t protocol_version; /* Slurm version number */ uint32_t real_memory; /* configured MB of real memory on the node */ char *reason; /* reason for node being DOWN or DRAINING */ time_t reason_time; /* Time stamp when reason was set, ignore if @@ -1607,6 +1606,7 @@ typedef struct node_info { * use * slurm_get_select_nodeinfo() * to access contents */ + char *version; /* Slurm version number */ } node_info_t; typedef struct node_info_msg { @@ -1630,7 +1630,6 @@ typedef struct front_end_info { char *deny_users; /* denied user string */ char *name; /* node name */ uint16_t node_state; /* see enum node_states */ - uint16_t protocol_version; /* Slurm version number */ char *reason; /* reason for node being DOWN or * DRAINING */ time_t reason_time; /* Time stamp when reason was set, @@ -1638,6 +1637,7 @@ typedef struct front_end_info { uint32_t reason_uid; /* User that set the reason, * ignore if no reason is set. */ time_t slurmd_start_time; /* Time of slurmd startup */ + char *version; /* Slurm version number */ } front_end_info_t; typedef struct front_end_info_msg { diff --git a/src/api/front_end_info.c b/src/api/front_end_info.c index 9e32f4e3e87bddb045bb887caebfb1772673e1f0..06462c1ad84b88f96506b3b2a1d95b85c7af296c 100644 --- a/src/api/front_end_info.c +++ b/src/api/front_end_info.c @@ -138,7 +138,7 @@ slurm_sprint_front_end_table (front_end_info_t * front_end_ptr, node_state_string(my_state), drain_str); xstrcat(out, tmp_line); snprintf(tmp_line, sizeof(tmp_line), "Version=%s ", - protocol_to_version(front_end_ptr->protocol_version)); + front_end_ptr->version); xstrcat(out, tmp_line); if (front_end_ptr->reason_time) { char *user_name = uid_to_string(front_end_ptr->reason_uid); diff --git a/src/api/node_info.c b/src/api/node_info.c index 897baa7df63d9b461dd25c94629d24811e729466..a618a65a8fdeff6ee4857b142178f5c1170b8baa 100644 --- a/src/api/node_info.c +++ b/src/api/node_info.c @@ -242,7 +242,7 @@ slurm_sprint_node_table (node_info_t * node_ptr, snprintf(tmp_line, sizeof(tmp_line), "NodeAddr=%s NodeHostName=%s Version=%s", node_ptr->node_addr, node_ptr->node_hostname, - protocol_to_version(node_ptr->protocol_version)); + node_ptr->version); xstrcat(out, tmp_line); if (one_liner) xstrcat(out, " "); diff --git a/src/common/node_conf.c b/src/common/node_conf.c index 20d1683a666c5e0028c435a89861c5e77447c7a9..84d022824f7ad90e5b76b6c18e1b5c674285e815 100644 --- a/src/common/node_conf.c +++ b/src/common/node_conf.c @@ -999,6 +999,7 @@ extern void purge_node_rec (struct node_record *node_ptr) xfree(node_ptr->os); xfree(node_ptr->part_pptr); xfree(node_ptr->reason); + xfree(node_ptr->version); acct_gather_energy_destroy(node_ptr->energy); ext_sensors_destroy(node_ptr->ext_sensors); select_g_select_nodeinfo_free(node_ptr->select_nodeinfo); diff --git a/src/common/node_conf.h b/src/common/node_conf.h index 9768ca47a1c379df2f55a467b2d52a47e499a540..1f0d2ca2887cf5cb9cf8bdc191186186ab27a79a 100644 --- a/src/common/node_conf.h +++ b/src/common/node_conf.h @@ -159,6 +159,7 @@ struct node_record { * to access contents */ uint32_t cpu_load; /* CPU load * 100 */ uint16_t protocol_version; /* Slurm version number */ + char *version; /* Slurm version */ }; extern struct node_record *node_record_table_ptr; /* ptr to node records */ extern int node_record_count; /* count in node_record_table_ptr */ diff --git a/src/common/slurm_protocol_defs.c b/src/common/slurm_protocol_defs.c index d96fef5579a3e6bb5631a666dac335f0393b3c74..7cc01ec340026492d090aa5349745a61859a3ecb 100644 --- a/src/common/slurm_protocol_defs.c +++ b/src/common/slurm_protocol_defs.c @@ -585,6 +585,7 @@ extern void slurm_free_node_registration_status_msg( xfree(msg->step_id); if (msg->switch_nodeinfo) switch_g_free_node_info(&msg->switch_nodeinfo); + xfree(msg->version); xfree(msg); } } @@ -2252,6 +2253,7 @@ extern void slurm_free_front_end_info_members(front_end_info_t * front_end) xfree(front_end->deny_users); xfree(front_end->name); xfree(front_end->reason); + xfree(front_end->version); } } @@ -2294,6 +2296,7 @@ extern void slurm_free_node_info_members(node_info_t * node) xfree(node->reason); select_g_select_nodeinfo_free(node->select_nodeinfo); node->select_nodeinfo = NULL; + xfree(node->version); } } diff --git a/src/common/slurm_protocol_defs.h b/src/common/slurm_protocol_defs.h index df579e00df0b67f657796ef2aafc7f63165adb8d..41786be79b051ddef0843d79c19d82e7a37ffcb5 100644 --- a/src/common/slurm_protocol_defs.h +++ b/src/common/slurm_protocol_defs.h @@ -1034,6 +1034,7 @@ typedef struct slurm_node_registration_status_msg { time_t timestamp; uint32_t tmp_disk; uint32_t up_time; /* seconds since reboot */ + char *version; } slurm_node_registration_status_msg_t; diff --git a/src/common/slurm_protocol_pack.c b/src/common/slurm_protocol_pack.c index 8385d208ac003f5d335cb903873a8ea656007bf7..4c7d1f222650d26291bc598dba7adc615edb99ec 100644 --- a/src/common/slurm_protocol_pack.c +++ b/src/common/slurm_protocol_pack.c @@ -2567,7 +2567,45 @@ _pack_node_registration_status_msg(slurm_node_registration_status_msg_t * uint32_t gres_info_size = 0; xassert(msg != NULL); - if (protocol_version >= SLURM_2_5_PROTOCOL_VERSION) { + if (protocol_version >= SLURM_14_03_PROTOCOL_VERSION) { + pack_time(msg->timestamp, buffer); + pack_time(msg->slurmd_start_time, buffer); + pack32(msg->status, buffer); + packstr(msg->node_name, buffer); + packstr(msg->arch, buffer); + packstr(msg->os, buffer); + pack16(msg->cpus, buffer); + pack16(msg->boards, buffer); + pack16(msg->sockets, buffer); + pack16(msg->cores, buffer); + pack16(msg->threads, buffer); + pack32(msg->real_memory, buffer); + pack32(msg->tmp_disk, buffer); + pack32(msg->up_time, buffer); + pack32(msg->hash_val, buffer); + pack32(msg->cpu_load, buffer); + + pack32(msg->job_count, buffer); + for (i = 0; i < msg->job_count; i++) { + pack32(msg->job_id[i], buffer); + } + for (i = 0; i < msg->job_count; i++) { + pack32(msg->step_id[i], buffer); + } + pack16(msg->startup, buffer); + if (msg->startup) + switch_g_pack_node_info(msg->switch_nodeinfo, buffer, + protocol_version); + if (msg->gres_info) + gres_info_size = get_buf_offset(msg->gres_info); + pack32(gres_info_size, buffer); + if (gres_info_size) { + packmem(get_buf_data(msg->gres_info), gres_info_size, + buffer); + } + acct_gather_energy_pack(msg->energy, buffer, protocol_version); + packstr(msg->version, buffer); + } else if (protocol_version >= SLURM_2_5_PROTOCOL_VERSION) { pack_time(msg->timestamp, buffer); pack_time(msg->slurmd_start_time, buffer); pack32(msg->status, buffer); @@ -2625,7 +2663,64 @@ _unpack_node_registration_status_msg(slurm_node_registration_status_msg_t node_reg_ptr = xmalloc(sizeof(slurm_node_registration_status_msg_t)); *msg = node_reg_ptr; - if (protocol_version >= SLURM_2_5_PROTOCOL_VERSION) { + if (protocol_version >= SLURM_14_03_PROTOCOL_VERSION) { + /* unpack timestamp of snapshot */ + safe_unpack_time(&node_reg_ptr->timestamp, buffer); + safe_unpack_time(&node_reg_ptr->slurmd_start_time, buffer); + /* load the data values */ + safe_unpack32(&node_reg_ptr->status, buffer); + safe_unpackstr_xmalloc(&node_reg_ptr->node_name, + &uint32_tmp, buffer); + safe_unpackstr_xmalloc(&node_reg_ptr->arch, + &uint32_tmp, buffer); + safe_unpackstr_xmalloc(&node_reg_ptr->os, &uint32_tmp, buffer); + safe_unpack16(&node_reg_ptr->cpus, buffer); + safe_unpack16(&node_reg_ptr->boards, buffer); + safe_unpack16(&node_reg_ptr->sockets, buffer); + safe_unpack16(&node_reg_ptr->cores, buffer); + safe_unpack16(&node_reg_ptr->threads, buffer); + safe_unpack32(&node_reg_ptr->real_memory, buffer); + safe_unpack32(&node_reg_ptr->tmp_disk, buffer); + safe_unpack32(&node_reg_ptr->up_time, buffer); + safe_unpack32(&node_reg_ptr->hash_val, buffer); + safe_unpack32(&node_reg_ptr->cpu_load, buffer); + + safe_unpack32(&node_reg_ptr->job_count, buffer); + node_reg_ptr->job_id = + xmalloc(sizeof(uint32_t) * node_reg_ptr->job_count); + for (i = 0; i < node_reg_ptr->job_count; i++) { + safe_unpack32(&node_reg_ptr->job_id[i], buffer); + } + node_reg_ptr->step_id = + xmalloc(sizeof(uint32_t) * node_reg_ptr->job_count); + for (i = 0; i < node_reg_ptr->job_count; i++) { + safe_unpack32(&node_reg_ptr->step_id[i], buffer); + } + + safe_unpack16(&node_reg_ptr->startup, buffer); + if (node_reg_ptr->startup + && (switch_g_alloc_node_info( + &node_reg_ptr->switch_nodeinfo) + || switch_g_unpack_node_info( + node_reg_ptr->switch_nodeinfo, buffer, + protocol_version))) + goto unpack_error; + + safe_unpack32(&gres_info_size, buffer); + if (gres_info_size) { + safe_unpackmem_xmalloc(&gres_info, &uint32_tmp, buffer); + if (gres_info_size != uint32_tmp) + goto unpack_error; + node_reg_ptr->gres_info = create_buf(gres_info, + gres_info_size); + } + if (acct_gather_energy_unpack(&node_reg_ptr->energy, buffer, + protocol_version) + != SLURM_SUCCESS) + goto unpack_error; + safe_unpackstr_xmalloc(&node_reg_ptr->version, + &uint32_tmp, buffer); + } else if (protocol_version >= SLURM_2_5_PROTOCOL_VERSION) { /* unpack timestamp of snapshot */ safe_unpack_time(&node_reg_ptr->timestamp, buffer); safe_unpack_time(&node_reg_ptr->slurmd_start_time, buffer); @@ -3006,7 +3101,7 @@ _unpack_node_info_members(node_info_t * node, Buf buffer, buffer); safe_unpackstr_xmalloc(&node->node_addr, &uint32_tmp, buffer); safe_unpack16(&node->node_state, buffer); - safe_unpack16(&node->protocol_version, buffer); + safe_unpackstr_xmalloc(&node->version, &uint32_tmp, buffer); safe_unpack16(&node->cpus, buffer); safe_unpack16(&node->boards, buffer); @@ -9325,7 +9420,7 @@ _unpack_front_end_info_members(front_end_info_t *front_end, Buf buffer, buffer); safe_unpackstr_xmalloc(&front_end->name, &uint32_tmp, buffer); safe_unpack16(&front_end->node_state, buffer); - safe_unpack16(&front_end->protocol_version, buffer); + safe_unpackstr_xmalloc(&front_end->version, &uint32_tmp, buffer); safe_unpackstr_xmalloc(&front_end->reason, &uint32_tmp, buffer); safe_unpack_time(&front_end->reason_time, buffer); diff --git a/src/common/slurm_protocol_util.c b/src/common/slurm_protocol_util.c index bd51c711071ae2c63c09030766fab9a1a56135f5..62e2d687fb75ea3222dac3ef548ef7cb620fd6b4 100644 --- a/src/common/slurm_protocol_util.c +++ b/src/common/slurm_protocol_util.c @@ -191,15 +191,3 @@ void slurm_print_launch_task_msg(launch_tasks_request_msg_t *msg, char *name) msg->global_task_ids[node_id][i]); } } - -/* Given a Slurm protocol version, return the version of Slurm that uses it */ -extern char *protocol_to_version(uint16_t protocol_version) -{ - if (protocol_version == SLURM_14_03_PROTOCOL_VERSION) - return "14.03"; - if (protocol_version == SLURM_2_6_PROTOCOL_VERSION) - return "2.6"; - if (protocol_version == SLURM_2_5_PROTOCOL_VERSION) - return "2.5"; - return "N/A"; -} diff --git a/src/common/slurm_protocol_util.h b/src/common/slurm_protocol_util.h index a22c2c91d3b6dc1686cbefb4a4c0905348eb7d3d..98965c72dc73bd6b97201487dee861b54d79794c 100644 --- a/src/common/slurm_protocol_util.h +++ b/src/common/slurm_protocol_util.h @@ -90,7 +90,4 @@ update_header(header_t * header, uint32_t msg_length); extern void slurm_print_launch_task_msg(launch_tasks_request_msg_t * msg, char *name); -/* Given a Slurm protocol version, return the version of Slurm that uses it */ -extern char *protocol_to_version(uint16_t protocol_version); - #endif /* !_SLURM_PROTOCOL_UTIL_H */ diff --git a/src/slurmctld/front_end.c b/src/slurmctld/front_end.c index 580e031bb6541895d9e179cd6f1f485a33006ce1..815ef94efe5c8170e7ec8f41b0b7fbb3e9ee76dd 100644 --- a/src/slurmctld/front_end.c +++ b/src/slurmctld/front_end.c @@ -137,7 +137,7 @@ static void _pack_front_end(struct front_end_record *dump_front_end_ptr, packstr(dump_front_end_ptr->deny_users, buffer); packstr(dump_front_end_ptr->name, buffer); pack16(dump_front_end_ptr->node_state, buffer); - pack16(dump_front_end_ptr->protocol_version, buffer); + packstr(dump_front_end_ptr->version, buffer); packstr(dump_front_end_ptr->reason, buffer); pack_time(dump_front_end_ptr->reason_time, buffer); @@ -421,13 +421,17 @@ extern void purge_front_end_state(void) for (i = 0, front_end_ptr = front_end_nodes; i < front_end_node_cnt; i++, front_end_ptr++) { xassert(front_end_ptr->magic == FRONT_END_MAGIC); + xfree(front_end_ptr->allow_gids); xfree(front_end_ptr->allow_groups); + xfree(front_end_ptr->allow_uids); xfree(front_end_ptr->allow_users); xfree(front_end_ptr->comm_name); + xfree(front_end_ptr->deny_gids); xfree(front_end_ptr->deny_groups); xfree(front_end_ptr->deny_users); xfree(front_end_ptr->name); xfree(front_end_ptr->reason); + xfree(front_end_ptr->version); } xfree(front_end_nodes); front_end_node_cnt = 0; diff --git a/src/slurmctld/node_mgr.c b/src/slurmctld/node_mgr.c index 288b2f74275c27662716b2b77c987ff972eb542d..c26f8aa6b3a4cafeda4e6baea3e7cc440a2a65aa 100644 --- a/src/slurmctld/node_mgr.c +++ b/src/slurmctld/node_mgr.c @@ -800,7 +800,7 @@ static void _pack_node (struct node_record *dump_node_ptr, Buf buffer, packstr (dump_node_ptr->node_hostname, buffer); packstr (dump_node_ptr->comm_name, buffer); pack16 (dump_node_ptr->node_state, buffer); - pack16 (dump_node_ptr->protocol_version, buffer); + packstr (dump_node_ptr->version, buffer); /* On a bluegene system always use the regular node * infomation not what is in the config_ptr. */ @@ -1822,6 +1822,10 @@ extern int validate_node_specs(slurm_node_registration_status_msg_t *reg_msg, error_code = SLURM_SUCCESS; node_ptr->protocol_version = protocol_version; + xfree(node_ptr->version); + node_ptr->version = reg_msg->version; + reg_msg->version = NULL; + if (cr_flag == NO_VAL) { cr_flag = 0; /* call is no-op for select/linear and bluegene */ if (select_g_get_info_from_plugin(SELECT_CR_PLUGIN, @@ -2198,6 +2202,10 @@ extern int validate_nodes_via_front_end( return ESLURM_INVALID_NODE_NAME; front_end_ptr->protocol_version = protocol_version; + xfree(front_end_ptr->version); + front_end_ptr->version = reg_msg->version; + reg_msg->version = NULL; + if (reg_msg->status == ESLURMD_PROLOG_FAILED) { error("Prolog failed on node %s", reg_msg->node_name); /* Do NOT set the node DOWN here. Unlike non-front-end systems, diff --git a/src/slurmctld/slurmctld.h b/src/slurmctld/slurmctld.h index 781cdd0fee4a3393b5fd9a194d04964a6ea63f67..1711864c47bb8c6f0c1de1c4a031fe51ba065be7 100644 --- a/src/slurmctld/slurmctld.h +++ b/src/slurmctld/slurmctld.h @@ -284,6 +284,7 @@ typedef struct front_end_record { uint32_t reason_uid; /* User that set the reason, ignore if * no reason is set. */ time_t slurmd_start_time; /* Time of slurmd startup */ + char *version; /* Slurm version */ } front_end_record_t; extern front_end_record_t *front_end_nodes; diff --git a/src/slurmd/slurmd/slurmd.c b/src/slurmd/slurmd/slurmd.c index c406b0667489c4a9998ff87c2527bb60a27ce822..1b4f221d78f71611efb3911229bf5e5e076e61b1 100644 --- a/src/slurmd/slurmd/slurmd.c +++ b/src/slurmd/slurmd/slurmd.c @@ -613,6 +613,9 @@ _fill_registration_msg(slurm_node_registration_status_msg_t *msg) Buf gres_info; msg->node_name = xstrdup (conf->node_name); + msg->version = xstrdup (PACKAGE_VERSION); + + msg->cpus = conf->cpus; msg->boards = conf->boards; msg->sockets = conf->sockets;