diff --git a/src/api/config_info.c b/src/api/config_info.c index cf1ac553feb0c3af8720272b5b1f4b486d0cfc78..e63c7286a18d6f56fe807e57b4644e9e3773d59f 100644 --- a/src/api/config_info.c +++ b/src/api/config_info.c @@ -43,7 +43,9 @@ void slurm_print_ctl_conf ( FILE* out, slurm_ctl_conf_info_msg_t * slurm_ctl_con make_time_str ((time_t *)&slurm_ctl_conf_ptr->last_update, time_str); fprintf(out, "Configuration updated at %s\n", time_str); + fprintf(out, "BackupAddr = %s\n", slurm_ctl_conf_ptr->backup_addr); fprintf(out, "BackupController = %s\n", slurm_ctl_conf_ptr->backup_controller); + fprintf(out, "ControlAddr = %s\n", slurm_ctl_conf_ptr->control_addr); fprintf(out, "ControlMachine = %s\n", slurm_ctl_conf_ptr->control_machine); fprintf(out, "Epilog = %s\n", slurm_ctl_conf_ptr->epilog); fprintf(out, "FastSchedule = %u\n", slurm_ctl_conf_ptr->fast_schedule); diff --git a/src/common/slurm_protocol_api.c b/src/common/slurm_protocol_api.c index 5b8ac8def1e3d9fb203d0390f8e79f5e50da4641..675fbf58a6880ed8eb1668c9790a9168a905a507 100644 --- a/src/common/slurm_protocol_api.c +++ b/src/common/slurm_protocol_api.c @@ -87,21 +87,21 @@ slurm_protocol_config_t *slurm_get_api_config() int slurm_api_set_default_config() { - if ((slurmctld_conf.control_machine == NULL) || + if ((slurmctld_conf.control_addr == NULL) || (slurmctld_conf.slurmctld_port == 0)) { read_slurm_port_config(); - if ((slurmctld_conf.control_machine == NULL) || + if ((slurmctld_conf.control_addr == NULL) || (slurmctld_conf.slurmctld_port == 0)) fatal ("Unable to establish control machine or port"); } slurm_set_addr(&proto_conf_default.primary_controller, slurmctld_conf.slurmctld_port, - slurmctld_conf.control_machine); - if (slurmctld_conf.backup_controller) { + slurmctld_conf.control_addr); + if (slurmctld_conf.backup_addr) { slurm_set_addr(&proto_conf_default.secondary_controller, slurmctld_conf.slurmctld_port, - slurmctld_conf.backup_controller); + slurmctld_conf.backup_addr); } proto_conf = &proto_conf_default; @@ -120,8 +120,8 @@ int read_slurm_port_config() { FILE *slurm_spec_file; /* pointer to input data file */ char in_line[BUF_SIZE]; /* input line */ - char *control_machine = NULL; - char *backup_controller = NULL; + char *control_addr = NULL, *control_machine = NULL; + char *backup_addr = NULL, *backup_controller = NULL; int error_code, i, j, line_num = 0; int slurmctld_port = 0, slurmd_port = 0; struct servent *servent; @@ -181,8 +181,10 @@ int read_slurm_port_config() /* parse what is left */ /* overall slurm configuration parameters */ error_code = slurm_parser(in_line, - "ControlMachine=", 's', &control_machine, + "BackupAddr=", 's', &backup_addr, "BackupController=", 's', &backup_controller, + "ControlAddr=", 's', &control_addr, + "ControlMachine=", 's', &control_machine, "SlurmctldPort=", 'd',&slurmctld_port, "SlurmdPort=", 'd', &slurmd_port, "END"); @@ -193,10 +195,25 @@ int read_slurm_port_config() if (slurmctld_conf.control_machine == NULL) slurmctld_conf.control_machine = control_machine; + if (slurmctld_conf.control_addr == NULL) { + if (control_addr) + slurmctld_conf.control_addr = control_addr; + else + slurmctld_conf.control_addr = control_machine; + } + if (slurmctld_conf.backup_controller == NULL) slurmctld_conf.backup_controller = backup_controller; + if (slurmctld_conf.backup_addr == NULL) { + if (backup_addr) + slurmctld_conf.backup_addr = backup_addr; + else + slurmctld_conf.backup_addr = backup_controller; + } + if (slurmctld_port) slurmctld_conf.slurmctld_port = slurmctld_port; + if (slurmd_port) slurmctld_conf.slurmd_port = slurmd_port; } diff --git a/src/common/slurm_protocol_defs.c b/src/common/slurm_protocol_defs.c index 662d1d29558360bb9e45c9829c55c11ca15f5b3f..27ad29e3017ba867a74ed69766abbfd55ee2915b 100644 --- a/src/common/slurm_protocol_defs.c +++ b/src/common/slurm_protocol_defs.c @@ -68,8 +68,12 @@ void slurm_free_return_code_msg(return_code_msg_t * msg) void slurm_free_ctl_conf(slurm_ctl_conf_info_msg_t * build_ptr) { if (build_ptr) { + if (build_ptr->backup_addr) + xfree(build_ptr->backup_addr); if (build_ptr->backup_controller) xfree(build_ptr->backup_controller); + if (build_ptr->control_addr) + xfree(build_ptr->control_addr); if (build_ptr->control_machine) xfree(build_ptr->control_machine); if (build_ptr->epilog) diff --git a/src/common/slurm_protocol_defs.h b/src/common/slurm_protocol_defs.h index 0d35ed24cccf9eab9feddd22669e7ec3a8dcda22..9bd788339b2b759797781aa96dc5b81db5f73ee9 100644 --- a/src/common/slurm_protocol_defs.h +++ b/src/common/slurm_protocol_defs.h @@ -343,7 +343,9 @@ typedef struct partition_info { struct slurm_ctl_conf { uint32_t last_update; /* last update time of the build parameters */ + char *backup_addr; /* comm path of slurmctld secondary server */ char *backup_controller; /* name of slurmctld secondary server */ + char *control_addr; /* comm path of slurmctld primary server */ char *control_machine; /* name of slurmctld primary server */ char *epilog; /* pathname of job epilog */ uint32_t first_job_id; /* first slurm generated job_id to assign */ diff --git a/src/common/slurm_protocol_pack.c b/src/common/slurm_protocol_pack.c index 9693b7a27c3a15529f3d17f25a49a0caa10fa2b7..f07b309cf53fbc90b86b913a632791aad38eaa23 100644 --- a/src/common/slurm_protocol_pack.c +++ b/src/common/slurm_protocol_pack.c @@ -1069,7 +1069,9 @@ int unpack_job_info_members ( job_info_t * job , Buf buffer ) void pack_slurm_ctl_conf ( slurm_ctl_conf_info_msg_t * build_ptr, Buf buffer ) { pack_time (build_ptr->last_update, buffer); + packstr (build_ptr->backup_addr, buffer); packstr (build_ptr->backup_controller, buffer); + packstr (build_ptr->control_addr, buffer); packstr (build_ptr->control_machine, buffer); packstr (build_ptr->epilog, buffer); pack16 (build_ptr->fast_schedule, buffer); @@ -1101,7 +1103,9 @@ int unpack_slurm_ctl_conf ( slurm_ctl_conf_info_msg_t **build_buffer_ptr, Buf bu /* load the data values */ /* unpack timestamp of snapshot */ unpack_time (&build_ptr->last_update, buffer); + unpackstr_xmalloc (&build_ptr->backup_addr, &uint16_tmp, buffer); unpackstr_xmalloc (&build_ptr->backup_controller, &uint16_tmp, buffer); + unpackstr_xmalloc (&build_ptr->control_addr, &uint16_tmp, buffer); unpackstr_xmalloc (&build_ptr->control_machine, &uint16_tmp, buffer); unpackstr_xmalloc (&build_ptr->epilog, &uint16_tmp, buffer); unpack16 (&build_ptr->fast_schedule, buffer); diff --git a/src/slurmctld/controller.c b/src/slurmctld/controller.c index 2ebfaa0b35c1d97bb079a718eaf8607613d6aa3c..40a03065a7adb05d57186638de825ef35bd30233 100644 --- a/src/slurmctld/controller.c +++ b/src/slurmctld/controller.c @@ -1810,7 +1810,9 @@ init_ctld_conf ( slurm_ctl_conf_t * conf_ptr ) struct servent *servent; conf_ptr->last_update = time (NULL) ; + conf_ptr->backup_addr = NULL ; conf_ptr->backup_controller = NULL ; + conf_ptr->control_addr = NULL ; conf_ptr->control_machine = NULL ; conf_ptr->epilog = NULL ; conf_ptr->fast_schedule = 1 ; @@ -1848,7 +1850,9 @@ void fill_ctld_conf ( slurm_ctl_conf_t * conf_ptr ) { conf_ptr->last_update = slurmctld_conf.last_update ; + conf_ptr->backup_addr = slurmctld_conf.backup_addr ; conf_ptr->backup_controller = slurmctld_conf.backup_controller ; + conf_ptr->control_addr = slurmctld_conf.control_addr ; conf_ptr->control_machine = slurmctld_conf.control_machine ; conf_ptr->epilog = slurmctld_conf.epilog ; conf_ptr->fast_schedule = slurmctld_conf.fast_schedule ; diff --git a/src/slurmctld/node_mgr.c b/src/slurmctld/node_mgr.c index ba112aec9e0af7df76a06486e9844e983f172b08..8426ea0ac94c5c678feb3fe9217c55a70072386f 100644 --- a/src/slurmctld/node_mgr.c +++ b/src/slurmctld/node_mgr.c @@ -886,7 +886,8 @@ set_slurmd_addr (void) if (strlen (node_record_table_ptr[i].name) == 0) continue; slurm_set_addr (& node_record_table_ptr[i].slurm_addr, - slurmctld_conf.slurmd_port, node_record_table_ptr[i].name); + slurmctld_conf.slurmd_port, + node_record_table_ptr[i].comm_name); } return; diff --git a/src/slurmctld/read_config.c b/src/slurmctld/read_config.c index f8a318fe60d8c917ef9884956062eae9521242f8..397314dd625ef1df4eb88d88be0e662ac18e9973 100644 --- a/src/slurmctld/read_config.c +++ b/src/slurmctld/read_config.c @@ -43,6 +43,7 @@ #include <src/common/list.h> #include <src/common/macros.h> #include <src/common/parse_spec.h> +#include <src/common/xstring.h> #include <src/slurmctld/locks.h> #include <src/slurmctld/slurmctld.h> @@ -248,7 +249,8 @@ parse_config_spec (char *in_line) int fast_schedule = -1, hash_base = -1, heartbeat_interval = -1; int inactive_limit = -1, kill_wait = -1; int ret2service = -1, slurmctld_timeout = -1, slurmd_timeout = -1; - char *backup_controller = NULL, *control_machine = NULL, *epilog = NULL; + char *backup_addr = NULL, *backup_controller = NULL; + char *control_addr = NULL, *control_machine = NULL, *epilog = NULL; char *prioritize = NULL, *prolog = NULL, *state_save_location = NULL, *tmp_fs = NULL; char *slurmctld_port = NULL, *slurmd_port = NULL; char *job_credential_private_key = NULL , *job_credential_public_certificate = NULL; @@ -257,7 +259,9 @@ parse_config_spec (char *in_line) struct stat sbuf; error_code = slurm_parser(in_line, + "BackupAddr=", 's', &backup_addr, "BackupController=", 's', &backup_controller, + "ControlAddr=", 's', &control_addr, "ControlMachine=", 's', &control_machine, "Epilog=", 's', &epilog, "FastSchedule=", 'd', &fast_schedule, @@ -281,12 +285,32 @@ parse_config_spec (char *in_line) if (error_code) return error_code; + if ( backup_addr ) { + if ( slurmctld_conf.backup_addr ) + xfree (slurmctld_conf.backup_addr); + slurmctld_conf.backup_addr = backup_addr; + } else if ( backup_controller ) { + if ( slurmctld_conf.backup_addr ) + xfree (slurmctld_conf.backup_addr); + slurmctld_conf.backup_addr = xstrdup (backup_controller); + } + if ( backup_controller ) { if ( slurmctld_conf.backup_controller ) xfree (slurmctld_conf.backup_controller); slurmctld_conf.backup_controller = backup_controller; } + if ( control_addr ) { + if ( slurmctld_conf.control_addr ) + xfree (slurmctld_conf.control_addr); + slurmctld_conf.control_addr = control_addr; + } else if ( control_machine ) { + if ( slurmctld_conf.control_addr ) + xfree (slurmctld_conf.control_addr); + slurmctld_conf.control_addr = xstrdup (control_machine); + } + if ( control_machine ) { if ( slurmctld_conf.control_machine ) xfree (slurmctld_conf.control_machine); @@ -397,14 +421,15 @@ parse_config_spec (char *in_line) */ static int parse_node_spec (char *in_line) { - char *node_name, *state, *feature, *this_node_name; + char *node_addr, *node_name, *state, *feature; + char *this_node_addr , *this_node_name; int error_code, first, i; int state_val, cpus_val, real_memory_val, tmp_disk_val, weight_val; struct node_record *node_record_point; struct config_record *config_point = NULL; - hostlist_t host_list = NULL; + hostlist_t addr_list = NULL, host_list = NULL; - node_name = state = feature = (char *) NULL; + node_addr = node_name = state = feature = (char *) NULL; cpus_val = real_memory_val = state_val = NO_VAL; tmp_disk_val = weight_val = NO_VAL; if ((error_code = load_string (&node_name, "NodeName=", in_line))) @@ -413,8 +438,9 @@ parse_node_spec (char *in_line) { return 0; /* no node info */ error_code = slurm_parser(in_line, - "Procs=", 'd', &cpus_val, "Feature=", 's', &feature, + "NodeAddr=", 's', &node_addr, + "Procs=", 'd', &cpus_val, "RealMemory=", 'd', &real_memory_val, "State=", 's', &state, "TmpDisk=", 'd', &tmp_disk_val, @@ -442,8 +468,15 @@ parse_node_spec (char *in_line) { } } + if ( node_addr && + ((addr_list = hostlist_create (node_addr)) == NULL)) { + error ("hostlist_create error for %s: %m", node_addr); + error_code = errno; + goto cleanup; + } + if ( (host_list = hostlist_create (node_name)) == NULL) { - error ("hostlist_create error for %s, %m", node_name); + error ("hostlist_create error for %s: %m", node_name); error_code = errno; goto cleanup; } @@ -513,6 +546,18 @@ parse_node_spec (char *in_line) { (state_val != NODE_STATE_UNKNOWN)) node_record_point->node_state = state_val; node_record_point->last_response = time (NULL); + if (node_addr) + this_node_addr = hostlist_shift (addr_list); + else + this_node_addr = NULL; + if (this_node_addr) { + strncpy (node_record_point->comm_name, + this_node_addr, MAX_NAME_LEN); + free (this_node_addr); + } else { + strncpy (node_record_point->comm_name, + node_record_point->name, MAX_NAME_LEN); + } } else { error ("parse_node_spec: reconfiguration for node %s ignored.", @@ -524,6 +569,8 @@ parse_node_spec (char *in_line) { /* xfree allocated storage */ if (state) xfree(state); + if (addr_list) + hostlist_destroy (addr_list); hostlist_destroy (host_list); return error_code; diff --git a/src/slurmctld/slurmctld.h b/src/slurmctld/slurmctld.h index 75aa952d163900b81e846645d6b9e57254ca30e1..a7dd85ead9eabb6142791d1509d8c55324182d29 100644 --- a/src/slurmctld/slurmctld.h +++ b/src/slurmctld/slurmctld.h @@ -123,6 +123,7 @@ struct node_record { uint32_t tmp_disk; /* actual megabytes of total disk in TMP_FS */ struct config_record *config_ptr; /* configuration specification for this node */ struct part_record *partition_ptr; /* partition for this node */ + char comm_name[MAX_NAME_LEN]; /* communications path name of the node */ struct sockaddr_in slurm_addr; /* network address */ }; extern struct node_record *node_record_table_ptr; /* location of the node records */