diff --git a/NEWS b/NEWS index d4b2cbbf15be0b6327250384ce8427a496f1f6b0..5315e8f68255aaa625b052e1b2f04ad6b64ebad5 100644 --- a/NEWS +++ b/NEWS @@ -47,6 +47,8 @@ documents those changes that are of interest to users and administrators. -- Avoid buffer overflow in time_str2secs. -- Calculate suspended time for suspended steps. -- Add null check for step_ptr->step_node_bitmap in _pick_step_nodes. + -- Fix multi-cluster srun issue after 'scontrol reconfigure' was called. + -- Fix accessing response_cluster_rec outside of write locks. * Changes in Slurm 18.08.5-2 ============================ diff --git a/src/slurmctld/job_mgr.c b/src/slurmctld/job_mgr.c index ce51525192aee533adc8edda410b6125c79aa8c9..c84c1d2effe1abf54b969bbb89e39c08d076377f 100644 --- a/src/slurmctld/job_mgr.c +++ b/src/slurmctld/job_mgr.c @@ -17884,7 +17884,6 @@ extern void update_job_fed_details(struct job_record *job_ptr) fed_mgr_get_cluster_id(job_ptr->job_id)); } - /* * Set the allocation response with the current cluster's information and the * job's allocated node's addr's if the allocation is being filled by a cluster @@ -17912,23 +17911,6 @@ set_remote_working_response(resource_allocation_response_msg_t *resp, fed_mgr_cluster_rec) { resp->working_cluster_rec = fed_mgr_cluster_rec; } else { - if (!response_cluster_rec) { - response_cluster_rec = - xmalloc(sizeof(slurmdb_cluster_rec_t)); - response_cluster_rec->name = - xstrdup(slurmctld_conf.cluster_name); - if (slurmctld_conf.slurmctld_addr) { - response_cluster_rec->control_host = - slurmctld_conf.slurmctld_addr; - } else { - response_cluster_rec->control_host = - slurmctld_conf.control_addr[0]; - } - response_cluster_rec->control_port = - slurmctld_conf.slurmctld_port; - response_cluster_rec->rpc_version = - SLURM_PROTOCOL_VERSION; - } resp->working_cluster_rec = response_cluster_rec; } diff --git a/src/slurmctld/read_config.c b/src/slurmctld/read_config.c index 20c2e9c8d8ce207ea65713833860becebfd5a8f9..41807dbdbcec9e72ec001f50c537a101b67327c1 100644 --- a/src/slurmctld/read_config.c +++ b/src/slurmctld/read_config.c @@ -142,6 +142,27 @@ static void _sync_part_prio(void); static int _update_preempt(uint16_t old_enable_preempt); +/* + * Setup the global response_cluster_rec + */ +static void _set_response_cluster_rec() +{ + if (response_cluster_rec) + return; + + response_cluster_rec = xmalloc(sizeof(slurmdb_cluster_rec_t)); + response_cluster_rec->name = xstrdup(slurmctld_conf.cluster_name); + if (slurmctld_conf.slurmctld_addr) { + response_cluster_rec->control_host = + xstrdup(slurmctld_conf.slurmctld_addr); + } else { + response_cluster_rec->control_host = + xstrdup(slurmctld_conf.control_addr[0]); + } + response_cluster_rec->control_port = slurmctld_conf.slurmctld_port; + response_cluster_rec->rpc_version = SLURM_PROTOCOL_VERSION; +} + /* Verify that Slurm directories are secure, not world writable */ static void _stat_slurm_dirs(void) { @@ -1467,6 +1488,8 @@ int read_slurm_conf(int recover, bool reconfig) if (reconfig && (slurm_mcs_reconfig() != SLURM_SUCCESS)) fatal("Failed to reconfigure mcs plugin"); + _set_response_cluster_rec(); + slurmctld_conf.last_update = time(NULL); END_TIMER2("read_slurm_conf"); return error_code;