From 10d7b272200a87337cd68f988f788271472d0d08 Mon Sep 17 00:00:00 2001
From: Moe Jette <jette1@llnl.gov>
Date: Thu, 4 Mar 2004 19:12:55 +0000
Subject: [PATCH] Added some missing read locks in slurmctld's references to
 its configuration data structure. This eliminates risks associated with
 re-reading slurm.conf.

---
 NEWS                          |   2 +
 src/slurmctld/agent.c         |   4 +-
 src/slurmctld/controller.c    |  62 +++++++++------
 src/slurmctld/job_mgr.c       |  27 +++++--
 src/slurmctld/job_scheduler.c |   4 +-
 src/slurmctld/node_mgr.c      |  23 ++++--
 src/slurmctld/partition_mgr.c |   5 +-
 src/slurmctld/proc_req.c      | 139 +++++++++++++++++++++++-----------
 src/slurmctld/read_config.c   |   2 +
 src/slurmctld/sched_upcalls.c |  30 +++++++-
 10 files changed, 209 insertions(+), 89 deletions(-)

diff --git a/NEWS b/NEWS
index dd8fc4ee6f9..264375298a1 100644
--- a/NEWS
+++ b/NEWS
@@ -29,6 +29,8 @@ documents those changes that are of interest to users and admins.
  -- Added -q,--quit-on-interrupt option to srun.
  -- Elan switch plugin now starts neterr resolver thread on all Elan3
     systems (QsNet and QsNetII).
+ -- Added some missing read locks for references for slurmctld's 
+    configuration data structure
 
 * Changes in SLURM 0.3.0.0-pre6
 ===============================
diff --git a/src/slurmctld/agent.c b/src/slurmctld/agent.c
index 94583e6843e..5dda5b4968a 100644
--- a/src/slurmctld/agent.c
+++ b/src/slurmctld/agent.c
@@ -451,9 +451,9 @@ static void _notify_slurmctld_nodes(agent_info_t *agent_ptr,
 		int no_resp_cnt, int retry_cnt)
 {
 #if AGENT_IS_THREAD
-	/* Locks: Write job and write node */
+	/* Locks: Read config, write job, write node */
 	slurmctld_lock_t node_write_lock =
-	    { NO_LOCK, WRITE_LOCK, WRITE_LOCK, NO_LOCK };
+	    { READ_LOCK, WRITE_LOCK, WRITE_LOCK, NO_LOCK };
 #endif
 	thd_t *thread_ptr = agent_ptr->thread_struct;
 	int i;
diff --git a/src/slurmctld/controller.c b/src/slurmctld/controller.c
index 5eeb586d389..f199f76eda3 100644
--- a/src/slurmctld/controller.c
+++ b/src/slurmctld/controller.c
@@ -365,15 +365,20 @@ static void *_slurmctld_signal_hand(void *no_data)
 	int sig;
 	int error_code;
 	sigset_t set;
+	/* Locks: Read configuration */
+	slurmctld_lock_t config_read_lock = { 
+		READ_LOCK, NO_LOCK, NO_LOCK, NO_LOCK };
 	/* Locks: Write configuration, job, node, and partition */
-	slurmctld_lock_t config_write_lock = { WRITE_LOCK, WRITE_LOCK,
-		WRITE_LOCK, WRITE_LOCK
-	};
+	slurmctld_lock_t config_write_lock = { 
+		WRITE_LOCK, WRITE_LOCK, WRITE_LOCK, WRITE_LOCK };
 
 	(void) pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL);
 	(void) pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, NULL);
 
+	lock_slurmctld(config_read_lock);
 	create_pidfile(slurmctld_conf.slurmctld_pidfile);
+	unlock_slurmctld(config_read_lock);
+
 	/* Make sure no required signals are ignored (possibly inherited) */
 	_default_sigaction(SIGINT);
 	_default_sigaction(SIGTERM);
@@ -401,13 +406,13 @@ static void *_slurmctld_signal_hand(void *no_data)
 			 */
 			lock_slurmctld(config_write_lock);
 			error_code = read_slurm_conf(0);
-			unlock_slurmctld(config_write_lock);
 			if (error_code)
 				error("read_slurm_conf: %s",
 				      slurm_strerror(error_code));
 			else {
 				_update_cred_key();
 			}
+			unlock_slurmctld(config_write_lock);
 			break;
 		case SIGABRT:	/* abort */
 			info("SIGABRT received");
@@ -448,6 +453,9 @@ static void *_slurmctld_rpc_mgr(void *no_data)
 	pthread_attr_t thread_attr_rpc_req;
 	int no_thread;
 	connection_arg_t *conn_arg;
+	/* Locks: Read config */
+	slurmctld_lock_t config_read_lock = { 
+		READ_LOCK, NO_LOCK, NO_LOCK, NO_LOCK };
 
 	(void) pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL);
 	(void) pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, NULL);
@@ -467,10 +475,12 @@ static void *_slurmctld_rpc_mgr(void *no_data)
 #endif
 
 	/* initialize port for RPCs */
+	lock_slurmctld(config_read_lock);
 	if ((sockfd = slurm_init_msg_engine_port(slurmctld_conf.
 						 slurmctld_port))
 	    == SLURM_SOCKET_ERROR)
 		fatal("slurm_init_msg_engine_port error %m");
+	unlock_slurmctld(config_read_lock);
 
 	/*
 	 * Process incoming RPCs indefinitely
@@ -604,18 +614,18 @@ static void *_slurmctld_background(void *no_data)
 	static time_t last_assert_primary_time;
 	time_t now;
 
-	/* Locks: Read job */
-	slurmctld_lock_t job_read_lock = { NO_LOCK, READ_LOCK,
-		NO_LOCK, NO_LOCK
-	};
-	/* Locks: Write job, write node, read partition */
-	slurmctld_lock_t job_write_lock = { NO_LOCK, WRITE_LOCK,
-		WRITE_LOCK, READ_LOCK
-	};
-	/* Locks: Write node */
-	slurmctld_lock_t node_write_lock = { NO_LOCK, NO_LOCK,
-		WRITE_LOCK, NO_LOCK
-	};
+	/* Locks: Read config */
+	slurmctld_lock_t config_read_lock = { 
+		READ_LOCK, NO_LOCK, NO_LOCK, NO_LOCK };
+	/* Locks: Read config, read job */
+	slurmctld_lock_t job_read_lock = { 
+		READ_LOCK, READ_LOCK, NO_LOCK, NO_LOCK };
+	/* Locks: Read config, write job, write node, read partition */
+	slurmctld_lock_t job_write_lock = { 
+		READ_LOCK, WRITE_LOCK, WRITE_LOCK, READ_LOCK };
+	/* Locks: Read config, write node */
+	slurmctld_lock_t node_write_lock = { 
+		READ_LOCK, NO_LOCK, WRITE_LOCK, NO_LOCK };
 	/* Locks: Write partition */
 	slurmctld_lock_t part_write_lock = { 
 		NO_LOCK, NO_LOCK, NO_LOCK, WRITE_LOCK };
@@ -713,6 +723,7 @@ static void *_slurmctld_background(void *no_data)
 		 * A network or security problem could result in 
 		 * the backup controller assuming control even 
 		 * while the real primary controller is running */
+		lock_slurmctld(config_read_lock);
 		if (slurmctld_conf.slurmctld_timeout   &&
 		    slurmctld_conf.backup_addr         &&
 		    slurmctld_conf.backup_addr[0]      &&
@@ -723,6 +734,7 @@ static void *_slurmctld_background(void *no_data)
 			last_assert_primary_time = now;
 			(void) _shutdown_backup_controller(0);
 		}
+		unlock_slurmctld(config_read_lock);
 
 	}
 	debug3("_slurmctld_background shutting down");
@@ -918,7 +930,8 @@ static void _usage(char *prog_name)
  * Tell the backup_controller to relinquish control, primary control_machine 
  *	has resumed operation
  * wait_time - How long to wait for backup controller to write state, seconds
- * RET 0 or an error code 
+ * RET 0 or an error code
+ * NOTE: READ lock_slurmctld config before entry (or be single-threaded)
  */
 static int _shutdown_backup_controller(int wait_time)
 {
@@ -926,7 +939,7 @@ static int _shutdown_backup_controller(int wait_time)
 	slurm_msg_t req;
 
 	if ((slurmctld_conf.backup_addr == NULL) ||
-	    (strlen(slurmctld_conf.backup_addr) == 0)) {
+	    (slurmctld_conf.backup_addr[0] == '\0')) {
 		debug("No backup controller to shutdown");
 		return SLURM_SUCCESS;
 	}
@@ -960,7 +973,8 @@ static int _shutdown_backup_controller(int wait_time)
 	return SLURM_SUCCESS;
 }
 
-/* Reset the job credential key based upon configuration parameters */
+/* Reset the job credential key based upon configuration parameters
+ * NOTE: READ lock_slurmctld config before entry */
 static void _update_cred_key(void) 
 {
 	slurm_cred_ctx_key_update(slurmctld_config.cred_ctx, 
@@ -968,7 +982,8 @@ static void _update_cred_key(void)
 }
 
 /* Reset slurmctld logging based upon configuration parameters
- * uses common slurmctld_conf data structure */
+ *   uses common slurmctld_conf data structure
+ * NOTE: READ lock_slurmctld config before entry */
 void update_logging(void) 
 {
 	/* Preserve execute line arguments (if any) */
@@ -997,7 +1012,8 @@ void update_logging(void)
 		  slurmctld_conf.slurmctld_logfile);
 }
 
-/* Kill the currently running slurmctld */
+/* Kill the currently running slurmctld
+ * NOTE: No need to lock the config data since we are still single-threaded */
 static void
 _kill_old_slurmctld(void)
 {
@@ -1016,6 +1032,7 @@ _kill_old_slurmctld(void)
 	}
 }
 
+/* NOTE: No need to lock the config data since we are still single-threaded */
 static void
 _init_pidfile(void)
 {
@@ -1035,7 +1052,8 @@ _init_pidfile(void)
 }
 
 /*
- * create state directory as needed and "cd" to it 
+ * create state directory as needed and "cd" to it
+ * NOTE: No need to lock the config data since we are still single-threaded 
  */
 static int
 _set_slurmctld_state_loc(void)
diff --git a/src/slurmctld/job_mgr.c b/src/slurmctld/job_mgr.c
index 6bde5c217f4..6c55070a938 100644
--- a/src/slurmctld/job_mgr.c
+++ b/src/slurmctld/job_mgr.c
@@ -257,7 +257,8 @@ int dump_all_job_state(void)
 		xassert (job_ptr->magic == JOB_MAGIC);
 		_dump_job_state(job_ptr, buffer);
 	}
-	unlock_slurmctld(job_read_lock);
+	/* Maintain config lock until we get the state_save_location *\
+	\* unlock_slurmctld(job_read_lock);         - see below      */
 	list_iterator_destroy(job_iterator);
 
 	/* write the buffer to file */
@@ -267,6 +268,8 @@ int dump_all_job_state(void)
 	xstrcat(reg_file, "/job_state");
 	new_file = xstrdup(slurmctld_conf.state_save_location);
 	xstrcat(new_file, "/job_state.new");
+	unlock_slurmctld(job_read_lock);
+
 	lock_state_files();
 	log_fd = creat(new_file, 0600);
 	if (log_fd == 0) {
@@ -1105,7 +1108,9 @@ int init_job_conf(void)
 }
 
 /* rehash_jobs - Create or rebuild the job rehash table. Actually for now we 
- * just preserve it */
+ * just preserve it
+ * NOTE: run lock_slurmctld before entry: Read config, write job
+ */
 void rehash_jobs(void)
 {
 	if (job_hash == NULL) {
@@ -1150,7 +1155,8 @@ void rehash_jobs(void)
  *	and cpu_count_reps={4,2,2}
  * globals: job_list - pointer to global job list 
  *	list_part - global list of partition info
- *	default_part_loc - pointer to default partition 
+ *	default_part_loc - pointer to default partition
+ * NOTE: lock_slurmctld on entry: Read config Write job, Write node, Read part
  */
 int job_allocate(job_desc_msg_t * job_specs, uint32_t * new_job_id,
 		 char **node_list, uint16_t * num_cpu_groups,
@@ -1800,6 +1806,7 @@ static int _write_data_to_file(char *file_name, char *data)
  * IN job_ptr - pointer to job for which data is required
  * OUT env_size - number of elements to read
  * RET point to array of string pointers containing environment variables
+ * NOTE: READ lock_slurmctld config before entry
  */
 char **get_job_env(struct job_record *job_ptr, uint16_t * env_size)
 {
@@ -1819,6 +1826,7 @@ char **get_job_env(struct job_record *job_ptr, uint16_t * env_size)
  * get_job_script - return the script for a given job
  * IN job_ptr - pointer to job for which data is required
  * RET point to string containing job script
+ * NOTE: READ lock_slurmctld config before entry
  */
 char *get_job_script(struct job_record *job_ptr)
 {
@@ -2070,6 +2078,7 @@ static char *_copy_nodelist_no_dup(char *node_list)
  * job_time_limit - terminate jobs which have exceeded their time limit
  * global: job_list - pointer global job list
  *	last_job_update - time of last job table update
+ * NOTE: READ lock_slurmctld config before entry
  */
 void job_time_limit(void)
 {
@@ -2453,6 +2462,7 @@ static void _pack_job_details(struct job_details *detail_ptr, Buf buffer)
  *	the jobs must have completed at least MIN_JOB_AGE minutes ago
  * global: job_list - global job table
  *	last_job_update - time of last job table update
+ * NOTE: READ lock_slurmctld config before entry
  */
 void purge_old_job(void)
 {
@@ -2594,7 +2604,8 @@ static void _reset_step_bitmaps(struct job_record *job_ptr)
 	return;
 }
 
-/* update first assigned job id as needed on reconfigure */
+/* update first assigned job id as needed on reconfigure
+ * NOTE: READ lock_slurmctld config before entry */
 void reset_first_job_id(void)
 {
 	if (job_id_sequence < slurmctld_conf.first_job_id)
@@ -3150,6 +3161,7 @@ old_job_info(uint32_t uid, uint32_t job_id, char **node_list,
  * Synchronize the batch job in the system with their files.
  * All pending batch jobs must have script and environment files
  * No other jobs should have such files
+ * NOTE: READ lock_slurmctld config before entry
  */
 int sync_job_files(void)
 {
@@ -3164,7 +3176,9 @@ int sync_job_files(void)
 }
 
 /* Append to the batch_dirs list the job_id's associated with 
- *	every batch job directory in existence */
+ *	every batch job directory in existence
+ * NOTE: READ lock_slurmctld config before entry
+ */
 static void _get_batch_job_dir_ids(List batch_dirs)
 {
 	DIR *f_dir;
@@ -3239,7 +3253,8 @@ static void _del_batch_list_rec(void *x)
 	xfree(x);
 }
 
-/* Remove all batch_dir entries in the list */
+/* Remove all batch_dir entries in the list
+ * NOTE: READ lock_slurmctld config before entry */
 static void _remove_defunct_batch_dirs(List batch_dirs)
 {
 	ListIterator batch_dir_inx;
diff --git a/src/slurmctld/job_scheduler.c b/src/slurmctld/job_scheduler.c
index d794b02b057..b45d5263064 100644
--- a/src/slurmctld/job_scheduler.c
+++ b/src/slurmctld/job_scheduler.c
@@ -141,9 +141,9 @@ int schedule(void)
 	int i, j, error_code, failed_part_cnt, job_queue_size, job_cnt = 0;
 	struct job_record *job_ptr;
 	struct part_record **failed_parts;
-	/* Locks: Write job, write node, read partition */
+	/* Locks: Read config, write job, write node, read partition */
 	slurmctld_lock_t job_write_lock =
-	    { NO_LOCK, WRITE_LOCK, WRITE_LOCK, READ_LOCK };
+	    { READ_LOCK, WRITE_LOCK, WRITE_LOCK, READ_LOCK };
 
 	lock_slurmctld(job_write_lock);
 	/* Avoid resource fragmentation if important */
diff --git a/src/slurmctld/node_mgr.c b/src/slurmctld/node_mgr.c
index 4e5dad52440..e2d41bb0e75 100644
--- a/src/slurmctld/node_mgr.c
+++ b/src/slurmctld/node_mgr.c
@@ -393,6 +393,7 @@ _dump_node_state (struct node_record *dump_node_ptr, Buf buffer)
  *	Data goes into common storage.
  * IN state_only - if true over-write only node state and reason fields
  * RET 0 or error code
+ * NOTE: READ lock_slurmctld config before entry
  */
 extern int load_all_node_state ( bool state_only )
 {
@@ -547,7 +548,8 @@ find_node_record (char *name)
  * _hash_index - return a hash table index for the given node name 
  * IN name = the node's name
  * RET the hash table index
- * global: slurmctld_conf.hash_base - numbering base for sequence numbers
+ * global: slurmctld_conf.hash_base - numbering base for sequence numbers, 
+ * 		if not set (while updating), reverts to sequential search
  */
 static int _hash_index (char *name) 
 {
@@ -765,6 +767,7 @@ extern int node_name2bitmap (char *node_names, bool best_effort,
  * global: node_record_table_ptr - pointer to global node table
  * NOTE: the caller must xfree the buffer at *buffer_ptr
  * NOTE: change slurm_load_node() in api/node_info.c when data format changes
+ * NOTE: READ lock_slurmctld config before entry
  */
 void pack_all_node (char **buffer_ptr, int *buffer_size) 
 {
@@ -810,6 +813,7 @@ void pack_all_node (char **buffer_ptr, int *buffer_size)
  * IN/OUT buffer - buffer where data is placed, pointers automatically updated
  * NOTE: if you make any changes here be sure to make the corresponding 
  *	changes to load_node_config in api/node_info.c
+ * NOTE: READ lock_slurmctld config before entry
  */
 static void _pack_node (struct node_record *dump_node_ptr, Buf buffer) 
 {
@@ -863,14 +867,17 @@ void rehash_node (void)
 }
 
 
-/* set_slurmd_addr - establish the slurm_addr for the slurmd on each node
- *	Uses common data structures. */
+/*
+ * set_slurmd_addr - establish the slurm_addr for the slurmd on each node
+ *	Uses common data structures.
+ * NOTE: READ lock_slurmctld config before entry
+ */
 void set_slurmd_addr (void) 
 {
 	int i;
 
 	for (i = 0; i < node_record_count; i++) {
-		if (strlen (node_record_table_ptr[i].name) == 0)
+		if (node_record_table_ptr[i].name[0] == '\0')
 			continue;
 		slurm_set_addr (& node_record_table_ptr[i].slurm_addr, 
 				slurmctld_conf.slurmd_port, 
@@ -1095,6 +1102,7 @@ static bool _valid_node_state_change(enum node_states old, enum node_states new)
  * IN status - node status code
  * RET 0 if no error, ENOENT if no such node, EINVAL if values too low
  * global: node_record_table_ptr - pointer to global node table
+ * NOTE: READ lock_slurmctld config before entry
  */
 int 
 validate_node_specs (char *node_name, uint32_t cpus, 
@@ -1231,8 +1239,11 @@ validate_node_specs (char *node_name, uint32_t cpus,
 	return error_code;
 }
 
-/* node_did_resp - record that the specified node is responding
- * IN name - name of the node */
+/*
+ * node_did_resp - record that the specified node is responding
+ * IN name - name of the node
+ * NOTE: READ lock_slurmctld config before entry
+ */ 
 void node_did_resp (char *name)
 {
 	struct node_record *node_ptr;
diff --git a/src/slurmctld/partition_mgr.c b/src/slurmctld/partition_mgr.c
index 230184e96be..78ba558b314 100644
--- a/src/slurmctld/partition_mgr.c
+++ b/src/slurmctld/partition_mgr.c
@@ -252,7 +252,8 @@ int dump_all_part_state(void)
 		_dump_part_state(part_ptr, buffer);
 	}
 	list_iterator_destroy(part_iterator);
-	unlock_slurmctld(part_read_lock);
+	/* Maintain config read lock until we copy state_save_location *\
+	\* unlock_slurmctld(part_read_lock);          - see below      */
 
 	/* write the buffer to file */
 	old_file = xstrdup(slurmctld_conf.state_save_location);
@@ -261,6 +262,7 @@ int dump_all_part_state(void)
 	xstrcat(reg_file, "/part_state");
 	new_file = xstrdup(slurmctld_conf.state_save_location);
 	xstrcat(new_file, "/part_state.new");
+	unlock_slurmctld(part_read_lock);
 	lock_state_files();
 	log_fd = creat(new_file, 0600);
 	if (log_fd == 0) {
@@ -337,6 +339,7 @@ static void _dump_part_state(struct part_record *part_ptr, Buf buffer)
  * load_all_part_state - load the partition state from file, recover on 
  *	slurmctld restart. execute this after loading the configuration 
  *	file data.
+ * NOTE: READ lock_slurmctld config before entry
  */
 int load_all_part_state(void)
 {
diff --git a/src/slurmctld/proc_req.c b/src/slurmctld/proc_req.c
index 707c9d9a185..8be85cc2fe3 100644
--- a/src/slurmctld/proc_req.c
+++ b/src/slurmctld/proc_req.c
@@ -61,6 +61,7 @@
 #define BUF_SIZE	  1024	/* Temporary buffer size */
 
 static void         _fill_ctld_conf(slurm_ctl_conf_t * build_ptr);
+static void         _free_ctld_conf(slurm_ctl_conf_t * build_ptr);
 static inline bool 	_is_super_user(uid_t uid);
 static void         _kill_job_on_msg_fail(uint32_t job_id);
 static int          _make_step_cred(struct step_record *step_rec, 
@@ -214,7 +215,8 @@ void slurmctld_req (slurm_msg_t * msg)
 		slurm_free_delete_part_msg(msg->data);
 		break;
 	case REQUEST_NODE_REGISTRATION_STATUS:
-		error("slurmctld is talking with itself. SlurmctldPort == SlurmdPort");
+		error("slurmctld is talking with itself. "
+			"SlurmctldPort == SlurmdPort");
 		slurm_send_rc_msg(msg, EINVAL);
 		break;
 	default:
@@ -232,56 +234,99 @@ void slurmctld_req (slurm_msg_t * msg)
 void _fill_ctld_conf(slurm_ctl_conf_t * conf_ptr)
 {
 	conf_ptr->last_update         = time(NULL);
-	conf_ptr->authtype            = slurmctld_conf.authtype;
-	conf_ptr->backup_addr         = slurmctld_conf.backup_addr;
-	conf_ptr->backup_controller   = slurmctld_conf.backup_controller;
-	conf_ptr->control_addr        = slurmctld_conf.control_addr;
-	conf_ptr->control_machine     = slurmctld_conf.control_machine;
-	conf_ptr->epilog              = slurmctld_conf.epilog;
+	conf_ptr->authtype            = xstrdup(slurmctld_conf.authtype);
+	conf_ptr->backup_addr         = xstrdup(slurmctld_conf.backup_addr);
+	conf_ptr->backup_controller   = xstrdup(slurmctld_conf.
+					backup_controller);
+	conf_ptr->control_addr        = xstrdup(slurmctld_conf.control_addr);
+	conf_ptr->control_machine     = xstrdup(slurmctld_conf.
+					control_machine);
+	conf_ptr->epilog              = xstrdup(slurmctld_conf.epilog);
 	conf_ptr->fast_schedule       = slurmctld_conf.fast_schedule;
 	conf_ptr->first_job_id        = slurmctld_conf.first_job_id;
 	conf_ptr->hash_base           = slurmctld_conf.hash_base;
 	conf_ptr->heartbeat_interval  = slurmctld_conf.heartbeat_interval;
 	conf_ptr->inactive_limit      = slurmctld_conf.inactive_limit;
-	conf_ptr->job_comp_loc        = slurmctld_conf.job_comp_loc;
-	conf_ptr->job_comp_type       = slurmctld_conf.job_comp_type;
-	conf_ptr->job_credential_private_key = 
-			slurmctld_conf.job_credential_private_key;
-	conf_ptr->job_credential_public_certificate = 
-			slurmctld_conf.job_credential_public_certificate;
+	conf_ptr->job_comp_loc        = xstrdup(slurmctld_conf.job_comp_loc);
+	conf_ptr->job_comp_type       = xstrdup(slurmctld_conf.
+					job_comp_type);
+	conf_ptr->job_credential_private_key = xstrdup(slurmctld_conf.
+					job_credential_private_key);
+	conf_ptr->job_credential_public_certificate = xstrdup(slurmctld_conf.
+					job_credential_public_certificate);
 	conf_ptr->kill_wait           = slurmctld_conf.kill_wait;
 	conf_ptr->max_job_cnt         = slurmctld_conf.max_job_cnt;
 	conf_ptr->min_job_age         = slurmctld_conf.min_job_age;
-	conf_ptr->plugindir           = slurmctld_conf.plugindir;
-	conf_ptr->prolog              = slurmctld_conf.prolog;
+	conf_ptr->plugindir           = xstrdup(slurmctld_conf.plugindir);
+	conf_ptr->prolog              = xstrdup(slurmctld_conf.prolog);
 	conf_ptr->ret2service         = slurmctld_conf.ret2service;
-	conf_ptr->schedauth           = slurmctld_conf.schedauth;
+	conf_ptr->schedauth           = xstrdup(slurmctld_conf.schedauth);
 	conf_ptr->schedport           = slurmctld_conf.schedport;
-	conf_ptr->schedtype           = slurmctld_conf.schedtype;
+	conf_ptr->schedtype           = xstrdup(slurmctld_conf.schedtype);
 	conf_ptr->slurm_user_id       = slurmctld_conf.slurm_user_id;
-	conf_ptr->slurm_user_name     = slurmctld_conf.slurm_user_name;
+	conf_ptr->slurm_user_name     = xstrdup(slurmctld_conf.
+					slurm_user_name);
 	conf_ptr->slurmctld_debug     = slurmctld_conf.slurmctld_debug;
-	conf_ptr->slurmctld_logfile   = slurmctld_conf.slurmctld_logfile;
-	conf_ptr->slurmctld_pidfile   = slurmctld_conf.slurmctld_pidfile;
+	conf_ptr->slurmctld_logfile   = xstrdup(slurmctld_conf.
+					slurmctld_logfile);
+	conf_ptr->slurmctld_pidfile   = xstrdup(slurmctld_conf.
+					slurmctld_pidfile);
 	conf_ptr->slurmctld_port      = slurmctld_conf.slurmctld_port;
 	conf_ptr->slurmctld_timeout   = slurmctld_conf.slurmctld_timeout;
 	conf_ptr->slurmd_debug        = slurmctld_conf.slurmd_debug;
-	conf_ptr->slurmd_logfile      = slurmctld_conf.slurmd_logfile;
-	conf_ptr->slurmd_pidfile      = slurmctld_conf.slurmd_pidfile;
+	conf_ptr->slurmd_logfile      = xstrdup(slurmctld_conf.
+					slurmd_logfile);
+	conf_ptr->slurmd_pidfile      = xstrdup(slurmctld_conf.
+					slurmd_pidfile);
 	conf_ptr->slurmd_port         = slurmctld_conf.slurmd_port;
-	conf_ptr->slurmd_spooldir     = slurmctld_conf.slurmd_spooldir;
+	conf_ptr->slurmd_spooldir     = xstrdup(slurmctld_conf.
+					slurmd_spooldir);
 	conf_ptr->slurmd_timeout      = slurmctld_conf.slurmd_timeout;
-	conf_ptr->slurm_conf          = slurmctld_conf.slurm_conf;
-	conf_ptr->state_save_location = slurmctld_conf.state_save_location;
-	conf_ptr->switch_type         = slurmctld_conf.switch_type;
-	conf_ptr->tmp_fs              = slurmctld_conf.tmp_fs;
+	conf_ptr->slurm_conf          = xstrdup(slurmctld_conf.slurm_conf);
+	conf_ptr->state_save_location = xstrdup(slurmctld_conf.
+					state_save_location);
+	conf_ptr->switch_type         = xstrdup(slurmctld_conf.switch_type);
+	conf_ptr->tmp_fs              = xstrdup(slurmctld_conf.tmp_fs);
 	conf_ptr->wait_time           = slurmctld_conf.wait_time;
 	return;
 }
 
+/* _free_ctld_conf - free memory allocated by _fill_ctld_conf */
+static void _free_ctld_conf(slurm_ctl_conf_t * conf_ptr)
+{
+	xfree(conf_ptr->authtype);
+	xfree(conf_ptr->backup_addr);
+	xfree(conf_ptr->backup_controller);
+	xfree(conf_ptr->control_addr);
+	xfree(conf_ptr->control_machine);
+	xfree(conf_ptr->epilog);
+	xfree(conf_ptr->job_comp_loc);
+	xfree(conf_ptr->job_comp_type);
+	xfree(conf_ptr->job_credential_private_key);
+	xfree(conf_ptr->job_credential_public_certificate);
+	xfree(conf_ptr->plugindir);
+	xfree(conf_ptr->prolog);
+	xfree(conf_ptr->schedauth);
+	xfree(conf_ptr->schedtype);
+	xfree(conf_ptr->slurm_user_name);
+	xfree(conf_ptr->slurmctld_logfile);
+	xfree(conf_ptr->slurmctld_pidfile);
+	xfree(conf_ptr->slurmd_logfile);
+	xfree(conf_ptr->slurmd_pidfile);
+	xfree(conf_ptr->slurmd_spooldir);
+	xfree(conf_ptr->slurm_conf);
+	xfree(conf_ptr->state_save_location);
+	xfree(conf_ptr->switch_type);
+	xfree(conf_ptr->tmp_fs);
+}
+
 /* return true if supplied uid is a super-user: root, self, or SlurmUser */
 static inline bool _is_super_user(uid_t uid)
 {
+	/* READ lock_slurmctld config would be ideal here, but 
+	 * that value should be identical to getuid() anyway.
+	 * privileged calls should be coming from user root too, 
+	 * so we forgo the overhead here. */
 	if ( (uid == 0) || 
 	     (uid == slurmctld_conf.slurm_user_id) ||
 	     (uid == getuid()) )
@@ -340,9 +385,9 @@ static void _slurm_rpc_allocate_resources(slurm_msg_t * msg)
 	uint32_t *cpus_per_node = NULL, *cpu_count_reps = NULL;
 	uint32_t job_id = 0;
 	resource_allocation_response_msg_t alloc_msg;
-	/* Locks: Write job, write node, read partition */
+	/* Locks: Read config, write job, write node, read partition */
 	slurmctld_lock_t job_write_lock = { 
-		NO_LOCK, WRITE_LOCK, WRITE_LOCK, READ_LOCK };
+		READ_LOCK, WRITE_LOCK, WRITE_LOCK, READ_LOCK };
 	uid_t uid;
 	uint16_t node_cnt = 0;
 	slurm_addr *node_addr = NULL;
@@ -554,6 +599,7 @@ static void _slurm_rpc_dump_conf(slurm_msg_t * msg)
 
 		/* send message */
 		slurm_send_node_msg(msg->conn_fd, &response_msg);
+		_free_ctld_conf(&config_tbl);
 	}
 }
 
@@ -605,9 +651,9 @@ static void _slurm_rpc_dump_nodes(slurm_msg_t * msg)
 	int dump_size;
 	slurm_msg_t response_msg;
 	last_update_msg_t *last_time_msg = (last_update_msg_t *) msg->data;
-	/* Locks: Read node */
+	/* Locks: Read config, read node */
 	slurmctld_lock_t node_read_lock = { 
-		NO_LOCK, NO_LOCK, READ_LOCK, NO_LOCK };
+		READ_LOCK, NO_LOCK, READ_LOCK, NO_LOCK };
 
 	START_TIMER;
 	debug2("Processing RPC: REQUEST_NODE_INFO");
@@ -732,9 +778,9 @@ static void _slurm_rpc_job_step_kill(slurm_msg_t * msg)
 	DEF_TIMERS;
 	job_step_kill_msg_t *job_step_kill_msg =
 	    (job_step_kill_msg_t *) msg->data;
-	/* Locks: Write job, write node */
+	/* Locks: Read config, write job, write node */
 	slurmctld_lock_t job_write_lock = { 
-		NO_LOCK, WRITE_LOCK, WRITE_LOCK, NO_LOCK };
+		READ_LOCK, WRITE_LOCK, WRITE_LOCK, NO_LOCK };
 	uid_t uid;
 
 	START_TIMER;
@@ -1080,9 +1126,9 @@ static void _slurm_rpc_node_registration(slurm_msg_t * msg)
 	int error_code = SLURM_SUCCESS;
 	slurm_node_registration_status_msg_t *node_reg_stat_msg =
 	    (slurm_node_registration_status_msg_t *) msg->data;
-	/* Locks: Write job and node */
+	/* Locks: Read config, write job, write node */
 	slurmctld_lock_t job_write_lock = { 
-		NO_LOCK, WRITE_LOCK, WRITE_LOCK, NO_LOCK };
+		READ_LOCK, WRITE_LOCK, WRITE_LOCK, NO_LOCK };
 	uid_t uid;
 
 	START_TIMER;
@@ -1248,6 +1294,14 @@ static void _slurm_rpc_reconfigure_controller(slurm_msg_t * msg)
 	if (error_code == SLURM_SUCCESS) {
 		lock_slurmctld(config_write_lock);
 		error_code = read_slurm_conf(0);
+		if (error_code == SLURM_SUCCESS) {
+			_update_cred_key();
+			if (slurmctld_config.daemonize &&
+			    chdir(slurmctld_conf.state_save_location) < 0) {
+				error("chdir to %s error %m",
+					slurmctld_conf.state_save_location);
+			}
+		}
 		unlock_slurmctld(config_write_lock);
 		if (error_code == SLURM_SUCCESS) {
 			lock_slurmctld(node_read_lock);
@@ -1255,14 +1309,6 @@ static void _slurm_rpc_reconfigure_controller(slurm_msg_t * msg)
 			unlock_slurmctld(node_read_lock);
 		}
 	}
-	if (error_code == SLURM_SUCCESS) {  /* Stuff to do after unlock */
-		_update_cred_key();
-		if (slurmctld_config.daemonize && 
-		    chdir(slurmctld_conf.state_save_location) < 0) {
-			error("chdir to %s error %m",
-			      slurmctld_conf.state_save_location);
-		}
-	}
 	END_TIMER;
 
 	/* return result */
@@ -1523,9 +1569,9 @@ static void _slurm_rpc_update_partition(slurm_msg_t * msg)
 	int error_code = SLURM_SUCCESS;
 	DEF_TIMERS;
 	update_part_msg_t *part_desc_ptr = (update_part_msg_t *) msg->data;
-	/* Locks: Read node, write partition */
+	/* Locks: Read config, read node, write partition */
 	slurmctld_lock_t part_write_lock = { 
-		NO_LOCK, NO_LOCK, READ_LOCK, WRITE_LOCK };
+		READ_LOCK, NO_LOCK, READ_LOCK, WRITE_LOCK };
 	uid_t uid;
 
 	START_TIMER;
@@ -1611,7 +1657,8 @@ static void _slurm_rpc_delete_partition(slurm_msg_t * msg)
 	}
 }
 
-/* Reset the job credential key based upon configuration parameters */
+/* Reset the job credential key based upon configuration parameters.
+ * NOTE: READ lock_slurmctld config before entry */
 static void _update_cred_key(void) 
 {
 	slurm_cred_ctx_key_update(slurmctld_config.cred_ctx, 
diff --git a/src/slurmctld/read_config.c b/src/slurmctld/read_config.c
index 1b8685c3eff..1cf74dce3f0 100644
--- a/src/slurmctld/read_config.c
+++ b/src/slurmctld/read_config.c
@@ -1059,6 +1059,7 @@ static void _validate_node_proc_count(void)
  * switch_state_begin - Recover or initialize switch state
  * IN recover - If set, recover switch state as previously saved
  * RET 0 if no error, otherwise an error code
+ * Don't need slurmctld_conf lock as nothing else is running to update value
  */ 
 int switch_state_begin(int recover)
 {
@@ -1071,6 +1072,7 @@ int switch_state_begin(int recover)
 /*
  * switch_state_fini - save switch state and shutdown switch
  * RET 0 if no error, otherwise an error code
+ * Don't need slurmctld_conf lock as nothing else is running to update value
  */ 
 int switch_state_fini(void)
 {
diff --git a/src/slurmctld/sched_upcalls.c b/src/slurmctld/sched_upcalls.c
index d330c3e2bcd..383390bd064 100644
--- a/src/slurmctld/sched_upcalls.c
+++ b/src/slurmctld/sched_upcalls.c
@@ -139,7 +139,16 @@ static void * sched_get_node_mod_time( 	sched_obj_list_t, int32_t, char * );
 const u_int16_t
 sched_get_port( void )
 {
-	return slurmctld_conf.schedport;
+	u_int16_t port;
+	/* Locks: Read config */
+	slurmctld_lock_t config_read_lock = { 
+		READ_LOCK, NO_LOCK, NO_LOCK, NO_LOCK };
+
+	lock_slurmctld(config_read_lock);
+	port = slurmctld_conf.schedport;
+	unlock_slurmctld(config_read_lock);
+
+	return port;
 }
 
 /* ************************************************************************ */
@@ -148,7 +157,20 @@ sched_get_port( void )
 const char * const
 sched_get_auth( void )
 {
-	return slurmctld_conf.schedauth;
+	static char auth[128];
+	/* Locks: Read config */
+	slurmctld_lock_t config_read_lock = { 
+		READ_LOCK, NO_LOCK, NO_LOCK, NO_LOCK };
+
+	lock_slurmctld(config_read_lock);
+	strncpy(auth, slurmctld_conf.schedauth, 128);
+	if (auth[127] != '\0') {
+		auth[127] = '\0';
+		error("slurmctld_conf.schedauth truncated");
+	}
+	unlock_slurmctld(config_read_lock);
+
+	return auth;
 }
 
 /* ************************************************************************ */
@@ -1077,9 +1099,9 @@ sched_cancel_job( const uint32_t job_id )
 {
 	int rc = SLURM_SUCCESS;
 	
-	/* Lock on job data, no other locks needed */
+	/* Locks: Read config, write jobs */
 	slurmctld_lock_t job_write_lock = {
-		NO_LOCK, WRITE_LOCK, NO_LOCK, NO_LOCK };
+		READ_LOCK, WRITE_LOCK, NO_LOCK, NO_LOCK };
 
 	/*
 	 * The nice way to do things would be to send SIGTERM, wait for
-- 
GitLab