From ad9c2413a735a6b15a0133ac5068d546c52de9a1 Mon Sep 17 00:00:00 2001
From: Morris Jette <jette@schedmd.com>
Date: Thu, 9 Jul 2015 11:02:24 -0700
Subject: [PATCH] Change slurmctld threads count against limit

The slurmctld logic throttles some RPCs so that only one of them
can execute at a time in order to reduce contention for the job,
partition and node locks (only one of the effected RPCs can execute
at any time anyway and this lets other RPC types run). While an
RPC is stuck in the throttle function, do not count that thread
against the slurmctld thread limit.
but 1794
---
 NEWS                       |  2 ++
 src/slurmctld/controller.c | 26 ++++++++++++++++----------
 src/slurmctld/proc_req.c   |  6 ++++++
 src/slurmctld/slurmctld.h  |  8 +++++++-
 4 files changed, 31 insertions(+), 11 deletions(-)

diff --git a/NEWS b/NEWS
index 82c9f8ed657..a3f85146e49 100644
--- a/NEWS
+++ b/NEWS
@@ -3,6 +3,8 @@ documents those changes that are of interest to users and administrators.
 
 * Changes in Slurm 14.11.9
 ==========================
+ -- Do not count slurmctld threads waiting in a "throttle" lock against the
+    daemon's thread limit as they are not contending for resources.
 
 * Changes in Slurm 14.11.8
 ==========================
diff --git a/src/slurmctld/controller.c b/src/slurmctld/controller.c
index 7ddb2558b6d..4dc0a816371 100644
--- a/src/slurmctld/controller.c
+++ b/src/slurmctld/controller.c
@@ -214,7 +214,6 @@ static int          _accounting_mark_all_nodes_down(char *reason);
 static void *       _assoc_cache_mgr(void *no_data);
 static void         _become_slurm_user(void);
 static void         _default_sigaction(int sig);
-inline static void  _free_server_thread(void);
 static void         _init_config(void);
 static void         _init_pidfile(void);
 static void         _kill_old_slurmctld(void);
@@ -514,9 +513,7 @@ int main(int argc, char *argv[])
 		/*
 		 * create attached thread to process RPCs
 		 */
-		slurm_mutex_lock(&slurmctld_config.thread_count_lock);
-		slurmctld_config.server_thread_count++;
-		slurm_mutex_unlock(&slurmctld_config.thread_count_lock);
+		server_thread_incr();
 		slurm_attr_init(&thread_attr);
 		while (pthread_create(&slurmctld_config.thread_id_rpc,
 				      &thread_attr, _slurmctld_rpc_mgr,
@@ -965,7 +962,7 @@ static void *_slurmctld_rpc_mgr(void *no_data)
 		if (select(max_fd+1, &rfds, NULL, NULL, NULL) == -1) {
 			if (errno != EINTR)
 				error("slurm_accept_msg_conn select: %m");
-			_free_server_thread();
+			server_thread_decr();
 			continue;
 		}
 		/* find one to process */
@@ -986,7 +983,7 @@ static void *_slurmctld_rpc_mgr(void *no_data)
 		    SLURM_SOCKET_ERROR) {
 			if (errno != EINTR)
 				error("slurm_accept_msg_conn: %m");
-			_free_server_thread();
+			server_thread_decr();
 			continue;
 		}
 		fd_set_close_on_exec(newsockfd);
@@ -1025,7 +1022,7 @@ static void *_slurmctld_rpc_mgr(void *no_data)
 	for (i=0; i<nports; i++)
 		(void) slurm_shutdown_msg_engine(sockfd[i]);
 	xfree(sockfd);
-	_free_server_thread();
+	server_thread_decr();
 	pthread_exit((void *) 0);
 	return NULL;
 }
@@ -1076,7 +1073,7 @@ static void *_service_connection(void *arg)
 cleanup:
 	slurm_free_msg(msg);
 	xfree(arg);
-	_free_server_thread();
+	server_thread_decr();
 	return return_code;
 }
 
@@ -1123,7 +1120,8 @@ static bool _wait_for_server_thread(void)
 	return rc;
 }
 
-static void _free_server_thread(void)
+/* Decrement slurmctld thread count (as applies to thread limit) */
+extern void server_thread_decr(void)
 {
 	slurm_mutex_lock(&slurmctld_config.thread_count_lock);
 	if (slurmctld_config.server_thread_count > 0)
@@ -1134,7 +1132,15 @@ static void _free_server_thread(void)
 	slurm_mutex_unlock(&slurmctld_config.thread_count_lock);
 }
 
-static int _accounting_cluster_ready()
+/* Increment slurmctld thread count (as applies to thread limit) */
+extern void server_thread_incr(void)
+{
+	slurm_mutex_lock(&slurmctld_config.thread_count_lock);
+	slurmctld_config.server_thread_count++;
+	slurm_mutex_unlock(&slurmctld_config.thread_count_lock);
+}
+
+static int _accounting_cluster_ready(void)
 {
 	int rc = SLURM_ERROR;
 	time_t event_time = time(NULL);
diff --git a/src/slurmctld/proc_req.c b/src/slurmctld/proc_req.c
index 92e3697b114..1a23dd4238a 100644
--- a/src/slurmctld/proc_req.c
+++ b/src/slurmctld/proc_req.c
@@ -565,7 +565,13 @@ static void _throttle_start(int *active_rpc_cnt)
 			(*active_rpc_cnt)++;
 			break;
 		}
+
+		/* While an RPC is being throttled due to a running RPC of the
+		 * same type, do not count that thread against the daemon's
+		 * thread limit */
+		server_thread_decr();
 		pthread_cond_wait(&throttle_cond, &throttle_mutex);
+		server_thread_incr();
 	}
 	slurm_mutex_unlock(&throttle_mutex);
 	if (LOTS_OF_AGENTS)
diff --git a/src/slurmctld/slurmctld.h b/src/slurmctld/slurmctld.h
index aefc4bd12df..e4e691d9b66 100644
--- a/src/slurmctld/slurmctld.h
+++ b/src/slurmctld/slurmctld.h
@@ -1866,13 +1866,19 @@ extern void set_cluster_cpus(void);
 /* sends all jobs in eligible state to accounting.  Only needed at
  * first registration
  */
-extern int send_jobs_to_accounting();
+extern int send_jobs_to_accounting(void);
 
 /* send all nodes in a down like state to accounting.  Only needed at
  * first registration
  */
 extern int send_nodes_to_accounting(time_t event_time);
 
+/* Decrement slurmctld thread count (as applies to thread limit) */
+extern void server_thread_decr(void);
+
+/* Increment slurmctld thread count (as applies to thread limit) */
+extern void server_thread_incr(void);
+
 /* Set a job's alias_list string */
 extern void set_job_alias_list(struct job_record *job_ptr);
 
-- 
GitLab