From b1d827eab9b756b80963b3841ea21d9b17dd5fc0 Mon Sep 17 00:00:00 2001
From: Moe Jette <jette1@llnl.gov>
Date: Fri, 4 Jul 2003 16:46:52 +0000
Subject: [PATCH] Add WaitTime, MaxJobCount and MinJobAge configuration
 parameters. WaitTime sets srun's default value for --wait. MaxJobCount sets
 the maximum job count for slurmctld (replacing #define MAX_JOB_CNT).
 MinJobAge sets the minimum job purrge age for slurmctld (replacing #define
 MIN_JOB_AGE).

---
 src/common/read_config.c         | 30 +++++++++++++++++++++++++++++-
 src/common/slurm_protocol_api.c  | 13 +++++++++++++
 src/common/slurm_protocol_api.h  |  6 ++++++
 src/common/slurm_protocol_pack.c |  6 ++++++
 src/slurmctld/controller.c       |  3 +++
 src/slurmctld/job_mgr.c          | 15 +++++++++------
 src/slurmctld/read_config.c      | 27 ++++++++++++++++++---------
 src/slurmctld/slurmctld.h        | 22 ++++++++++++----------
 src/srun/opt.c                   |  3 ++-
 9 files changed, 98 insertions(+), 27 deletions(-)

diff --git a/src/common/read_config.c b/src/common/read_config.c
index a521223ecd0..52c5af72abd 100644
--- a/src/common/read_config.c
+++ b/src/common/read_config.c
@@ -112,6 +112,8 @@ init_slurm_conf (slurm_ctl_conf_t *ctl_conf_ptr)
 	xfree (ctl_conf_ptr->job_credential_private_key);
 	xfree (ctl_conf_ptr->job_credential_public_certificate);
 	ctl_conf_ptr->kill_wait			= (uint16_t) NO_VAL;
+	ctl_conf_ptr->max_job_cnt		= (uint16_t) NO_VAL;
+	ctl_conf_ptr->min_job_age		= (uint16_t) NO_VAL;
 	xfree (ctl_conf_ptr->plugindir);
 	xfree (ctl_conf_ptr->prioritize);
 	xfree (ctl_conf_ptr->prolog);
@@ -133,6 +135,7 @@ init_slurm_conf (slurm_ctl_conf_t *ctl_conf_ptr)
 	xfree (ctl_conf_ptr->plugindir);
 	xfree (ctl_conf_ptr->authtype );
 	xfree (ctl_conf_ptr->tmp_fs);
+	ctl_conf_ptr->wait_time			= (uint16_t) NO_VAL;
 	return;
 }
 
@@ -159,6 +162,7 @@ parse_config_spec (char *in_line, slurm_ctl_conf_t *ctl_conf_ptr)
 	int inactive_limit = -1, kill_wait = -1;
 	int ret2service = -1, slurmctld_timeout = -1, slurmd_timeout = -1;
 	int slurmctld_debug = -1, slurmd_debug = -1;
+	int max_job_cnt = -1, min_job_age = -1, wait_time = -1;
 	char *backup_addr = NULL, *backup_controller = NULL;
 	char *control_addr = NULL, *control_machine = NULL, *epilog = NULL;
 	char *prioritize = NULL, *prolog = NULL;
@@ -189,6 +193,8 @@ parse_config_spec (char *in_line, slurm_ctl_conf_t *ctl_conf_ptr)
 		"JobCredentialPublicCertificate=", 's', 
 					&job_credential_public_certificate,
 		"KillWait=", 'd', &kill_wait,
+		"MaxJobCount=", 'd', &max_job_cnt,
+		"MinJobAge=", 'd', &min_job_age,
 		"PluginDir=", 's', &plugindir,
 		"Prioritize=", 's', &prioritize,
 		"Prolog=", 's', &prolog,
@@ -207,6 +213,7 @@ parse_config_spec (char *in_line, slurm_ctl_conf_t *ctl_conf_ptr)
 		"SlurmdTimeout=", 'd', &slurmd_timeout,
 		"StateSaveLocation=", 's', &state_save_location,
 		"TmpFS=", 's', &tmp_fs,
+		"WaitTime=", 'd', &wait_time,
 		"END");
 
 	if (error_code)
@@ -296,6 +303,18 @@ parse_config_spec (char *in_line, slurm_ctl_conf_t *ctl_conf_ptr)
 		ctl_conf_ptr->kill_wait = kill_wait;
 	}
 
+	if ( max_job_cnt != -1) {
+		if ( ctl_conf_ptr->max_job_cnt != (uint16_t) NO_VAL)
+			error (MULTIPLE_VALUE_MSG, "MaxJobCount");
+		ctl_conf_ptr->max_job_cnt = max_job_cnt;
+	}
+
+	if ( min_job_age != -1) {
+		if ( ctl_conf_ptr->min_job_age != (uint16_t) NO_VAL)
+			error (MULTIPLE_VALUE_MSG, "MinJobAge");
+		ctl_conf_ptr->min_job_age = min_job_age;
+	}
+
 	if ( plugindir ) {
 		if ( ctl_conf_ptr->plugindir ) {
 			error( MULTIPLE_VALUE_MSG, "PluginDir" );
@@ -449,6 +468,12 @@ parse_config_spec (char *in_line, slurm_ctl_conf_t *ctl_conf_ptr)
 		ctl_conf_ptr->tmp_fs = tmp_fs;
 	}
 
+	if ( wait_time != -1) {
+		if ( ctl_conf_ptr->wait_time != (uint16_t) NO_VAL)
+			error (MULTIPLE_VALUE_MSG, "WaitTime");
+		ctl_conf_ptr->wait_time = wait_time;
+	}
+
 	if ( job_credential_private_key ) {
 		if ( ctl_conf_ptr->job_credential_private_key ) {
 			error (MULTIPLE_VALUE_MSG, "JobCredentialPrivateKey");
@@ -747,7 +772,10 @@ validate_config (slurm_ctl_conf_t *ctl_conf_ptr)
 		_normalize_debug_level(&ctl_conf_ptr->slurmd_debug);
 
 	if (ctl_conf_ptr->kill_wait == (uint16_t) NO_VAL)
-		ctl_conf_ptr->kill_wait = 30;
+		ctl_conf_ptr->kill_wait = DEFAULT_KILL_WAIT;
+
+	if (ctl_conf_ptr->wait_time == (uint16_t) NO_VAL)
+		ctl_conf_ptr->wait_time = DEFAULT_WAIT_TIME;
 }
 
 /* Normalize supplied debug level to be in range per log.h definitions */
diff --git a/src/common/slurm_protocol_api.c b/src/common/slurm_protocol_api.c
index c22585e5d38..4632ded8f70 100644
--- a/src/common/slurm_protocol_api.c
+++ b/src/common/slurm_protocol_api.c
@@ -105,6 +105,7 @@ int slurm_api_set_default_config()
                 goto cleanup;
 
         read_slurm_conf_ctl(&slurmctld_conf);
+
         if ((slurmctld_conf.control_addr == NULL) ||
             (slurmctld_conf.slurmctld_port == 0)) {
                 error("Unable to establish control machine or port");
@@ -157,6 +158,18 @@ uint32_t slurm_get_slurm_user_id(void)
         return slurmctld_conf.slurm_user_id;
 }
 
+/* slurm_get_wait_time
+ * returns wait_time from slurmctld_conf object
+ * RET uint16_t        - wait_time
+ */
+uint16_t slurm_get_wait_time(void)
+{
+        if (slurmctld_conf.slurmd_port == 0)  /* ==0 if config unread */
+                slurm_api_set_default_config();
+
+        return slurmctld_conf.wait_time;
+}
+
 /**********************************************************************\
  * general message management functions used by slurmctld, slurmd
 \**********************************************************************/
diff --git a/src/common/slurm_protocol_api.h b/src/common/slurm_protocol_api.h
index 6406bcb845c..805eb9fd2a6 100644
--- a/src/common/slurm_protocol_api.h
+++ b/src/common/slurm_protocol_api.h
@@ -99,6 +99,12 @@ short int inline slurm_get_slurmd_port(void);
  */
 uint32_t slurm_get_slurm_user_id(void);
 
+/* slurm_get_wait_time
+ * returns wait_time from slurmctld_conf object
+ * RET uint16_t        - wait_time
+ */
+uint16_t slurm_get_wait_time(void);
+
 /**********************************************************************\
  * general message management functions used by slurmctld, slurmd
 \**********************************************************************/
diff --git a/src/common/slurm_protocol_pack.c b/src/common/slurm_protocol_pack.c
index cd864d77ef8..a71d220a489 100644
--- a/src/common/slurm_protocol_pack.c
+++ b/src/common/slurm_protocol_pack.c
@@ -1491,6 +1491,8 @@ _pack_slurm_ctl_conf_msg(slurm_ctl_conf_info_msg_t * build_ptr, Buf buffer)
 	pack16(build_ptr->heartbeat_interval, buffer);
 	pack16(build_ptr->inactive_limit, buffer);
 	pack16(build_ptr->kill_wait, buffer);
+	pack16(build_ptr->max_job_cnt, buffer);
+	pack16(build_ptr->min_job_age, buffer);
 	packstr(build_ptr->plugindir, buffer);
 	packstr(build_ptr->prioritize, buffer);
 	packstr(build_ptr->prolog, buffer);
@@ -1511,6 +1513,7 @@ _pack_slurm_ctl_conf_msg(slurm_ctl_conf_info_msg_t * build_ptr, Buf buffer)
 	packstr(build_ptr->slurm_conf, buffer);
 	packstr(build_ptr->state_save_location, buffer);
 	packstr(build_ptr->tmp_fs, buffer);
+	pack16(build_ptr->wait_time, buffer);
 	packstr(build_ptr->job_credential_private_key, buffer);
 	packstr(build_ptr->job_credential_public_certificate, buffer);
 }
@@ -1543,6 +1546,8 @@ _unpack_slurm_ctl_conf_msg(slurm_ctl_conf_info_msg_t **
 	safe_unpack16(&build_ptr->heartbeat_interval, buffer);
 	safe_unpack16(&build_ptr->inactive_limit, buffer);
 	safe_unpack16(&build_ptr->kill_wait, buffer);
+	safe_unpack16(&build_ptr->max_job_cnt, buffer);
+	safe_unpack16(&build_ptr->min_job_age, buffer);
 	safe_unpackstr_xmalloc(&build_ptr->plugindir, &uint16_tmp, buffer);
 	safe_unpackstr_xmalloc(&build_ptr->prioritize, &uint16_tmp, buffer);
 	safe_unpackstr_xmalloc(&build_ptr->prolog, &uint16_tmp, buffer);
@@ -1570,6 +1575,7 @@ _unpack_slurm_ctl_conf_msg(slurm_ctl_conf_info_msg_t **
 	safe_unpackstr_xmalloc(&build_ptr->state_save_location,
 			       &uint16_tmp, buffer);
 	safe_unpackstr_xmalloc(&build_ptr->tmp_fs, &uint16_tmp, buffer);
+	safe_unpack16(&build_ptr->wait_time, buffer);
 	safe_unpackstr_xmalloc(&build_ptr->job_credential_private_key,
 			       &uint16_tmp, buffer);
 	safe_unpackstr_xmalloc(&build_ptr->
diff --git a/src/slurmctld/controller.c b/src/slurmctld/controller.c
index 8b06b68e8b0..4cbe0c9946a 100644
--- a/src/slurmctld/controller.c
+++ b/src/slurmctld/controller.c
@@ -2058,6 +2058,8 @@ void _fill_ctld_conf(slurm_ctl_conf_t * conf_ptr)
 	conf_ptr->job_credential_public_certificate = 
 			slurmctld_conf.job_credential_public_certificate;
 	conf_ptr->kill_wait           = slurmctld_conf.kill_wait;
+	conf_ptr->max_job_cnt         = slurmctld_conf.max_job_cnt;
+	conf_ptr->min_job_age         = slurmctld_conf.min_job_age;
 	conf_ptr->plugindir           = slurmctld_conf.plugindir;
 	conf_ptr->prioritize          = slurmctld_conf.prioritize;
 	conf_ptr->prolog              = slurmctld_conf.prolog;
@@ -2078,6 +2080,7 @@ void _fill_ctld_conf(slurm_ctl_conf_t * conf_ptr)
 	conf_ptr->slurm_conf          = slurmctld_conf.slurm_conf;
 	conf_ptr->state_save_location = slurmctld_conf.state_save_location;
 	conf_ptr->tmp_fs              = slurmctld_conf.tmp_fs;
+	conf_ptr->wait_time           = slurmctld_conf.wait_time;
 	return;
 }
 
diff --git a/src/slurmctld/job_mgr.c b/src/slurmctld/job_mgr.c
index f3726236bd6..f51c4a625ba 100644
--- a/src/slurmctld/job_mgr.c
+++ b/src/slurmctld/job_mgr.c
@@ -69,7 +69,7 @@
 #define STEP_FLAG 0xbbbb
 #define TOP_PRIORITY 0xffff0000	/* large, but leave headroom for higher */
 
-#define JOB_HASH_INX(_job_id)	(_job_id % MAX_JOB_COUNT)
+#define JOB_HASH_INX(_job_id)	(_job_id % DEFAULT_MAX_JOB_COUNT)
 
 #define YES_OR_NO(_in_string)	\
 		(( strcmp ((_in_string),"YES"))? \
@@ -83,8 +83,8 @@ time_t last_job_update;		/* time of last update to job records */
 static int default_prio = TOP_PRIORITY;
 static int job_count;		/* job's in the system */
 static long job_id_sequence = -1;	/* first job_id to assign new job */
-static struct job_record *job_hash[MAX_JOB_COUNT];
-static struct job_record *job_hash_over[MAX_JOB_COUNT];
+static struct job_record *job_hash[DEFAULT_MAX_JOB_COUNT];
+static struct job_record *job_hash_over[DEFAULT_MAX_JOB_COUNT];
 static int max_hash_over = 0;
 
 /* Local functions */
@@ -154,7 +154,7 @@ struct job_record *create_job_record(int *error_code)
 	struct job_record *job_record_point;
 	struct job_details *job_details_point;
 
-	if (job_count >= MAX_JOB_COUNT) {
+	if (job_count >= DEFAULT_MAX_JOB_COUNT) {
 		error("create_job_record: job_count exceeds limit");
 		*error_code = EAGAIN;
 		return NULL;
@@ -770,7 +770,7 @@ void _add_job_hash(struct job_record *job_ptr)
 
 	inx = JOB_HASH_INX(job_ptr->job_id);
 	if (job_hash[inx]) {
-		if (max_hash_over >= MAX_JOB_COUNT)
+		if (max_hash_over >= DEFAULT_MAX_JOB_COUNT)
 			fatal("Job hash table overflow");
 		job_hash_over[max_hash_over++] = job_ptr;
 	} else
@@ -2064,9 +2064,12 @@ static int _list_find_job_id(void *job_entry, void *key)
  */
 static int _list_find_job_old(void *job_entry, void *key)
 {
-	time_t min_age = time(NULL) - MIN_JOB_AGE;
+	time_t min_age = time(NULL) - slurmctld_conf.min_job_age;
 	struct job_record *job_ptr = (struct job_record *)job_entry;
 
+	if (slurmctld_conf.min_job_age == 0)
+		return 0;	/* No job record purging */
+
 	if (job_ptr->end_time > min_age)
 		return 0;	/* Too new to purge */
 
diff --git a/src/slurmctld/read_config.c b/src/slurmctld/read_config.c
index 3cc4b36b01c..474f4c5c992 100644
--- a/src/slurmctld/read_config.c
+++ b/src/slurmctld/read_config.c
@@ -813,31 +813,37 @@ static void _set_config_defaults(slurm_ctl_conf_t * ctl_conf_ptr)
 		   "read_slurm_conf: backup_controller value not specified.");
 
 	if (ctl_conf_ptr->fast_schedule == (uint16_t) NO_VAL)
-		ctl_conf_ptr->fast_schedule = 1;
+		ctl_conf_ptr->fast_schedule = DEFAULT_FAST_SCHEDULE;
 
 	if (ctl_conf_ptr->first_job_id == (uint32_t) NO_VAL)
-		ctl_conf_ptr->first_job_id = 1;
+		ctl_conf_ptr->first_job_id = DEFAULT_FIRST_JOB_ID;
 
 	if (ctl_conf_ptr->hash_base == (uint16_t) NO_VAL)
-		ctl_conf_ptr->hash_base = 10;
+		ctl_conf_ptr->hash_base = DEFAULT_HASH_BASE;
 
 	if (ctl_conf_ptr->heartbeat_interval == (uint16_t) NO_VAL)
-		ctl_conf_ptr->heartbeat_interval = 60;
+		ctl_conf_ptr->heartbeat_interval = DEFAULT_HEARTBEAT_INTERVAL;
 
 	if (ctl_conf_ptr->inactive_limit == (uint16_t) NO_VAL)
-		ctl_conf_ptr->inactive_limit = 0;	/* unlimited */
+		ctl_conf_ptr->inactive_limit = DEFAULT_INACTIVE_LIMIT;
 
 	if (ctl_conf_ptr->kill_wait == (uint16_t) NO_VAL)
-		ctl_conf_ptr->kill_wait = 30;
+		ctl_conf_ptr->kill_wait = DEFAULT_KILL_WAIT;
+
+	if (ctl_conf_ptr->max_job_cnt == (uint16_t) NO_VAL)
+		ctl_conf_ptr->max_job_cnt = DEFAULT_MAX_JOB_COUNT;
+
+	if (ctl_conf_ptr->min_job_age == (uint16_t) NO_VAL)
+		ctl_conf_ptr->min_job_age = DEFAULT_MIN_JOB_AGE;
 
 	if (ctl_conf_ptr->ret2service == (uint16_t) NO_VAL)
-		ctl_conf_ptr->ret2service = 0;
+		ctl_conf_ptr->ret2service = DEFAULT_RETURN_TO_SERVICE;
 
 	if (ctl_conf_ptr->slurmctld_timeout == (uint16_t) NO_VAL)
-		ctl_conf_ptr->slurmctld_timeout = 300;
+		ctl_conf_ptr->slurmctld_timeout = DEFAULT_SLURMCTLD_TIMEOUT;
 
 	if (ctl_conf_ptr->slurmd_timeout == (uint16_t) NO_VAL)
-		ctl_conf_ptr->slurmd_timeout = 300;
+		ctl_conf_ptr->slurmd_timeout = DEFAULT_SLURMD_TIMEOUT;
 
 	if (ctl_conf_ptr->state_save_location == NULL)
 		ctl_conf_ptr->state_save_location =
@@ -845,6 +851,9 @@ static void _set_config_defaults(slurm_ctl_conf_t * ctl_conf_ptr)
 
 	if (ctl_conf_ptr->tmp_fs == NULL)
 		ctl_conf_ptr->tmp_fs = xstrdup(DEFAULT_TMP_FS);
+
+	if (ctl_conf_ptr->wait_time == (uint16_t) NO_VAL)
+		ctl_conf_ptr->wait_time = DEFAULT_WAIT_TIME;
 }
 
 
diff --git a/src/slurmctld/slurmctld.h b/src/slurmctld/slurmctld.h
index fefe3141c82..2f527cc3ab2 100644
--- a/src/slurmctld/slurmctld.h
+++ b/src/slurmctld/slurmctld.h
@@ -96,16 +96,18 @@
  * Update the group uid_t access list as needed */
 #define	PERIODIC_GROUP_CHECK	600
 
-/* Default temporary storage for slurm state and user files */
-#define DEFAULT_TMP_FS	"/tmp"
-
-/* Don't accept more jobs once there are MAX_JOB_COUNT in the system
- * This should prevent exhausting memory */
-#define MAX_JOB_COUNT 2000
-
-/* Purge OK for jobs over MIN_JOB_AGE seconds old (since completion)
- * This should prevent exhausting memory */
-#define MIN_JOB_AGE 300
+/* Default configuration configuration file values */
+#define DEFAULT_FAST_SCHEDULE       1
+#define DEFAULT_FIRST_JOB_ID        1
+#define DEFAULT_HASH_BASE           10
+#define DEFAULT_HEARTBEAT_INTERVAL  60
+#define DEFAULT_INACTIVE_LIMIT      0
+#define DEFAULT_MAX_JOB_COUNT       2000
+#define DEFAULT_MIN_JOB_AGE         300
+#define DEFAULT_RETURN_TO_SERVICE   0
+#define DEFAULT_SLURMCTLD_TIMEOUT   300
+#define DEFAULT_SLURMD_TIMEOUT      300
+#define DEFAULT_TMP_FS              "/tmp"
 
 extern slurm_ctl_conf_t slurmctld_conf;
 
diff --git a/src/srun/opt.c b/src/srun/opt.c
index c923b63cd35..c1e7bf11aa0 100644
--- a/src/srun/opt.c
+++ b/src/srun/opt.c
@@ -46,6 +46,7 @@
 
 #include "src/common/list.h"
 #include "src/common/log.h"
+#include "src/common/slurm_protocol_api.h"
 #include "src/common/xmalloc.h"
 #include "src/common/xstring.h"
 
@@ -584,7 +585,7 @@ static void _opt_default()
 	opt.allocate	= false;
 	opt.attach	= NULL;
 	opt.join	= false;
-	opt.max_wait	= 0;
+	opt.max_wait	= slurm_get_wait_time();
 
 	_verbose = 0;
 	opt.slurmd_debug = LOG_LEVEL_QUIET;
-- 
GitLab