From 39ca450e35d26786c07fa49f35c2d6f88daa9d44 Mon Sep 17 00:00:00 2001
From: Moe Jette <jette1@llnl.gov>
Date: Fri, 5 Sep 2008 21:04:59 +0000
Subject: [PATCH] Add configuration parameter CompleteTime to control how long
 to wait for     a job's completion before allocating already released
 resources to pending     jobs. This can be used to reduce fragmentation of
 resources.

---
 NEWS                             |  3 +++
 doc/html/configurator.html.in    |  1 +
 doc/man/man5/slurm.conf.5        | 16 ++++++++++++-
 slurm/slurm.h.in                 |  2 ++
 src/api/config_info.c            |  2 ++
 src/common/read_config.c         | 14 ++++++++---
 src/common/read_config.h         |  1 +
 src/common/slurm_protocol_pack.c |  2 ++
 src/slurmctld/proc_req.c         |  1 +
 src/sview/popups.c               | 40 +++++++++++++++++++++++---------
 10 files changed, 67 insertions(+), 15 deletions(-)

diff --git a/NEWS b/NEWS
index 6eee814bcd3..03bed7dd7de 100644
--- a/NEWS
+++ b/NEWS
@@ -12,6 +12,9 @@ documents those changes that are of interest to users and admins.
  -- Added configuration parameter PrologSlurmctld, which can be used to boot
     nodes into a particular state for each job. See "man slurm.conf" for 
     details.
+ -- Add configuration parameter CompleteTime to control how long to wait for 
+    a job's completion before allocating already released resources to pending
+    jobs. This can be used to reduce fragmentation of resources.
 
 * Changes in SLURM 1.4.0-pre1
 =============================
diff --git a/doc/html/configurator.html.in b/doc/html/configurator.html.in
index 613a844d9b5..01fdda1e9c6 100644
--- a/doc/html/configurator.html.in
+++ b/doc/html/configurator.html.in
@@ -197,6 +197,7 @@ function displayfile()
    "#HealthCheckProgram= <br>" +
    "InactiveLimit=" + document.config.inactive_limit.value + "<br>" +
    "MinJobAge=" + document.config.min_job_age.value + "<br>" +
+   "#CompleteWait=0 <br>" +
    "KillWait=" + document.config.kill_wait.value + "<br>" +
    "#MessageTimeout=10 <br>" +
    "SlurmctldTimeout=" + document.config.slurmctld_timeout.value + "<br>" +
diff --git a/doc/man/man5/slurm.conf.5 b/doc/man/man5/slurm.conf.5
index 4aad0ae536b..dfc2cb84e15 100644
--- a/doc/man/man5/slurm.conf.5
+++ b/doc/man/man5/slurm.conf.5
@@ -1,4 +1,4 @@
-.TH "slurm.conf" "5" "September 2008" "slurm.conf 1.3" "Slurm configuration file"
+.TH "slurm.conf" "5" "September 2008" "slurm.conf 1.4" "Slurm configuration file"
 
 .SH "NAME"
 slurm.conf \- Slurm configuration file 
@@ -176,6 +176,20 @@ The name by which this SLURM managed cluster is known for accounting
 purposes. This is needed distinguish between accounting data from 
 multiple clusters being recorded in a single database.
 
+.TP
+\fBCompleteWait\fR
+The time, in seconds, given for a job to remain in COMPLETING state
+before any any additional jobs are scheduled. 
+If set to zero, pending jobs will be started as soon as possible.
+Since a COMPLETING job's resources are released for use by other 
+jobs as soon as the \fBEpilog\fR completes on each individual node, 
+this can result in very fragmented resource allocations. 
+In order to minimize fragmentation of resources without substantial
+performance degradation, a value equal to \fBKillWait\fR plus two
+is recommended.
+The default value is 0 seconds for fastest job scheduling.
+The value may not exceed 65533.
+
 .TP
 \fBControlAddr\fR
 Name that \fBControlMachine\fR should be referred to in 
diff --git a/slurm/slurm.h.in b/slurm/slurm.h.in
index c752d1c0bc8..510a6fe2647 100644
--- a/slurm/slurm.h.in
+++ b/slurm/slurm.h.in
@@ -961,6 +961,8 @@ typedef struct slurm_ctl_conf {
 	uint16_t cache_groups;	/* cache /etc/groups to avoid initgroups(2) */
 	char *checkpoint_type;	/* checkpoint plugin type */
 	char *cluster_name;     /* general name of the entire cluster */
+	uint16_t complete_wait;	/* seconds to wait for job completion before
+				 * scheduling another job */
 	char *control_addr;	/* comm path of slurmctld primary server */
 	char *control_machine;	/* name of slurmctld primary server */
 	char *crypto_type;	/* cryptographic signature plugin */
diff --git a/src/api/config_info.c b/src/api/config_info.c
index b147f822472..9b21664013c 100644
--- a/src/api/config_info.c
+++ b/src/api/config_info.c
@@ -148,6 +148,8 @@ void slurm_print_ctl_conf ( FILE* out,
 		slurm_ctl_conf_ptr->checkpoint_type);
 	fprintf(out, "ClusterName             = %s\n",
 		slurm_ctl_conf_ptr->cluster_name);
+	fprintf(out, "CompleteWait            = %u\n", 
+		slurm_ctl_conf_ptr->complete_wait);
 	fprintf(out, "ControlAddr             = %s\n", 
 		slurm_ctl_conf_ptr->control_addr);
 	fprintf(out, "ControlMachine          = %s\n", 
diff --git a/src/common/read_config.c b/src/common/read_config.c
index a07d99c00df..58ad7042c37 100644
--- a/src/common/read_config.c
+++ b/src/common/read_config.c
@@ -138,6 +138,7 @@ s_p_options_t slurm_conf_options[] = {
 	{"CheckpointType", S_P_STRING},
 	{"CacheGroups", S_P_UINT16},
 	{"ClusterName", S_P_STRING},
+	{"CompleteWait", S_P_UINT16},
 	{"ControlAddr", S_P_STRING},
 	{"ControlMachine", S_P_STRING},
 	{"CryptoType", S_P_STRING},
@@ -1213,6 +1214,7 @@ init_slurm_conf (slurm_ctl_conf_t *ctl_conf_ptr)
 	ctl_conf_ptr->cache_groups		= 0;
 	xfree (ctl_conf_ptr->checkpoint_type);
 	xfree (ctl_conf_ptr->cluster_name);
+	ctl_conf_ptr->complete_wait		= (uint16_t) NO_VAL;
 	xfree (ctl_conf_ptr->control_addr);
 	xfree (ctl_conf_ptr->control_machine);
 	xfree (ctl_conf_ptr->crypto_type);
@@ -1523,6 +1525,9 @@ validate_and_set_defaults(slurm_ctl_conf_t *conf, s_p_hashtbl_t *hashtbl)
 
 	s_p_get_string(&conf->cluster_name, "ClusterName", hashtbl);
 
+	if (!s_p_get_uint16(&conf->complete_wait, "CompleteWait", hashtbl))
+		conf->complete_wait = DEFAULT_COMPLETE_WAIT;
+
 	if (!s_p_get_string(&conf->control_machine, "ControlMachine", hashtbl))
 		fatal ("validate_and_set_defaults: "
 		       "ControlMachine not specified.");
@@ -1580,7 +1585,8 @@ validate_and_set_defaults(slurm_ctl_conf_t *conf, s_p_hashtbl_t *hashtbl)
 
 	if (s_p_get_uint32(&conf->def_mem_per_task, "DefMemPerCPU", hashtbl))
 		conf->def_mem_per_task |= MEM_PER_CPU;
-	else if (!s_p_get_uint32(&conf->def_mem_per_task, "DefMemPerNode", hashtbl))
+	else if (!s_p_get_uint32(&conf->def_mem_per_task, "DefMemPerNode", 
+				 hashtbl))
 		conf->def_mem_per_task = DEFAULT_MEM_PER_CPU;
 
 	if (!s_p_get_boolean((bool *) &conf->disable_root_jobs, 
@@ -1707,8 +1713,10 @@ validate_and_set_defaults(slurm_ctl_conf_t *conf, s_p_hashtbl_t *hashtbl)
 	if (!s_p_get_uint16(&conf->get_env_timeout, "GetEnvTimeout", hashtbl))
 		conf->get_env_timeout = DEFAULT_GET_ENV_TIMEOUT;
 
-	s_p_get_uint16(&conf->health_check_interval, "HealthCheckInterval", hashtbl);
-	s_p_get_string(&conf->health_check_program, "HealthCheckProgram", hashtbl);
+	s_p_get_uint16(&conf->health_check_interval, "HealthCheckInterval", 
+		       hashtbl);
+	s_p_get_string(&conf->health_check_program, "HealthCheckProgram", 
+		       hashtbl);
 
 	if (!s_p_get_uint16(&conf->kill_wait, "KillWait", hashtbl))
 		conf->kill_wait = DEFAULT_KILL_WAIT;
diff --git a/src/common/read_config.h b/src/common/read_config.h
index 987294c2ebf..39915150911 100644
--- a/src/common/read_config.h
+++ b/src/common/read_config.h
@@ -54,6 +54,7 @@ extern char *default_plugstack;
 #define DEFAULT_ACCOUNTING_STORAGE_TYPE "accounting_storage/none"
 #define DEFAULT_AUTH_TYPE          "auth/none"
 #define DEFAULT_CACHE_GROUPS        0
+#define DEFAULT_COMPLETE_WAIT       0
 #define DEFAULT_CRYPTO_TYPE        "crypto/openssl"
 #define DEFAULT_EPILOG_MSG_TIME     2000
 #define DEFAULT_FAST_SCHEDULE       1
diff --git a/src/common/slurm_protocol_pack.c b/src/common/slurm_protocol_pack.c
index 85832294dbd..d1aa1988450 100644
--- a/src/common/slurm_protocol_pack.c
+++ b/src/common/slurm_protocol_pack.c
@@ -2241,6 +2241,7 @@ _pack_slurm_ctl_conf_msg(slurm_ctl_conf_info_msg_t * build_ptr, Buf buffer)
 	pack16(build_ptr->cache_groups, buffer);
 	packstr(build_ptr->checkpoint_type, buffer);
 	packstr(build_ptr->cluster_name, buffer);
+	pack16(build_ptr->complete_wait, buffer);
 	packstr(build_ptr->control_addr, buffer);
 	packstr(build_ptr->control_machine, buffer);
 	packstr(build_ptr->crypto_type, buffer);
@@ -2394,6 +2395,7 @@ _unpack_slurm_ctl_conf_msg(slurm_ctl_conf_info_msg_t **
 	safe_unpackstr_xmalloc(&build_ptr->checkpoint_type, &uint32_tmp,
 			       buffer);
 	safe_unpackstr_xmalloc(&build_ptr->cluster_name, &uint32_tmp, buffer);
+	safe_unpack16(&build_ptr->complete_wait, buffer);
 	safe_unpackstr_xmalloc(&build_ptr->control_addr, &uint32_tmp, buffer);
 	safe_unpackstr_xmalloc(&build_ptr->control_machine, &uint32_tmp,
 			       buffer);
diff --git a/src/slurmctld/proc_req.c b/src/slurmctld/proc_req.c
index 65ae919a1bb..6a977229a4d 100644
--- a/src/slurmctld/proc_req.c
+++ b/src/slurmctld/proc_req.c
@@ -358,6 +358,7 @@ void _fill_ctld_conf(slurm_ctl_conf_t * conf_ptr)
 	conf_ptr->cache_groups        = conf->cache_groups;
 	conf_ptr->checkpoint_type     = xstrdup(conf->checkpoint_type);
 	conf_ptr->cluster_name        = xstrdup(conf->cluster_name);
+	conf_ptr->complete_wait       = conf->complete_wait;
 	conf_ptr->control_addr        = xstrdup(conf->control_addr);
 	conf_ptr->control_machine     = xstrdup(conf->control_machine);
 	conf_ptr->crypto_type         = xstrdup(conf->crypto_type);
diff --git a/src/sview/popups.c b/src/sview/popups.c
index 6415cc5c0b4..9ffb523359e 100644
--- a/src/sview/popups.c
+++ b/src/sview/popups.c
@@ -258,6 +258,11 @@ static void _layout_ctl_conf(GtkTreeStore *treestore,
 	add_display_treestore_line(update, treestore, &iter, 
 				   "CheckpointType",
 				   slurm_ctl_conf_ptr->checkpoint_type);
+	snprintf(temp_str, sizeof(temp_str), "%u", 
+		 slurm_ctl_conf_ptr->complete_wait);
+	add_display_treestore_line(update, treestore, &iter, 
+				   "CompleteWait", 
+				   temp_str);
 	add_display_treestore_line(update, treestore, &iter, 
 				   "ControlAddr", 
 				   slurm_ctl_conf_ptr->control_addr);
@@ -267,11 +272,17 @@ static void _layout_ctl_conf(GtkTreeStore *treestore,
 	add_display_treestore_line(update, treestore, &iter, 
 				   "CryptoType", 
 				   slurm_ctl_conf_ptr->crypto_type);
-	snprintf(temp_str, sizeof(temp_str), "%u", 
-		 slurm_ctl_conf_ptr->def_mem_per_task);
-	add_display_treestore_line(update, treestore, &iter, 
-				   "DefMemPerTask", 
-				   temp_str);
+	if (slurm_ctl_conf_ptr->def_mem_per_task & MEM_PER_CPU) {
+		snprintf(temp_str, sizeof(temp_str), "%u", 
+			 slurm_ctl_conf_ptr->def_mem_per_task & (~MEM_PER_CPU));
+		add_display_treestore_line(update, treestore, &iter, 
+					   "DefMemPerCPU", temp_str);
+	} else {
+		snprintf(temp_str, sizeof(temp_str), "%u", 
+			 slurm_ctl_conf_ptr->def_mem_per_task);
+		add_display_treestore_line(update, treestore, &iter, 
+					   "DefMemPerNode", temp_str);
+	}
 	add_display_treestore_line(update, treestore, &iter, 
 				   "Epilog", 
 				   slurm_ctl_conf_ptr->epilog);
@@ -324,7 +335,8 @@ static void _layout_ctl_conf(GtkTreeStore *treestore,
 
 	add_display_treestore_line(update, treestore, &iter, 
 				   "JobCredentialPrivateKey", 
-				   slurm_ctl_conf_ptr->job_credential_private_key);
+				   slurm_ctl_conf_ptr->
+				   job_credential_private_key);
 	add_display_treestore_line(update, treestore, &iter, 
 				   "JobCredentialPublicCertificate", 
 				   slurm_ctl_conf_ptr->
@@ -346,11 +358,17 @@ static void _layout_ctl_conf(GtkTreeStore *treestore,
 	add_display_treestore_line(update, treestore, &iter, 
 				   "MaxJobCount", 
 				   temp_str);
-	snprintf(temp_str, sizeof(temp_str), "%u", 
-		 slurm_ctl_conf_ptr->max_mem_per_task);
-	add_display_treestore_line(update, treestore, &iter, 
-				   "MaxMemPerTask", 
-				   temp_str);
+	if (slurm_ctl_conf_ptr->max_mem_per_task & MEM_PER_CPU) {
+		snprintf(temp_str, sizeof(temp_str), "%u", 
+			 slurm_ctl_conf_ptr->max_mem_per_task & (~MEM_PER_CPU));
+		add_display_treestore_line(update, treestore, &iter, 
+					   "MaxMemPerCPU", temp_str);
+	} else {
+		snprintf(temp_str, sizeof(temp_str), "%u", 
+			 slurm_ctl_conf_ptr->max_mem_per_task);
+		add_display_treestore_line(update, treestore, &iter, 
+					   "MaxMemPerNode", temp_str);
+	}
 	snprintf(temp_str, sizeof(temp_str), "%u", 
 		 slurm_ctl_conf_ptr->msg_timeout);
 	add_display_treestore_line(update, treestore, &iter, 
-- 
GitLab