From 0dea4c1c75d1682afd8e16c400a764bf4a1a7480 Mon Sep 17 00:00:00 2001
From: Moe Jette <jette1@llnl.gov>
Date: Mon, 23 Feb 2009 23:24:58 +0000
Subject: [PATCH] Add support for SchedulerParameters=interval=<sec> to control
 the time     interval between executions of the backfill scheduler logic.

---
 NEWS                                  |  2 ++
 doc/man/man5/slurm.conf.5             |  9 +++++++++
 src/common/slurm_protocol_api.c       | 17 +++++++++++++++++
 src/common/slurm_protocol_api.h       |  6 +++++-
 src/plugins/sched/backfill/backfill.c | 20 +++++++++++++-------
 5 files changed, 46 insertions(+), 8 deletions(-)

diff --git a/NEWS b/NEWS
index 5a5811e0bc0..31d7dfc4507 100644
--- a/NEWS
+++ b/NEWS
@@ -32,6 +32,8 @@ documents those changes that are of interest to users and admins.
     management. Add resv_port_cnt and resv_ports fields to the job step 
     data structures. Add environment variable SLURM_STEP_RESV_PORTS to
     show what ports are reserved for a job step.
+ -- Add support for SchedulerParameters=interval=<sec> to control the time
+    interval between executions of the backfill scheduler logic.
  -- NOTE: Cold-start (without preserving state) required for upgrade from 
     version 1.4.0-pre7.
 
diff --git a/doc/man/man5/slurm.conf.5 b/doc/man/man5/slurm.conf.5
index 5608d04e0b7..a3edff41a01 100644
--- a/doc/man/man5/slurm.conf.5
+++ b/doc/man/man5/slurm.conf.5
@@ -1025,6 +1025,15 @@ and
 
 would run \fBxterm\fR with the title set to the SLURM jobid.
 
+.TP
+\fBSchedulerParameters\fR
+The interprettation of this parameter varies by \fBSchedulerType\fR.
+In the case of \fBSchedulerType=sched/backfill\fR, there is one 
+optional argument of the form "interval:#", where "#" is number of
+seconds between iterations. Higher values result in less overhead 
+and responsivenss, The default value is 5 secondson BlueGene systems 
+and 10 seconds otherwise.
+
 .TP
 \fBSchedulerPort\fR
 The port number on which slurmctld should listen for connection requests.
diff --git a/src/common/slurm_protocol_api.c b/src/common/slurm_protocol_api.c
index 633fa0f3bb8..c645de8b359 100644
--- a/src/common/slurm_protocol_api.c
+++ b/src/common/slurm_protocol_api.c
@@ -1275,6 +1275,23 @@ extern uint16_t slurm_get_root_filter(void)
 	}
 	return root_filter;
 }
+
+/* slurm_get_sched_params
+ * RET char * - Value of SchedulerParameters, MUST be xfreed by caller */
+extern char *slurm_get_sched_params(void)
+{
+	char *params = 0;
+	slurm_ctl_conf_t *conf;
+
+ 	if(slurmdbd_conf) {
+	} else {
+		conf = slurm_conf_lock();
+		params = conf->sched_params;
+		slurm_conf_unlock();
+	}
+	return params;
+}
+
 /* slurm_get_sched_port
  * RET uint16_t  - Value of SchedulerPort */
 extern uint16_t slurm_get_sched_port(void)
diff --git a/src/common/slurm_protocol_api.h b/src/common/slurm_protocol_api.h
index 13df3a70137..d9c7830ba18 100644
--- a/src/common/slurm_protocol_api.h
+++ b/src/common/slurm_protocol_api.h
@@ -3,7 +3,7 @@
  *	definitions
  *****************************************************************************
  *  Copyright (C) 2002-2006 The Regents of the University of California.
- *  Copyright (C) 2008 Lawrence Livermore National Security.
+ *  Copyright (C) 2008-2009 Lawrence Livermore National Security.
  *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
  *  Written by Kevin Tew <tew1@llnl.gov>, et. al.
  *  CODE-OCEC-09-009. All rights reserved.
@@ -424,6 +424,10 @@ char *slurm_get_proctrack_type(void);
  * RET uint16_t  - Value of SchedulerRootFilter */
 extern uint16_t slurm_get_root_filter(void);
 
+/* slurm_get_sched_params
+ * RET char * - Value of SchedulerParameters, MUST be xfreed by caller */
+extern char *slurm_get_sched_params(void);
+
 /* slurm_get_sched_port
  * RET uint16_t  - Value of SchedulerPort */
 extern uint16_t slurm_get_sched_port(void);
diff --git a/src/plugins/sched/backfill/backfill.c b/src/plugins/sched/backfill/backfill.c
index 09414d02c4e..191daa95c73 100644
--- a/src/plugins/sched/backfill/backfill.c
+++ b/src/plugins/sched/backfill/backfill.c
@@ -91,10 +91,6 @@ static bool new_work      = false;
 static bool stop_backfill = false;
 static pthread_mutex_t thread_flag_mutex = PTHREAD_MUTEX_INITIALIZER;
 
-/* Backfill scheduling has considerable overhead, 
- *	so only attempt it every BACKFILL_INTERVAL seconds.
- * Much of the scheduling for BlueGene happens through backfill,
- *	so we run it more frequently. */
 #ifndef BACKFILL_INTERVAL
 #  ifdef HAVE_BG
 #    define BACKFILL_INTERVAL	5
@@ -176,14 +172,24 @@ extern void stop_backfill_agent(void)
 extern void *backfill_agent(void *args)
 {
 	struct timeval tv1, tv2;
-	char tv_str[20];
+	char tv_str[20], *sched_params, *tmp_ptr;
 	time_t now;
-	int i, iter;
+	int backfill_interval = 0, i, iter;
 	static time_t last_backfill_time = 0;
 	/* Read config, and partitions; Write jobs and nodes */
 	slurmctld_lock_t all_locks = {
 		READ_LOCK, WRITE_LOCK, WRITE_LOCK, READ_LOCK };
 
+	sched_params = slurm_get_sched_params();
+	if (sched_params && (tmp_ptr=strstr(sched_params, "interval:")))
+		backfill_interval = atoi(tmp_ptr+9);
+	else
+		backfill_interval = BACKFILL_INTERVAL;
+	if (backfill_interval < 1) {
+		fatal("Invalid backfill scheduler interval: %d", 
+		      backfill_interval);
+	}
+
 	while (!stop_backfill) {
 		iter = (BACKFILL_CHECK_SEC * 1000000) /
 		       STOP_CHECK_USEC;
@@ -197,7 +203,7 @@ extern void *backfill_agent(void *args)
 		/* Avoid resource fragmentation if important */
 		if (job_is_completing())
 			continue;
-		if ((difftime(now, last_backfill_time) < BACKFILL_INTERVAL) ||
+		if ((difftime(now, last_backfill_time) < backfill_interval) ||
 		    stop_backfill || (!_more_work()))
 			continue;
 		last_backfill_time = now;
-- 
GitLab