From 866fa7ce5bae53a0e65b1d2d9747ea34211d18cb Mon Sep 17 00:00:00 2001
From: Moe Jette <jette1@llnl.gov>
Date: Fri, 29 Aug 2008 18:14:04 +0000
Subject: [PATCH] Modify power save mode so that nodes can be powered off when
 idle. See     https://computing.llnl.gov/linux/slurm/power_save.html or    
 "man slurm.conf" (SuspendProgram and related parameters) for more    
 information. This is the final installment of the work: update some
 documenation,     increase default ResumeRate, and reduce frequency of
 retrying     batch launch state check.

---
 NEWS                      |  1 +
 doc/html/power_save.shtml | 14 ++++++++------
 doc/man/man5/slurm.conf.5 |  2 +-
 src/common/read_config.h  |  2 +-
 src/slurmctld/agent.c     | 17 ++++++++++++-----
 5 files changed, 23 insertions(+), 13 deletions(-)

diff --git a/NEWS b/NEWS
index e9a2535f462..f89fee5d8a9 100644
--- a/NEWS
+++ b/NEWS
@@ -6,6 +6,7 @@ documents those changes that are of interest to users and admins.
  -- Remove srun's --ctrl-comm-ifhn-addr option (for PMI/MPICH2). It is no
     longer needed.
  -- Modify power save mode so that nodes can be powered off when idle. See
+    https://computing.llnl.gov/linux/slurm/power_save.html or 
     "man slurm.conf" (SuspendProgram and related parameters) for more 
     information.
 
diff --git a/doc/html/power_save.shtml b/doc/html/power_save.shtml
index 0f4a8de4867..68f1b4654fe 100644
--- a/doc/html/power_save.shtml
+++ b/doc/html/power_save.shtml
@@ -3,18 +3,20 @@
 <h1>Power Saving Guide</h1>
 <p>SLURM provides an integrated power saving mechanism beginning with 
 version 1.2.7.
+Beginning with version 1.4.0, nodes can be fully powered down rather 
+than only having their power consumption reduced.
 Nodes that remain idle for an configurable period of time can be placed 
 in a power saving mode. 
 The nodes will be restored to normal operation once work is assigned to them.
-Power saving is accomplished using a <i>cpufreq</i> governor that can change
-CPU frequency and voltage.
+Power saving can be accomplished either using a <i>cpufreq</i> governor 
+that can change CPU frequency and voltage or by powering down the node.
 Note that the <i>cpufreq</i> driver must be enabled in the Linux kernel 
 configuration. 
 While the "ondemand" governor can be configured to operate at all
 times to automatically alter the CPU performance based upon workload, 
 SLURM provides somewhat greater flexibility for power management on a
 cluster. 
-Of particular note, SLURM can alter the governors across the cluster
+Of particular note, SLURM can power nodes up or down 
 at a configurable rate to prevent rapid changes in power demands. 
 For example, starting a 1000 node job on an idle cluster could result 
 in an instantaneous surge in power demand of multiple megawatts without 
@@ -41,10 +43,10 @@ The default value is 60.
 Use this to prevent rapid drops in power requirements.</li>
 
 <li><b>ResumeRate</b>:
-Maximum number of nodes to be placed into power saving mode 
+Maximum number of nodes to be removed from power saving mode 
 per minute. 
 A value of zero results in no limits being imposed.
-The default value is 60.
+The default value is 300.
 Use this to prevent rapid increasses in power requirements.</li>
 
 <li><b>SuspendProgram</b>:
@@ -117,6 +119,6 @@ nodes are in power save mode using messages of this sort:
 You can also configure SLURM without SuspendProgram or ResumeProgram values
 to assess the potential impact of power saving mode before enabling it.</p>
 
-<p style="text-align:center;">Last modified 14 May 2007</p>
+<p style="text-align:center;">Last modified 29 August 2008</p>
 
 <!--#include virtual="footer.txt"-->
diff --git a/doc/man/man5/slurm.conf.5 b/doc/man/man5/slurm.conf.5
index b0a6509fa02..6d00f9e0e18 100644
--- a/doc/man/man5/slurm.conf.5
+++ b/doc/man/man5/slurm.conf.5
@@ -766,7 +766,7 @@ The value is number of nodes per minute and it can be used to prevent
 power surges if a large number of nodes in power save mode are 
 assigned work at the same time (e.g. a large job starts).
 A value of zero results in no limits being imposed.
-The default value is 60 nodes per minute.
+The default value is 300 nodes per minute.
 Related configuration options include \fBResumeProgram\fR, \fBSuspendRate\fR,
 \fBSuspendTime\fR, \fBSuspendProgram\fR, \fBSuspendExcNodes\fR, and
 \fBSuspendExcParts\fR.
diff --git a/src/common/read_config.h b/src/common/read_config.h
index 5c6e52774cf..987294c2ebf 100644
--- a/src/common/read_config.h
+++ b/src/common/read_config.h
@@ -87,7 +87,7 @@ extern char *default_plugstack;
 #endif
 #define DEFAULT_PROPAGATE_PRIO_PROCESS 0
 #define DEFAULT_RETURN_TO_SERVICE   0
-#define DEFAULT_RESUME_RATE         60
+#define DEFAULT_RESUME_RATE         300
 #define DEFAULT_SAVE_STATE_LOC      "/tmp"
 #define DEFAULT_SCHEDROOTFILTER     1
 #define DEFAULT_SCHEDULER_PORT      7321
diff --git a/src/slurmctld/agent.c b/src/slurmctld/agent.c
index 5a7c9963860..e12ccddbf5c 100644
--- a/src/slurmctld/agent.c
+++ b/src/slurmctld/agent.c
@@ -153,6 +153,8 @@ typedef struct task_info {
 
 typedef struct queued_request {
 	agent_arg_t* agent_arg_ptr;	/* The queued request */
+	time_t       first_attempt;	/* Time of first check for batch 
+					 * launch RPC *only* */
 	time_t       last_attempt;	/* Time of last xmit attempt */
 } queued_request_t;
 
@@ -1459,6 +1461,11 @@ static bool _batch_launch_defer(queued_request_t *queued_req_ptr)
 	if (agent_arg_ptr->msg_type != REQUEST_BATCH_JOB_LAUNCH)
 		return false;
 
+	if (difftime(now, queued_req_ptr->last_attempt) < 5) {
+		/* Reduce overhead by only testing once every 5 secs */
+		return false;
+	}
+
 	launch_msg_ptr = (batch_job_launch_msg_t *)agent_arg_ptr->msg_args;
 	hostlist_deranged_string(agent_arg_ptr->hostlist, 
 				 sizeof(hostname), hostname);
@@ -1472,14 +1479,14 @@ static bool _batch_launch_defer(queued_request_t *queued_req_ptr)
 
 	if (((node_ptr->node_state & NODE_STATE_POWER_SAVE) == 0) &&
 	    ((node_ptr->node_state & NODE_STATE_NO_RESPOND) == 0)) {
-info("agent ready to send batch request to %s", hostname);
 		queued_req_ptr->last_attempt = (time_t) 0;
 		return false;
 	}
 
-	if (queued_req_ptr->last_attempt == 0)
-		queued_req_ptr->last_attempt = now;
-	else if (difftime(now, queued_req_ptr->last_attempt) >= 
+	if (queued_req_ptr->last_attempt == 0) {
+		queued_req_ptr->first_attempt = now;
+		queued_req_ptr->last_attempt  = now;
+	} else if (difftime(now, queued_req_ptr->first_attempt) >= 
 				BATCH_START_TIME) {
 		error("agent waited too long for node %s to come up, "
 		      "sending batch request anyway...");
@@ -1487,6 +1494,6 @@ info("agent ready to send batch request to %s", hostname);
 		return false;
 	}
 
-info("agent waiting to send batch request to %s", hostname);
+	queued_req_ptr->last_attempt  = now;
 	return true;
 }
-- 
GitLab