From 2833b51720f5d23b58457133d416404c504a3737 Mon Sep 17 00:00:00 2001
From: Moe Jette <jette1@llnl.gov>
Date: Tue, 8 May 2007 23:06:38 +0000
Subject: [PATCH] Add support for power saving mode (experimental code to
 reduce     clock on nodes that stay in the IDLE state).

---
 NEWS                             |   2 +
 doc/man/man1/sinfo.1             |   5 +-
 doc/man/man1/smap.1              |   3 +
 slurm/slurm.h.in                 |   1 +
 src/common/slurm_protocol_defs.c |   6 +
 src/slurmctld/Makefile.am        |   1 +
 src/slurmctld/Makefile.in        |   4 +-
 src/slurmctld/controller.c       |  31 ++-
 src/slurmctld/power_save.c       | 385 +++++++++++++++++++++++++++++++
 src/slurmctld/slurmctld.h        |  11 +-
 10 files changed, 436 insertions(+), 13 deletions(-)
 create mode 100644 src/slurmctld/power_save.c

diff --git a/NEWS b/NEWS
index 07a76fe903a..ab1b6a67765 100644
--- a/NEWS
+++ b/NEWS
@@ -11,6 +11,8 @@ documents those changes that are of interest to users and admins.
  -- Add ability to change MaxNodes and ExcNodeList for pending job 
     using scontrol.
  -- Purge zombie processes spawned via event triggers.
+ -- Add support for power saving mode (experimental code to reduce
+    clock on nodes that stay in the IDLE state).
 
 * Changes in SLURM 1.2.6
 ========================
diff --git a/doc/man/man1/sinfo.1 b/doc/man/man1/sinfo.1
index 369af1eabed..889ad726145 100644
--- a/doc/man/man1/sinfo.1
+++ b/doc/man/man1/sinfo.1
@@ -1,4 +1,4 @@
-.TH SINFO "1" "November 2006" "sinfo 1.2" "Slurm components"
+.TH SINFO "1" "May 2007" "sinfo 1.2" "Slurm components"
 
 .SH "NAME"
 sinfo \- view information about SLURM nodes and partitions.
@@ -347,6 +347,9 @@ node is presently not responding and will not be allocated
 any new work.  If the node remains non\-responsive, it will
 be placed in the \fBDOWN\fR state (except in the case of
 \fBDRAINED\fR, \fBDRAINING\fR, or \fBCOMPLETING\fR nodes).
+If the node state code is followed by "~", this indicates
+the node is presently in a power saving mode (typically
+running at reduced frequency).
 .TP 12
 \fBALLOCATED\fR
 The node has been allocated to one or more jobs.
diff --git a/doc/man/man1/smap.1 b/doc/man/man1/smap.1
index 5df8bbbbbcc..85bfbd9ae48 100644
--- a/doc/man/man1/smap.1
+++ b/doc/man/man1/smap.1
@@ -327,6 +327,9 @@ node is presently not responding and will not be allocated
 any new work.  If the node remains non\-responsive, it will
 be placed in the \fBDOWN\fR state (except in the case of
 \fBDRAINED\fR, \fBDRAINING\fR, or \fBCOMPLETING\fR nodes).
+If the node state code is followed by "~", this indicates
+the node is presently in a power saving mode (typically
+running at reduced frequency).
 .TP 12
 \fBALLOCATED\fR
 The node has been allocated to one or more jobs.
diff --git a/slurm/slurm.h.in b/slurm/slurm.h.in
index 047705185fe..a3fa7cc9054 100644
--- a/slurm/slurm.h.in
+++ b/slurm/slurm.h.in
@@ -376,6 +376,7 @@ enum node_states {
 #define NODE_STATE_DRAIN      0x0200	/* node not be be allocated work */
 #define NODE_STATE_COMPLETING 0x0400	/* node is completing allocated job */
 #define NODE_STATE_NO_RESPOND 0x0800	/* node is not responding */
+#define NODE_STATE_POWER_SAVE 0x1000	/* node in power save mode */
 
 /* used to define the size of the credential.signature size
  * used to define the key size of the io_stream_header_t
diff --git a/src/common/slurm_protocol_defs.c b/src/common/slurm_protocol_defs.c
index 185e7f0c241..707eab8dc3e 100644
--- a/src/common/slurm_protocol_defs.c
+++ b/src/common/slurm_protocol_defs.c
@@ -636,6 +636,7 @@ char *node_state_string(enum node_states inx)
 	bool drain_flag   = (inx & NODE_STATE_DRAIN);
 	bool comp_flag    = (inx & NODE_STATE_COMPLETING);
 	bool no_resp_flag = (inx & NODE_STATE_NO_RESPOND);
+	bool power_flag   = (inx & NODE_STATE_POWER_SAVE);
 
 	inx = (uint16_t) (inx & NODE_STATE_BASE);
 
@@ -670,6 +671,8 @@ char *node_state_string(enum node_states inx)
 	if (inx == NODE_STATE_IDLE) {
 		if (no_resp_flag)
 			return "IDLE*";
+		if (power_flag)
+			return "IDLE~";
 		return "IDLE";
 	}
 	if (inx == NODE_STATE_UNKNOWN) {
@@ -685,6 +688,7 @@ char *node_state_string_compact(enum node_states inx)
 	bool drain_flag   = (inx & NODE_STATE_DRAIN);
 	bool comp_flag    = (inx & NODE_STATE_COMPLETING);
 	bool no_resp_flag = (inx & NODE_STATE_NO_RESPOND);
+	bool power_flag   = (inx & NODE_STATE_POWER_SAVE);
 
 	inx = (uint16_t) (inx & NODE_STATE_BASE);
 
@@ -719,6 +723,8 @@ char *node_state_string_compact(enum node_states inx)
 	if (inx == NODE_STATE_IDLE) {
 		if (no_resp_flag)
 			return "IDLE*";
+		if (power_flag)
+			return "IDLE~";
 		return "IDLE";
 	}
 	if (inx == NODE_STATE_UNKNOWN) {
diff --git a/src/slurmctld/Makefile.am b/src/slurmctld/Makefile.am
index 82f88bb07da..5852f1e58ea 100644
--- a/src/slurmctld/Makefile.am
+++ b/src/slurmctld/Makefile.am
@@ -27,6 +27,7 @@ slurmctld_SOURCES = 	\
 	partition_mgr.c \
 	ping_nodes.c	\
 	ping_nodes.h	\
+	power_save.c	\
 	proc_req.c	\
 	proc_req.h	\
 	read_config.c	\
diff --git a/src/slurmctld/Makefile.in b/src/slurmctld/Makefile.in
index a26fcd931bb..09375b9a16e 100644
--- a/src/slurmctld/Makefile.in
+++ b/src/slurmctld/Makefile.in
@@ -77,7 +77,7 @@ am_slurmctld_OBJECTS = agent.$(OBJEXT) backup.$(OBJEXT) \
 	controller.$(OBJEXT) job_mgr.$(OBJEXT) job_scheduler.$(OBJEXT) \
 	locks.$(OBJEXT) node_mgr.$(OBJEXT) node_scheduler.$(OBJEXT) \
 	partition_mgr.$(OBJEXT) ping_nodes.$(OBJEXT) \
-	proc_req.$(OBJEXT) read_config.$(OBJEXT) \
+	power_save.$(OBJEXT) proc_req.$(OBJEXT) read_config.$(OBJEXT) \
 	sched_plugin.$(OBJEXT) srun_comm.$(OBJEXT) \
 	state_save.$(OBJEXT) step_mgr.$(OBJEXT) trigger_mgr.$(OBJEXT)
 slurmctld_OBJECTS = $(am_slurmctld_OBJECTS)
@@ -303,6 +303,7 @@ slurmctld_SOURCES = \
 	partition_mgr.c \
 	ping_nodes.c	\
 	ping_nodes.h	\
+	power_save.c	\
 	proc_req.c	\
 	proc_req.h	\
 	read_config.c	\
@@ -402,6 +403,7 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/node_scheduler.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/partition_mgr.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ping_nodes.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/power_save.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/proc_req.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/read_config.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sched_plugin.Po@am__quote@
diff --git a/src/slurmctld/controller.c b/src/slurmctld/controller.c
index 38ef75a4973..6661434c3f7 100644
--- a/src/slurmctld/controller.c
+++ b/src/slurmctld/controller.c
@@ -174,7 +174,7 @@ typedef struct connection_arg {
 int main(int argc, char *argv[])
 {
 	int error_code;
-	pthread_attr_t thread_attr_save, thread_attr_sig, thread_attr_rpc;
+	pthread_attr_t thread_attr;
 	struct stat stat_buf;
 
 	/*
@@ -324,31 +324,41 @@ int main(int argc, char *argv[])
 		slurm_mutex_lock(&slurmctld_config.thread_count_lock);
 		slurmctld_config.server_thread_count++;
 		slurm_mutex_unlock(&slurmctld_config.thread_count_lock);
-		slurm_attr_init(&thread_attr_rpc);
+		slurm_attr_init(&thread_attr);
 		if (pthread_create(&slurmctld_config.thread_id_rpc, 
-				&thread_attr_rpc,_slurmctld_rpc_mgr, NULL))
+				&thread_attr,_slurmctld_rpc_mgr, NULL))
 			fatal("pthread_create error %m");
-		slurm_attr_destroy(&thread_attr_rpc);
+		slurm_attr_destroy(&thread_attr);
 
 		/*
 		 * create attached thread for signal handling
 		 */
-		slurm_attr_init(&thread_attr_sig);
+		slurm_attr_init(&thread_attr);
 		if (pthread_create(&slurmctld_config.thread_id_sig,
-				 &thread_attr_sig, _slurmctld_signal_hand,
+				 &thread_attr, _slurmctld_signal_hand,
 				 NULL))
 			fatal("pthread_create %m");
-		slurm_attr_destroy(&thread_attr_sig);
+		slurm_attr_destroy(&thread_attr);
 
 		/*
 		 * create attached thread for state save
 		 */
-		slurm_attr_init(&thread_attr_save);
+		slurm_attr_init(&thread_attr);
 		if (pthread_create(&slurmctld_config.thread_id_save,
-				&thread_attr_save, slurmctld_state_save,
+				&thread_attr, slurmctld_state_save,
 				NULL))
 			fatal("pthread_create %m");
-		slurm_attr_destroy(&thread_attr_save);
+		slurm_attr_destroy(&thread_attr);
+
+		/*
+		 * create attached thread for node power management
+		 */
+		slurm_attr_init(&thread_attr);
+		if (pthread_create(&slurmctld_config.thread_id_power,
+				&thread_attr, init_power_save,
+				NULL))
+			fatal("pthread_create %m");
+		slurm_attr_destroy(&thread_attr);
 
 		/*
 		 * process slurm background activities, could run as pthread
@@ -360,6 +370,7 @@ int main(int argc, char *argv[])
 		pthread_join(slurmctld_config.thread_id_sig,  NULL);
 		pthread_join(slurmctld_config.thread_id_rpc,  NULL);
 		pthread_join(slurmctld_config.thread_id_save, NULL);
+		pthread_join(slurmctld_config.thread_id_power,NULL);
 		if (select_g_state_save(slurmctld_conf.state_save_location)
 				!= SLURM_SUCCESS )
 			error("failed to save node selection state");
diff --git a/src/slurmctld/power_save.c b/src/slurmctld/power_save.c
new file mode 100644
index 00000000000..e8e9c1b29ff
--- /dev/null
+++ b/src/slurmctld/power_save.c
@@ -0,0 +1,385 @@
+/*****************************************************************************\
+ *  power_save.c - support node power saving mode. Nodes which have been 
+ *  idle for an extended period of time will be placed into a power saving 
+ *  mode by running an arbitrary script (typically to set frequency governor).
+ *  When the node is restored to normal operation, another script will be 
+ *  executed. Many parameters are available to control this mode of operation.
+ *****************************************************************************
+ *  Copyright (C) 2007 The Regents of the University of California.
+ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+ *  Written by Morris Jette <jette1@llnl.gov>
+ *  UCRL-CODE-226842.
+ *  
+ *  This file is part of SLURM, a resource management program.
+ *  For details, see <http://www.llnl.gov/linux/slurm/>.
+ *  
+ *  SLURM is free software; you can redistribute it and/or modify it under
+ *  the terms of the GNU General Public License as published by the Free
+ *  Software Foundation; either version 2 of the License, or (at your option)
+ *  any later version.
+ *
+ *  In addition, as a special exception, the copyright holders give permission 
+ *  to link the code of portions of this program with the OpenSSL library under
+ *  certain conditions as described in each individual source file, and 
+ *  distribute linked combinations including the two. You must obey the GNU 
+ *  General Public License in all respects for all of the code used other than 
+ *  OpenSSL. If you modify file(s) with this exception, you may extend this 
+ *  exception to your version of the file(s), but you are not obligated to do 
+ *  so. If you do not wish to do so, delete this exception statement from your
+ *  version.  If you delete this exception statement from all source files in 
+ *  the program, then also delete it here.
+ *  
+ *  SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
+ *  WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ *  FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
+ *  details.
+ *  
+ *  You should have received a copy of the GNU General Public License along
+ *  with SLURM; if not, write to the Free Software Foundation, Inc.,
+ *  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA.
+\*****************************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#  include "config.h"
+#endif
+
+#include "src/common/bitstring.h"
+#include "src/common/xstring.h"
+#include "src/slurmctld/locks.h"
+#include "src/slurmctld/slurmctld.h"
+
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+/* NOTE: These paramters will be moved into the slurm.conf file in version 1.3
+ * Directly modify the default values here in order to enable this capability
+ * in SLURM version 1.2. */
+
+/* Node becomes elligible for power saving mode after being idle for
+ * this number of seconds. A negative disables power saving mode. */
+#define DEFAULT_IDLE_TIME	-1
+
+/* Maximum number of nodes to be placed into or removed from power saving mode
+ * per minute. Use this to prevent rapid changing in power requirements.
+ * Note that up to DEFAULT_SUSPEND_RATE + DEFAULT_RESUME_RATE processes may 
+ * be created at the same time, so use reasonable limits. A value of zero 
+ * results in no limits being imposed. */
+#define DEFAULT_SUSPEND_RATE	60
+#define DEFAULT_RESUME_RATE	60
+
+/* Programs to be executed to place nodes or out of power saving mode. These 
+ * are run as user SlurmUser. The hostname of the node to be modified will be
+ * passed as an argument to the program. */
+#define DEFAULT_SUSPEND_PROGRAM	"/home/jette/slurm.way/sbin/slurm.node.suspend"
+#define DEFAULT_RESUME_PROGRAM	"/home/jette/slurm.way/sbin/slurm.node.resume"
+
+/* Individual nodes or all nodes in selected partitions can be excluded from
+ * being placed into power saving mode. SLURM hostlist expressions can be used.
+ * Multiple partitions may be listed with a comma separator. */
+#define DEFAULT_EXCLUDE_SUSPEND_NODES		NULL
+#define DEFAULT_EXCLUDE_SUSPEND_PARTITIONS	NULL
+
+int idle_time, suspend_rate, resume_rate;
+char *suspend_prog = NULL, *resume_prog = NULL;
+char *exc_nodes = NULL, *exc_parts = NULL;
+
+
+bitstr_t *exc_node_bitmap = NULL;
+int suspend_cnt, resume_cnt;
+
+static void  _do_power_work(void);
+static void  _do_resume(char *host);
+static void  _do_suspend(char *host);
+static int   _init_power_config(void);
+static void  _kill_zombies(void);
+static pid_t _run_prog(char *prog, char *arg);
+static bool  _valid_prog(char *file_name);
+
+/* Perform any power change work to nodes */
+static void _do_power_work(void)
+{
+	int i, wake_cnt = 0, sleep_cnt = 0, susp_total = 0;
+	time_t now = time(NULL), last_work_scan = 0, last_log = 0, delta_t;
+	uint16_t base_state, susp_state;
+	bitstr_t *wake_node_bitmap = NULL, *sleep_node_bitmap = NULL;
+	struct node_record *node_ptr;
+
+	/* Build bitmaps identifying each node which should change state */
+	for (i=0; i<node_record_count; i++) {
+		node_ptr = &node_record_table_ptr[i];
+		base_state = node_ptr->node_state & NODE_STATE_BASE;
+		susp_state = node_ptr->node_state & NODE_STATE_POWER_SAVE;
+
+		if (susp_state)
+			susp_total++;
+		if (susp_state
+		&&  ((base_state == NODE_STATE_ALLOCATED)
+		||   (node_ptr->last_idle > (now - idle_time)))) {
+			if (wake_cnt == 0)
+				wake_node_bitmap = bit_alloc(node_record_count);
+			wake_cnt++;
+			bit_set(wake_node_bitmap, i);
+		}
+		if ((susp_state == 0)
+		&&  (base_state == NODE_STATE_IDLE)
+		&&  (node_ptr->last_idle < (now - idle_time))
+		&&  ((exc_node_bitmap == NULL) || 
+		     (bit_test(exc_node_bitmap, i) == 0))) {
+			if (sleep_cnt == 0)
+				sleep_node_bitmap = bit_alloc(node_record_count);
+			sleep_cnt++;
+			bit_set(sleep_node_bitmap, i);
+		}
+	}
+	if ((susp_total > 0) && ((now - last_log) > 300))
+		info("Power save mode %d nodes", susp_total);
+	if ((wake_cnt == 0) && (sleep_cnt == 0))
+		goto fini;		/* No work to be done now */
+/* FIXME: Consider re-running wake command on nodes just in case
+ * some wake requests failed. This is the place to do such work. */
+
+	/* Set limit on counts of nodes to have state changed */
+	delta_t = now - last_work_scan;
+	if (delta_t >= 60) {
+		suspend_cnt = 0;
+		resume_cnt  = 0;
+	} else {
+		float rate = (60 - delta_t) / 60.0;
+		suspend_cnt *= rate;
+		resume_cnt  *= rate;
+	}
+	last_work_scan = now;
+
+	/* Perform work up to limits */
+	for (i=0; i<node_record_count; i++) {
+		node_ptr = &node_record_table_ptr[i];
+		if ((suspend_cnt <= suspend_rate)
+		&&  sleep_node_bitmap
+		&&  bit_test(sleep_node_bitmap, i)) {
+			_do_suspend(node_ptr->name);
+			node_ptr->node_state |= NODE_STATE_POWER_SAVE;
+			last_node_update = now;
+		}
+		if ((resume_cnt <= resume_rate)
+		&&  wake_node_bitmap
+		&&  bit_test(wake_node_bitmap, i)) {
+			_do_resume(node_ptr->name);
+			node_ptr->node_state &= (~NODE_STATE_POWER_SAVE);
+			last_node_update = now;
+		}
+	}
+
+fini:	FREE_NULL_BITMAP(wake_node_bitmap);
+	FREE_NULL_BITMAP(sleep_node_bitmap);
+}
+static void _do_resume(char *host)
+{
+	debug("power_save: waking node %s", host);
+	_run_prog(resume_prog, host);	
+}
+
+static void _do_suspend(char *host)
+{
+	debug("power_save: suspending node %s", host);
+	_run_prog(suspend_prog, host);	
+}
+
+static pid_t _run_prog(char *prog, char *arg)
+{
+	char program[1024], arg0[1024], arg1[1024], *pname;
+	pid_t child;
+
+	strncpy(program, prog, sizeof(program));
+	pname = strrchr(program, '/');
+	if (pname == NULL)
+		pname = program;
+	else
+		pname++;
+	strncpy(arg0, pname, sizeof(arg0));
+	strncpy(arg1, arg, sizeof(arg1));
+
+	child = fork();
+	if (child == 0) {
+		int i;
+		for (i=0; i<128; i++)
+			close(i);
+		execl(program, arg0, arg1, NULL);
+		exit(1);
+	} else if (child < 0)
+		error("fork: %m");
+	return child;
+}
+
+/* We don't bother to track individual process IDs, 
+ * just clean everything up here. We could capture 
+ * the value of "child" in _run_prog() if we want 
+ * to track each process. */
+static void  _kill_zombies(void)
+{
+	while (waitpid(-1, NULL, WNOHANG) > 0)
+		;
+}
+
+/* Initialize power_save module paramters.
+ * Return 0 on valid configuration to run power saving,
+ * otherwise log the problem and return -1 */
+static int _init_power_config(void)
+{
+	idle_time     = DEFAULT_IDLE_TIME;
+	suspend_rate  = DEFAULT_SUSPEND_RATE;
+	resume_rate   = DEFAULT_RESUME_RATE;
+	if (DEFAULT_SUSPEND_PROGRAM)
+		suspend_prog = xstrdup(DEFAULT_SUSPEND_PROGRAM);
+	if (DEFAULT_RESUME_PROGRAM)
+		resume_prog = xstrdup(DEFAULT_RESUME_PROGRAM);
+	if (DEFAULT_EXCLUDE_SUSPEND_NODES)
+		exc_nodes = xstrdup(DEFAULT_EXCLUDE_SUSPEND_NODES);
+	if (DEFAULT_EXCLUDE_SUSPEND_PARTITIONS)
+		exc_parts = xstrdup(DEFAULT_EXCLUDE_SUSPEND_PARTITIONS);
+
+	if (idle_time < 0) {	/* not an error */
+		debug("power_save module disabled, idle_time < 0");
+		return -1;
+	}
+	if (suspend_rate < 1) {
+		error("power_save module disabled, suspend_rate < 1");
+		return -1;
+	}
+	if (resume_rate < 1) {
+		error("power_save module disabled, resume_rate < 1");
+		return -1;
+	}
+	if (suspend_prog == NULL)
+		info("WARNING: power_save module has NULL suspend program");
+	else if (!_valid_prog(suspend_prog)) {
+		error("power_save module disabled, invalid suspend program %s",
+			suspend_prog);
+		return -1;
+	}
+	if (resume_prog == NULL)
+		info("WARNING: power_save module has NULL resume program");
+	else if (!_valid_prog(resume_prog)) {
+		error("power_save module disabled, invalid resume program %s",
+			resume_prog);
+		return -1;
+	}
+
+	if (exc_nodes
+	&&  (node_name2bitmap(exc_nodes, false, &exc_node_bitmap))) {
+		error("power_save module disabled, "
+			"invalid excluded nodes %s", exc_nodes);
+		return -1;
+	}
+
+	if (exc_parts) {
+		char *tmp, *one_part, *part_list;
+		struct part_record *part_ptr;
+		int rc = 0;
+
+		part_list = xstrdup(exc_parts);
+		one_part = strtok_r(part_list, ",", &tmp);
+		while (one_part != NULL) {
+			part_ptr = find_part_record(one_part);
+			if (!part_ptr) {
+				error("power_save module disabled, "
+					"invalid excluded partition %s",
+					part_ptr);
+				rc = -1;
+				break;
+			}
+			if (exc_node_bitmap)
+				bit_or(exc_node_bitmap, part_ptr->node_bitmap);
+			else
+				exc_node_bitmap = bit_copy(part_ptr->node_bitmap);
+			one_part = strtok_r(NULL, ",", &tmp);
+		}
+		xfree(part_list);
+		if (rc)
+			return rc;
+	}
+
+	if (exc_node_bitmap) {
+		char *tmp = bitmap2node_name(exc_node_bitmap);
+		debug("power_save module, excluded nodes %s", tmp);
+		xfree(tmp);
+	}
+
+	return 0;
+}
+
+static bool _valid_prog(char *file_name)
+{
+	struct stat buf;
+
+	if (file_name[0] != '/') {
+		debug("program %s not absolute pathname", file_name);
+		return false;
+	}
+
+	if (stat(file_name, &buf)) {
+		debug("program %s not found", file_name);
+		return false;
+	}
+	if (!S_ISREG(buf.st_mode)) {
+		debug("program %s not regular file", file_name);
+		return false;
+	}
+	if (buf.st_mode & 022) {
+		debug("program %s has group or world write permission",
+			file_name);
+		return false;
+	}
+	return true;
+}
+
+/*
+ * init_power_save - initialize the power save module. Started as a
+ *	pthread. Terminates automatically at slurmctld shutdown time.
+ *	Input and output are unused.
+ */
+extern void *init_power_save(void *arg)
+{
+        /* Locks: Read config, node, and partitions */
+        slurmctld_lock_t config_read_lock = {
+                READ_LOCK, NO_LOCK, READ_LOCK, READ_LOCK };
+        /* Locks: Write node, read jobs and partitions */
+        slurmctld_lock_t node_write_lock = {
+                NO_LOCK, READ_LOCK, WRITE_LOCK, READ_LOCK };
+	int rc;
+	time_t now, last_power_scan = 0;
+
+	lock_slurmctld(config_read_lock);
+	rc = _init_power_config();
+	unlock_slurmctld(config_read_lock);
+	if (rc)
+		goto fini;
+
+	while (slurmctld_config.shutdown_time == 0) {
+		sleep(1);
+		_kill_zombies();
+
+		/* Only run every 60 seconds or after
+		 * a node state change, whichever 
+		 * happens first */
+		now = time(NULL);
+		if ((last_node_update < last_power_scan)
+		&&  (now < (last_power_scan + 60)))
+			continue;
+
+		lock_slurmctld(node_write_lock);
+		_do_power_work();
+		unlock_slurmctld(node_write_lock);
+		last_power_scan = now;
+	}
+
+fini:	/* Free all allocated memory */
+	xfree(suspend_prog);
+	xfree(resume_prog);
+	xfree(exc_nodes);
+	xfree(exc_parts);
+	FREE_NULL_BITMAP(exc_node_bitmap);
+	return NULL;
+}
diff --git a/src/slurmctld/slurmctld.h b/src/slurmctld/slurmctld.h
index 8719e674341..89505b445ee 100644
--- a/src/slurmctld/slurmctld.h
+++ b/src/slurmctld/slurmctld.h
@@ -3,7 +3,7 @@
  *
  *  $Id$
  *****************************************************************************
- *  Copyright (C) 2002-2006 The Regents of the University of California.
+ *  Copyright (C) 2002-2007 The Regents of the University of California.
  *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
  *  Written by Morris Jette <jette1@llnl.gov> et. al.
  *  UCRL-CODE-226842.
@@ -141,12 +141,14 @@ typedef struct slurmctld_config {
 	pthread_t thread_id_main;
 	pthread_t thread_id_save;
 	pthread_t thread_id_sig;
+	pthread_t thread_id_power;
 	pthread_t thread_id_rpc;
 #else
 	int thread_count_lock;
 	int thread_id_main;
 	int thread_id_save;
 	int thread_id_sig;
+	int thread_id_power;
 	int thread_id_rpc;
 #endif
 } slurmctld_config_t;
@@ -675,6 +677,13 @@ extern int init_node_conf ();
  */
 extern int init_part_conf (void);
 
+/*
+ * init_power_save - initialize the power save module. Started as a 
+ *	pthread. Terminates automatically at slurmctld shutdown time.
+ *	Input and output are unused.
+ */
+extern void *init_power_save(void *arg);
+
 /*
  * is_node_down - determine if the specified node's state is DOWN
  * IN name - name of the node
-- 
GitLab