From da29f2dccad4d00dd142732eea127146b8c541a9 Mon Sep 17 00:00:00 2001
From: Moe Jette <jette1@llnl.gov>
Date: Tue, 12 May 2009 23:45:29 +0000
Subject: [PATCH] Fix logic so that we clear POWER_SAVE flag on nodes on
 reconfig   as appropriate

---
 src/slurmctld/power_save.c  | 37 ++++++++++++++++++++---------------
 src/slurmctld/read_config.c | 39 +++++++++++++++++++++++++++++--------
 2 files changed, 52 insertions(+), 24 deletions(-)

diff --git a/src/slurmctld/power_save.c b/src/slurmctld/power_save.c
index 6e9b892bab0..f693a20d785 100644
--- a/src/slurmctld/power_save.c
+++ b/src/slurmctld/power_save.c
@@ -345,36 +345,38 @@ static int _init_power_config(void)
 	slurm_conf_unlock();
 
 	if (idle_time < 0) {	/* not an error */
-		debug("power_save module disabled, idle_time < 0");
+		debug("power_save module disabled, SuspendTime < 0");
 		return -1;
 	}
 	if (suspend_rate < 1) {
-		error("power_save module disabled, suspend_rate < 1");
+		error("power_save module disabled, SuspendRate < 1");
 		return -1;
 	}
 	if (resume_rate < 1) {
-		error("power_save module disabled, resume_rate < 1");
+		error("power_save module disabled, ResumeRate < 1");
 		return -1;
 	}
-	if (suspend_prog == NULL)
-		info("WARNING: power_save module has NULL suspend program");
-	else if (!_valid_prog(suspend_prog)) {
-		error("power_save module disabled, invalid suspend program %s",
+	if (suspend_prog == NULL) {
+		error("power_save module disabled, NULL SuspendProgram");
+		return -1;
+	} else if (!_valid_prog(suspend_prog)) {
+		error("power_save module disabled, invalid SuspendProgram %s",
 			suspend_prog);
 		return -1;
 	}
-	if (resume_prog == NULL)
-		info("WARNING: power_save module has NULL resume program");
-	else if (!_valid_prog(resume_prog)) {
-		error("power_save module disabled, invalid resume program %s",
+	if (resume_prog == NULL) {
+		error("power_save module disabled, NULL ResumeProgram");
+		return -1;
+	} else if (!_valid_prog(resume_prog)) {
+		error("power_save module disabled, invalid ResumeProgram %s",
 			resume_prog);
 		return -1;
 	}
 
-	if (exc_nodes
-	&&  (node_name2bitmap(exc_nodes, false, &exc_node_bitmap))) {
+	if (exc_nodes &&
+	    (node_name2bitmap(exc_nodes, false, &exc_node_bitmap))) {
 		error("power_save module disabled, "
-			"invalid excluded nodes %s", exc_nodes);
+			"invalid SuspendExcNodes %s", exc_nodes);
 		return -1;
 	}
 
@@ -389,7 +391,7 @@ static int _init_power_config(void)
 			part_ptr = find_part_record(one_part);
 			if (!part_ptr) {
 				error("power_save module disabled, "
-					"invalid excluded partition %s",
+					"invalid SuspendExcPart %s",
 					one_part);
 				rc = -1;
 				break;
@@ -468,8 +470,11 @@ extern void *init_power_save(void *arg)
 		}
 
 		if ((last_config != slurmctld_conf.last_update) &&
-		    (_init_power_config()))
+		    (_init_power_config())) {
+			info("power_save mode has been disabled due to "
+			     "configuration changes");
 			goto fini;
+		}
 
 		/* Only run every 60 seconds or after
 		 * a node state change, whichever 
diff --git a/src/slurmctld/read_config.c b/src/slurmctld/read_config.c
index 4f2f12d7bf5..2785fc22f37 100644
--- a/src/slurmctld/read_config.c
+++ b/src/slurmctld/read_config.c
@@ -17,7 +17,7 @@
  *  any later version.
  *
  *  In addition, as a special exception, the copyright holders give permission 
- *  to link the code of portions of this program with the OpenSSL library under 
+ *  to link the code of portions of this program with the OpenSSL library under
  *  certain conditions as described in each individual source file, and 
  *  distribute linked combinations including the two. You must obey the GNU 
  *  General Public License in all respects for all of the code used other than 
@@ -840,10 +840,10 @@ int read_slurm_conf(int recover)
 
 	_build_bitmaps_pre_select();
 	if ((select_g_node_init(node_record_table_ptr, node_record_count)
-	     != SLURM_SUCCESS) 
-	    || (select_g_block_init(part_list) != SLURM_SUCCESS) 
-	    || (select_g_state_restore(state_save_dir) != SLURM_SUCCESS) 
-	    || (select_g_job_init(job_list) != SLURM_SUCCESS)) {
+	     != SLURM_SUCCESS)						||
+	    (select_g_block_init(part_list) != SLURM_SUCCESS)		||
+	    (select_g_state_restore(state_save_dir) != SLURM_SUCCESS)	||
+	    (select_g_job_init(job_list) != SLURM_SUCCESS)) {
 		fatal("failed to initialize node selection plugin state, "
 		      "Clean start required.");
 	}
@@ -909,10 +909,17 @@ int read_slurm_conf(int recover)
 /* Restore node state and size information from saved records.
  * If a node was re-configured to be down or drained, we set those states */
 static int _restore_node_state(struct node_record *old_node_table_ptr, 
-				int old_node_record_count)
+			       int old_node_record_count)
 {
 	struct node_record *node_ptr;
 	int i, rc = SLURM_SUCCESS;
+	hostset_t hs = NULL;
+	slurm_ctl_conf_t *conf = slurm_conf_lock();
+	bool power_save_mode = false;
+
+	if (conf->suspend_program && conf->resume_program)
+		power_save_mode = true;
+	slurm_conf_unlock();
 
 	for (i = 0; i < old_node_record_count; i++) {
 		uint16_t drain_flag = false, down_flag = false;
@@ -920,7 +927,8 @@ static int _restore_node_state(struct node_record *old_node_table_ptr,
 		if (node_ptr == NULL)
 			continue;
 
-		if ((node_ptr->node_state & NODE_STATE_BASE) == NODE_STATE_DOWN)
+		if ((node_ptr->node_state & NODE_STATE_BASE) == 
+		    NODE_STATE_DOWN)
 			down_flag = true;
 		if (node_ptr->node_state & NODE_STATE_DRAIN)
 			drain_flag = true;
@@ -931,7 +939,15 @@ static int _restore_node_state(struct node_record *old_node_table_ptr,
 		}
 		if (drain_flag)
 			node_ptr->node_state |= NODE_STATE_DRAIN; 
-			
+		if ((node_ptr->node_state & NODE_STATE_POWER_SAVE) &&
+		    (!power_save_mode)) {
+			node_ptr->node_state &= (~NODE_STATE_POWER_SAVE);
+			if (hs)
+				hostset_insert(hs, node_ptr->name);
+			else
+				hs = hostset_create(node_ptr->name);
+		}
+
 		node_ptr->last_response = old_node_table_ptr[i].last_response;
 		if (old_node_table_ptr[i].port != node_ptr->config_ptr->cpus) {
 			rc = ESLURM_NEED_RESTART;
@@ -966,6 +982,13 @@ static int _restore_node_state(struct node_record *old_node_table_ptr,
 			old_node_table_ptr[i].os = NULL;
 		}
 	}
+
+	if (hs) {
+		char node_names[128];
+		hostset_ranged_string(hs, sizeof(node_names), node_names);
+		info("Cleared POWER_SAVE flag from nodes %s", node_names);
+		hostset_destroy(hs);
+	}
 	return rc;
 }
 
-- 
GitLab