From da29f2dccad4d00dd142732eea127146b8c541a9 Mon Sep 17 00:00:00 2001 From: Moe Jette <jette1@llnl.gov> Date: Tue, 12 May 2009 23:45:29 +0000 Subject: [PATCH] Fix logic so that we clear POWER_SAVE flag on nodes on reconfig as appropriate --- src/slurmctld/power_save.c | 37 ++++++++++++++++++++--------------- src/slurmctld/read_config.c | 39 +++++++++++++++++++++++++++++-------- 2 files changed, 52 insertions(+), 24 deletions(-) diff --git a/src/slurmctld/power_save.c b/src/slurmctld/power_save.c index 6e9b892bab0..f693a20d785 100644 --- a/src/slurmctld/power_save.c +++ b/src/slurmctld/power_save.c @@ -345,36 +345,38 @@ static int _init_power_config(void) slurm_conf_unlock(); if (idle_time < 0) { /* not an error */ - debug("power_save module disabled, idle_time < 0"); + debug("power_save module disabled, SuspendTime < 0"); return -1; } if (suspend_rate < 1) { - error("power_save module disabled, suspend_rate < 1"); + error("power_save module disabled, SuspendRate < 1"); return -1; } if (resume_rate < 1) { - error("power_save module disabled, resume_rate < 1"); + error("power_save module disabled, ResumeRate < 1"); return -1; } - if (suspend_prog == NULL) - info("WARNING: power_save module has NULL suspend program"); - else if (!_valid_prog(suspend_prog)) { - error("power_save module disabled, invalid suspend program %s", + if (suspend_prog == NULL) { + error("power_save module disabled, NULL SuspendProgram"); + return -1; + } else if (!_valid_prog(suspend_prog)) { + error("power_save module disabled, invalid SuspendProgram %s", suspend_prog); return -1; } - if (resume_prog == NULL) - info("WARNING: power_save module has NULL resume program"); - else if (!_valid_prog(resume_prog)) { - error("power_save module disabled, invalid resume program %s", + if (resume_prog == NULL) { + error("power_save module disabled, NULL ResumeProgram"); + return -1; + } else if (!_valid_prog(resume_prog)) { + error("power_save module disabled, invalid ResumeProgram %s", resume_prog); return -1; } - if (exc_nodes - && (node_name2bitmap(exc_nodes, false, &exc_node_bitmap))) { + if (exc_nodes && + (node_name2bitmap(exc_nodes, false, &exc_node_bitmap))) { error("power_save module disabled, " - "invalid excluded nodes %s", exc_nodes); + "invalid SuspendExcNodes %s", exc_nodes); return -1; } @@ -389,7 +391,7 @@ static int _init_power_config(void) part_ptr = find_part_record(one_part); if (!part_ptr) { error("power_save module disabled, " - "invalid excluded partition %s", + "invalid SuspendExcPart %s", one_part); rc = -1; break; @@ -468,8 +470,11 @@ extern void *init_power_save(void *arg) } if ((last_config != slurmctld_conf.last_update) && - (_init_power_config())) + (_init_power_config())) { + info("power_save mode has been disabled due to " + "configuration changes"); goto fini; + } /* Only run every 60 seconds or after * a node state change, whichever diff --git a/src/slurmctld/read_config.c b/src/slurmctld/read_config.c index 4f2f12d7bf5..2785fc22f37 100644 --- a/src/slurmctld/read_config.c +++ b/src/slurmctld/read_config.c @@ -17,7 +17,7 @@ * any later version. * * In addition, as a special exception, the copyright holders give permission - * to link the code of portions of this program with the OpenSSL library under + * to link the code of portions of this program with the OpenSSL library under * certain conditions as described in each individual source file, and * distribute linked combinations including the two. You must obey the GNU * General Public License in all respects for all of the code used other than @@ -840,10 +840,10 @@ int read_slurm_conf(int recover) _build_bitmaps_pre_select(); if ((select_g_node_init(node_record_table_ptr, node_record_count) - != SLURM_SUCCESS) - || (select_g_block_init(part_list) != SLURM_SUCCESS) - || (select_g_state_restore(state_save_dir) != SLURM_SUCCESS) - || (select_g_job_init(job_list) != SLURM_SUCCESS)) { + != SLURM_SUCCESS) || + (select_g_block_init(part_list) != SLURM_SUCCESS) || + (select_g_state_restore(state_save_dir) != SLURM_SUCCESS) || + (select_g_job_init(job_list) != SLURM_SUCCESS)) { fatal("failed to initialize node selection plugin state, " "Clean start required."); } @@ -909,10 +909,17 @@ int read_slurm_conf(int recover) /* Restore node state and size information from saved records. * If a node was re-configured to be down or drained, we set those states */ static int _restore_node_state(struct node_record *old_node_table_ptr, - int old_node_record_count) + int old_node_record_count) { struct node_record *node_ptr; int i, rc = SLURM_SUCCESS; + hostset_t hs = NULL; + slurm_ctl_conf_t *conf = slurm_conf_lock(); + bool power_save_mode = false; + + if (conf->suspend_program && conf->resume_program) + power_save_mode = true; + slurm_conf_unlock(); for (i = 0; i < old_node_record_count; i++) { uint16_t drain_flag = false, down_flag = false; @@ -920,7 +927,8 @@ static int _restore_node_state(struct node_record *old_node_table_ptr, if (node_ptr == NULL) continue; - if ((node_ptr->node_state & NODE_STATE_BASE) == NODE_STATE_DOWN) + if ((node_ptr->node_state & NODE_STATE_BASE) == + NODE_STATE_DOWN) down_flag = true; if (node_ptr->node_state & NODE_STATE_DRAIN) drain_flag = true; @@ -931,7 +939,15 @@ static int _restore_node_state(struct node_record *old_node_table_ptr, } if (drain_flag) node_ptr->node_state |= NODE_STATE_DRAIN; - + if ((node_ptr->node_state & NODE_STATE_POWER_SAVE) && + (!power_save_mode)) { + node_ptr->node_state &= (~NODE_STATE_POWER_SAVE); + if (hs) + hostset_insert(hs, node_ptr->name); + else + hs = hostset_create(node_ptr->name); + } + node_ptr->last_response = old_node_table_ptr[i].last_response; if (old_node_table_ptr[i].port != node_ptr->config_ptr->cpus) { rc = ESLURM_NEED_RESTART; @@ -966,6 +982,13 @@ static int _restore_node_state(struct node_record *old_node_table_ptr, old_node_table_ptr[i].os = NULL; } } + + if (hs) { + char node_names[128]; + hostset_ranged_string(hs, sizeof(node_names), node_names); + info("Cleared POWER_SAVE flag from nodes %s", node_names); + hostset_destroy(hs); + } return rc; } -- GitLab