diff --git a/NEWS b/NEWS index a00107ad8a0b132d57f80043deabe89facfcaeaf..822325058be8c0f8d524dde76e70cb22b1b3297d 100644 --- a/NEWS +++ b/NEWS @@ -62,6 +62,9 @@ documents those changes that are of interest to users and administrators. then pass to the ResumeProgram the job ID assigned to the nodes in the SLURM_JOB_ID environment variable. -- Allow a node's PowerUp state flag to be cleared using update_node RPC. + -- capmc_suspend/resume - If a request modify NUMA or MCDRAM state on a set of + nodes or reboot a set of nodes fails then just requeue the job and abort the + entire operation rather than trying to operate on individual nodes. * Changes in Slurm 16.05.4 ========================== diff --git a/contribs/cray/capmc_resume.c b/contribs/cray/capmc_resume.c index e61fb63312337a8ff2ed0bda76b66ed686cd248a..3f1a0f0727a5296e7450416a6f9b7b77a64b7914 100644 --- a/contribs/cray/capmc_resume.c +++ b/contribs/cray/capmc_resume.c @@ -56,7 +56,6 @@ #include "slurm/slurm.h" #include "slurm/slurm_errno.h" -#include "src/common/hostlist.h" #include "src/common/log.h" #include "src/common/macros.h" #include "src/common/parse_config.h" @@ -90,10 +89,6 @@ static char *prog_name = NULL; static char *mcdram_mode = NULL, *numa_mode = NULL; static char *syscfg_path = NULL; -static pthread_mutex_t thread_cnt_mutex = PTHREAD_MUTEX_INITIALIZER; -static pthread_cond_t thread_cnt_cond = PTHREAD_COND_INITIALIZER; -static int thread_cnt = 0; - /* NOTE: Keep this table synchronized with the table in * src/plugins/node_features/knl_cray/node_features_knl_cray.c */ static s_p_options_t knl_conf_file_options[] = { @@ -114,7 +109,6 @@ static s_p_options_t knl_conf_file_options[] = { /* Static functions */ static s_p_hashtbl_t *_config_make_tbl(char *filename); static uint32_t *_json_parse_nids(json_object *jobj, char *key, int *num); -static void *_node_update(void *args); static void _read_config(void); static char *_run_script(char **script_argv, int *status); static int _tot_wait(struct timeval *start_time); @@ -404,125 +398,6 @@ static int _update_all_nodes(char *host_list) return rc; } -static void *_node_update(void *args) -{ - char *node_name = (char *) args; - char *argv[10], nid_str[32], *resp_msg; - int i, nid = -1, status = 0; - bool node_reinit_sent = false, node_state_sent; - - for (i = 0; node_name[i]; i++) { - if ((node_name[i] >= '0') && (node_name[i] <= '9')) { - nid = strtol(node_name + i, NULL, 10); - break; - } - } - if (nid < 0) { - error("%s: No valid NID: %s", prog_name, node_name); - goto fini; - } - bit_set(node_bitmap, nid); - snprintf(nid_str, sizeof(nid_str), "%d", nid); - - if (mcdram_mode) { - /* Update MCDRAM mode. - * Example: "capmc set_mcdram_cfg –n 43 –m cache" */ - argv[0] = "capmc"; - argv[1] = "set_mcdram_cfg"; - argv[2] = "-m"; - argv[3] = mcdram_mode; - argv[4] = "-n"; - argv[5] = nid_str; - argv[6] = NULL; - node_state_sent = false; - for (i = 0; ((i < NODE_STATE_RETRIES) && !node_state_sent); - i++) { - resp_msg = _run_script(argv, &status); - if ((status == 0) || - (resp_msg && - strcasestr(resp_msg, "Success"))) { - debug("%s: set_mcdram_cfg sent to %s", - prog_name, nid_str); - node_state_sent = true; - } else { - error("%s: capmc(%s,%s,%s,%s,%s): %d %s", - prog_name, argv[1], argv[2], argv[3], - argv[4], argv[5], status, resp_msg); - sleep(1); - } - xfree(resp_msg); - } - } - - if (numa_mode) { - /* Update NUMA mode. - * Example: "capmc set_numa_cfg –m a2a –n 43" */ - argv[0] = "capmc"; - argv[1] = "set_numa_cfg"; - argv[2] = "-m"; - argv[3] = numa_mode; - argv[4] = "-n"; - argv[5] = nid_str; - argv[6] = NULL; - node_state_sent = false; - for (i = 0; ((i < NODE_STATE_RETRIES) && !node_state_sent); - i++) { - resp_msg = _run_script(argv, &status); - if ((status == 0) || - (resp_msg && - strcasestr(resp_msg, "Success"))) { - debug("%s: set_numa_cfg sent to %s", - prog_name, nid_str); - node_state_sent = true; - } else { - error("%s: capmc(%s,%s,%s,%s,%s): %d %s", - prog_name, argv[1], argv[2], argv[3], - argv[4], argv[5], status, resp_msg); - sleep(1); - } - xfree(resp_msg); - } - } - - /* Request node restart. - * Example: "capmc node_reinit –n 43" */ - argv[0] = "capmc"; - argv[1] = "node_reinit"; - argv[2] = "-n"; - argv[3] = nid_str; - argv[4] = NULL; -// argv[4] = "-r"; /* Future option: Reason */ -// argv[5] = "Change KNL mode"; - for (i = 0; ((i < NODE_REINIT_RETRIES) && !node_reinit_sent); i++) { - resp_msg = _run_script(argv, &status); - if ((status == 0) || - (resp_msg && strcasestr(resp_msg, "Success"))) { - debug("%s: node_reinit sent to %s", prog_name, nid_str); - node_reinit_sent = true; - } else { - error("%s: capmc(%s,%s,%s): %d %s", prog_name, - argv[1], argv[2], argv[3], status, resp_msg); - sleep(1); - } - xfree(resp_msg); - } - - if (!node_reinit_sent) { - char *scontrol_input = NULL; - xstrfmtcat(scontrol_input, - "%s/bin/scontrol update nodename=%s state=DOWN Reason=reboot_failure", - SLURM_PREFIX, node_name); - (void) system(scontrol_input); - xfree(scontrol_input); - } - -fini: slurm_mutex_lock(&thread_cnt_mutex); - thread_cnt--; - pthread_cond_signal(&thread_cnt_cond); - slurm_mutex_unlock(&thread_cnt_mutex); - return NULL; -} - static uint32_t *_json_parse_nids(json_object *jobj, char *key, int *num) { json_object *j_array = NULL; @@ -601,10 +476,6 @@ int main(int argc, char *argv[]) char *features, *save_ptr = NULL, *tok; update_node_msg_t node_msg; int rc = SLURM_SUCCESS; - hostlist_t hl = NULL; - char *node_name; - pthread_attr_t attr_work; - pthread_t thread_work = 0; xstrfmtcat(prog_name, "%s[%u]", argv[0], (uint32_t) getpid()); _read_config(); @@ -633,7 +504,7 @@ int main(int argc, char *argv[]) !strcasecmp(tok, "snc4")) { xfree(numa_mode); numa_mode = xstrdup(tok); - } else if (!strcasecmp(tok, "cache") || + } else if (!strcasecmp(tok, "cache") || !strcasecmp(tok, "split") || !strcasecmp(tok, "equal") || !strcasecmp(tok, "flat")) { @@ -645,50 +516,31 @@ int main(int argc, char *argv[]) xfree(features); } - /* Attempt to update and restart all nodes in a single capmc call, - * attempt to update and restart individual nodes only if that fails. */ + /* Attempt to update and restart all nodes in a single capmc call */ node_bitmap = bit_alloc(100000); if (_update_all_nodes(argv[1]) != 0) { - /* Spawn threads to change MCDRAM and NUMA states and start node - * reboot process */ - if ((hl = hostlist_create(argv[1])) == NULL) { - error("%s: Invalid hostlist (%s)", prog_name, argv[1]); - exit(2); - } - while ((node_name = hostlist_pop(hl))) { - slurm_mutex_lock(&thread_cnt_mutex); - while (1) { - if (thread_cnt <= MAX_THREADS) { - thread_cnt++; - break; - } else { /* wait for state change and retry */ - pthread_cond_wait(&thread_cnt_cond, - &thread_cnt_mutex); - } - } - slurm_mutex_unlock(&thread_cnt_mutex); - - slurm_attr_init(&attr_work); - (void) pthread_attr_setdetachstate - (&attr_work, PTHREAD_CREATE_DETACHED); - if (pthread_create(&thread_work, &attr_work, - _node_update, (void *) node_name)) { - _node_update((void *) node_name); - } - slurm_attr_destroy(&attr_work); + /* Could not reboot nodes. + * Requeue/hold the job we were trying to start */ + uint32_t job_id = 0; + char *job_id_str = getenv("SLURM_JOB_ID"); + if (job_id_str) + job_id = strtol(job_id_str, NULL, 10); + if (job_id) + (void) slurm_requeue(job_id, JOB_REQUEUE_HOLD); + + /* Return the nodes to service */ + slurm_init_update_node_msg(&node_msg); + node_msg.node_names = argv[1]; + node_msg.node_state = NODE_STATE_POWER_SAVE | + NODE_STATE_POWER_UP; + rc = slurm_update_node(&node_msg); + if (rc != SLURM_SUCCESS) { + error("%s: slurm_update_node(\'%s\', \'IDLE\'): %s\n", + prog_name, argv[1], + slurm_strerror(slurm_get_errno())); } - hostlist_destroy(hl); - /* Wait for work threads to complete */ - slurm_mutex_lock(&thread_cnt_mutex); - while (1) { - if (thread_cnt == 0) - break; - else /* wait for state change and retry */ - pthread_cond_wait(&thread_cnt_cond, - &thread_cnt_mutex); - } - slurm_mutex_unlock(&thread_cnt_mutex); + exit(1); } xfree(mcdram_mode); xfree(numa_mode); diff --git a/contribs/cray/capmc_suspend.c b/contribs/cray/capmc_suspend.c index 330400a0d5b1f2c7d16361bd316d2bffd7563bae..1d88ea344763fa757673c00ab843ef01195fc188 100644 --- a/contribs/cray/capmc_suspend.c +++ b/contribs/cray/capmc_suspend.c @@ -107,10 +107,7 @@ static s_p_options_t knl_conf_file_options[] = { {NULL} }; -static bool _check_node_state(int nid, char *nid_str, char *state); static s_p_hashtbl_t *_config_make_tbl(char *filename); -static uint32_t *_json_parse_nids(json_object *jobj, char *key, int *num); -static void *_node_update(void *args); static void _read_config(void); static char *_run_script(char **script_argv, int *status); static int _tot_wait(struct timeval *start_time); @@ -276,137 +273,6 @@ static char *_run_script(char **script_argv, int *status) return resp; } -static uint32_t *_json_parse_nids(json_object *jobj, char *key, int *num) -{ - json_object *j_array = NULL; - json_object *j_value = NULL; - enum json_type j_type; - uint32_t *ents; - int i, cnt; - - *num = 0; - json_object_object_get_ex(jobj, key, &j_array); - if (!j_array) { - debug("%s: key=%s not found in nid specification", - prog_name, key); - return NULL; - } - - cnt = json_object_array_length(j_array); - ents = xmalloc(sizeof(uint32_t) * cnt); - for (i = 0; i < cnt; i++) { - j_value = json_object_array_get_idx(j_array, i); - j_type = json_object_get_type(j_value); - if (j_type != json_type_int) { - error("%s: Unable to parse nid specification", - prog_name); - break; - } else { - ents[i] = (uint32_t) json_object_get_int64(j_value); - *num = i + 1; - } - } - return ents; -} - -static bool _check_node_state(int nid, char *nid_str, char *state) -{ - bool node_state_ok = false; - char *argv[10], *resp_msg; - int i, nid_cnt, status = 0; - uint32_t *nid_array; - json_object *j; - - argv[0] = "capmc"; - argv[1] = "node_status"; - argv[2] = "-n"; - argv[3] = nid_str; - argv[4] = NULL; - resp_msg = _run_script(argv, &status); - if (status != 0) { - error("%s: capmc(%s,%s,%s): %d %s", prog_name, - argv[1], argv[2], argv[3], status, resp_msg); - xfree(resp_msg); - return node_state_ok; - } - j = json_tokener_parse(resp_msg); - if (j == NULL) { - error("%s: json parser failed on %s", prog_name, resp_msg); - xfree(resp_msg); - return node_state_ok; - } - xfree(resp_msg); - - nid_cnt = 0; - nid_array = _json_parse_nids(j, "off", &nid_cnt); - json_object_put(j); /* Frees json memory */ - for (i = 0; i < nid_cnt; i++) { - if (nid_array[i] == nid) { - node_state_ok = true; - break; - } - } - xfree(nid_array); - - return node_state_ok; -} - -static void *_node_update(void *args) -{ - char *node_name = (char *) args; - char *argv[10], nid_str[32], *resp_msg; - int i, nid = -1, status = 0; - bool node_state_ok, node_off_sent = false; - time_t poll_start; - - for (i = 0; node_name[i]; i++) { - if ((node_name[i] >= '0') && (node_name[i] <= '9')) { - nid = strtol(node_name + i, NULL, 10); - break; - } - } - if (nid < 0) { - error("%s: No valid NID: %s", prog_name, node_name); - return NULL; - } - snprintf(nid_str, sizeof(nid_str), "%d", nid); - - /* Request node power down. - * Example: "capmc node_off –n 43" */ - argv[0] = "capmc"; - argv[1] = "node_off"; - argv[2] = "-n"; - argv[3] = nid_str; - argv[4] = NULL; - for (i = 0; ((i < NODE_OFF_RETRIES) && !node_off_sent); i++) { - resp_msg = _run_script(argv, &status); - if ((status == 0) || - (resp_msg && strcasestr(resp_msg, "Success"))) { - debug("%s: node_off sent to %s", prog_name, argv[3]); - node_off_sent = true; - } else { - error("%s: capmc(%s,%s,%s): %d %s", prog_name, - argv[1], argv[2], argv[3], status, resp_msg); - sleep(1); - } - xfree(resp_msg); - } - - /* Wait for node in "off" state */ - poll_start = time(NULL); - while (!node_state_ok && - (difftime(time(NULL), poll_start) < NODE_OFF_STATE_WAIT)) { - sleep(capmc_poll_freq); - node_state_ok = _check_node_state(nid, nid_str, "off"); - } - - slurm_mutex_lock(&thread_cnt_mutex); - thread_cnt--; - pthread_cond_signal(&thread_cnt_cond); - slurm_mutex_unlock(&thread_cnt_mutex); - return NULL; -} - /* Convert node name string to equivalent nid string */ static char *_node_names_2_nid_list(char *node_names) { @@ -484,10 +350,6 @@ static int _update_all_nodes(char *node_names) int main(int argc, char *argv[]) { log_options_t log_opts = LOG_OPTS_INITIALIZER; - hostlist_t hl = NULL; - char *node_name; - pthread_attr_t attr_work; - pthread_t thread_work = 0; xstrfmtcat(prog_name, "%s[%u]", argv[0], (uint32_t) getpid()); _read_config(); @@ -497,37 +359,9 @@ int main(int argc, char *argv[]) log_opts.logfile_level += 3; (void) log_init(argv[0], log_opts, LOG_DAEMON, log_file); - /* Attempt to shutdown all nodes in a single capmc call, - * attempt to shutdown individual nodes only if that fails. */ - if (_update_all_nodes(argv[1]) != 0) { - if ((hl = hostlist_create(argv[1])) == NULL) { - error("%s: Invalid hostlist (%s)", prog_name, argv[1]); - exit(2); - } - while ((node_name = hostlist_pop(hl))) { - slurm_mutex_lock(&thread_cnt_mutex); - while (1) { - if (thread_cnt <= MAX_THREADS) { - thread_cnt++; - break; - } else { /* wait for state change and retry */ - pthread_cond_wait(&thread_cnt_cond, - &thread_cnt_mutex); - } - } - slurm_mutex_unlock(&thread_cnt_mutex); - - slurm_attr_init(&attr_work); - (void) pthread_attr_setdetachstate - (&attr_work, PTHREAD_CREATE_DETACHED); - if (pthread_create(&thread_work, &attr_work, - _node_update, (void *) node_name)) { - _node_update((void *) node_name); - } - slurm_attr_destroy(&attr_work); - } - hostlist_destroy(hl); - } + /* Attempt to shutdown all nodes in a single capmc call. */ + if (_update_all_nodes(argv[1]) != 0) + exit(1); /* Wait for work threads to complete */ slurm_mutex_lock(&thread_cnt_mutex);