From 62631b3fa3f172b716f9860291bb0da075003b3d Mon Sep 17 00:00:00 2001 From: Morris Jette <jette@schedmd.com> Date: Fri, 8 Jan 2016 15:51:47 -0800 Subject: [PATCH] Add active feature list infrastructure --- src/common/node_conf.c | 152 ++++++++++++++++++++++++++++++++++-- src/common/node_conf.h | 13 ++- src/slurmctld/node_mgr.c | 76 +++++++++++++----- src/slurmctld/read_config.c | 12 ++- 4 files changed, 221 insertions(+), 32 deletions(-) diff --git a/src/common/node_conf.c b/src/common/node_conf.c index 4340c21a866..1544686b7e1 100644 --- a/src/common/node_conf.c +++ b/src/common/node_conf.c @@ -89,13 +89,17 @@ time_t last_node_update = (time_t) 0; /* time of last update */ struct node_record *node_record_table_ptr = NULL; /* node records */ xhash_t* node_hash_table = NULL; int node_record_count = 0; /* count in node_record_table_ptr */ - uint16_t *cr_node_num_cores = NULL; uint32_t *cr_node_cores_offset = NULL; -static void _add_config_feature(char *feature, bitstr_t *node_bitmap); +/* Local function defiitions */ +static void _add_config_feature(List feature_list, char *feature, + bitstr_t *node_bitmap); +static void _add_config_feature_inx(List feature_list, char *feature, + int node_inx); static int _build_single_nodeline_info(slurm_conf_node_t *node_ptr, struct config_record *config_ptr); +static void _copy_feature_list(void); static int _delete_config_record (void); #if _DEBUG static void _dump_hash (void); @@ -110,14 +114,15 @@ static int _list_find_config (void *config_entry, void *key); static int _list_find_feature (void *feature_entry, void *key); -static void _add_config_feature(char *feature, bitstr_t *node_bitmap) +static void _add_config_feature(List feature_list, char *feature, + bitstr_t *node_bitmap) { node_feature_t *feature_ptr; ListIterator feature_iter; bool match = false; /* If feature already in avail_feature_list, just update the bitmap */ - feature_iter = list_iterator_create(avail_feature_list); + feature_iter = list_iterator_create(feature_list); while ((feature_ptr = (node_feature_t *) list_next(feature_iter))) { if (strcmp(feature, feature_ptr->name)) continue; @@ -132,10 +137,37 @@ static void _add_config_feature(char *feature, bitstr_t *node_bitmap) feature_ptr->magic = FEATURE_MAGIC; feature_ptr->name = xstrdup(feature); feature_ptr->node_bitmap = bit_copy(node_bitmap); - list_append(avail_feature_list, feature_ptr); + list_append(feature_list, feature_ptr); } } +static void _add_config_feature_inx(List feature_list, char *feature, + int node_inx) +{ + node_feature_t *feature_ptr; + ListIterator feature_iter; + bool match = false; + + /* If feature already in avail_feature_list, just update the bitmap */ + feature_iter = list_iterator_create(feature_list); + while ((feature_ptr = (node_feature_t *) list_next(feature_iter))) { + if (strcmp(feature, feature_ptr->name)) + continue; + bit_set(feature_ptr->node_bitmap, node_inx); + match = true; + break; + } + list_iterator_destroy(feature_iter); + + if (!match) { /* Need to create new avail_feature_list record */ + feature_ptr = xmalloc(sizeof(node_feature_t)); + feature_ptr->magic = FEATURE_MAGIC; + feature_ptr->name = xstrdup(feature); + feature_ptr->node_bitmap = bit_alloc(node_record_count); + bit_set(feature_ptr->node_bitmap, node_inx); + list_append(feature_list, feature_ptr); + } +} /* * _build_single_nodeline_info - From the slurm.conf reader, build table, @@ -692,8 +724,111 @@ extern int build_all_nodeline_info (bool set_bitmap) return max_rc; } -/* Given a config_record with it's bitmap already set, - * build avail_feature_list */ +/* Rebuild active_feature_list for given node bitmap */ +extern void build_active_feature_list(bitstr_t *node_bitmap, + char *active_features) +{ + node_feature_t *feature_ptr; + ListIterator feature_iter; + char *tmp_str, *token, *last = NULL; + + /* Clear these nodes from the feature_list record, + * then restore as needed */ + feature_iter = list_iterator_create(active_feature_list); + bit_not(node_bitmap); + while ((feature_ptr = (node_feature_t *) list_next(feature_iter))) { + bit_and(feature_ptr->node_bitmap, node_bitmap); + } + list_iterator_destroy(feature_iter); + bit_not(node_bitmap); + + if (active_features) { + tmp_str = xstrdup(active_features); + token = strtok_r(tmp_str, ",", &last); + while (token) { + _add_config_feature(active_feature_list, token, + node_bitmap); + token = strtok_r(NULL, ",", &last); + } + xfree(tmp_str); + } +} + +/* Clear active_feature_list, + * then copy avail_feature_list into active_feature_list */ +static void _copy_feature_list(void) +{ + node_feature_t *active_feature_ptr, *avail_feature_ptr; + ListIterator feature_iter; + + (void) list_delete_all(active_feature_list, &_list_find_feature, NULL); + + feature_iter = list_iterator_create(avail_feature_list); + while ((avail_feature_ptr = (node_feature_t *)list_next(feature_iter))){ + active_feature_ptr = xmalloc(sizeof(node_feature_t)); + active_feature_ptr->magic = FEATURE_MAGIC; + active_feature_ptr->name = xstrdup(avail_feature_ptr->name); + active_feature_ptr->node_bitmap = + bit_copy(avail_feature_ptr->node_bitmap); + list_append(active_feature_list, active_feature_ptr); + } + list_iterator_destroy(feature_iter); +} + +/* Rebuild active_feature_list for given node index, + * IN node_inx - Node index, if -1 then copy alloc_feature_list into + * acitve_feature_list, if -2 then log state + */ +extern void build_active_feature_list2(int node_inx, char *active_features) +{ + node_feature_t *feature_ptr; + ListIterator feature_iter; + char *tmp_str, *token, *last = NULL; + + if (node_inx == -1) { + _copy_feature_list(); + return; + } + if (node_inx == -2) { +#if _DEBUG + feature_iter = list_iterator_create(active_feature_list); + while ((feature_ptr = (node_feature_t *) + list_next(feature_iter))) { + info("ACTIVE FEATURE: NAME:%s CNT:%d", + feature_ptr->name, + bit_set_count(feature_ptr->node_bitmap)); + } + list_iterator_destroy(feature_iter); +#endif + return; + } + + if ((node_inx < 0) || (node_inx >= node_record_count)) { + error("%s: Invalid node_inx:%d", __func__, node_inx); + return; + } + + /* Clear this node from the feature_list record, + * then restore as needed */ + feature_iter = list_iterator_create(active_feature_list); + while ((feature_ptr = (node_feature_t *) list_next(feature_iter))) { + bit_clear(feature_ptr->node_bitmap, node_inx); + } + list_iterator_destroy(feature_iter); + + if (active_features) { + tmp_str = xstrdup(active_features); + token = strtok_r(tmp_str, ",", &last); + while (token) { + _add_config_feature_inx(active_feature_list, token, + node_inx); + token = strtok_r(NULL, ",", &last); + } + xfree(tmp_str); + } +} + +/* Rebuild avail_feature_list for given node configuration structure */ extern void build_avail_feature_list(struct config_record *config_ptr) { node_feature_t *feature_ptr; @@ -714,7 +849,8 @@ extern void build_avail_feature_list(struct config_record *config_ptr) tmp_str = xstrdup(config_ptr->feature); token = strtok_r(tmp_str, ",", &last); while (token) { - _add_config_feature(token, config_ptr->node_bitmap); + _add_config_feature(avail_feature_list, token, + config_ptr->node_bitmap); token = strtok_r(NULL, ",", &last); } xfree(tmp_str); diff --git a/src/common/node_conf.h b/src/common/node_conf.h index bef76371e69..78efa433b68 100644 --- a/src/common/node_conf.h +++ b/src/common/node_conf.h @@ -240,8 +240,17 @@ extern int build_all_nodeline_info (bool set_bitmap); */ extern int build_all_frontend_info (bool is_slurmd_context); -/* Given a config_record with it's bitmap already set, - * build avail_feature_list */ +/* Rebuild active_feature_list for given node bitmap */ +extern void build_active_feature_list(bitstr_t *node_bitmap, + char *active_features); + +/* Rebuild active_feature_list for given node index, + * IN node_inx - Node index, if -1 then copy alloc_feature_list into + * acitve_feature_list, if -2 then log state + */ +extern void build_active_feature_list2(int node_inx, char *active_features); + +/* Rebuild avail_feature_list for given node configuration structure */ extern void build_avail_feature_list(struct config_record *config_ptr); /* diff --git a/src/slurmctld/node_mgr.c b/src/slurmctld/node_mgr.c index d76bda53087..780a9489705 100644 --- a/src/slurmctld/node_mgr.c +++ b/src/slurmctld/node_mgr.c @@ -111,7 +111,10 @@ static void _pack_node(struct node_record *dump_node_ptr, Buf buffer, static void _sync_bitmaps(struct node_record *node_ptr, int job_count); static void _update_config_ptr(bitstr_t *bitmap, struct config_record *config_ptr); -static int _update_node_features(char *node_names, char *features); +static int _update_node_active_features(char *node_names, + char *active_features); +static int _update_node_avail_features(char *node_names, + char *avail_features); static int _update_node_gres(char *node_names, char *gres); static int _update_node_weight(char *node_names, uint32_t weight); static bool _valid_node_state_change(uint32_t old, uint32_t new); @@ -1347,7 +1350,8 @@ int update_node ( update_node_msg_t * update_node_msg ) node_ptr->features = xstrdup(update_node_msg->features); } - /* _update_node_features() logs and updates config */ + /* _update_node_avail_features() logs and updates + * avail_feature_list */ } if (update_node_msg->features_act && @@ -1363,7 +1367,8 @@ int update_node ( update_node_msg_t * update_node_msg ) node_ptr->features_act = xstrdup(update_node_msg->features_act); } - /* _update_node_features() logs and updates config */ + /* _update_node_active_features() logs and updates + * active_feature_list */ } if (update_node_msg->gres) { @@ -1614,9 +1619,15 @@ int update_node ( update_node_msg_t * update_node_msg ) FREE_NULL_HOSTLIST(hostname_list); last_node_update = now; + if ((error_code == 0) && (update_node_msg->features_act)) { + error_code = _update_node_active_features( + update_node_msg->node_names, + update_node_msg->features_act); + } if ((error_code == 0) && (update_node_msg->features)) { - error_code = _update_node_features(update_node_msg->node_names, - update_node_msg->features); + error_code = _update_node_avail_features( + update_node_msg->node_names, + update_node_msg->features); } if ((error_code == 0) && (update_node_msg->gres)) { error_code = _update_node_gres(update_node_msg->node_names, @@ -1668,8 +1679,8 @@ extern void restore_node_features(int recover) error("Node %s Features(%s) differ from slurm.conf", node_ptr->name, node_ptr->features); if (recover == 2) { - _update_node_features(node_ptr->name, - node_ptr->features); + _update_node_avail_features(node_ptr->name, + node_ptr->features); } else { xfree(node_ptr->features); node_ptr->features = xstrdup(node_ptr-> @@ -1677,7 +1688,7 @@ extern void restore_node_features(int recover) feature); } } - +//FIXME ?? /* We lose the gres information updated manually and always * use the information from slurm.conf */ (void) gres_plugin_node_reconfig(node_ptr->name, @@ -1762,7 +1773,6 @@ static int _update_node_weight(char *node_names, uint32_t weight) new_config_ptr->node_bitmap = bit_copy(tmp_bitmap); new_config_ptr->nodes = bitmap2node_name(tmp_bitmap); - build_avail_feature_list(new_config_ptr); _update_config_ptr(tmp_bitmap, new_config_ptr); /* Update remaining records */ @@ -1783,13 +1793,37 @@ static int _update_node_weight(char *node_names, uint32_t weight) } /* - * _update_node_features - Update features associated with nodes - * build new config list records as needed + * _update_node_active_features - Update active features associated with nodes * IN node_names - List of nodes to update - * IN features - New features value + * IN active_features - New active features value * RET: SLURM_SUCCESS or error code */ -static int _update_node_features(char *node_names, char *features) +static int _update_node_active_features(char *node_names, char *active_features) +{ + bitstr_t *node_bitmap = NULL; + int rc; + + rc = node_name2bitmap(node_names, false, &node_bitmap); + if (rc) { + info("%s: invalid node_name (%s)", __func__, node_names); + return rc; + } + build_active_feature_list(node_bitmap, active_features); + FREE_NULL_BITMAP(node_bitmap); + + info("%s: nodes %s active features set to: %s", + __func__, node_names, active_features); + return SLURM_SUCCESS; +} + +/* + * _update_node_avail_features - Update available features associated with + * nodes, build new config list records as needed + * IN node_names - List of nodes to update + * IN avail_features - New available features value + * RET: SLURM_SUCCESS or error code + */ +static int _update_node_avail_features(char *node_names, char *avail_features) { bitstr_t *node_bitmap = NULL, *tmp_bitmap; ListIterator config_iterator; @@ -1799,7 +1833,7 @@ static int _update_node_features(char *node_names, char *features) rc = node_name2bitmap(node_names, false, &node_bitmap); if (rc) { - info("_update_node_features: invalid node_name"); + info("%s: invalid node_name (%s)", __func__, node_names); return rc; } @@ -1821,8 +1855,8 @@ static int _update_node_features(char *node_names, char *features) } else if (tmp_cnt == config_cnt) { /* all nodes changed, update in situ */ xfree(config_ptr->feature); - if (features && features[0]) - config_ptr->feature = xstrdup(features); + if (avail_features && avail_features[0]) + config_ptr->feature = xstrdup(avail_features); build_avail_feature_list(config_ptr); } else { /* partial update, split config_record */ @@ -1830,8 +1864,10 @@ static int _update_node_features(char *node_names, char *features) if (first_new == NULL) first_new = new_config_ptr; xfree(new_config_ptr->feature); - if (features && features[0]) - new_config_ptr->feature = xstrdup(features); + if (avail_features && avail_features[0]) { + new_config_ptr->feature = + xstrdup(avail_features); + } new_config_ptr->node_bitmap = bit_copy(tmp_bitmap); new_config_ptr->nodes = bitmap2node_name(tmp_bitmap); @@ -1850,8 +1886,8 @@ static int _update_node_features(char *node_names, char *features) list_iterator_destroy(config_iterator); FREE_NULL_BITMAP(node_bitmap); - info("_update_node_features: nodes %s features set to: %s", - node_names, features); + info("%s: nodes %s available features set to: %s", + __func__, node_names, avail_features); return SLURM_SUCCESS; } diff --git a/src/slurmctld/read_config.c b/src/slurmctld/read_config.c index 196eff948f0..b2d9d03fad0 100644 --- a/src/slurmctld/read_config.c +++ b/src/slurmctld/read_config.c @@ -339,8 +339,8 @@ static int _build_bitmaps(void) /* scan all nodes and identify which are up, idle and * their configuration, resync DRAINED vs. DRAINING state */ - for (i=0, node_ptr=node_record_table_ptr; - i<node_record_count; i++, node_ptr++) { + for (i = 0, node_ptr = node_record_table_ptr; + i < node_record_count; i++, node_ptr++) { uint32_t drain_flag, job_cnt; if (node_ptr->name[0] == '\0') @@ -366,12 +366,20 @@ static int _build_bitmaps(void) bit_set(node_ptr->config_ptr->node_bitmap, i); } + /* Build active and available feature lists used for scheduling */ config_iterator = list_iterator_create(config_list); while ((config_ptr = (struct config_record *) list_next(config_iterator))) { build_avail_feature_list(config_ptr); } list_iterator_destroy(config_iterator); + build_active_feature_list2(-1, NULL); /* Copy avail list to active */ + for (i = 0, node_ptr = node_record_table_ptr; + i < node_record_count; i++, node_ptr++) { + if (node_ptr->features_act) + build_active_feature_list2(i, node_ptr->features_act); + } + build_active_feature_list2(-2, NULL); /* Log active list */ return error_code; } -- GitLab