From 1b6c4ecf0ff91cd5b3ef58d492d11ba9bdea014a Mon Sep 17 00:00:00 2001 From: Morris Jette <jette@schedmd.com> Date: Wed, 5 Oct 2016 10:14:51 -0600 Subject: [PATCH] node_features/knl_cray plugin: KNL type check If a node is initially determined to NOT be of type KNL, then don't ever look for it's MCDRAM or NUMA modes in cnselect output when dealing with job launch failures or unexpected node reboots. --- .../node_features/knl_cray/node_features_knl_cray.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/plugins/node_features/knl_cray/node_features_knl_cray.c b/src/plugins/node_features/knl_cray/node_features_knl_cray.c index 688ca2c5f77..ddaf007f754 100644 --- a/src/plugins/node_features/knl_cray/node_features_knl_cray.c +++ b/src/plugins/node_features/knl_cray/node_features_knl_cray.c @@ -160,6 +160,7 @@ static uint32_t capmc_poll_freq = 45; /* capmc state polling frequency */ static uint32_t capmc_retries = DEFAULT_CAPMC_RETRIES; static uint32_t capmc_timeout = 0; /* capmc command timeout in msec */ static bitstr_t *capmc_node_bitmap = NULL; /* Nodes found by capmc */ +static bitstr_t *knl_node_bitmap = NULL; /* KNL nodes found by capmc */ static char *cnselect_path = NULL; static bool debug_flag = false; static uint16_t allow_mcdram = KNL_MCDRAM_FLAG; @@ -1300,9 +1301,7 @@ static void _update_all_node_features( char node_name[32], *prefix; int i, node_inx, width = 5; uint64_t mcdram_size; - bitstr_t *knl_node_bitmap; - knl_node_bitmap = bit_alloc(node_record_count); if ((node_record_table_ptr == NULL) || (node_record_table_ptr->name == NULL)) { prefix = xstrdup("nid"); @@ -1319,6 +1318,8 @@ static void _update_all_node_features( } } if (mcdram_cap) { + if (!knl_node_bitmap) + knl_node_bitmap = bit_alloc(node_record_count); for (i = 0; i < mcdram_cap_cnt; i++) { snprintf(node_name, sizeof(node_name), "%s%.*d", prefix, width, mcdram_cap[i].nid); @@ -1396,7 +1397,6 @@ static void _update_all_node_features( } xfree(prefix); - FREE_NULL_BITMAP(knl_node_bitmap); } /* Update a specific node's features and features_act fields based upon @@ -1514,13 +1514,16 @@ static void _update_node_features2(struct node_record *node_ptr, uint64_t mcdram_size; xassert(node_ptr); + node_idx = node_ptr - node_record_table_ptr; + if (!knl_node_bitmap || !bit_test(knl_node_bitmap, node_idx)) + return; /* Not KNL node */ + nid = strtol(node_ptr->name + 3, &end_ptr, 10); if ((end_ptr[0] != '\0') || (nid < 0) || (nid >= 100000)) { error("%s: Invalid node name (%s)", __func__, node_ptr->name); return; } - node_idx = node_ptr - node_record_table_ptr; _strip_knl_opts(&node_ptr->features_act); if (mcdram_cfg2) { @@ -1739,6 +1742,7 @@ extern int fini(void) xfree(mcdram_per_node); xfree(syscfg_path); FREE_NULL_BITMAP(capmc_node_bitmap); + FREE_NULL_BITMAP(knl_node_bitmap); return SLURM_SUCCESS; } -- GitLab