diff --git a/NEWS b/NEWS index c63109b112735cb9b61d7fe03fcdf10bfc5a44c5..b6b2eb9035671f582d690e55c6cdacfe1473a8a4 100644 --- a/NEWS +++ b/NEWS @@ -112,6 +112,7 @@ documents those changes that are of interest to users and administrators. time. -- Note when a job finishes in the slurmd to avoid a race when launching a batch job takes longer than it takes to finish. + -- Improve slurmd startup on large systems (> 10000 nodes) * Changes in Slurm 17.02.4 ========================== diff --git a/src/common/node_conf.c b/src/common/node_conf.c index 9f815a5a306dbecd917f996807f040a195351de2..922b3d8c62a58a851e345c2d441dcf1aef5870e9 100644 --- a/src/common/node_conf.c +++ b/src/common/node_conf.c @@ -96,6 +96,7 @@ static struct node_record * _find_node_record (char *name,bool test_alias,bool log_missing); static void _list_delete_config (void *config_entry); static int _list_find_config (void *config_entry, void *key); +static const char* _node_record_hash_identity (void* item); /* * _build_single_nodeline_info - From the slurm.conf reader, build table, @@ -231,7 +232,7 @@ static int _build_single_nodeline_info(slurm_conf_node_t *node_ptr, } /* find_node_record locks this to get the * alias so we need to unlock */ - node_rec = find_node_record(alias); + node_rec = find_node_record2(alias); if (node_rec == NULL) { node_rec = create_node_record(config_ptr, alias); @@ -400,6 +401,16 @@ static int _list_find_config (void *config_entry, void *key) return 0; } +/* + * xhash helper function to index node_record per name field + * in node_hash_table + */ +static const char* _node_record_hash_identity (void* item) +{ + struct node_record *node_ptr = (struct node_record *) item; + return node_ptr->name; +} + /* * bitmap2hostlist - given a bitmap, build a hostlist * IN bitmap - bitmap pointer @@ -686,10 +697,21 @@ extern struct node_record *create_node_record ( if (!node_record_table_ptr) { node_record_table_ptr = (struct node_record *) xmalloc (new_buffer_size); - } else if (old_buffer_size != new_buffer_size) + } else if (old_buffer_size != new_buffer_size) { xrealloc (node_record_table_ptr, new_buffer_size); + /* + * You need to rehash the hash after we realloc or we will have + * only bad memory references in the hash. + */ + rehash_node(); + } node_ptr = node_record_table_ptr + (node_record_count++); node_ptr->name = xstrdup(node_name); + if (!node_hash_table) + node_hash_table = xhash_init(_node_record_hash_identity, + NULL, NULL, 0); + xhash_add(node_hash_table, node_ptr); + node_ptr->config_ptr = config_ptr; /* these values will be overwritten when the node actually registers */ node_ptr->cpus = config_ptr->cpus; @@ -800,15 +822,6 @@ static struct node_record *_find_node_record (char *name, bool test_alias, return NULL; } -/* - * xhash helper function to index node_record per name field - * in node_hash_table - */ -const char* node_record_hash_identity (void* item) { - struct node_record *node_ptr = (struct node_record *) item; - return node_ptr->name; -} - /* * init_node_conf - initialize the node configuration tables and values. * this should be called before creating any node or configuration @@ -988,7 +1001,7 @@ extern void rehash_node (void) struct node_record *node_ptr = node_record_table_ptr; xhash_free (node_hash_table); - node_hash_table = xhash_init(node_record_hash_identity, + node_hash_table = xhash_init(_node_record_hash_identity, NULL, NULL, 0); for (i = 0; i < node_record_count; i++, node_ptr++) { if ((node_ptr->name == NULL) ||