diff --git a/NEWS b/NEWS index dd70abc76fe130d7d4da2ef0da1c04013d99a5d3..821c3bb13e0a147cadbaaec17c4f3daa616114da 100644 --- a/NEWS +++ b/NEWS @@ -13,6 +13,8 @@ documents those changes that are of interest to users and admins. your code. -- salloc's --wait=<secs> option deprecated by --immediate=<secs> option to match the srun command. + -- Multiple features can be specified when creating a reservation. Use "&" + (AND) or "|" (OR) separators between the feature names. * Changes in SLURM 2.1.0-pre1 ============================= diff --git a/doc/man/man1/scontrol.1 b/doc/man/man1/scontrol.1 index bfec4682e8c6b341ad0402f8dcc8e52d19dadab3..bc61bac079eea1b124e0fbbc865914cb096ddcb3 100644 --- a/doc/man/man1/scontrol.1 +++ b/doc/man/man1/scontrol.1 @@ -651,7 +651,7 @@ Reservation is for specific nodes (output only) .TP \fIFeatures\fP=<features> Set the reservation's required node features. Multiple values -may be comma separated if all features are required (AND operation) or +may be "&" separated if all features are required (AND operation) or separated by "|" if any of the specified features are required (OR operation). Value may be cleared with blank data value, "Features=". diff --git a/src/slurmctld/job_scheduler.c b/src/slurmctld/job_scheduler.c index d4f248fc443a21acb44d557b0036a33b206d8168..20ddd60022564502b9738710e5e219691e8cfb47 100644 --- a/src/slurmctld/job_scheduler.c +++ b/src/slurmctld/job_scheduler.c @@ -1376,6 +1376,11 @@ extern int build_feature_list(struct job_record *job_ptr) list_append(detail_ptr->feature_list, feat); } break; + } else if (tmp_requested[i] == ',') { + info("Job %u invalid constraint %s", + job_ptr->job_id, detail_ptr->features); + xfree(tmp_requested); + return ESLURM_INVALID_FEATURE; } else if (feature == NULL) { feature = &tmp_requested[i]; } @@ -1411,7 +1416,7 @@ static int _valid_feature_list(uint32_t job_id, List feature_list) } feat_iter = list_iterator_create(feature_list); - while((feat_ptr = (struct feature_record *)list_next(feat_iter))) { + while ((feat_ptr = (struct feature_record *)list_next(feat_iter))) { if (feat_ptr->op_code == FEATURE_OP_XOR) { if (bracket == 0) xstrcat(buf, "["); @@ -1445,28 +1450,23 @@ static int _valid_feature_list(uint32_t job_id, List feature_list) static int _valid_node_feature(char *feature) { - int i, rc = ESLURM_INVALID_FEATURE; - ListIterator config_iterator; - struct config_record *config_ptr; - - config_iterator = list_iterator_create(config_list); - if (config_iterator == NULL) - fatal("list_iterator_create malloc failure"); - while ((config_ptr = (struct config_record *) - list_next(config_iterator))) { - if (config_ptr->feature_array == NULL) + int rc = ESLURM_INVALID_FEATURE; + struct features_record *feature_ptr; + ListIterator feature_iter; + + /* Clear these nodes from the feature_list record, + * then restore as needed */ + feature_iter = list_iterator_create(feature_list); + if (feature_iter == NULL) + fatal("list_inerator_create malloc failure"); + while ((feature_ptr = (struct features_record *) + list_next(feature_iter))) { + if (strcmp(feature_ptr->name, feature)) continue; - for (i=0; ; i++) { - if (config_ptr->feature_array[i] == NULL) - break; - if (strcmp(feature, config_ptr->feature_array[i])) - continue; - rc = SLURM_SUCCESS; - break; - } - if (rc == SLURM_SUCCESS) - break; + rc = SLURM_SUCCESS; + break; } - list_iterator_destroy(config_iterator); + list_iterator_destroy(feature_iter); + return rc; } diff --git a/src/slurmctld/node_mgr.c b/src/slurmctld/node_mgr.c index 9bd16c516610a6bdaeaf1cdd97f9e83e79c7df6f..c183e6936e01272e0c420c34dcb6117cfb4cd3a4 100644 --- a/src/slurmctld/node_mgr.c +++ b/src/slurmctld/node_mgr.c @@ -79,7 +79,8 @@ #define NODE_STATE_VERSION "VER003" /* Global variables */ -List config_list = NULL; /* list of config_record entries */ +List config_list = NULL; /* list of config_record entries */ +List feature_list = NULL; /* list of features_record entries */ struct node_record *node_record_table_ptr = NULL; /* node records */ struct node_record **node_hash_table = NULL; /* node_record hash table */ time_t last_bitmap_update = (time_t) NULL; /* time of last node creation @@ -92,13 +93,16 @@ bitstr_t *power_node_bitmap = NULL; /* bitmap of powered down nodes */ bitstr_t *share_node_bitmap = NULL; /* bitmap of sharable nodes */ bitstr_t *up_node_bitmap = NULL; /* bitmap of non-down nodes */ +static void _add_config_feature(char *feature, bitstr_t *node_bitmap); static int _delete_config_record (void); static void _dump_node_state (struct node_record *dump_node_ptr, Buf buffer); static struct node_record * _find_alias_node_record (char *name); static int _hash_index (char *name); static void _list_delete_config (void *config_entry); +static void _list_delete_feature (void *feature_entry); static int _list_find_config (void *config_entry, void *key); +static int _list_find_feature (void *feature_entry, void *key); static void _make_node_down(struct node_record *node_ptr, time_t event_time); static void _node_did_resp(struct node_record *node_ptr); @@ -240,8 +244,8 @@ create_node_record (struct config_record *config_ptr, char *node_name) static int _delete_config_record (void) { last_node_update = time (NULL); - (void) list_delete_all (config_list, &_list_find_config, - "universal_key"); + (void) list_delete_all (config_list, &_list_find_config, NULL); + (void) list_delete_all (feature_list, &_list_find_feature, NULL); return SLURM_SUCCESS; } @@ -712,8 +716,9 @@ int init_node_conf (void) if (config_list) /* delete defunct configuration entries */ (void) _delete_config_record (); else { - config_list = list_create (_list_delete_config); - if (config_list == NULL) + config_list = list_create (_list_delete_config); + feature_list = list_create (_list_delete_feature); + if ((config_list == NULL) || (feature_list == NULL)) fatal("list_create malloc failure"); } @@ -742,26 +747,57 @@ static void _list_delete_config (void *config_entry) xassert(config_ptr); xassert(config_ptr->magic == CONFIG_MAGIC); xfree (config_ptr->feature); - build_config_feature_array(config_ptr); + build_config_feature_list(config_ptr); xfree (config_ptr->nodes); FREE_NULL_BITMAP (config_ptr->node_bitmap); xfree (config_ptr); } +/* _list_delete_feature - delete an entry from the feature list, + * see list.h for documentation */ +static void _list_delete_feature (void *feature_entry) +{ + struct features_record *feature_ptr = (struct features_record *) + feature_entry; + + xassert(feature_ptr); + xassert(feature_ptr->magic == FEATURE_MAGIC); + xfree (feature_ptr->name); + FREE_NULL_BITMAP (feature_ptr->node_bitmap); + xfree (feature_ptr); +} /* * _list_find_config - find an entry in the config list, see list.h for * documentation - * IN key - is "universal_key" for all config - * RET 1 if key == "universal_key", 0 otherwise + * IN key - is NULL for all config + * RET 1 if key == NULL, 0 otherwise */ static int _list_find_config (void *config_entry, void *key) { - if (strcmp (key, "universal_key") == 0) + if (key == NULL) return 1; return 0; } +/* + * _list_find_feature - find an entry in the feature list, see list.h for + * documentation + * IN key - is feature name or NULL for all features + * RET 1 if found, 0 otherwise + */ +static int _list_find_feature (void *feature_entry, void *key) +{ + struct features_record *feature_ptr; + + if (key == NULL) + return 1; + + feature_ptr = (struct features_record *) feature_entry; + if (strcmp(feature_ptr->name, (char *) key) == 0) + return 1; + return 0; +} /* * node_name2bitmap - given a node name regular expression, build a bitmap @@ -1338,10 +1374,10 @@ static int _update_node_weight(char *node_names, uint32_t weight) new_config_ptr->feature = xstrdup(config_ptr-> feature); } - build_config_feature_array(new_config_ptr); new_config_ptr->node_bitmap = bit_copy(tmp_bitmap); new_config_ptr->nodes = bitmap2node_name(tmp_bitmap); + build_config_feature_list(new_config_ptr); _update_config_ptr(tmp_bitmap, new_config_ptr); /* Update remaining records */ @@ -1406,7 +1442,7 @@ static int _update_node_features(char *node_names, char *features) config_ptr->feature = xstrdup(features); else config_ptr->feature = NULL; - build_config_feature_array(config_ptr); + build_config_feature_list(config_ptr); } else { /* partial update, split config_record */ new_config_ptr = create_config_record(); @@ -1422,10 +1458,10 @@ static int _update_node_features(char *node_names, char *features) new_config_ptr->weight = config_ptr->weight; if (features[0]) new_config_ptr->feature = xstrdup(features); - build_config_feature_array(new_config_ptr); new_config_ptr->node_bitmap = bit_copy(tmp_bitmap); new_config_ptr->nodes = bitmap2node_name(tmp_bitmap); + build_config_feature_list(new_config_ptr); _update_config_ptr(tmp_bitmap, new_config_ptr); /* Update remaining records */ @@ -2606,6 +2642,8 @@ void node_fini(void) if (config_list) { list_destroy(config_list); config_list = NULL; + list_destroy(feature_list); + feature_list = NULL; } for (i=0; i< node_record_count; i++) { @@ -2658,35 +2696,69 @@ extern int send_nodes_to_accounting(time_t event_time) return rc; } -/* Given a config_record, clear any existing feature_array and - * if feature is set, then rebuild feature_array - * Filter out any white-space from the feature string */ -extern void build_config_feature_array(struct config_record *config_ptr) +static void _add_config_feature(char *feature, bitstr_t *node_bitmap) +{ + struct features_record *feature_ptr; + ListIterator feature_iter; + bool match = false; + + /* If feature already exists in feature_list, just update the bitmap */ + feature_iter = list_iterator_create(feature_list); + if (feature_iter == NULL) + fatal("list_iterator_create malloc failure"); + while ((feature_ptr = (struct features_record *) + list_next(feature_iter))) { + if (strcmp(feature, feature_ptr->name)) + continue; + bit_or(feature_ptr->node_bitmap, node_bitmap); + match = true; + break; + } + list_iterator_destroy(feature_iter); + + if (!match) { /* Need to create new feature_list record */ + feature_ptr = xmalloc(sizeof(struct features_record)); + feature_ptr->magic = FEATURE_MAGIC; + feature_ptr->name = xstrdup(feature); + feature_ptr->node_bitmap = bit_copy(node_bitmap); + list_append(feature_list, feature_ptr); + } +} + +/* Given a config_record with it's bitmap already set, update feature_list */ +extern void build_config_feature_list(struct config_record *config_ptr) { + struct features_record *feature_ptr; + ListIterator feature_iter; int i, j; char *tmp_str, *token, *last = NULL; - /* clear any old feature_array */ - if (config_ptr->feature_array) { - for (i=0; config_ptr->feature_array[i]; i++) - xfree(config_ptr->feature_array[i]); - xfree(config_ptr->feature_array); + /* Clear these nodes from the feature_list record, + * then restore as needed */ + feature_iter = list_iterator_create(feature_list); + if (feature_iter == NULL) + fatal("list_inerator_create malloc failure"); + bit_not(config_ptr->node_bitmap); + while ((feature_ptr = (struct features_record *) + list_next(feature_iter))) { + bit_and(feature_ptr->node_bitmap, config_ptr->node_bitmap); } + list_iterator_destroy(feature_iter); + bit_not(config_ptr->node_bitmap); if (config_ptr->feature) { i = strlen(config_ptr->feature) + 1; /* oversized */ - config_ptr->feature_array = xmalloc(i * sizeof(char *)); tmp_str = xmalloc(i); + /* Remove white space from feature specification */ for (i=0, j=0; config_ptr->feature[i]; i++) { if (!isspace(config_ptr->feature[i])) tmp_str[j++] = config_ptr->feature[i]; } if (i != j) strcpy(config_ptr->feature, tmp_str); - i = 0; token = strtok_r(tmp_str, ",", &last); while (token) { - config_ptr->feature_array[i++] = xstrdup(token); + _add_config_feature(token, config_ptr->node_bitmap); token = strtok_r(NULL, ",", &last); } xfree(tmp_str); diff --git a/src/slurmctld/node_scheduler.c b/src/slurmctld/node_scheduler.c index c0eb5f64fc4b1b898206122fe292f5d5a663c408..437c352e0cb066221c9de69f58f660ea9b8d55e2 100644 --- a/src/slurmctld/node_scheduler.c +++ b/src/slurmctld/node_scheduler.c @@ -88,9 +88,8 @@ struct node_set { /* set of nodes with same configuration */ uint32_t nodes; uint32_t weight; char *features; - char **feature_array; /* POINTER, NOT COPIED */ - bitstr_t *feature_bits; - bitstr_t *my_bitmap; + bitstr_t *feature_bits; /* XORed feature's position */ + bitstr_t *my_bitmap; /* node bitmap */ }; static int _build_node_list(struct job_record *job_ptr, @@ -98,6 +97,7 @@ static int _build_node_list(struct job_record *job_ptr, int *node_set_size); static void _filter_nodes_in_set(struct node_set *node_set_ptr, struct job_details *detail_ptr); +static int _list_find_feature(void *feature_entry, void *key); static int _match_feature(char *seek, struct node_set *node_set_ptr); static int _nodes_in_sets(bitstr_t *req_bitmap, struct node_set * node_set_ptr, @@ -108,11 +108,10 @@ static int _pick_best_nodes(struct node_set *node_set_ptr, struct part_record *part_ptr, uint32_t min_nodes, uint32_t max_nodes, uint32_t req_nodes, bool test_only); -static void _reset_feature_counts(struct job_details *details_ptr); -static bool _valid_feature_counts(struct job_details *details_ptr); +static bool _valid_feature_counts(struct job_details *detail_ptr, + bitstr_t *node_bitmap, bool *has_xor); static bitstr_t *_valid_features(struct job_details *detail_ptr, - struct config_record *config_ptr, - bool update_count); + struct config_record *config_ptr); /* @@ -241,18 +240,19 @@ extern void deallocate_nodes(struct job_record *job_ptr, bool timeout, */ static int _match_feature(char *seek, struct node_set *node_set_ptr) { - int i; + struct features_record *feat_ptr; if (seek == NULL) return 1; /* nothing to look for */ - if (node_set_ptr->feature_array == NULL) - return 0; /* nothing to find */ - for (i=0; node_set_ptr->feature_array[i]; i++) { - if (strcmp(seek, node_set_ptr->feature_array[i]) == 0) - return 1; /* this is it */ - } - return 0; /* not found */ + feat_ptr = list_find_first(feature_list, _list_find_feature, + (void *) seek); + if (feat_ptr == NULL) + return 0; /* no such feature */ + + if (bit_super_set(node_set_ptr->my_bitmap, feat_ptr->node_bitmap)) + return 1; /* nodes have this feature */ + return 0; } @@ -420,9 +420,6 @@ _get_req_features(struct node_set *node_set_ptr, int node_set_size, node_set_ptr[i].weight; tmp_node_set_ptr[tmp_node_set_size].features = xstrdup(node_set_ptr[i].features); - tmp_node_set_ptr[tmp_node_set_size]. - feature_array = - node_set_ptr[i].feature_array; tmp_node_set_ptr[tmp_node_set_size]. feature_bits = bit_copy(node_set_ptr[i].feature_bits); @@ -780,24 +777,26 @@ _pick_best_nodes(struct node_set *node_set_ptr, int node_set_size, avail_nodes = bit_set_count(avail_bitmap); tried_sched = false; /* need to test these nodes */ - if (shared && ((i+1) < node_set_size) && + if (shared && ((i+1) < node_set_size) && (node_set_ptr[i].weight == - node_set_ptr[i+1].weight)) { + node_set_ptr[i+1].weight) && + ((i+1) < node_set_size)) { /* Keep accumulating so we can pick the * most lightly loaded nodes */ continue; } + if ((avail_nodes < min_nodes) || + ((avail_nodes >= min_nodes) && + (avail_nodes < req_nodes) && + ((i+1) < node_set_size))) + continue; /* Keep accumulating nodes */ + if ((job_ptr->details->req_node_bitmap) && (!bit_super_set(job_ptr->details->req_node_bitmap, - avail_bitmap))) + avail_bitmap))) continue; - if ((avail_nodes < min_nodes) || - ((req_nodes > min_nodes) && - (avail_nodes < req_nodes))) - continue; /* Keep accumulating nodes */ - /* NOTE: select_g_job_test() is destructive of * avail_bitmap, so save a backup copy */ backup_bitmap = bit_copy(avail_bitmap); @@ -840,9 +839,9 @@ _pick_best_nodes(struct node_set *node_set_ptr, int node_set_size, } /* for (i = 0; i < node_set_size; i++) */ /* try to get req_nodes now for this feature */ - if (avail_bitmap && (!tried_sched) - && (avail_nodes >= min_nodes) - && ((job_ptr->details->req_node_bitmap == NULL) || + if (avail_bitmap && (!tried_sched) && + (avail_nodes >= min_nodes) && + ((job_ptr->details->req_node_bitmap == NULL) || bit_super_set(job_ptr->details->req_node_bitmap, avail_bitmap))) { pick_code = select_g_job_test(job_ptr, avail_bitmap, @@ -863,10 +862,10 @@ _pick_best_nodes(struct node_set *node_set_ptr, int node_set_size, * nodes available) */ if (total_bitmap) total_nodes = bit_set_count(total_bitmap); - if (total_bitmap - && (!runable_ever || !runable_avail) - && (total_nodes >= min_nodes) - && ((job_ptr->details->req_node_bitmap == NULL) || + if (total_bitmap && + (!runable_ever || !runable_avail) && + (total_nodes >= min_nodes) && + ((job_ptr->details->req_node_bitmap == NULL) || (bit_super_set(job_ptr->details->req_node_bitmap, total_bitmap)))) { if (!runable_avail) { @@ -1151,41 +1150,114 @@ extern int select_nodes(struct job_record *job_ptr, bool test_only, return error_code; } -/* Clear tmp_cnt for all features of given job */ -static void _reset_feature_counts(struct job_details *details_ptr) +/* + * _list_find_feature - find an entry in the feature list, see list.h for + * documentation + * IN key - is feature name or NULL for all features + * RET 1 if found, 0 otherwise + */ +static int _list_find_feature(void *feature_entry, void *key) { - ListIterator feat_iter; - struct feature_record *feat_ptr; + struct features_record *feature_ptr; - if (details_ptr->feature_list == NULL) /* no constraints */ - return; + if (key == NULL) + return 1; - feat_iter = list_iterator_create(details_ptr->feature_list); - while ((feat_ptr = (struct feature_record *) list_next(feat_iter))) { - feat_ptr->tmp_cnt = 0; - } - list_iterator_destroy(feat_iter); + feature_ptr = (struct features_record *) feature_entry; + if (strcmp(feature_ptr->name, (char *) key) == 0) + return 1; + return 0; } -/* Verify that tmp_cnt >= count for all features of given job */ -static bool _valid_feature_counts(struct job_details *details_ptr) +/* + * _valid_feature_counts - validate a job's features can be satisfied + * by the selected nodes (NOTE: does not process XOR operators) + * IN detail_ptr - job details + * IN/OUT node_bitmap - nodes available for use, clear if unusable + * RET true if valid, false otherwise + */ +static bool _valid_feature_counts(struct job_details *detail_ptr, + bitstr_t *node_bitmap, bool *has_xor) { - ListIterator feat_iter; - struct feature_record *feat_ptr; - bool result = true; - - if (details_ptr->feature_list == NULL) /* no constraints */ - return result; - - feat_iter = list_iterator_create(details_ptr->feature_list); - while ((feat_ptr = (struct feature_record *) list_next(feat_iter))) { - if (feat_ptr->tmp_cnt >= feat_ptr->count) - continue; - result = false; - break; + ListIterator job_feat_iter; + struct feature_record *job_feat_ptr; + struct features_record *feat_ptr; + int have_count = false, last_op = FEATURE_OP_AND; + bitstr_t *feature_bitmap, *tmp_bitmap; + bool rc = true; + + xassert(detail_ptr); + xassert(node_bitmap); + xassert(has_xor); + + *has_xor = false; + if (detail_ptr->feature_list == NULL) /* no constraints */ + return rc; + + feature_bitmap = bit_copy(node_bitmap); + if (feature_bitmap == NULL) + fatal("bit_copy malloc error"); + job_feat_iter = list_iterator_create(detail_ptr->feature_list); + if (job_feat_iter == NULL) + fatal("list_iterator_create malloc error"); + while ((job_feat_ptr = (struct feature_record *) + list_next(job_feat_iter))) { + feat_ptr = list_find_first(feature_list, _list_find_feature, + (void *) job_feat_ptr->name); + if (feat_ptr) { + if (last_op == FEATURE_OP_AND) + bit_and(feature_bitmap, feat_ptr->node_bitmap); + else if (last_op == FEATURE_OP_XOR) { + *has_xor = true; + bit_or(feature_bitmap, feat_ptr->node_bitmap); + } else /* FEATURE_OP_OR */ + bit_or(feature_bitmap, feat_ptr->node_bitmap); + } else { /* feature not found */ + if (last_op == FEATURE_OP_AND) { + bit_nclear(feature_bitmap, 0, + (node_record_count - 1)); + } + } + last_op = job_feat_ptr->op_code; + if (job_feat_ptr->count) + have_count = true; } - list_iterator_destroy(feat_iter); - return result; + list_iterator_destroy(job_feat_iter); + + if (have_count) { + job_feat_iter = list_iterator_create(detail_ptr-> + feature_list); + if (job_feat_iter == NULL) + fatal("list_iterator_create malloc error"); + while ((job_feat_ptr = (struct feature_record *) + list_next(job_feat_iter))) { + if (job_feat_ptr->count == 0) + continue; + feat_ptr = list_find_first(feature_list, + _list_find_feature, + (void *)job_feat_ptr->name); + if (!feat_ptr) { + rc = false; + break; + } + tmp_bitmap = bit_copy(feature_bitmap); + if (tmp_bitmap == NULL) + fatal("bit_copy malloc error"); + bit_and(tmp_bitmap, feat_ptr->node_bitmap); + if (bit_set_count(tmp_bitmap) < job_feat_ptr->count) + rc = false; + bit_free(tmp_bitmap); + if (!rc) + break; + } + list_iterator_destroy(job_feat_iter); + bit_free(feature_bitmap); + } else { + bit_and(node_bitmap, feature_bitmap); + bit_free(feature_bitmap); + } + + return rc; } /* @@ -1206,7 +1278,7 @@ extern int job_req_node_filter(struct job_record *job_ptr, multi_core_data_t *mc_ptr; struct node_record *node_ptr; struct config_record *config_ptr; - bitstr_t *feature_bitmap = NULL; + bool has_xor = false; if (detail_ptr == NULL) { error("job_req_node_filter: job %u has no details", @@ -1214,66 +1286,54 @@ extern int job_req_node_filter(struct job_record *job_ptr, return EINVAL; } - _reset_feature_counts(detail_ptr); mc_ptr = detail_ptr->mc_ptr; for (i=0; i< node_record_count; i++) { if (!bit_test(avail_bitmap, i)) continue; node_ptr = node_record_table_ptr + i; config_ptr = node_ptr->config_ptr; - feature_bitmap = _valid_features(detail_ptr, config_ptr, true); - if ((feature_bitmap == NULL) || - (!bit_test(feature_bitmap, 0))) { - bit_clear(avail_bitmap, i); - continue; - } - FREE_NULL_BITMAP(feature_bitmap); if (slurmctld_conf.fast_schedule) { - if ((detail_ptr->job_min_procs > - config_ptr->cpus ) - || ((detail_ptr->job_min_memory & (~MEM_PER_CPU)) > - config_ptr->real_memory) - || (detail_ptr->job_min_tmp_disk > + if ((detail_ptr->job_min_procs > config_ptr->cpus) || + ((detail_ptr->job_min_memory & (~MEM_PER_CPU)) > + config_ptr->real_memory) || + (detail_ptr->job_min_tmp_disk > config_ptr->tmp_disk)) { bit_clear(avail_bitmap, i); continue; } - if (mc_ptr - && ((mc_ptr->min_sockets > config_ptr->sockets ) - || (mc_ptr->min_cores > config_ptr->cores ) - || (mc_ptr->min_threads > config_ptr->threads ) - || (mc_ptr->job_min_sockets > config_ptr->sockets ) - || (mc_ptr->job_min_cores > config_ptr->cores ) - || (mc_ptr->job_min_threads > - config_ptr->threads ))) { + if (mc_ptr && + ((mc_ptr->min_sockets > config_ptr->sockets) || + (mc_ptr->min_cores > config_ptr->cores) || + (mc_ptr->min_threads > config_ptr->threads) || + (mc_ptr->job_min_sockets > config_ptr->sockets) || + (mc_ptr->job_min_cores > config_ptr->cores) || + (mc_ptr->job_min_threads > config_ptr->threads))){ bit_clear(avail_bitmap, i); continue; } } else { - if ((detail_ptr->job_min_procs > - node_ptr->cpus ) - || ((detail_ptr->job_min_memory & (~MEM_PER_CPU)) > - node_ptr->real_memory) - || (detail_ptr->job_min_tmp_disk > + if ((detail_ptr->job_min_procs > node_ptr->cpus) || + ((detail_ptr->job_min_memory & (~MEM_PER_CPU)) > + node_ptr->real_memory) || + (detail_ptr->job_min_tmp_disk > node_ptr->tmp_disk)) { bit_clear(avail_bitmap, i); continue; } - if (mc_ptr - && ((mc_ptr->min_sockets > node_ptr->sockets ) - || (mc_ptr->min_cores > node_ptr->cores ) - || (mc_ptr->min_threads > node_ptr->threads ) - || (mc_ptr->job_min_sockets > node_ptr->sockets ) - || (mc_ptr->job_min_cores > node_ptr->cores ) - || (mc_ptr->job_min_threads > node_ptr->threads ))) { + if (mc_ptr && + ((mc_ptr->min_sockets > node_ptr->sockets) || + (mc_ptr->min_cores > node_ptr->cores) || + (mc_ptr->min_threads > node_ptr->threads) || + (mc_ptr->job_min_sockets > node_ptr->sockets) || + (mc_ptr->job_min_cores > node_ptr->cores) || + (mc_ptr->job_min_threads > node_ptr->threads))) { bit_clear(avail_bitmap, i); continue; } } } - FREE_NULL_BITMAP(feature_bitmap); - if (!_valid_feature_counts(detail_ptr)) + if (!_valid_feature_counts(detail_ptr, avail_bitmap, &has_xor)) return EINVAL; return SLURM_SUCCESS; @@ -1304,11 +1364,13 @@ static int _build_node_list(struct job_record *job_ptr, multi_core_data_t *mc_ptr = detail_ptr->mc_ptr; bitstr_t *tmp_feature; uint32_t max_weight = 0; + bool has_xor = false; if (job_ptr->resv_name) { /* Limit node selection to those in selected reservation */ time_t start_res = time(NULL); - rc = job_test_resv(job_ptr, &start_res, false, &usable_node_mask); + rc = job_test_resv(job_ptr, &start_res, false, + &usable_node_mask); if (rc != SLURM_SUCCESS) { job_ptr->state_reason = WAIT_RESERVATION; xfree(job_ptr->state_desc); @@ -1344,6 +1406,18 @@ static int _build_node_list(struct job_record *job_ptr, fatal("bit_copy malloc failure"); bit_not(usable_node_mask); } + } else { + usable_node_mask = bit_alloc(node_record_count); + if (usable_node_mask == NULL) + fatal("bit_alloc malloc failure"); + bit_nset(usable_node_mask, 0, (node_record_count - 1)); + } + + if (!_valid_feature_counts(detail_ptr, usable_node_mask, &has_xor)) { + info("No job %u feature requirements can not be met", + job_ptr->job_id); + bit_free(usable_node_mask); + return ESLURM_REQUESTED_NODE_CONFIG_UNAVAILABLE; } config_iterator = list_iterator_create(config_list); @@ -1354,18 +1428,18 @@ static int _build_node_list(struct job_record *job_ptr, list_next(config_iterator))) { config_filter = 0; - if ((detail_ptr->job_min_procs > config_ptr->cpus ) - || ((detail_ptr->job_min_memory & (~MEM_PER_CPU)) > - config_ptr->real_memory) - || (detail_ptr->job_min_tmp_disk > config_ptr->tmp_disk)) + if ((detail_ptr->job_min_procs > config_ptr->cpus ) || + ((detail_ptr->job_min_memory & (~MEM_PER_CPU)) > + config_ptr->real_memory) || + (detail_ptr->job_min_tmp_disk > config_ptr->tmp_disk)) config_filter = 1; - if (mc_ptr - && ((mc_ptr->min_sockets > config_ptr->sockets ) - || (mc_ptr->min_cores > config_ptr->cores ) - || (mc_ptr->min_threads > config_ptr->threads ) - || (mc_ptr->job_min_sockets > config_ptr->sockets ) - || (mc_ptr->job_min_cores > config_ptr->cores ) - || (mc_ptr->job_min_threads > config_ptr->threads ))) + if (mc_ptr && + ((mc_ptr->min_sockets > config_ptr->sockets ) || + (mc_ptr->min_cores > config_ptr->cores ) || + (mc_ptr->min_threads > config_ptr->threads ) || + (mc_ptr->job_min_sockets > config_ptr->sockets ) || + (mc_ptr->job_min_cores > config_ptr->cores ) || + (mc_ptr->job_min_threads > config_ptr->threads ))) config_filter = 1; /* since nodes can register with more resources than defined */ @@ -1392,8 +1466,8 @@ static int _build_node_list(struct job_record *job_ptr, } node_set_ptr[node_set_inx].nodes = bit_set_count(node_set_ptr[node_set_inx].my_bitmap); - if (check_node_config - && (node_set_ptr[node_set_inx].nodes != 0)) { + if (check_node_config && + (node_set_ptr[node_set_inx].nodes != 0)) { _filter_nodes_in_set(&node_set_ptr[node_set_inx], detail_ptr); } @@ -1402,11 +1476,18 @@ static int _build_node_list(struct job_record *job_ptr, continue; } - tmp_feature = _valid_features(job_ptr->details, config_ptr, - false); - if (tmp_feature == NULL) { - FREE_NULL_BITMAP(node_set_ptr[node_set_inx].my_bitmap); - continue; + if (has_xor) { + tmp_feature = _valid_features(job_ptr->details, + config_ptr); + if (tmp_feature == NULL) { + FREE_NULL_BITMAP(node_set_ptr[node_set_inx]. + my_bitmap); + continue; + } + } else { + /* We've already filtered for AND/OR features */ + tmp_feature = bit_alloc(MAX_FEATURES); + bit_set(tmp_feature, 0); } /* NOTE: Must bit_free(tmp_feature) to avoid memory leak */ @@ -1419,8 +1500,6 @@ static int _build_node_list(struct job_record *job_ptr, max_weight = MAX(max_weight, config_ptr->weight); node_set_ptr[node_set_inx].features = xstrdup(config_ptr->feature); - node_set_ptr[node_set_inx].feature_array = - config_ptr->feature_array; /* NOTE: NOT COPIED */ node_set_ptr[node_set_inx].feature_bits = tmp_feature; debug2("found %d usable nodes from config containing %s", node_set_ptr[node_set_inx].nodes, config_ptr->nodes); @@ -1468,8 +1547,6 @@ static int _build_node_list(struct job_record *job_ptr, node_set_ptr[i].weight + max_weight; node_set_ptr[node_set_inx].features = xstrdup(node_set_ptr[i].features); - node_set_ptr[node_set_inx].feature_array = - node_set_ptr[i].feature_array; node_set_ptr[node_set_inx].feature_bits = bit_copy(node_set_ptr[i].feature_bits); node_set_ptr[node_set_inx].my_bitmap = @@ -1510,18 +1587,18 @@ static void _filter_nodes_in_set(struct node_set *node_set_ptr, continue; node_con = node_record_table_ptr[i].config_ptr; - if ((job_con->job_min_procs <= node_con->cpus) - && ((job_con->job_min_memory & (~MEM_PER_CPU)) <= - node_con->real_memory) - && (job_con->job_min_tmp_disk <= node_con->tmp_disk)) + if ((job_con->job_min_procs <= node_con->cpus) && + ((job_con->job_min_memory & (~MEM_PER_CPU)) <= + node_con->real_memory) && + (job_con->job_min_tmp_disk <= node_con->tmp_disk)) job_ok = 1; - if (mc_ptr - && ((mc_ptr->min_sockets <= node_con->sockets) - && (mc_ptr->min_cores <= node_con->cores ) - && (mc_ptr->min_threads <= node_con->threads) - && (mc_ptr->job_min_sockets <= node_con->sockets) - && (mc_ptr->job_min_cores <= node_con->cores ) - && (mc_ptr->job_min_threads <= node_con->threads))) + if (mc_ptr && + ((mc_ptr->min_sockets <= node_con->sockets) && + (mc_ptr->min_cores <= node_con->cores ) && + (mc_ptr->min_threads <= node_con->threads) && + (mc_ptr->job_min_sockets <= node_con->sockets) && + (mc_ptr->job_min_cores <= node_con->cores ) && + (mc_ptr->job_min_threads <= node_con->threads))) job_mc_ptr_ok = 1; if (job_ok && (!mc_ptr || job_mc_ptr_ok)) continue; @@ -1539,18 +1616,18 @@ static void _filter_nodes_in_set(struct node_set *node_set_ptr, continue; node_ptr = &node_record_table_ptr[i]; - if ((job_con->job_min_procs <= node_ptr->cpus) - && ((job_con->job_min_memory & (~MEM_PER_CPU)) <= - node_ptr->real_memory) - && (job_con->job_min_tmp_disk <= node_ptr->tmp_disk)) + if ((job_con->job_min_procs <= node_ptr->cpus) && + ((job_con->job_min_memory & (~MEM_PER_CPU)) <= + node_ptr->real_memory) && + (job_con->job_min_tmp_disk <= node_ptr->tmp_disk)) job_ok = 1; - if (mc_ptr - && ((mc_ptr->min_sockets <= node_ptr->sockets) - && (mc_ptr->min_cores <= node_ptr->cores ) - && (mc_ptr->min_threads <= node_ptr->threads) - && (mc_ptr->job_min_sockets <= node_ptr->sockets) - && (mc_ptr->job_min_cores <= node_ptr->cores ) - && (mc_ptr->job_min_threads <= node_ptr->threads))) + if (mc_ptr && + ((mc_ptr->min_sockets <= node_ptr->sockets) && + (mc_ptr->min_cores <= node_ptr->cores ) && + (mc_ptr->min_threads <= node_ptr->threads) && + (mc_ptr->job_min_sockets <= node_ptr->sockets) && + (mc_ptr->job_min_cores <= node_ptr->cores ) && + (mc_ptr->job_min_threads <= node_ptr->threads))) job_mc_ptr_ok = 1; if (job_ok && (!mc_ptr || job_mc_ptr_ok)) continue; @@ -1639,12 +1716,10 @@ extern void build_node_details(struct job_record *job_ptr) } /* - * _valid_features - determine if the requested features are satisfied by - * the available nodes + * _valid_features - Determine if the requested features are satisfied by + * the available nodes. This is only used for XOR operators. * IN details_ptr - job requirement details, includes requested features * IN config_ptr - node's configuration record - * IN update_count - if set, then increment tmp_cnt (temporary counter) - * for matched features * RET NULL if request is not satisfied, otherwise a bitmap indicating * which mutually exclusive features are satisfied. For example * _valid_features("[fs1|fs2|fs3|fs4]", "fs3") returns a bitmap with @@ -1655,92 +1730,43 @@ extern void build_node_details(struct job_record *job_ptr) * mutually exclusive feature list. */ static bitstr_t *_valid_features(struct job_details *details_ptr, - struct config_record *config_ptr, - bool update_count) + struct config_record *config_ptr) { bitstr_t *result_bits = (bitstr_t *) NULL; ListIterator feat_iter; - struct feature_record *feat_ptr; - bool found, test_names, result; - int last_op, position = 0; - int save_op = FEATURE_OP_AND, save_result = 1; + struct feature_record *job_feat_ptr; + struct features_record *feat_ptr; + int last_op = FEATURE_OP_AND, position = 0; + result_bits = bit_alloc(MAX_FEATURES); + if (result_bits == NULL) + fatal("bit_alloc malloc failure"); if (details_ptr->feature_list == NULL) { /* no constraints */ - result_bits = bit_alloc(MAX_FEATURES); bit_set(result_bits, 0); return result_bits; } - result = true; /* assume good for now */ - last_op = FEATURE_OP_AND; feat_iter = list_iterator_create(details_ptr->feature_list); - while ((feat_ptr = (struct feature_record *) list_next(feat_iter))) { - test_names = false; - found = false; - if (feat_ptr->count) { - found = true; - if (update_count) - test_names = true; - } else - test_names = true; - - if (test_names && config_ptr->feature_array) { - int i; - for (i=0; config_ptr->feature_array[i]; i++) { - if (strcmp(feat_ptr->name, - config_ptr->feature_array[i])) - continue; - found = true; - if (update_count && feat_ptr->count) - feat_ptr->tmp_cnt++; - break; - } - } - - if ((last_op == FEATURE_OP_XOR) || - (feat_ptr->op_code == FEATURE_OP_XOR)) { - if (position == 0) { - save_op = last_op; - save_result = result; - result = found; - } else - result |= found; - - if (!result_bits) - result_bits = bit_alloc(MAX_FEATURES); - - if (!found) - ; - else if (position < MAX_FEATURES) + if (feat_iter == NULL) + fatal("list_iterator_create malloc failure"); + while ((job_feat_ptr = (struct feature_record *) + list_next(feat_iter))) { + if ((job_feat_ptr->op_code == FEATURE_OP_XOR) || + (last_op == FEATURE_OP_XOR)) { + feat_ptr = list_find_first(feature_list, + _list_find_feature, + (void *)job_feat_ptr->name); + if (feat_ptr && + bit_super_set(config_ptr->node_bitmap, + feat_ptr->node_bitmap)) { bit_set(result_bits, position); - else - error("_valid_features: overflow"); - position++; - - if (feat_ptr->op_code != FEATURE_OP_XOR) { - if (save_op == FEATURE_OP_OR) - result |= save_result; - else /* (save_op == FEATURE_OP_AND) */ - result &= save_result; } - } else if (last_op == FEATURE_OP_OR) { - result |= found; - } else if (last_op == FEATURE_OP_AND) { - result &= found; + position++; } - last_op = feat_ptr->op_code; + last_op = job_feat_ptr->op_code; } list_iterator_destroy(feat_iter); - if (result) { - if (!result_bits) { - result_bits = bit_alloc(MAX_FEATURES); - bit_set(result_bits, 0); - } - } else { - FREE_NULL_BITMAP(result_bits); - } - return result_bits; } diff --git a/src/slurmctld/read_config.c b/src/slurmctld/read_config.c index 999ef3d405da3530e99f97463adc6aebae3eacf8..6e82e6ecec053163c89c206f631f6fd3697278d6 100644 --- a/src/slurmctld/read_config.c +++ b/src/slurmctld/read_config.c @@ -203,7 +203,6 @@ static int _build_bitmaps(void) config_iterator = list_iterator_create(config_list); if (config_iterator == NULL) fatal ("memory allocation failure"); - while ((config_ptr = (struct config_record *) list_next(config_iterator))) { FREE_NULL_BITMAP(config_ptr->node_bitmap); @@ -261,6 +260,16 @@ static int _build_bitmaps(void) if (node_ptr->config_ptr) bit_set(node_ptr->config_ptr->node_bitmap, i); } + + config_iterator = list_iterator_create(config_list); + if (config_iterator == NULL) + fatal ("memory allocation failure"); + while ((config_ptr = (struct config_record *) + list_next(config_iterator))) { + build_config_feature_list(config_ptr); + } + list_iterator_destroy(config_iterator); + return error_code; } @@ -539,7 +548,6 @@ static int _build_all_nodeline_info(void) config_ptr->weight = node->weight; if (node->feature) config_ptr->feature = xstrdup(node->feature); - build_config_feature_array(config_ptr); _build_single_nodeline_info(node, config_ptr, conf); } diff --git a/src/slurmctld/reservation.c b/src/slurmctld/reservation.c index 80bc8aa0eaafcdb07b748acd34ca9348b2e1d42a..22726fc1a0f48bf37fb794f72571ebdb3e18e6a2 100644 --- a/src/slurmctld/reservation.c +++ b/src/slurmctld/reservation.c @@ -1353,9 +1353,13 @@ extern int update_resv(resv_desc_msg_t *resv_desc_ptr) xfree(resv_ptr->features); } if (resv_desc_ptr->features) { - xfree(resv_ptr->features); - resv_ptr->features = resv_desc_ptr->features; - resv_desc_ptr->features = NULL; /* Nothing left to free */ + /* To support in the future, the reservation resources would + * need to be selected again. For now, administrator can + * delete this reservation and create a new one. */ + info("Attempt to change features of reservation %s", + resv_desc_ptr->name); + error_code = ESLURM_NOT_SUPPORTED; + goto update_failure; } if (resv_desc_ptr->users) { rc = _update_uid_list(resv_ptr, resv_desc_ptr->users); @@ -1453,7 +1457,7 @@ extern int update_resv(resv_desc_msg_t *resv_desc_ptr) goto update_failure; } _set_cpu_cnt(resv_ptr); - if((error_code = _set_assoc_list(resv_ptr)) != SLURM_SUCCESS) + if ((error_code = _set_assoc_list(resv_ptr)) != SLURM_SUCCESS) goto update_failure; slurm_make_time_str(&resv_ptr->start_time, start_time, @@ -2120,9 +2124,8 @@ static int _select_nodes(resv_desc_msg_t *resv_desc_ptr, { slurmctld_resv_t *resv_ptr; bitstr_t *node_bitmap; - struct node_record *node_ptr; ListIterator iter; - int i, j; + int i, rc = SLURM_SUCCESS; if (*part_ptr == NULL) { *part_ptr = default_part_loc; @@ -2152,26 +2155,70 @@ static int _select_nodes(resv_desc_msg_t *resv_desc_ptr, /* Satisfy feature specification */ if (resv_desc_ptr->features) { - /* FIXME: Just support a single feature name for now */ - node_ptr = node_record_table_ptr; - for (i=0; i<node_record_count; i++, node_ptr++) { - if (!bit_test(node_bitmap, i)) - continue; - if (!node_ptr->config_ptr->feature_array) { - bit_clear(node_bitmap, i); - continue; - } - for (j=0; node_ptr->config_ptr->feature_array[j]; j++){ - if (!strcmp(resv_desc_ptr->features, - node_ptr->config_ptr-> - feature_array[j])) + int op_code = FEATURE_OP_AND, last_op_code = FEATURE_OP_AND; + char *features = xstrdup(resv_desc_ptr->features); + char *sep_ptr, *token = features; + bitstr_t *feature_bitmap = bit_copy(node_bitmap); + struct features_record *feature_ptr; + ListIterator feature_iter; + bool match; + + if (feature_bitmap == NULL) + fatal("bit_copy malloc failure"); + + while (1) { + for (i=0; ; i++) { + if (token[i] == '\0') { + sep_ptr = NULL; break; + } else if (token[i] == '|') { + op_code = FEATURE_OP_OR; + token[i] = '\0'; + sep_ptr = &token[i]; + break; + } else if ((token[i] == '&') || + (token[i] == ',')) { + op_code = FEATURE_OP_AND; + token[i] = '\0'; + sep_ptr = &token[i]; + break; + } } - if (!node_ptr->config_ptr->feature_array[j]) { - bit_clear(node_bitmap, i); - continue; + + match = false; + feature_iter = list_iterator_create(feature_list); + if (feature_iter == NULL) + fatal("list_iterator_create malloc failure"); + while ((feature_ptr = (struct features_record *) + list_next(feature_iter))) { + if (strcmp(token, feature_ptr->name)) + continue; + if (last_op_code == FEATURE_OP_OR) { + bit_or(feature_bitmap, + feature_ptr->node_bitmap); + } else { + bit_and(feature_bitmap, + feature_ptr->node_bitmap); + } + match = true; + break; + } + list_iterator_destroy(feature_iter); + if (!match) { + info("reservation feature invalid: %s", token); + rc = ESLURM_INVALID_FEATURE; + bit_nclear(feature_bitmap, 0, + (node_record_count - 1)); + break; } + if (sep_ptr == NULL) + break; + token = sep_ptr + 1; + last_op_code = op_code; } + xfree(features); + bit_and(node_bitmap, feature_bitmap); + bit_free(feature_bitmap); } if ((resv_desc_ptr->flags & RESERVE_FLAG_MAINT) == 0) { @@ -2179,7 +2226,9 @@ static int _select_nodes(resv_desc_msg_t *resv_desc_ptr, bit_and(node_bitmap, avail_node_bitmap); } *resv_bitmap = NULL; - if (bit_set_count(node_bitmap) < resv_desc_ptr->node_cnt) + if (rc != SLURM_SUCCESS) + ; + else if (bit_set_count(node_bitmap) < resv_desc_ptr->node_cnt) verbose("reservation requests more nodes than are available"); else if ((i = bit_overlap(node_bitmap, idle_node_bitmap)) >= resv_desc_ptr->node_cnt) { /* Reserve idle nodes */ @@ -2188,7 +2237,8 @@ static int _select_nodes(resv_desc_msg_t *resv_desc_ptr, resv_desc_ptr->node_cnt); } else if (resv_desc_ptr->flags & RESERVE_FLAG_IGN_JOBS) { /* Reserve nodes that are idle first, then busy nodes */ - *resv_bitmap = _pick_idle_nodes2(node_bitmap, resv_desc_ptr); + *resv_bitmap = _pick_idle_nodes2(node_bitmap, + resv_desc_ptr); } else { /* Reserve nodes that are or will be idle. * This algorithm is slower than above logic that just @@ -2197,8 +2247,12 @@ static int _select_nodes(resv_desc_msg_t *resv_desc_ptr, } bit_free(node_bitmap); - if (*resv_bitmap == NULL) - return ESLURM_NODES_BUSY; + if (*resv_bitmap == NULL) { + if (rc == SLURM_SUCCESS) + rc = ESLURM_NODES_BUSY; + return rc; + } + resv_desc_ptr->node_list = bitmap2node_name(*resv_bitmap); return SLURM_SUCCESS; } diff --git a/src/slurmctld/slurmctld.h b/src/slurmctld/slurmctld.h index 446f85fa1daf54efa960a35d09aafbbb36d234e3..48add9f6d12a48b878d4cc81c8776cdc5dae2611 100644 --- a/src/slurmctld/slurmctld.h +++ b/src/slurmctld/slurmctld.h @@ -164,8 +164,9 @@ extern int cluster_procs; /*****************************************************************************\ * NODE parameters and data structures \*****************************************************************************/ -#define CONFIG_MAGIC 0xc065eded -#define NODE_MAGIC 0x0de575ed +#define CONFIG_MAGIC 0xc065eded +#define FEATURE_MAGIC 0x34dfd8b5 +#define NODE_MAGIC 0x0de575ed struct config_record { uint32_t magic; /* magic cookie to test data integrity */ @@ -182,9 +183,15 @@ struct config_record { char *nodes; /* name of nodes with this configuration */ bitstr_t *node_bitmap; /* bitmap of nodes with this configuration */ }; - extern List config_list; /* list of config_record entries */ +struct features_record { + uint32_t magic; /* magic cookie to test data integrity */ + char *name; /* name of a feature */ + bitstr_t *node_bitmap; /* bitmap of nodes with this feature */ +}; +extern List feature_list; /* list of features_record entries */ + struct node_record { uint32_t magic; /* magic cookie for data integrity */ char *name; /* name of the node. NULL==defunct */ @@ -375,7 +382,6 @@ struct feature_record { char *name; /* name of feature */ uint16_t count; /* count of nodes with this feature */ uint8_t op_code; /* separator, see FEATURE_OP_ above */ - uint16_t tmp_cnt; /* temporary, allocated node counter */ }; /* job_details - specification of a job's constraints, @@ -664,9 +670,8 @@ extern void abort_job_on_node(uint32_t job_id, struct job_record *job_ptr, */ extern char * bitmap2node_name (bitstr_t *bitmap) ; -/* Given a config_record, clear any existing feature_array and - * if feature is set, then rebuild feature_array */ -extern void build_config_feature_array(struct config_record *config_ptr); +/* Given a config_record with it's bitmap already set, update feature_list */ +extern void build_config_feature_list(struct config_record *config_ptr); /* * create_config_record - create a config_record entry and set is values to