diff --git a/src/common/bitstring.c b/src/common/bitstring.c index 922137d435c99fb698686fdc36acd936ad6a7557..eac9231338c1cacf7f38733eab59d8a7e801e0b3 100644 --- a/src/common/bitstring.c +++ b/src/common/bitstring.c @@ -83,6 +83,7 @@ strong_alias(bit_unfmt_binmask, slurm_bit_unfmt_binmask); strong_alias(bit_fls, slurm_bit_fls); strong_alias(bit_fill_gaps, slurm_bit_fill_gaps); strong_alias(bit_super_set, slurm_bit_super_set); +strong_alias(bit_overlap, slurm_bit_overlap); strong_alias(bit_equal, slurm_bit_equal); strong_alias(bit_copy, slurm_bit_copy); strong_alias(bit_pick_cnt, slurm_bit_pick_cnt); @@ -338,7 +339,7 @@ bit_noc(bitstr_t *b, int n, int seed) } } - cnt = 0; /* start at beginning */ + cnt = 0; /* start at beginning */ for (bit = 0; bit < _bitstr_bits(b); bit++) { if (bit_test(b, bit)) { /* fail */ if (bit >= seed) @@ -499,6 +500,26 @@ bit_super_set(bitstr_t *b1, bitstr_t *b2) { return 1; } +/* + * return number of bits set in b1 that are also set in b2, 0 if no overlap + */ +extern int +bit_overlap(bitstr_t *b1, bitstr_t *b2) +{ + int count = 0; + bitstr_t *my_bitmap = NULL; + + _assert_bitstr_valid(b1); + _assert_bitstr_valid(b2); + assert(_bitstr_bits(b1) == _bitstr_bits(b2)); + + my_bitmap = bit_copy(b1); + bit_and(my_bitmap, b2); + count = bit_set_count(my_bitmap); + bit_free(my_bitmap); + + return count; +} /* * return 1 if b1 and b2 are identical, 0 otherwise */ @@ -1219,3 +1240,4 @@ bit_get_pos_num(bitstr_t *b, bitoff_t pos) return cnt; } + diff --git a/src/common/bitstring.h b/src/common/bitstring.h index c24e32800d35f9e2180d0bcdedf112f867e7edda..448de077c96d4c17719dd0d808056d7f036ca8e0 100644 --- a/src/common/bitstring.h +++ b/src/common/bitstring.h @@ -19,7 +19,7 @@ * any later version. * * In addition, as a special exception, the copyright holders give permission - * to link the code of portions of this program with the OpenSSL library under + * to link the code of portions of this program with the OpenSSL library under * certain conditions as described in each individual source file, and * distribute linked combinations including the two. You must obey the GNU * General Public License in all respects for all of the code used other than @@ -182,6 +182,7 @@ int bit_unfmt_binmask(bitstr_t *b, const char *str); bitoff_t bit_fls(bitstr_t *b); void bit_fill_gaps(bitstr_t *b); int bit_super_set(bitstr_t *b1, bitstr_t *b2); +int bit_overlap(bitstr_t *b1, bitstr_t *b2); int bit_equal(bitstr_t *b1, bitstr_t *b2); void bit_copybits(bitstr_t *dest, bitstr_t *src); bitstr_t *bit_copy(bitstr_t *b); diff --git a/src/common/node_select.c b/src/common/node_select.c index c117a1efed85960ec709eb43f7c2311faa7ad395..2998083cb264b530f0faac5cc8d524e98fe57bbb 100644 --- a/src/common/node_select.c +++ b/src/common/node_select.c @@ -99,6 +99,8 @@ typedef struct slurm_select_ops { int (*update_nodeinfo) (struct job_record *job_ptr); int (*update_block) (update_part_msg_t *part_desc_ptr); + int (*update_sub_node) (update_part_msg_t + *part_desc_ptr); int (*get_info_from_plugin)(enum select_data_info cr_info, void *data); int (*update_node_state) (int index, uint16_t state); @@ -175,6 +177,7 @@ static slurm_select_ops_t * _select_get_ops(slurm_select_context_t *c) "select_p_get_select_nodeinfo", "select_p_update_nodeinfo", "select_p_update_block", + "select_p_update_sub_node", "select_p_get_info_from_plugin", "select_p_update_node_state", "select_p_alter_node_cnt" @@ -426,7 +429,6 @@ extern int select_g_update_nodeinfo (struct job_record *job_ptr) /* * Update specific block (usually something has gone wrong) - * IN cr_info - type of data to update for a given job record * IN part_desc_ptr - information about the block */ extern int select_g_update_block (update_part_msg_t *part_desc_ptr) @@ -437,6 +439,18 @@ extern int select_g_update_block (update_part_msg_t *part_desc_ptr) return (*(g_select_context->ops.update_block))(part_desc_ptr); } +/* + * Update specific sub nodes (usually something has gone wrong) + * IN part_desc_ptr - information about the block + */ +extern int select_g_update_sub_node (update_part_msg_t *part_desc_ptr) +{ + if (slurm_select_init() < 0) + return SLURM_ERROR; + + return (*(g_select_context->ops.update_sub_node))(part_desc_ptr); +} + /* * Get select data from a plugin * IN node_pts - current node record @@ -946,6 +960,7 @@ extern int select_g_free_jobinfo (select_jobinfo_t *jobinfo) } else { (*jobinfo)->magic = 0; xfree((*jobinfo)->bg_block_id); + xfree((*jobinfo)->ionodes); xfree((*jobinfo)->blrtsimage); xfree((*jobinfo)->linuximage); xfree((*jobinfo)->mloaderimage); diff --git a/src/common/node_select.h b/src/common/node_select.h index ae06864f87720fd8cc5ed97bd3ee20c2dad8e353..eefd7bfcc7160fe914123e5fa0e45a453e014819 100644 --- a/src/common/node_select.h +++ b/src/common/node_select.h @@ -98,11 +98,16 @@ extern int select_g_update_nodeinfo (struct job_record *job_ptr); /* * Update specific block (usually something has gone wrong) - * IN cr_info - type of data to update for a given job record * IN part_desc_ptr - information about the block */ extern int select_g_update_block (update_part_msg_t *part_desc_ptr); +/* + * Update specific sub nodes (usually something has gone wrong) + * IN part_desc_ptr - information about the block + */ +extern int select_g_update_sub_node (update_part_msg_t *part_desc_ptr); + /* * Get select data from a plugin * IN node_pts - current node record diff --git a/src/plugins/select/bluegene/plugin/bg_job_run.c b/src/plugins/select/bluegene/plugin/bg_job_run.c index a6875792a4e18a996db7a69e7c529642e9f1392d..38af06963dba9c5b208701015ad58aff996d6ab3 100644 --- a/src/plugins/select/bluegene/plugin/bg_job_run.c +++ b/src/plugins/select/bluegene/plugin/bg_job_run.c @@ -284,8 +284,6 @@ static void _start_agent(bg_update_t *bg_update_ptr) slurm_mutex_unlock(&block_state_mutex); - num_block_to_free = 0; - num_block_freed = 0; delete_list = list_create(NULL); slurm_mutex_lock(&block_state_mutex); itr = list_iterator_create(bg_list); @@ -578,8 +576,8 @@ static void _term_agent(bg_update_t *bg_update_ptr) } slurm_mutex_lock(&block_state_mutex); - - bg_record->job_running = NO_JOB_RUNNING; + if(bg_record->job_running > NO_JOB_RUNNING) + bg_record->job_running = NO_JOB_RUNNING; /* remove user from list */ diff --git a/src/plugins/select/bluegene/plugin/bluegene.c b/src/plugins/select/bluegene/plugin/bluegene.c index c5691aece1a0a71d36a6ca7926822eee6577cead..7f75f3d23b302cfbc022c0141e7df3de8429b997 100644 --- a/src/plugins/select/bluegene/plugin/bluegene.c +++ b/src/plugins/select/bluegene/plugin/bluegene.c @@ -114,7 +114,6 @@ static int _split_block(bg_record_t *bg_record, int procs); static int _breakup_blocks(ba_request_t *request, List my_block_list); static bg_record_t *_create_small_record(bg_record_t *bg_record, uint16_t quarter, uint16_t nodecard); -static int _add_bg_record(List records, List used_nodes, blockreq_t *blockreq); static int _reopen_bridge_log(void); /* Initialize all plugin variables */ @@ -681,19 +680,10 @@ end_it: extern int format_node_name(bg_record_t *bg_record, char *buf, int buf_size) { - if(bg_record->quarter != (uint16_t)NO_VAL) { - if(bg_record->nodecard != (uint16_t)NO_VAL) { - snprintf(buf, buf_size, - "%s.%d.%d", - bg_record->nodes, - bg_record->quarter, - bg_record->nodecard); - } else { - snprintf(buf, buf_size, - "%s.%d", - bg_record->nodes, - bg_record->quarter); - } + if(bg_record->ionodes) { + snprintf(buf, buf_size, "%s[%s]", + bg_record->nodes, + bg_record->ionodes); } else { snprintf(buf, buf_size, "%s", bg_record->nodes); } @@ -1334,7 +1324,7 @@ no_list: blockreq.nodecards = num_nodecard; blockreq.quarters = num_quarter; - _add_bg_record(requests, results, &blockreq); + add_bg_record(requests, results, &blockreq); while((bg_record = (bg_record_t *) list_pop(requests)) != NULL) { if(block_exist_in_list(bg_list, bg_record)) @@ -1449,7 +1439,7 @@ extern int create_full_system_block() blockreq.conn_type = SELECT_TORUS; blockreq.nodecards = 0; blockreq.quarters = 0; - _add_bg_record(records, NULL, &blockreq); + add_bg_record(records, NULL, &blockreq); xfree(name); bg_record = (bg_record_t *) list_pop(records); @@ -2080,7 +2070,7 @@ extern int read_bg_conf(void) } for (i = 0; i < count; i++) { - _add_bg_record(bg_list, NULL, blockreq_array[i]); + add_bg_record(bg_list, NULL, blockreq_array[i]); } } //#if 0 @@ -2155,6 +2145,173 @@ extern int set_ionodes(bg_record_t *bg_record) return SLURM_SUCCESS; } +extern int add_bg_record(List records, List used_nodes, blockreq_t *blockreq) +{ + bg_record_t *bg_record = NULL; + bg_record_t *found_record = NULL; + ba_node_t *ba_node = NULL; + ListIterator itr; + struct passwd *pw_ent = NULL; + int i, len; + int small_size = 0; + int small_count = 0; + uint16_t quarter = 0; + uint16_t nodecard = 0; + int node_cnt = 0; + + if(!records) { + fatal("add_bg_record: no records list given"); + } + bg_record = (bg_record_t*) xmalloc(sizeof(bg_record_t)); + + slurm_conf_lock(); + bg_record->user_name = + xstrdup(slurmctld_conf.slurm_user_name); + bg_record->target_name = + xstrdup(slurmctld_conf.slurm_user_name); + slurm_conf_unlock(); + if((pw_ent = getpwnam(bg_record->user_name)) == NULL) { + error("getpwnam(%s): %m", bg_record->user_name); + } else { + bg_record->user_uid = pw_ent->pw_uid; + } + + bg_record->bg_block_list = list_create(destroy_ba_node); + if(used_nodes) { + if(copy_node_path(used_nodes, bg_record->bg_block_list) + == SLURM_ERROR) + error("couldn't copy the path for the allocation"); + bg_record->bp_count = list_count(used_nodes); + } + bg_record->quarter = (uint16_t)NO_VAL; + bg_record->nodecard = (uint16_t)NO_VAL; + if(set_ionodes(bg_record) == SLURM_ERROR) { + error("add_bg_record: problem creating ionodes"); + } + /* bg_record->boot_state = 0; Implicit */ + /* bg_record->state = 0; Implicit */ + debug2("asking for %s %d %d %s", + blockreq->block, blockreq->quarters, blockreq->nodecards, + convert_conn_type(blockreq->conn_type)); + len = strlen(blockreq->block); + i=0; + while((blockreq->block[i] != '[' + && (blockreq->block[i] > 57 || blockreq->block[i] < 48)) + && (i<len)) + i++; + + if(i<len) { + len -= i; + slurm_conf_lock(); + len += strlen(slurmctld_conf.node_prefix)+1; + bg_record->nodes = xmalloc(len); + snprintf(bg_record->nodes, len, "%s%s", + slurmctld_conf.node_prefix, blockreq->block+i); + slurm_conf_unlock(); + + } else + fatal("BPs=%s is in a weird format", blockreq->block); + + process_nodes(bg_record); + + bg_record->node_use = SELECT_COPROCESSOR_MODE; + bg_record->conn_type = blockreq->conn_type; + bg_record->cpus_per_bp = procs_per_node; + bg_record->node_cnt = bluegene_bp_node_cnt * bg_record->bp_count; + bg_record->job_running = NO_JOB_RUNNING; + + if(blockreq->blrtsimage) + bg_record->blrtsimage = xstrdup(blockreq->blrtsimage); + else + bg_record->blrtsimage = xstrdup(default_blrtsimage); + + if(blockreq->linuximage) + bg_record->linuximage = xstrdup(blockreq->linuximage); + else + bg_record->linuximage = xstrdup(default_linuximage); + + if(blockreq->mloaderimage) + bg_record->mloaderimage = xstrdup(blockreq->mloaderimage); + else + bg_record->mloaderimage = xstrdup(default_mloaderimage); + + if(blockreq->ramdiskimage) + bg_record->ramdiskimage = xstrdup(blockreq->ramdiskimage); + else + bg_record->ramdiskimage = xstrdup(default_ramdiskimage); + + if(bg_record->conn_type != SELECT_SMALL) { + /* this needs to be an append so we keep things in the + order we got them, they will be sorted later */ + list_append(records, bg_record); + /* this isn't a correct list so we need to set it later for + now we just used it to be the bp number */ + if(!used_nodes) { + debug4("we didn't get a request list so we are " + "destroying this bp list"); + list_destroy(bg_record->bg_block_list); + bg_record->bg_block_list = NULL; + } + } else { + debug("adding a small block"); + if(blockreq->nodecards==0 && blockreq->quarters==0) { + info("No specs given for this small block, " + "I am spliting this block into 4 quarters"); + blockreq->quarters=4; + } + i = (blockreq->nodecards*bluegene_nodecard_node_cnt) + + (blockreq->quarters*bluegene_quarter_node_cnt); + if(i != bluegene_bp_node_cnt) + fatal("There is an error in your bluegene.conf file.\n" + "I am unable to request %d nodes in one " + "base partition with %d nodes.", + i, bluegene_bp_node_cnt); + small_count = blockreq->nodecards+blockreq->quarters; + + /* Automatically create 4-way split if + * conn_type == SELECT_SMALL in bluegene.conf + * Here we go through each node listed and do the same thing + * for each node. + */ + itr = list_iterator_create(bg_record->bg_block_list); + while ((ba_node = list_next(itr)) != NULL) { + /* break base partition up into 16 parts */ + small_size = 16; + node_cnt = 0; + quarter = 0; + nodecard = 0; + for(i=0; i<small_count; i++) { + if(i == blockreq->nodecards) { + /* break base partition + up into 4 parts */ + small_size = 4; + } + + if(small_size == 4) + nodecard = (uint16_t)NO_VAL; + else + nodecard = i%4; + found_record = _create_small_record(bg_record, + quarter, + nodecard); + + /* this needs to be an append so we + keep things in the order we got + them, they will be sorted later */ + list_append(records, found_record); + node_cnt += bluegene_bp_node_cnt/small_size; + if(node_cnt == 128) { + node_cnt = 0; + quarter++; + } + } + } + list_iterator_destroy(itr); + destroy_bg_record(bg_record); + } + return SLURM_SUCCESS; +} + #ifdef HAVE_BG static int _addto_node_list(bg_record_t *bg_record, int *start, int *end) { @@ -2445,9 +2602,6 @@ static int _delete_old_blocks(void) int retries; List bg_destroy_list = list_create(NULL); - num_block_to_free = 0; - num_block_freed = 0; - info("removing unspecified blocks"); if(!bg_recover) { if(bg_curr_block_list) { @@ -2547,7 +2701,7 @@ static int _delete_old_blocks(void) list_destroy(bg_destroy_list); retries=30; - while(num_block_to_free != num_block_freed) { + while(num_block_to_free > num_block_freed) { update_freeing_block_list(); if(retries==30) { info("Waiting for old blocks to be " @@ -2925,170 +3079,6 @@ static bg_record_t *_create_small_record(bg_record_t *bg_record, return found_record; } -static int _add_bg_record(List records, List used_nodes, blockreq_t *blockreq) -{ - bg_record_t *bg_record = NULL; - bg_record_t *found_record = NULL; - ba_node_t *ba_node = NULL; - ListIterator itr; - struct passwd *pw_ent = NULL; - int i, len; - int small_size = 0; - int small_count = 0; - uint16_t quarter = 0; - uint16_t nodecard = 0; - int node_cnt = 0; - - bg_record = (bg_record_t*) xmalloc(sizeof(bg_record_t)); - - slurm_conf_lock(); - bg_record->user_name = - xstrdup(slurmctld_conf.slurm_user_name); - bg_record->target_name = - xstrdup(slurmctld_conf.slurm_user_name); - slurm_conf_unlock(); - if((pw_ent = getpwnam(bg_record->user_name)) == NULL) { - error("getpwnam(%s): %m", bg_record->user_name); - } else { - bg_record->user_uid = pw_ent->pw_uid; - } - - bg_record->bg_block_list = list_create(destroy_ba_node); - if(used_nodes) { - if(copy_node_path(used_nodes, bg_record->bg_block_list) - == SLURM_ERROR) - error("couldn't copy the path for the allocation"); - bg_record->bp_count = list_count(used_nodes); - } - bg_record->quarter = (uint16_t)NO_VAL; - bg_record->nodecard = (uint16_t)NO_VAL; - if(set_ionodes(bg_record) == SLURM_ERROR) { - error("_add_bg_record: problem creating ionodes"); - } - /* bg_record->boot_state = 0; Implicit */ - /* bg_record->state = 0; Implicit */ - debug2("asking for %s %d %d %s", - blockreq->block, blockreq->quarters, blockreq->nodecards, - convert_conn_type(blockreq->conn_type)); - len = strlen(blockreq->block); - i=0; - while((blockreq->block[i] != '[' - && (blockreq->block[i] > 57 || blockreq->block[i] < 48)) - && (i<len)) - i++; - - if(i<len) { - len -= i; - slurm_conf_lock(); - len += strlen(slurmctld_conf.node_prefix)+1; - bg_record->nodes = xmalloc(len); - snprintf(bg_record->nodes, len, "%s%s", - slurmctld_conf.node_prefix, blockreq->block+i); - slurm_conf_unlock(); - - } else - fatal("BPs=%s is in a weird format", blockreq->block); - - process_nodes(bg_record); - - bg_record->node_use = SELECT_COPROCESSOR_MODE; - bg_record->conn_type = blockreq->conn_type; - bg_record->cpus_per_bp = procs_per_node; - bg_record->node_cnt = bluegene_bp_node_cnt * bg_record->bp_count; - bg_record->job_running = NO_JOB_RUNNING; - - if(blockreq->blrtsimage) - bg_record->blrtsimage = xstrdup(blockreq->blrtsimage); - else - bg_record->blrtsimage = xstrdup(default_blrtsimage); - - if(blockreq->linuximage) - bg_record->linuximage = xstrdup(blockreq->linuximage); - else - bg_record->linuximage = xstrdup(default_linuximage); - - if(blockreq->mloaderimage) - bg_record->mloaderimage = xstrdup(blockreq->mloaderimage); - else - bg_record->mloaderimage = xstrdup(default_mloaderimage); - - if(blockreq->ramdiskimage) - bg_record->ramdiskimage = xstrdup(blockreq->ramdiskimage); - else - bg_record->ramdiskimage = xstrdup(default_ramdiskimage); - - if(bg_record->conn_type != SELECT_SMALL) { - /* this needs to be an append so we keep things in the - order we got them, they will be sorted later */ - list_append(records, bg_record); - /* this isn't a correct list so we need to set it later for - now we just used it to be the bp number */ - if(!used_nodes) { - debug4("we didn't get a request list so we are " - "destroying this bp list"); - list_destroy(bg_record->bg_block_list); - bg_record->bg_block_list = NULL; - } - } else { - debug("adding a small block"); - if(blockreq->nodecards==0 && blockreq->quarters==0) { - info("No specs given for this small block, " - "I am spliting this block into 4 quarters"); - blockreq->quarters=4; - } - i = (blockreq->nodecards*bluegene_nodecard_node_cnt) + - (blockreq->quarters*bluegene_quarter_node_cnt); - if(i != bluegene_bp_node_cnt) - fatal("There is an error in your bluegene.conf file.\n" - "I am unable to request %d nodes in one " - "base partition with %d nodes.", - i, bluegene_bp_node_cnt); - small_count = blockreq->nodecards+blockreq->quarters; - - /* Automatically create 4-way split if - * conn_type == SELECT_SMALL in bluegene.conf - * Here we go through each node listed and do the same thing - * for each node. - */ - itr = list_iterator_create(bg_record->bg_block_list); - while ((ba_node = list_next(itr)) != NULL) { - /* break base partition up into 16 parts */ - small_size = 16; - node_cnt = 0; - quarter = 0; - nodecard = 0; - for(i=0; i<small_count; i++) { - if(i == blockreq->nodecards) { - /* break base partition - up into 4 parts */ - small_size = 4; - } - - if(small_size == 4) - nodecard = (uint16_t)NO_VAL; - else - nodecard = i%4; - found_record = _create_small_record(bg_record, - quarter, - nodecard); - - /* this needs to be an append so we - keep things in the order we got - them, they will be sorted later */ - list_append(records, found_record); - node_cnt += bluegene_bp_node_cnt/small_size; - if(node_cnt == 128) { - node_cnt = 0; - quarter++; - } - } - } - list_iterator_destroy(itr); - destroy_bg_record(bg_record); - } - return SLURM_SUCCESS; -} - static int _reopen_bridge_log(void) { int rc = SLURM_SUCCESS; diff --git a/src/plugins/select/bluegene/plugin/bluegene.h b/src/plugins/select/bluegene/plugin/bluegene.h index 98fbc6b3137dc771237067ac9f860cd870979eb7..9ec00824e19a4f2847b2d9f1493bf1d678d10f87 100644 --- a/src/plugins/select/bluegene/plugin/bluegene.h +++ b/src/plugins/select/bluegene/plugin/bluegene.h @@ -224,6 +224,7 @@ extern void *mult_destroy_block(void *args); extern int free_block_list(List delete_list); extern int read_bg_conf(void); extern int set_ionodes(bg_record_t *bg_record); +extern int add_bg_record(List records, List used_nodes, blockreq_t *blockreq); /* block_sys.c */ /*****************************************************/ diff --git a/src/plugins/select/bluegene/plugin/select_bluegene.c b/src/plugins/select/bluegene/plugin/select_bluegene.c index 4409c0f84b212fafcfdc058b1046affc2af1147b..701df6b56f3c20b58f58a69b540e66f3c26eb450 100644 --- a/src/plugins/select/bluegene/plugin/select_bluegene.c +++ b/src/plugins/select/bluegene/plugin/select_bluegene.c @@ -414,6 +414,266 @@ extern int select_p_update_block (update_part_msg_t *part_desc_ptr) return rc; } +extern int select_p_update_sub_node (update_part_msg_t *part_desc_ptr) +{ + int rc = SLURM_SUCCESS; + bg_record_t *bg_record = NULL, *found_record = NULL; + time_t now; + char reason[128], tmp[64], time_str[32]; + blockreq_t blockreq; + int i = 0, j = 0; + char coord[BA_SYSTEM_DIMENSIONS]; + char ionodes[128]; + int set = 0; + int set_error = 0; + bitstr_t *ionode_bitmap = NULL; + List requests = NULL; + List delete_list = NULL; + ListIterator itr; + + if(bluegene_layout_mode != LAYOUT_DYNAMIC) { + info("You can't use this call unless you are on a Dynamically " + "allocated system. Please use update BlockName instead"); + rc = SLURM_ERROR; + goto end_it; + } + + memset(coord, -1, BA_SYSTEM_DIMENSIONS); + memset(ionodes, 0, 128); + if(!part_desc_ptr->name) { + error("update_sub_node: No name specified"); + rc = SLURM_ERROR; + goto end_it; + + } + + now = time(NULL); + slurm_make_time_str(&now, time_str, sizeof(time_str)); + snprintf(tmp, sizeof(tmp), "[SLURM@%s]", time_str); + + while (part_desc_ptr->name[j] != '\0') { + if (part_desc_ptr->name[j] == '[') { + if(set<1) { + rc = SLURM_ERROR; + goto end_it; + } + i = j++; + if((part_desc_ptr->name[j] >= 58 + && part_desc_ptr->name[j] <= 47)) { + error("update_sub_node: sub part is empty"); + rc = SLURM_ERROR; + goto end_it; + } + while(part_desc_ptr->name[i] != '\0') { + if(part_desc_ptr->name[i] == ']') + break; + i++; + } + if(part_desc_ptr->name[i] != ']') { + error("update_sub_node: " + "No close (']') on sub part"); + rc = SLURM_ERROR; + goto end_it; + } + + strncpy(ionodes, part_desc_ptr->name+j, i-j); + set++; + break; + } else if((part_desc_ptr->name[j] < 58 + && part_desc_ptr->name[j] > 47)) { + if(set) { + rc = SLURM_ERROR; + goto end_it; + } + for(i = 0; i < BA_SYSTEM_DIMENSIONS; i++) { + if((part_desc_ptr->name[i] >= 58) + || (part_desc_ptr->name[i] <= 47)) { + error("update_sub_node: " + "misformatted name given %s", + part_desc_ptr->name); + rc = SLURM_ERROR; + goto end_it; + } + } + strncpy(coord, part_desc_ptr->name+j, + BA_SYSTEM_DIMENSIONS); + + j += BA_SYSTEM_DIMENSIONS-1; + set++; + } + j++; + } + + if(set != 2) { + error("update_sub_node: " + "I didn't get the base partition and the sub part."); + rc = SLURM_ERROR; + goto end_it; + } + ionode_bitmap = bit_alloc(bluegene_numpsets); + bit_unfmt(ionode_bitmap, ionodes); + + requests = list_create(destroy_bg_record); + + blockreq.block = coord; + blockreq.blrtsimage = NULL; + blockreq.linuximage = NULL; + blockreq.mloaderimage = NULL; + blockreq.ramdiskimage = NULL; + blockreq.conn_type = SELECT_SMALL; + blockreq.nodecards = 16; + blockreq.quarters = 0; + add_bg_record(requests, NULL, &blockreq); + + delete_list = list_create(NULL); + while((bg_record = list_pop(requests))) { + set_error = 0; + if(bit_overlap(bg_record->ionode_bitmap, ionode_bitmap)) + set_error = 1; + + slurm_mutex_lock(&block_state_mutex); + itr = list_iterator_create(bg_list); + while((found_record = list_next(itr))) { + if(!found_record || (bg_record == found_record)) + continue; + if(bit_equal(bg_record->bitmap, found_record->bitmap) + && bit_equal(bg_record->ionode_bitmap, + found_record->ionode_bitmap)) { + debug2("block %s[%s] already there", + found_record->nodes, + found_record->ionodes); + /* we don't need to set this error, it + doesn't overlap + */ + if(!set_error) + break; + + snprintf(reason, sizeof(reason), + "update_sub_node: " + "Admin set block %s state to %s %s", + found_record->bg_block_id, + _block_state_str( + part_desc_ptr->state_up), + tmp); + info("%s",reason); + if(found_record->job_running + > NO_JOB_RUNNING) { + slurm_fail_job( + found_record->job_running); + } + + if(!part_desc_ptr->state_up) { + found_record->job_running = + BLOCK_ERROR_STATE; + found_record->state = + RM_PARTITION_ERROR; + } else if(part_desc_ptr->state_up){ + found_record->job_running = + NO_JOB_RUNNING; + found_record->state = + RM_PARTITION_FREE; + } else { + error("update_sub_node: " + "Unknown state %d given", + part_desc_ptr->state_up); + rc = SLURM_ERROR; + break; + } + break; + } else if(!set_error + && bit_equal(bg_record->bitmap, + found_record->bitmap) + && bit_overlap( + bg_record->ionode_bitmap, + found_record->ionode_bitmap)) { + break; + } + + } + list_iterator_destroy(itr); + slurm_mutex_unlock(&block_state_mutex); + /* we already found an existing record */ + if(found_record) { + destroy_bg_record(bg_record); + continue; + } + /* we need to add this record since it doesn't exist */ + if(configure_block(bg_record) == SLURM_ERROR) { + destroy_bg_record(bg_record); + error("update_sub_node: " + "unable to configure block in api"); + } + debug2("adding block %s to fill in small blocks " + "around bad blocks", + bg_record->bg_block_id); + print_bg_record(bg_record); + slurm_mutex_lock(&block_state_mutex); + list_append(bg_list, bg_record); + slurm_mutex_unlock(&block_state_mutex); + + /* We are just adding the block not deleting any or + setting this one to an error state. + */ + if(!set_error) + continue; + + if(!part_desc_ptr->state_up) { + bg_record->job_running = BLOCK_ERROR_STATE; + bg_record->state = RM_PARTITION_ERROR; + } else if(part_desc_ptr->state_up){ + bg_record->job_running = NO_JOB_RUNNING; + bg_record->state = RM_PARTITION_FREE; + } else { + error("update_sub_node: Unknown state %d given", + part_desc_ptr->state_up); + rc = SLURM_ERROR; + continue; + } + snprintf(reason, sizeof(reason), + "update_sub_node: " + "Admin set block %s state to %s %s", + bg_record->bg_block_id, + _block_state_str(part_desc_ptr->state_up), + tmp); + info("%s",reason); + + /* remove overlapping blocks */ + slurm_mutex_lock(&block_state_mutex); + itr = list_iterator_create(bg_list); + while((found_record = list_next(itr))) { + if ((!found_record) || (bg_record == found_record)) + continue; + if(!blocks_overlap(bg_record, found_record)) { + debug2("block %s isn't part of %s", + found_record->bg_block_id, + bg_record->bg_block_id); + continue; + } + debug2("removing block %s because there is something " + "wrong with part of the base partition", + found_record->bg_block_id); + if(found_record->job_running > NO_JOB_RUNNING) { + slurm_fail_job(found_record->job_running); + } + list_push(delete_list, found_record); + list_remove(itr); + num_block_to_free++; + } + list_iterator_destroy(itr); + free_block_list(delete_list); + slurm_mutex_unlock(&block_state_mutex); + } + list_destroy(delete_list); + bit_free(ionode_bitmap); + + /* This only works for the error state, not free */ + + last_bg_update = time(NULL); + +end_it: + return rc; +} + extern int select_p_get_extra_jobinfo (struct node_record *node_ptr, struct job_record *job_ptr, enum select_data_info info, diff --git a/src/plugins/select/cons_res/select_cons_res.c b/src/plugins/select/cons_res/select_cons_res.c index 13b429daaa6ae23ae3e539a9ed588a2ad269875e..07b04767e6568eda274a077d62f79b87c1e1089a 100644 --- a/src/plugins/select/cons_res/select_cons_res.c +++ b/src/plugins/select/cons_res/select_cons_res.c @@ -2294,6 +2294,11 @@ extern int select_p_update_block (update_part_msg_t *part_desc_ptr) return SLURM_SUCCESS; } +extern int select_p_update_sub_node (update_part_msg_t *part_desc_ptr) +{ + return SLURM_SUCCESS; +} + extern int select_p_get_info_from_plugin(enum select_data_info info, void *data) { diff --git a/src/plugins/select/linear/select_linear.c b/src/plugins/select/linear/select_linear.c index 4485cfcf5486b548771ec542e03c507eeb6aa0b4..45f56f76dcaa9d119799e59aa6d95654cfe652b1 100644 --- a/src/plugins/select/linear/select_linear.c +++ b/src/plugins/select/linear/select_linear.c @@ -715,6 +715,10 @@ extern int select_p_update_block (update_part_msg_t *part_desc_ptr) return SLURM_SUCCESS; } +extern int select_p_update_sub_node (update_part_msg_t *part_desc_ptr) +{ + return SLURM_SUCCESS; +} extern int select_p_get_extra_jobinfo (struct node_record *node_ptr, struct job_record *job_ptr, enum select_data_info info, diff --git a/src/scontrol/scontrol.c b/src/scontrol/scontrol.c index 992fa004b57e1a1ab0959a380967750185e4e6b4..d6e91d1db90e802a53868a7a1109b917bf602525 100644 --- a/src/scontrol/scontrol.c +++ b/src/scontrol/scontrol.c @@ -58,6 +58,7 @@ static void _print_version( void ); static int _process_command (int argc, char *argv[]); static void _update_it (int argc, char *argv[]); static int _update_bluegene_block (int argc, char *argv[]); +static int _update_bluegene_subbp (int argc, char *argv[]); static void _usage (); int @@ -781,6 +782,9 @@ _update_it (int argc, char *argv[]) } else if (strncasecmp (argv[i], "BlockName=", 10) == 0) { error_code = _update_bluegene_block (argc, argv); break; + } else if (strncasecmp (argv[i], "SubBPName=", 10) == 0) { + error_code = _update_bluegene_subbp (argc, argv); + break; } } @@ -790,7 +794,8 @@ _update_it (int argc, char *argv[]) fprintf(stderr, "No valid entity in update command\n"); fprintf(stderr, "Input line must include \"NodeName\", "); #ifdef HAVE_BG - fprintf(stderr, "\"BlockName\", "); + fprintf(stderr, "\"BlockName\", \"SubBPName\" " + "(i.e. bgl000[0-3]),"); #endif fprintf(stderr, "\"PartitionName\", or \"JobId\"\n"); } @@ -816,7 +821,7 @@ _update_bluegene_block (int argc, char *argv[]) update_part_msg_t part_msg; slurm_init_part_desc_msg ( &part_msg ); - /* means this is for bluegene */ + /* means this is for bluegene and altering a block */ part_msg.hidden = (uint16_t)INFINITE; for (i=0; i<argc; i++) { @@ -838,6 +843,63 @@ _update_bluegene_block (int argc, char *argv[]) update_cnt++; } } + if(!part_msg.name) { + error("You didn't supply a name."); + return 0; + } + if (slurm_update_partition(&part_msg)) { + exit_code = 1; + return slurm_get_errno (); + } else + return 0; +#else + printf("This only works on a bluegene system.\n"); + return 0; +#endif +} + +/* + * _update_bluegene_subbp - update the bluegene nodecards per the + * supplied arguments + * IN argc - count of arguments + * IN argv - list of arguments + * RET 0 if no slurm error, errno otherwise. parsing error prints + * error message and returns 0 + */ +static int +_update_bluegene_subbp (int argc, char *argv[]) +{ +#ifdef HAVE_BG + int i, update_cnt = 0; + update_part_msg_t part_msg; + + slurm_init_part_desc_msg ( &part_msg ); + /* means this is for bluegene and altering a sub node */ + part_msg.root_only = (uint16_t)INFINITE; + + for (i=0; i<argc; i++) { + if (strncasecmp(argv[i], "SubBPName=", 10) == 0) + part_msg.name = &argv[i][10]; + else if (strncasecmp(argv[i], "State=", 6) == 0) { + if (strcasecmp(&argv[i][6], "ERROR") == 0) + part_msg.state_up = 0; + else if (strcasecmp(&argv[i][6], "FREE") == 0) + part_msg.state_up = 1; + else { + exit_code = 1; + fprintf (stderr, "Invalid input: %s\n", + argv[i]); + fprintf (stderr, "Acceptable State values " + "are FREE and ERROR\n"); + return 0; + } + update_cnt++; + } + } + if(!part_msg.name) { + error("You didn't supply a name."); + return 0; + } if (slurm_update_partition(&part_msg)) { exit_code = 1; return slurm_get_errno (); @@ -893,13 +955,13 @@ scontrol [<OPTION>] [<COMMAND>] \n\ suspend <job_id> susend specified job \n\ resume <job_id> resume previously suspended job \n\ update <SPECIFICATIONS> update job, node, partition, or bluegene \n\ - block configuration \n\ + block/subbp configuration \n\ verbose enable detailed logging. \n\ version display tool version number. \n\ !! Repeat the last command entered. \n\ \n\ <ENTITY> may be \"config\", \"daemons\", \"job\", \"node\", \"partition\"\n\ - \"block\" or \"step\". \n\ + \"block\", \"subbp\" or \"step\". \n\ \n\ <ID> may be a configuration parameter name , job id, node name, partition\n\ name or job step id. \n\ @@ -910,8 +972,9 @@ scontrol [<OPTION>] [<COMMAND>] \n\ \n\ <SPECIFICATIONS> are specified in the same format as the configuration \n\ file. You may wish to use the \"show\" keyword then use its output as \n\ - input for the update keyword, editing as needed. Bluegene blocks are \n\ - only able to be set to an error or free state. (Bluegene systems only) \n\ + input for the update keyword, editing as needed. Bluegene blocks/subbps \n\ + are only able to be set to an error or free state. \n\ + (Bluegene systems only) \n\ \n\ <CH_OP> identify checkpoint operations and may be \"able\", \"disable\", \n\ \"enable\", \"create\", \"vacate\", \"restart\", or \"error\". \n\ diff --git a/src/slurmctld/proc_req.c b/src/slurmctld/proc_req.c index db2bd669f6b29ffafc2d06f2df1cc80ff89c6298..ae12006880101fffc39b2622abbaf9014458395f 100644 --- a/src/slurmctld/proc_req.c +++ b/src/slurmctld/proc_req.c @@ -651,7 +651,7 @@ static void _slurm_rpc_dump_nodes(slurm_msg_t * msg) slurm_send_rc_msg(msg, SLURM_NO_CHANGE_IN_DATA); } else { pack_all_node(&dump, &dump_size, node_req_msg->show_flags, - g_slurm_auth_get_uid(msg->auth_cred)); + g_slurm_auth_get_uid(msg->auth_cred)); unlock_slurmctld(node_read_lock); END_TIMER; debug2("_slurm_rpc_dump_nodes, size=%d %s", @@ -1914,6 +1914,8 @@ static void _slurm_rpc_update_partition(slurm_msg_t * msg) /* do RPC call */ if(part_desc_ptr->hidden == (uint16_t)INFINITE) error_code = select_g_update_block(part_desc_ptr); + else if(part_desc_ptr->root_only == (uint16_t)INFINITE) + error_code = select_g_update_sub_node(part_desc_ptr); else { lock_slurmctld(part_write_lock); error_code = update_part(part_desc_ptr);