diff --git a/NEWS b/NEWS index 0df7ead7bc8ad2b6217da804231817a5b927840e..a913574f8081e8783428ac771bc8c67c7de40090 100644 --- a/NEWS +++ b/NEWS @@ -271,6 +271,7 @@ documents those changes that are of interest to users and admins. count from a range rather than minimum (e.g. "sbatch -N1-4 my.sh"). -- In accounting_storage/filetxt and accounting_storage/pgsql fix possible invalid memory reference when a job lacks a name. + -- Give srun command an exit code of 1 if the prolog fails. * Changes in SLURM 1.3.14 ========================= diff --git a/src/plugins/select/bluegene/plugin/bg_record_functions.c b/src/plugins/select/bluegene/plugin/bg_record_functions.c index 33d1b69ef11eb2ded6fa062e0a72768b825cd8f3..e4c572444554406c5008868298e28f5d8d5ab64c 100644 --- a/src/plugins/select/bluegene/plugin/bg_record_functions.c +++ b/src/plugins/select/bluegene/plugin/bg_record_functions.c @@ -1117,6 +1117,194 @@ extern int format_node_name(bg_record_t *bg_record, char *buf, int buf_size) return SLURM_SUCCESS; } +extern int down_sub_node_blocks(int *coord, bitstr_t *ionode_bitmap) +{ + List requests = NULL; + List delete_list = NULL; + List error_list = NULL; + ListIterator itr = NULL; + blockreq_t blockreq; + bg_record_t *bg_record = NULL, *found_record = NULL; + char *node_name = NULL; + struct node_record *node_ptr = NULL; + int bp_bit = 0; + + xassert(coord); + + node_name = xstrdup_printf("%s%c%c%c", + bg_slurm_node_prefix, + alpha_num[coord[X]], + alpha_num[coord[Y]], + alpha_num[coord[Z]]); + node_ptr = find_node_record(node_name); + if (!node_ptr) { + error ("down_sub_node_blocks: invalid node specified %s", + node_name); + xfree(node_name); + return EINVAL; + } + bp_bit = (node_ptr - node_record_table_ptr); + + + /* Here we need to add blocks that take up nodecards on this + midplane. Since Slurm only keeps track of midplanes + natively this is the only want to handle this case. + */ + requests = list_create(destroy_bg_record); + memset(&blockreq, 0, sizeof(blockreq_t)); + + blockreq.block = node_name; + blockreq.conn_type = SELECT_SMALL; + blockreq.small32 = bluegene_bp_nodecard_cnt; + + add_bg_record(requests, NULL, &blockreq); + + slurm_mutex_lock(&block_state_mutex); + itr = list_iterator_create(bg_list); + + error_list = list_create(NULL); + delete_list = list_create(NULL); + while((bg_record = list_pop(requests))) { + if(bit_overlap(bg_record->ionode_bitmap, ionode_bitmap)) { + /* we don't care about this one since it + wasn't set. + */ + destroy_bg_record(bg_record); + continue; + } + + list_iterator_reset(itr); + while((found_record = list_next(itr))) { + if(bit_equal(bg_record->bitmap, + found_record->bitmap) + && bit_equal(bg_record->ionode_bitmap, + found_record->ionode_bitmap)) { + break; + } + } + + if(found_record) { + debug2("block %s[%s] already there", + found_record->nodes, + found_record->ionodes); + /* we'll get this one later. We are just + checking which ones we have to add right now. + */ + if(found_record->job_running > NO_JOB_RUNNING) + slurm_fail_job(found_record->job_running); + list_append(error_list, found_record); + destroy_bg_record(bg_record); + continue; + } else if(bluegene_layout_mode != LAYOUT_DYNAMIC) { + bg_record_t *smallest_bg_record = NULL; + /* here we only want to see if we can find the + smallest overlapping thing and set it to an + error */ + /* don't add anything new to the list since we aren't + dynamic */ + list_iterator_reset(itr); + while((found_record = list_next(itr))) { + if(found_record->node_cnt > 1) + /* we don't care about + anything over 1 midplane */ + if(!blocks_overlap(bg_record, found_record)) { + debug2("block %s isn't part of %s", + found_record->bg_block_id, + bg_record->bg_block_id); + continue; + } + + if(smallest_bg_record || + (smallest_bg_record->cpu_cnt + > found_record->cpu_cnt)) + smallest_bg_record = found_record; + } + + if(smallest_bg_record) { + if(smallest_bg_record->job_running + > NO_JOB_RUNNING) + slurm_fail_job(smallest_bg_record-> + job_running); + list_append(error_list, smallest_bg_record); + } else { + if(!node_already_down(node_name)) + ba_update_node_state( + &ba_system_ptr->grid[coord[X]] + [coord[Y]][coord[Z]], + NODE_STATE_DRAIN); + } + + destroy_bg_record(bg_record); + continue; + } + + /* we need to add this record since it doesn't exist */ + if(configure_block(bg_record) == SLURM_ERROR) { + destroy_bg_record(bg_record); + error("down_sub_node_blocks: " + "unable to configure block in api"); + continue; + } + + debug("adding block %s to fill in small blocks " + "around bad nodecards", + bg_record->bg_block_id); + print_bg_record(bg_record); + list_append(bg_list, bg_record); + list_append(error_list, bg_record); + } + + /* remove overlapping blocks */ + while((found_record = list_pop(error_list))) { + if(found_record->job_running == BLOCK_ERROR_STATE) + continue; + error("Setting block %s to error state " + "because of failed hardware.", found_record->bg_block_id); + found_record->job_running = BLOCK_ERROR_STATE; + found_record->state = RM_PARTITION_ERROR; + trigger_block_error(); + + /* we have to check them all just to make sure no + small blocks are there + */ + list_iterator_reset(itr); + while((bg_record = list_next(itr))) { + if(found_record == bg_record) + continue; + if(!blocks_overlap(bg_record, found_record)) { + debug2("block %s isn't part of %s", + found_record->bg_block_id, + bg_record->bg_block_id); + continue; + } + debug2("removing block %s because there is something " + "wrong with part of the base partition", + found_record->bg_block_id); + if(found_record->job_running > NO_JOB_RUNNING) + slurm_fail_job(found_record->job_running); + + /* don't remove any blocks if not dynamic */ + if(bluegene_layout_mode != LAYOUT_DYNAMIC) + continue; + list_push(delete_list, found_record); + list_remove(itr); + num_block_to_free++; + } + } + list_iterator_destroy(itr); + free_block_list(delete_list); + list_destroy(delete_list); + slurm_mutex_unlock(&block_state_mutex); + + list_destroy(error_list); + FREE_NULL_BITMAP(ionode_bitmap); + + xfree(node_name); + last_bg_update = time(NULL); + return SLURM_SUCCESS; + +} + /************************* local functions ***************************/ #ifdef HAVE_BG @@ -1192,6 +1380,8 @@ static int _ba_node_cmpf_inc(ba_node_t *node_a, ba_node_t *node_b) alpha_num[node_a->coord[Z]]); return 0; } + + #endif //HAVE_BG diff --git a/src/plugins/select/bluegene/plugin/bg_record_functions.h b/src/plugins/select/bluegene/plugin/bg_record_functions.h index c4d56857b22299960a036eaa445483fd0941b0b7..0c95f87c733b01bd412558d498efc172676db3fc 100644 --- a/src/plugins/select/bluegene/plugin/bg_record_functions.h +++ b/src/plugins/select/bluegene/plugin/bg_record_functions.h @@ -147,5 +147,6 @@ extern int handle_small_record_request(List records, blockreq_t *blockreq, bg_record_t *bg_record, bitoff_t start); extern int format_node_name(bg_record_t *bg_record, char *buf, int buf_size); +extern int down_sub_node_blocks(int *coord, bitstr_t *ionode_bitmap); #endif /* _BLUEGENE_BG_RECORD_FUNCTIONS_H_ */ diff --git a/src/plugins/select/bluegene/plugin/bluegene.c b/src/plugins/select/bluegene/plugin/bluegene.c index 0984e41c16c2ea8eac1ff2e0eec039a4fca79210..48bdca0d6894598efb5bcc5507c755f82b08cf15 100644 --- a/src/plugins/select/bluegene/plugin/bluegene.c +++ b/src/plugins/select/bluegene/plugin/bluegene.c @@ -234,29 +234,13 @@ extern bool blocks_overlap(bg_record_t *rec_a, bg_record_t *rec_b) if (!bit_overlap(rec_a->bitmap, rec_b->bitmap)) return false; -#ifdef HAVE_BGL - - if(rec_a->quarter != (uint16_t) NO_VAL) { - if(rec_b->quarter == (uint16_t) NO_VAL) - return true; - else if(rec_a->quarter != rec_b->quarter) - return false; - if(rec_a->nodecard != (uint16_t) NO_VAL) { - if(rec_b->nodecard == (uint16_t) NO_VAL) - return true; - else if(rec_a->nodecard - != rec_b->nodecard) - return false; - } - } -#else if((rec_a->node_cnt >= bluegene_bp_node_cnt) || (rec_b->node_cnt >= bluegene_bp_node_cnt)) return true; if (!bit_overlap(rec_a->ionode_bitmap, rec_b->ionode_bitmap)) return false; -#endif + return true; } diff --git a/src/plugins/select/bluegene/plugin/state_test.c b/src/plugins/select/bluegene/plugin/state_test.c index 1899f79811f5476a665620da095b7ed52b661cd2..052c9812e3d6615642996ee04d70328490866832 100644 --- a/src/plugins/select/bluegene/plugin/state_test.c +++ b/src/plugins/select/bluegene/plugin/state_test.c @@ -162,6 +162,161 @@ static char *_convert_bp_state(rm_BP_state_t state) return "BP_STATE_UNIDENTIFIED!"; } +static int _test_down_nodecards(rm_BP_t *bp_ptr) +{ + rm_bp_id_t bp_id = NULL; + rm_nodecard_id_t nc_name = NULL; + int num = 0; + int i=0; + int rc = SLURM_SUCCESS; + rm_nodecard_list_t *ncard_list = NULL; + rm_nodecard_t *ncard = NULL; + rm_nodecard_state state; + bitstr_t *ionode_bitmap = NULL; + bg_record_t *bg_record = NULL; + int *coord = NULL; + char *node_name_tmp = NULL; + struct node_record *node_ptr = NULL; + int bp_bit = 0; + int set = 0, io_cnt = 1; + + /* Translate 1 nodecard count to ionode count */ + if((io_cnt *= bluegene_io_ratio)) + io_cnt--; + + if ((rc = bridge_get_data(bp_ptr, RM_BPID, &bp_id)) + != STATUS_OK) { + error("bridge_get_data(RM_BPID): %s", + bg_err_str(rc)); + return SLURM_ERROR; + } + + if ((rc = bridge_get_nodecards(bp_id, &ncard_list)) + != STATUS_OK) { + error("bridge_get_nodecards(%s): %d", + bp_id, rc); + rc = SLURM_ERROR; + goto clean_up; + } + + coord = find_bp_loc(bp_id); + if(!coord) { + error("Could not find coordinates for " + "BP ID %s", (char *) bp_id); + rc = SLURM_ERROR; + goto cleanup; + } + + node_name = xstrdup_printf("%s%c%c%c", + bg_slurm_node_prefix, + alpha_num[coord[X]], + alpha_num[coord[Y]], + alpha_num[coord[Z]]); + + if((rc = bridge_get_data(ncard_list, RM_NodeCardListSize, &num)) + != STATUS_OK) { + error("bridge_get_data(RM_NodeCardListSize): %s", + bg_err_str(rc)); + rc = SLURM_ERROR; + goto clean_up; + } + + for(i=0; i<num; i++) { + int nc_id = 0, io_start = 0; + + if (i) { + if ((rc = bridge_get_data(ncard_list, + RM_NodeCardListNext, + &ncard)) != STATUS_OK) { + error("bridge_get_data" + "(RM_NodeCardListNext): %s", + rc); + rc = SLURM_ERROR; + goto cleanup; + } + } else { + if ((rc = bridge_get_data(ncard_list, + RM_NodeCardListFirst, + &ncard)) != STATUS_OK) { + error("bridge_get_data" + "(RM_NodeCardListFirst: %s", + rc); + rc = SLURM_ERROR; + goto cleanup; + } + } + if ((rc = bridge_get_data(ncard, + RM_NodeCardState, + &state)) != STATUS_OK) { + error("bridge_get_data(RM_NodeCardState: %s", + rc); + rc = SLURM_ERROR; + goto cleanup; + } + + if(state == RM_NODECARD_UP) + continue; + + if ((rc = bridge_get_data(ncard, + RM_NodeCardID, + &nc_name)) != STATUS_OK) { + error("bridge_get_data(RM_NodeCardID): %d",rc); + rc = SLURM_ERROR; + goto clean_up; + } + + if(!nc_name) { + rc = SLURM_ERROR; + goto clean_up; + } + + debug("nodecard %s on %s is in an error state", + nc_name, node_name); + + /* From the first nodecard id we can figure + out where to start from with the alloc of ionodes. + */ + nc_id = atoi((char*)nc_name+1); + free(nc_name); + io_start = nc_id * bluegene_io_ratio; + + if(!ionode_bitmap) + ionode_bitmap = bit_alloc(bluegene_numpsets); + + bit_nset(ionode_bitmap, io_start, io_start+io_cnt); + + } + + if(ionode_bitmap) { + down_sub_node_blocks(coord, ionode_bitmap); + up_sub_node_blocks(coord, ionode_bitmap); + } else { + ListIterator itr = NULL; + slurm_mutex_lock(&block_state_mutex); + itr = list_iterator_create(bg_list); + while ((bg_record = list_next(itr))) { + if(bg_record->state != BLOCK_ERROR_STATE) + continue; + + if(!bit_test(bg_record->bitmap, bp_bit)) + continue; + + bg_record->job_running = NO_JOB_RUNNING; + bg_record->state = RM_PARTITION_FREE; + } + list_iterator_destroy(itr); + slurm_mutex_unlock(&block_state_mutex); + } + +cleanup: + xfree(node_name); + if(ionode_bitmap) + FREE_NULL_BITMAP(ionode_bitmap); + free(bp_id); + + return rc; +} + /* Test for nodes that are not UP in MMCS and DRAIN them in SLURM */ static void _test_down_nodes(my_bluegene_t *my_bg) { @@ -204,9 +359,11 @@ static void _test_down_nodes(my_bluegene_t *my_bg) continue; } - if (bp_state == RM_BP_UP) + if (bp_state == RM_BP_UP) { + _test_down_nodecards(my_bp); continue; - + } + if ((rc = bridge_get_data(my_bp, RM_BPLoc, &bp_loc)) != STATUS_OK) { error("bridge_get_data(RM_BPLoc): %s", bg_err_str(rc)); diff --git a/src/slurmctld/trigger_mgr.c b/src/slurmctld/trigger_mgr.c index b92bbb98b785c51622a7d46fc3a8b7ad0aea0239..fe05f24bb51e665bdc26921b01676ef2f5e40b00 100644 --- a/src/slurmctld/trigger_mgr.c +++ b/src/slurmctld/trigger_mgr.c @@ -985,11 +985,11 @@ static void _trigger_run_program(trig_mgr_info_t *trig_in) error("trigger: initgroups: %m"); exit(1); } - if (setgid(uid) == -1) { + if (setgid(gid) == -1) { error("trigger: setgid: %m"); exit(1); } - if (setuid(gid) == -1) { + if (setuid(uid) == -1) { error("trigger: setuid: %m"); exit(1); } diff --git a/src/srun/srun.c b/src/srun/srun.c index 606b26e160b753ca217000f123c1c7b7897a976e..62d4df42800d6b17790121c3447d9722ddc4daf6 100644 --- a/src/srun/srun.c +++ b/src/srun/srun.c @@ -420,6 +420,7 @@ int srun(int ac, char **av) if (slurm_step_launch(job->step_ctx, &launch_params, &callbacks) != SLURM_SUCCESS) { error("Application launch failed: %m"); + global_rc = 1; goto cleanup; }