diff --git a/NEWS b/NEWS index 82b5a23da8dc292ddb044782da7dc7ebb3c0106e..a6aa56dc93594f8aeca2a0cbfa035a0ce649ad51 100644 --- a/NEWS +++ b/NEWS @@ -223,6 +223,9 @@ documents those changes that are of interest to users and admins. -- when using -j option in sacct no user restriction will applied unless specified with the -u option. -- significant speed up for association based reports in sreport + -- BLUEGENE - fix for checking if job can run with downed nodes. Previously + sbatch etc would tell you node configuration not available now jobs are + accepted but held until nodes are back up. * Changes in SLURM 1.3.13 ========================= diff --git a/src/plugins/select/bluegene/plugin/bg_job_place.c b/src/plugins/select/bluegene/plugin/bg_job_place.c index 824a6cce291c693f28209f106f882d1ec877a102..cbce871360591bd0d4812760a7034bf032994d2c 100644 --- a/src/plugins/select/bluegene/plugin/bg_job_place.c +++ b/src/plugins/select/bluegene/plugin/bg_job_place.c @@ -724,7 +724,8 @@ static int _dynamically_request(List block_list, int *blocks_added, */ debug("trying with %d", create_try); if((new_blocks = create_dynamic_block(block_list, - request, temp_list))) { + request, temp_list, + true))) { bg_record_t *bg_record = NULL; while((bg_record = list_pop(new_blocks))) { if(block_exist_in_list(block_list, bg_record)) @@ -1086,6 +1087,7 @@ static int _find_best_block_match(List block_list, slurm_mutex_unlock(&block_state_mutex); list_sort(job_list, (ListCmpF)_bg_record_sort_aval_inc); while(1) { + bool track_down_nodes = true; /* this gets altered in * create_dynamic_block so we reset it */ for(i=0; i<BA_SYSTEM_DIMENSIONS; i++) @@ -1099,8 +1101,17 @@ static int _find_best_block_match(List block_list, bg_record->bg_block_id, bg_record->job_ptr->start_time, bg_record->job_ptr->end_time); + else + /* This means we didn't have + any jobs to take off + anymore so we are making + sure we can look at every + node on the system. + */ + track_down_nodes = false; if(!(new_blocks = create_dynamic_block( - block_list, &request, job_list))) { + block_list, &request, job_list, + track_down_nodes))) { destroy_bg_record(bg_record); if(errno == ESLURM_INTERCONNECT_FAILURE || !list_count(job_list)) { diff --git a/src/plugins/select/bluegene/plugin/bg_job_run.c b/src/plugins/select/bluegene/plugin/bg_job_run.c index 9d5f0477f465128bba681c33e289fada991ee6df..36c3a46e3b1da9d3e44afeb45593bdf96cb306f2 100644 --- a/src/plugins/select/bluegene/plugin/bg_job_run.c +++ b/src/plugins/select/bluegene/plugin/bg_job_run.c @@ -265,11 +265,11 @@ static void _sync_agent(bg_update_t *bg_update_ptr) bg_record->job_running = bg_update_ptr->job_ptr->job_id; bg_record->job_ptr = bg_update_ptr->job_ptr; - if(!block_exist_in_list(bg_job_block_list, bg_record)) { + if(!block_ptr_exist_in_list(bg_job_block_list, bg_record)) { list_push(bg_job_block_list, bg_record); num_unused_cpus -= bg_record->cpu_cnt; } - if(!block_exist_in_list(bg_booted_block_list, bg_record)) + if(!block_ptr_exist_in_list(bg_booted_block_list, bg_record)) list_push(bg_booted_block_list, bg_record); slurm_mutex_unlock(&block_state_mutex); @@ -318,8 +318,7 @@ static void _start_agent(bg_update_t *bg_update_ptr) slurm_mutex_lock(&job_start_mutex); - bg_record = - find_bg_record_in_list(bg_list, bg_update_ptr->bg_block_id); + bg_record = find_bg_record_in_list(bg_list, bg_update_ptr->bg_block_id); if(!bg_record) { error("block %s not found in bg_list", @@ -354,6 +353,9 @@ static void _start_agent(bg_update_t *bg_update_ptr) slurm_mutex_unlock(&block_state_mutex); debug("Block is in Deallocating state, waiting for free."); bg_free_block(bg_record); + /* no reason to reboot here since we are already + deallocating */ + bg_update_ptr->reboot = 0; } else slurm_mutex_unlock(&block_state_mutex); @@ -587,12 +589,17 @@ static void _start_agent(bg_update_t *bg_update_ptr) slurm_mutex_lock(&block_state_mutex); bg_record->modifying = 0; slurm_mutex_unlock(&block_state_mutex); - } else if(bg_update_ptr->reboot) -#ifdef HAVE_BGL + } else if(bg_update_ptr->reboot) { + slurm_mutex_lock(&block_state_mutex); + bg_record->modifying = 1; + slurm_mutex_unlock(&block_state_mutex); + bg_free_block(bg_record); -#else - bg_reboot_block(bg_record); -#endif + + slurm_mutex_lock(&block_state_mutex); + bg_record->modifying = 0; + slurm_mutex_unlock(&block_state_mutex); + } if(bg_record->state == RM_PARTITION_FREE) { if((rc = boot_block(bg_record)) != SLURM_SUCCESS) { @@ -1073,11 +1080,11 @@ extern int start_job(struct job_record *job_ptr) job_ptr->total_procs = job_ptr->num_procs; bg_record->job_running = bg_update_ptr->job_ptr->job_id; bg_record->job_ptr = bg_update_ptr->job_ptr; - if(!block_exist_in_list(bg_job_block_list, bg_record)) { + if(!block_ptr_exist_in_list(bg_job_block_list, bg_record)) { list_push(bg_job_block_list, bg_record); num_unused_cpus -= bg_record->cpu_cnt; } - if(!block_exist_in_list(bg_booted_block_list, bg_record)) + if(!block_ptr_exist_in_list(bg_booted_block_list, bg_record)) list_push(bg_booted_block_list, bg_record); slurm_mutex_unlock(&block_state_mutex); } else { @@ -1279,7 +1286,7 @@ extern int boot_block(bg_record_t *bg_record) } slurm_mutex_lock(&block_state_mutex); - if(!block_exist_in_list(bg_booted_block_list, bg_record)) + if(!block_ptr_exist_in_list(bg_booted_block_list, bg_record)) list_push(bg_booted_block_list, bg_record); slurm_mutex_unlock(&block_state_mutex); @@ -1305,7 +1312,7 @@ extern int boot_block(bg_record_t *bg_record) slurm_mutex_unlock(&block_state_mutex); #else slurm_mutex_lock(&block_state_mutex); - if(!block_exist_in_list(bg_booted_block_list, bg_record)) + if(!block_ptr_exist_in_list(bg_booted_block_list, bg_record)) list_push(bg_booted_block_list, bg_record); bg_record->state = RM_PARTITION_READY; last_bg_update = time(NULL); diff --git a/src/plugins/select/bluegene/plugin/bg_record_functions.c b/src/plugins/select/bluegene/plugin/bg_record_functions.c index aa25e3acd4756cd283c6ada87eb98d9065f347e0..b8924377094589c289b03454936a204dc4fe9cf0 100644 --- a/src/plugins/select/bluegene/plugin/bg_record_functions.c +++ b/src/plugins/select/bluegene/plugin/bg_record_functions.c @@ -113,13 +113,14 @@ extern void destroy_bg_record(void *object) } } +/* see if a record already of like bitmaps exists in a list */ extern int block_exist_in_list(List my_list, bg_record_t *bg_record) { ListIterator itr = list_iterator_create(my_list); bg_record_t *found_record = NULL; int rc = 0; - while ((found_record = (bg_record_t *) list_next(itr)) != NULL) { + while ((found_record = list_next(itr))) { /* check for full node bitmap compare */ if(bit_equal(bg_record->bitmap, found_record->bitmap) && bit_equal(bg_record->ionode_bitmap, @@ -144,6 +145,23 @@ extern int block_exist_in_list(List my_list, bg_record_t *bg_record) return rc; } +/* see if the exact record already exists in a list */ +extern int block_ptr_exist_in_list(List my_list, bg_record_t *bg_record) +{ + ListIterator itr = list_iterator_create(my_list); + bg_record_t *found_record = NULL; + int rc = 0; + + while ((found_record = list_next(itr))) { + if(bg_record == found_record) { + rc = 1; + break; + } + } + list_iterator_destroy(itr); + return rc; +} + extern void process_nodes(bg_record_t *bg_record, bool startup) { #ifdef HAVE_BG @@ -587,7 +605,6 @@ extern void drain_as_needed(bg_record_t *bg_record, char *reason) bool needed = true; hostlist_t hl; char *host = NULL; - char bg_down_node[128]; if(bg_record->job_running > NO_JOB_RUNNING) { int rc; @@ -623,7 +640,7 @@ extern void drain_as_needed(bg_record_t *bg_record, char *reason) return; } while ((host = hostlist_shift(hl))) { - if (node_already_down(bg_down_node)) { + if (node_already_down(host)) { needed = false; free(host); break; diff --git a/src/plugins/select/bluegene/plugin/bg_record_functions.h b/src/plugins/select/bluegene/plugin/bg_record_functions.h index 16f139d8217ca550ca480b87e30876441ca2998e..c4d56857b22299960a036eaa445483fd0941b0b7 100644 --- a/src/plugins/select/bluegene/plugin/bg_record_functions.h +++ b/src/plugins/select/bluegene/plugin/bg_record_functions.h @@ -121,6 +121,7 @@ typedef struct bg_record { extern void print_bg_record(bg_record_t *record); extern void destroy_bg_record(void *object); extern int block_exist_in_list(List my_list, bg_record_t *bg_record); +extern int block_ptr_exist_in_list(List my_list, bg_record_t *bg_record); extern void process_nodes(bg_record_t *bg_reord, bool startup); extern List copy_bg_list(List in_list); extern void copy_bg_record(bg_record_t *fir_record, bg_record_t *sec_record); diff --git a/src/plugins/select/bluegene/plugin/bluegene.c b/src/plugins/select/bluegene/plugin/bluegene.c index 34e2df1768de5651d1664dd0552979c84d766f07..0df8e1697193de8a372fe8c921dd1bc733c01389 100644 --- a/src/plugins/select/bluegene/plugin/bluegene.c +++ b/src/plugins/select/bluegene/plugin/bluegene.c @@ -616,74 +616,6 @@ extern int bg_free_block(bg_record_t *bg_record) return SLURM_SUCCESS; } -#ifndef HAVE_BGL -/* This function not available in bgl land */ -extern int bg_reboot_block(bg_record_t *bg_record) -{ -#ifdef HAVE_BG_FILES - int rc; -#endif - if(!bg_record) { - error("bg_reboot_block: there was no bg_record"); - return SLURM_ERROR; - } - - while (1) { - if(!bg_record) { - error("bg_reboot_block: there was no bg_record"); - return SLURM_ERROR; - } - - slurm_mutex_lock(&block_state_mutex); - if (bg_record->state != NO_VAL - && bg_record->state != RM_PARTITION_REBOOTING) { -#ifdef HAVE_BG_FILES - debug2("bridge_reboot %s", bg_record->bg_block_id); - - rc = bridge_reboot_block(bg_record->bg_block_id); - if (rc != STATUS_OK) { - if(rc == PARTITION_NOT_FOUND) { - debug("block %s is not found", - bg_record->bg_block_id); - break; - } else if(rc == INCOMPATIBLE_STATE) { - debug2("bridge_reboot_partition" - "(%s): %s State = %d", - bg_record->bg_block_id, - bg_err_str(rc), - bg_record->state); - } else { - error("bridge_reboot_partition" - "(%s): %s State = %d", - bg_record->bg_block_id, - bg_err_str(rc), - bg_record->state); - } - } -#else - bg_record->state = RM_PARTITION_READY; - break; -#endif - } - - if (bg_record->state == RM_PARTITION_CONFIGURING) { - if(!block_exist_in_list(bg_booted_block_list, - bg_record)) - list_push(bg_booted_block_list, bg_record); - break; - } else if (bg_record->state == RM_PARTITION_ERROR) { - remove_from_bg_list(bg_booted_block_list, bg_record); - break; - } - slurm_mutex_unlock(&block_state_mutex); - sleep(3); - } - slurm_mutex_unlock(&block_state_mutex); - - return SLURM_SUCCESS; -} -#endif - /* Free multiple blocks in parallel */ extern void *mult_free_block(void *args) { @@ -868,7 +800,7 @@ extern int free_block_list(List delete_list) while ((found_record = (bg_record_t*)list_pop(delete_list)) != NULL) { /* push job onto queue in a FIFO */ debug3("adding %s to be freed", found_record->bg_block_id); - if(!block_exist_in_list(*block_list, found_record)) { + if(!block_ptr_exist_in_list(*block_list, found_record)) { if (list_push(*block_list, found_record) == NULL) fatal("malloc failure in _block_op/list_push"); } else { @@ -1544,8 +1476,8 @@ static int _validate_config_nodes(List *bg_found_block_list, char *dir) convert_conn_type(bg_record->conn_type)); if(((bg_record->state == RM_PARTITION_READY) || (bg_record->state == RM_PARTITION_CONFIGURING)) - && !block_exist_in_list(bg_booted_block_list, - bg_record)) + && !block_ptr_exist_in_list(bg_booted_block_list, + bg_record)) list_push(bg_booted_block_list, bg_record); } } @@ -1569,7 +1501,7 @@ static int _validate_config_nodes(List *bg_found_block_list, char *dir) if(((bg_record->state == RM_PARTITION_READY) || (bg_record->state == RM_PARTITION_CONFIGURING)) - && !block_exist_in_list( + && !block_ptr_exist_in_list( bg_booted_block_list, bg_record)) list_push(bg_booted_block_list, bg_record); diff --git a/src/plugins/select/bluegene/plugin/bluegene.h b/src/plugins/select/bluegene/plugin/bluegene.h index df27c21af5ab64cd51cacd97cbbc953e80d0972f..f617d4ddabe5d8dea1a38473c1bcd0483cb36a08 100644 --- a/src/plugins/select/bluegene/plugin/bluegene.h +++ b/src/plugins/select/bluegene/plugin/bluegene.h @@ -150,10 +150,6 @@ extern void *bluegene_agent(void *args); extern int bg_free_block(bg_record_t *bg_record); -#ifndef HAVE_BGL -extern int bg_reboot_block(bg_record_t *bg_record); -#endif - extern int remove_from_bg_list(List my_bg_list, bg_record_t *bg_record); extern bg_record_t *find_and_remove_org_from_bg_list(List my_list, bg_record_t *bg_record); diff --git a/src/plugins/select/bluegene/plugin/dynamic_block.c b/src/plugins/select/bluegene/plugin/dynamic_block.c index 04f13b6cda6a32bf80669bc4e48401ed58d10d89..8131c206d5fae49544b3e3145b2a5ea2fb7795a5 100644 --- a/src/plugins/select/bluegene/plugin/dynamic_block.c +++ b/src/plugins/select/bluegene/plugin/dynamic_block.c @@ -55,7 +55,8 @@ static int _breakup_blocks(List block_list, List new_blocks, * RET - a list of created block(s) or NULL on failure errno is set. */ extern List create_dynamic_block(List block_list, - ba_request_t *request, List my_block_list) + ba_request_t *request, List my_block_list, + bool track_down_nodes) { int rc = SLURM_SUCCESS; @@ -80,7 +81,7 @@ extern List create_dynamic_block(List block_list, slurm_mutex_lock(&block_state_mutex); if(my_block_list) { - reset_ba_system(true); + reset_ba_system(track_down_nodes); itr = list_iterator_create(my_block_list); while ((bg_record = list_next(itr))) { if(!my_bitmap) { diff --git a/src/plugins/select/bluegene/plugin/dynamic_block.h b/src/plugins/select/bluegene/plugin/dynamic_block.h index 7ee4d1c953c353b641b77f90bf6ec57464d4d6ab..1555f8f8d980ed28907d31d6f4a743b0f701cf92 100644 --- a/src/plugins/select/bluegene/plugin/dynamic_block.h +++ b/src/plugins/select/bluegene/plugin/dynamic_block.h @@ -43,7 +43,8 @@ #include "bluegene.h" extern List create_dynamic_block(List block_list, - ba_request_t *request, List my_block_list); + ba_request_t *request, List my_block_list, + bool track_down_nodes); #ifdef HAVE_BGL extern bg_record_t *create_small_record(bg_record_t *bg_record, diff --git a/src/plugins/select/bluegene/plugin/slurm_prolog.c b/src/plugins/select/bluegene/plugin/slurm_prolog.c index 65f87d8fe1d6ee94151e3564b4379f17cfc9467b..63fc3f735a84a7f86361c29b6b612fc3836624ff 100644 --- a/src/plugins/select/bluegene/plugin/slurm_prolog.c +++ b/src/plugins/select/bluegene/plugin/slurm_prolog.c @@ -126,8 +126,11 @@ static int _wait_part_ready(uint32_t job_id) break; /* fatal error */ if (rc == READY_JOB_ERROR) /* error */ continue; /* retry */ - if ((rc & READY_JOB_STATE) == 0) /* job killed */ + if ((rc & READY_JOB_STATE) == 0) { /* job killed */ + /* return 1 so we don't get a prolog error */ + is_ready = 1; break; + } if (rc & READY_NODE_STATE) { /* job and node ready */ is_ready = 1; break; diff --git a/src/salloc/salloc.c b/src/salloc/salloc.c index 300c366bd11d3a8d648aa8049e11bd06640c8346..e4c48f2d936d3056099d3ab26757d259643710b2 100644 --- a/src/salloc/salloc.c +++ b/src/salloc/salloc.c @@ -230,7 +230,8 @@ int main(int argc, char *argv[]) info("Granted job allocation %d", alloc->job_id); #ifdef HAVE_BG if (!_wait_bluegene_block_ready(alloc)) { - error("Something is wrong with the boot of the block."); + if(!allocation_interrupted) + error("Something is wrong with the boot of the block."); goto relinquish; } #endif @@ -681,11 +682,14 @@ static int _wait_bluegene_block_ready(resource_allocation_response_msg_t *alloc) break; } } - if (is_ready) info("Block %s is ready for job", block_id); - else + else if(!allocation_interrupted) error("Block %s still not ready", block_id); + else /* this should never happen, but if allocation_intrrupted + send back not ready */ + is_ready = 0; + xfree(block_id); pending_job_id = 0;