From 583e6782d656d52468cc7a182dbb482ab5a07f7a Mon Sep 17 00:00:00 2001 From: Danny Auble <da@llnl.gov> Date: Thu, 9 Mar 2006 23:00:52 +0000 Subject: [PATCH] fixed some issues with dynamic bgl --- .../select/bluegene/plugin/bg_block_info.c | 23 +- .../select/bluegene/plugin/bg_job_place.c | 24 +- .../select/bluegene/plugin/bg_job_run.c | 41 +- .../select/bluegene/plugin/block_sys.c | 4 +- src/plugins/select/bluegene/plugin/bluegene.c | 698 +++++++++--------- src/plugins/select/bluegene/plugin/bluegene.h | 6 +- 6 files changed, 433 insertions(+), 363 deletions(-) diff --git a/src/plugins/select/bluegene/plugin/bg_block_info.c b/src/plugins/select/bluegene/plugin/bg_block_info.c index 74c7121766b..864b2a30cf1 100644 --- a/src/plugins/select/bluegene/plugin/bg_block_info.c +++ b/src/plugins/select/bluegene/plugin/bg_block_info.c @@ -161,15 +161,14 @@ static void _drain_as_needed(char *node_list, char *reason) extern int block_ready(struct job_record *job_ptr) { int rc = 1; -#ifdef HAVE_BG_FILES char *block_id = NULL; bg_record_t *bg_record = NULL; rc = select_g_get_jobinfo(job_ptr->select_jobinfo, SELECT_DATA_BLOCK_ID, &block_id); if (rc == SLURM_SUCCESS) { + bg_record = find_bg_record_in_list(bg_list, block_id); slurm_mutex_lock(&block_state_mutex); - bg_record = find_bg_record(block_id); if(bg_record) { if ((bg_record->user_uid == job_ptr->user_id) @@ -188,7 +187,6 @@ extern int block_ready(struct job_record *job_ptr) xfree(block_id); } else rc = READY_JOB_ERROR; -#endif return rc; } @@ -273,10 +271,13 @@ extern int update_block_list() free(name); continue; } - bg_record = find_bg_record(name); + bg_record = find_bg_record_in_list(bg_list, name); if(bg_record == NULL) { - error("Block %s not found in bg_list " + if(find_bg_record_in_list(bg_freeing_list, name)) { + break; + } + debug("Block %s not found in bg_list " "removing from database", name); term_jobs_on_block(name); if ((rc = rm_get_data(block_ptr, @@ -311,9 +312,15 @@ extern int update_block_list() || (state == RM_PARTITION_ERROR)) { rc = rm_remove_partition(name); if (rc != STATUS_OK) { - error("rm_remove_partition(%s): %s", - name, - bg_err_str(rc)); + if(rc == PARTITION_NOT_FOUND) { + debug("1 block %s not found", + name); + } else { + error("1 rm_remove_partition" + "(%s): %s", + name, + bg_err_str(rc)); + } } else debug("done\n"); } diff --git a/src/plugins/select/bluegene/plugin/bg_job_place.c b/src/plugins/select/bluegene/plugin/bg_job_place.c index e8aed9f80c8..f62a58e5aba 100644 --- a/src/plugins/select/bluegene/plugin/bg_job_place.c +++ b/src/plugins/select/bluegene/plugin/bg_job_place.c @@ -64,6 +64,8 @@ static void _rotate_geo(uint16_t *req_geometry, int rot_cnt) } } +pthread_mutex_t create_dynamic_mutex = PTHREAD_MUTEX_INITIALIZER; + /* * finds the best match for a given job request * @@ -126,6 +128,7 @@ static int _find_best_block_match(struct job_record* job_ptr, *found_bg_record = NULL; try_again: + debug("got here"); slurm_mutex_lock(&block_state_mutex); debug("number of blocks to check: %d state %d", list_count(bg_list), @@ -135,7 +138,8 @@ try_again: /* If test_only we want to fall through to tell the scheduler that it is runnable just not right now. */ - debug3("job_running = %d", record->job_running); + debug3("%s job_running = %d", + record->bg_block_id, record->job_running); /*partition is being destroyed, ignore it*/ if(record->job_running == -2) continue; @@ -228,12 +232,8 @@ try_again: LAYOUT_DYNAMIC) { temp_list = list_create(NULL); list_push(temp_list, record); - num_block_to_free++; - slurm_mutex_unlock( - &block_state_mutex); free_block_list(temp_list); - slurm_mutex_lock( - &block_state_mutex); + num_block_to_free++; list_destroy(temp_list); } break; @@ -422,6 +422,8 @@ extern int submit_job(struct job_record *job_ptr, bitstr_t *slurm_block_bitmap, buf, min_nodes, max_nodes); + if(bluegene_layout_mode == LAYOUT_DYNAMIC) + slurm_mutex_lock(&create_dynamic_mutex); rc = _find_best_block_match(job_ptr, slurm_block_bitmap, min_nodes, max_nodes, spec, &record, test_only); @@ -432,8 +434,12 @@ extern int submit_job(struct job_record *job_ptr, bitstr_t *slurm_block_bitmap, select_g_set_jobinfo(job_ptr->select_jobinfo, SELECT_DATA_BLOCK_ID, "unassigned"); - /*FIX ME: isn't correct for small blocks */ - min_nodes *= bluegene_bp_node_cnt; + if(job_ptr->num_procs < bluegene_bp_node_cnt) { + i = procs_per_node/job_ptr->num_procs; + info("divide by %d",i); + } else + i = 1; + min_nodes *= bluegene_bp_node_cnt/i; select_g_set_jobinfo(job_ptr->select_jobinfo, SELECT_DATA_NODE_CNT, &min_nodes); @@ -472,6 +478,8 @@ extern int submit_job(struct job_record *job_ptr, bitstr_t *slurm_block_bitmap, "unassigned"); } } + if(bluegene_layout_mode == LAYOUT_DYNAMIC) + slurm_mutex_unlock(&create_dynamic_mutex); return rc; } diff --git a/src/plugins/select/bluegene/plugin/bg_job_run.c b/src/plugins/select/bluegene/plugin/bg_job_run.c index b448c5926b6..bb68e17f1e4 100644 --- a/src/plugins/select/bluegene/plugin/bg_job_run.c +++ b/src/plugins/select/bluegene/plugin/bg_job_run.c @@ -186,14 +186,16 @@ static void _sync_agent(bg_update_t *bg_update_ptr) { bg_record_t * bg_record = NULL; - bg_record = find_bg_record(bg_update_ptr->bg_block_id); + bg_record = + find_bg_record_in_list(bg_list, bg_update_ptr->bg_block_id); if(!bg_record) { error("No block %s", bg_update_ptr->bg_block_id); return; } slurm_mutex_lock(&block_state_mutex); bg_record->job_running = bg_update_ptr->job_id; - list_push(bg_job_block_list, bg_record); + if(!block_exist_in_list(bg_job_block_list, bg_record)) + list_push(bg_job_block_list, bg_record); slurm_mutex_unlock(&block_state_mutex); if(bg_record->state == RM_PARTITION_READY) { @@ -235,7 +237,8 @@ static void _start_agent(bg_update_t *bg_update_ptr) slurm_mutex_lock(&job_start_mutex); - bg_record = find_bg_record(bg_update_ptr->bg_block_id); + bg_record = + find_bg_record_in_list(bg_list, bg_update_ptr->bg_block_id); if(!bg_record) { error("block %s not found in bg_list", bg_update_ptr->bg_block_id); @@ -282,6 +285,9 @@ static void _start_agent(bg_update_t *bg_update_ptr) /* wait for all necessary blocks to be freed */ while(num_block_to_free != num_block_freed) { sleep(1); + debug("got %d of %d freed", + num_block_freed, + num_block_to_free); } if(bg_record->job_running == -1) { @@ -420,7 +426,8 @@ static void _term_agent(bg_update_t *bg_update_ptr) } #endif /* remove the block's users */ - bg_record = find_bg_record(bg_update_ptr->bg_block_id); + bg_record = + find_bg_record_in_list(bg_list, bg_update_ptr->bg_block_id); if(bg_record) { debug("got the record %s user is %s", bg_record->bg_block_id, @@ -442,7 +449,8 @@ static void _term_agent(bg_update_t *bg_update_ptr) } slurm_mutex_lock(&block_state_mutex); - bg_record->job_running = -1; + if(bg_record->job_running != -2) + bg_record->job_running = -1; /*remove user from list */ if(bg_record->target_name) { @@ -667,14 +675,23 @@ extern int start_job(struct job_record *job_ptr) SELECT_DATA_BLOCK_ID, &(bg_update_ptr->bg_block_id)); select_g_get_jobinfo(job_ptr->select_jobinfo, SELECT_DATA_NODE_USE, &(bg_update_ptr->node_use)); - bg_record = find_bg_record(bg_update_ptr->bg_block_id); + bg_record = + find_bg_record_in_list(bg_list, bg_update_ptr->bg_block_id); if (bg_record) { job_ptr->num_procs = (bg_record->cpus_per_bp * bg_record->bp_count); slurm_mutex_lock(&block_state_mutex); bg_record->job_running = bg_update_ptr->job_id; - list_push(bg_job_block_list, bg_record); + if(!block_exist_in_list(bg_job_block_list, bg_record)) + list_push(bg_job_block_list, bg_record); + if(!block_exist_in_list(bg_booted_block_list, bg_record)) + list_push(bg_booted_block_list, bg_record); slurm_mutex_unlock(&block_state_mutex); + } else { + error("bg_record %s does exist, requested for job (%d)", + bg_update_ptr->bg_block_id, job_ptr->job_id); + _bg_list_del(bg_update_ptr); + return SLURM_ERROR; } info("Queue start of job %u in BG block %s", job_ptr->job_id, @@ -839,8 +856,11 @@ extern int boot_block(bg_record_t *bg_record) return SLURM_ERROR; } slurm_mutex_unlock(&api_file_mutex); - list_push(bg_booted_block_list, bg_record); - + slurm_mutex_lock(&block_state_mutex); + if(!block_exist_in_list(bg_booted_block_list, bg_record)) + list_push(bg_booted_block_list, bg_record); + slurm_mutex_unlock(&block_state_mutex); + rc = 0; while(rc < 10) { if(bg_record->state == RM_PARTITION_CONFIGURING) { @@ -862,8 +882,9 @@ extern int boot_block(bg_record_t *bg_record) last_bg_update = time(NULL); slurm_mutex_unlock(&block_state_mutex); #else - list_push(bg_booted_block_list, bg_record); slurm_mutex_lock(&block_state_mutex); + if(!block_exist_in_list(bg_booted_block_list, bg_record)) + list_push(bg_booted_block_list, bg_record); bg_record->state = RM_PARTITION_READY; last_bg_update = time(NULL); slurm_mutex_unlock(&block_state_mutex); diff --git a/src/plugins/select/bluegene/plugin/block_sys.c b/src/plugins/select/bluegene/plugin/block_sys.c index b996beff124..e385785e17e 100755 --- a/src/plugins/select/bluegene/plugin/block_sys.c +++ b/src/plugins/select/bluegene/plugin/block_sys.c @@ -145,7 +145,7 @@ static int _post_allocate(bg_record_t *bg_record) pm_partition_id_t block_id; struct passwd *pw_ent = NULL; /* Add partition record to the DB */ - debug("adding partition\n"); + debug2("adding partition\n"); slurm_mutex_lock(&api_file_mutex); for(i=0;i<MAX_ADD_RETRY; i++) { @@ -168,7 +168,7 @@ static int _post_allocate(bg_record_t *bg_record) } slurm_mutex_unlock(&api_file_mutex); - debug("done adding\n"); + debug2("done adding\n"); /* Get back the new partition id */ if ((rc = rm_get_data(bg_record->bg_block, RM_PartitionID, &block_id)) diff --git a/src/plugins/select/bluegene/plugin/bluegene.c b/src/plugins/select/bluegene/plugin/bluegene.c index 629788c3ac8..84e48bf0ec3 100644 --- a/src/plugins/select/bluegene/plugin/bluegene.c +++ b/src/plugins/select/bluegene/plugin/bluegene.c @@ -29,7 +29,7 @@ #include <stdio.h> #define BUFSIZE 4096 -#define BITSZE 128 +#define BITSIZE 128 #define MMCS_POLL_TIME 120 /* poll MMCS for down switches and nodes * every 120 secs */ #define BG_POLL_TIME 0 /* poll bg blocks every 3 secs */ @@ -46,6 +46,7 @@ List bg_curr_block_list = NULL; /* current bg blocks in bluegene.conf*/ List bg_found_block_list = NULL; /* found bg blocks already on system */ List bg_job_block_list = NULL; /* jobs running in these blocks */ List bg_booted_block_list = NULL; /* blocks that are booted */ +List bg_freeing_list = NULL; /* blocks that being freed */ char *bluegene_blrts = NULL, *bluegene_linux = NULL, *bluegene_mloader = NULL; char *bluegene_ramdisk = NULL, *bridge_api_file = NULL; @@ -243,6 +244,30 @@ extern void destroy_bg_record(void *object) } } +extern int block_exist_in_list(List my_list, bg_record_t *bg_record) +{ + ListIterator itr = list_iterator_create(my_list); + bg_record_t *found_record = NULL; + int rc = 0; + + while ((found_record = (bg_record_t *) list_next(itr)) != NULL) { + if(bit_equal(bg_record->bitmap, found_record->bitmap) + && (bg_record->quarter == found_record->quarter) + && (bg_record->segment == found_record->segment)){ + debug("This partition %s %d %d" + "already exists here %s", + bg_record->nodes, + bg_record->quarter, + bg_record->segment, + found_record->bg_block_id); + rc = 1; + break; + } + } + list_iterator_destroy(itr); + return rc; +} + extern void process_nodes(bg_record_t *bg_record) { #ifdef HAVE_BG @@ -398,7 +423,7 @@ extern void copy_bg_record(bg_record_t *fir_record, bg_record_t *sec_record) sec_record->segment = fir_record->segment; } -extern bg_record_t *find_bg_record(char *bg_block_id) +extern bg_record_t *find_bg_record_in_list(List my_list, char *bg_block_id) { ListIterator itr; bg_record_t *bg_record = NULL; @@ -406,8 +431,9 @@ extern bg_record_t *find_bg_record(char *bg_block_id) if(!bg_block_id) return NULL; - if(bg_list) { - itr = list_iterator_create(bg_list); + if(my_list) { + slurm_mutex_lock(&block_state_mutex); + itr = list_iterator_create(my_list); while ((bg_record = (bg_record_t *) list_next(itr)) != NULL) { if(bg_record->bg_block_id) if (!strcmp(bg_record->bg_block_id, @@ -415,12 +441,13 @@ extern bg_record_t *find_bg_record(char *bg_block_id) break; } list_iterator_destroy(itr); + slurm_mutex_unlock(&block_state_mutex); if(bg_record) return bg_record; else return NULL; } else { - error("find_bg_record: no bg_list"); + error("find_bg_record_in_list: no list"); return NULL; } @@ -443,10 +470,7 @@ extern int update_block_user(bg_record_t *bg_record, int set) if((rc = remove_all_users(bg_record->bg_block_id, bg_record->target_name)) == REMOVE_USER_ERR) { - if(rc == INCONSISTENT_DATA - && bluegene_layout_mode == LAYOUT_DYNAMIC) - return 0; - error("Something happened removing " + error("1 Something happened removing " "users from block %s", bg_record->bg_block_id); return -1; @@ -545,6 +569,10 @@ extern int remove_all_users(char *bg_block_id, char *user_name) slurm_mutex_lock(&api_file_mutex); if ((rc = rm_get_partition(bg_block_id, &block_ptr)) != STATUS_OK) { slurm_mutex_unlock(&api_file_mutex); + if(rc == INCONSISTENT_DATA + && bluegene_layout_mode == LAYOUT_DYNAMIC) + return REMOVE_USER_FOUND; + error("rm_get_partition(%s): %s", bg_block_id, bg_err_str(rc)); @@ -601,9 +629,7 @@ extern int remove_all_users(char *bg_block_id, char *user_name) } } - info("Removing user %s from Block %s", - user, - bg_block_id); + info("Removing user %s from Block %s", user, bg_block_id); if ((rc = rm_remove_part_user(bg_block_id, user)) != STATUS_OK) { debug("user %s isn't on block %s", @@ -905,13 +931,12 @@ extern int create_defined_blocks(bg_layout_t overlapped) convert_node_use(bg_record->node_use)); } list_iterator_destroy(itr); + slurm_mutex_unlock(&block_state_mutex); } else { error("create_defined_blocks: no bg_list 4"); - slurm_mutex_unlock(&block_state_mutex); return SLURM_ERROR; } - slurm_mutex_unlock(&block_state_mutex); #endif /* not have HAVE_BG_FILES */ @@ -956,7 +981,7 @@ extern int create_dynamic_block(ba_request_t *request, List my_block_list) bitstr_t *my_bitmap = NULL; int geo[BA_SYSTEM_DIMENSIONS]; int i; - + slurm_mutex_lock(&block_state_mutex); reset_ba_system(); @@ -967,13 +992,14 @@ extern int create_dynamic_block(ba_request_t *request, List my_block_list) my_bitmap = bit_alloc(bit_size(bg_record->bitmap)); } - if(bg_record->bp_count>0 - && !bit_super_set(bg_record->bitmap, - my_bitmap)) { - bit_and(my_bitmap, bg_record->bitmap); + + if(bg_record->job_running != -2 + && !bit_super_set(bg_record->bitmap, my_bitmap)) { + bit_or(my_bitmap, bg_record->bitmap); + for(i=0; i<BA_SYSTEM_DIMENSIONS; i++) geo[i] = bg_record->geo[i]; - debug("adding %s %d%d%d %d%d%d", + debug2("adding %s %d%d%d %d%d%d", bg_record->nodes, bg_record->start[X], bg_record->start[Y], @@ -988,7 +1014,7 @@ extern int create_dynamic_block(ba_request_t *request, List my_block_list) if(!name) { debug("I was unable to make the " "requested block."); - FREE_NULL_BITMAP(my_bitmap); + bit_free(my_bitmap); slurm_mutex_unlock(&block_state_mutex); return SLURM_ERROR; } @@ -997,7 +1023,7 @@ extern int create_dynamic_block(ba_request_t *request, List my_block_list) } list_iterator_destroy(itr); if(my_bitmap) - FREE_NULL_BITMAP(my_bitmap); + bit_free(my_bitmap); } else { debug("No list was given"); } @@ -1027,29 +1053,41 @@ extern int create_dynamic_block(ba_request_t *request, List my_block_list) rc = SLURM_ERROR; goto finished; } - + if(!list_count(bg_list) || !my_block_list) { bg_record = NULL; goto no_list; } + /*Try to put block starting in the smallest of the exisiting blocks*/ itr = list_iterator_create(bg_list); while ((bg_record = (bg_record_t *) list_next(itr)) != NULL) { request->rotate_count = 0; request->elongate_count = 1; - - if((bg_record->job_running == -1) - && (bg_record->cpus_per_bp == procs_per_node + + if(!my_bitmap) { + my_bitmap = bit_alloc(bit_size(bg_record->bitmap)); + } + + if(bg_record->job_running == -1 + && (bg_record->quarter == (uint16_t) NO_VAL || (bg_record->quarter == 0 - && (bg_record->segment == 0 - || bg_record->segment == (uint16_t)NO_VAL)))) { + && (bg_record->segment == (uint16_t) NO_VAL + || bg_record->segment == 0)))) { + debug2("allocating %d%d%d %d", + bg_record->nodes, + request->start[X], + request->start[Y], + request->start[Z], + request->size); + for(i=0; i<BA_SYSTEM_DIMENSIONS; i++) request->start[i] = bg_record->start[i]; request->start_req = 1; rc = SLURM_SUCCESS; if (!allocate_block(request, results)){ debug2("allocate failure for size %d " - "midplanes", + "base partitions", request->size); rc = SLURM_ERROR; } else @@ -1063,7 +1101,7 @@ no_list: request->start_req = 0; rc = SLURM_SUCCESS; if (!allocate_block(request, results)){ - debug("allocate failure for size %d midplanes", + debug("allocate failure for size %d base partitions", request->size); rc = SLURM_ERROR; } @@ -1079,28 +1117,9 @@ no_list: request->conn_type, num_segment, num_quarter); while((bg_record = (bg_record_t *) list_pop(results)) != NULL) { - itr = list_iterator_create(bg_list); - while ((found_record = - (bg_record_t *) list_next(itr)) != NULL) { - if(bit_equal(bg_record->bitmap, - found_record->bitmap) - && (bg_record->quarter - == found_record->quarter) - && (bg_record->segment - == found_record->segment)){ - debug("This partition %s %d %d" - "already exists here %s", - bg_record->nodes, - bg_record->quarter, - bg_record->segment, - found_record->bg_block_id); - destroy_bg_record(bg_record); - break; - } - } - list_iterator_destroy(itr); - - if(!found_record) { + if(block_exist_in_list(bg_list, bg_record)) + destroy_bg_record(bg_record); + else { if(_add_block_db(bg_record, &block_inx) == SLURM_ERROR) goto finished; list_push(bg_list, bg_record); @@ -1205,6 +1224,7 @@ extern int create_full_system_block() error("I was unable to make the " "requested block."); rc = SLURM_ERROR; + destroy_bg_record(bg_record); goto no_total; } xfree(name); @@ -1212,6 +1232,7 @@ extern int create_full_system_block() #ifdef HAVE_BG_FILES if((rc = configure_block(bg_record)) == SLURM_ERROR) { error("unable to configure block in api"); + destroy_bg_record(bg_record); goto no_total; } #endif /* HAVE_BG_FILES */ @@ -1256,19 +1277,23 @@ extern int bg_free_block(bg_record_t *bg_record) } while (1) { + if(!bg_record) { + error("bg_free_block: there was no bg_record"); + return SLURM_ERROR; + } if (bg_record->state != NO_VAL && bg_record->state != RM_PARTITION_FREE && bg_record->state != RM_PARTITION_DEALLOCATING) { #ifdef HAVE_BG_FILES - debug("pm_destroy %s",bg_record->bg_block_id); + debug2("pm_destroy %s",bg_record->bg_block_id); slurm_mutex_lock(&api_file_mutex); rc = pm_destroy_partition(bg_record->bg_block_id); + slurm_mutex_unlock(&api_file_mutex); if (rc != STATUS_OK) { if(rc == PARTITION_NOT_FOUND) { debug("block %s is not found", bg_record->bg_block_id); - slurm_mutex_unlock(&api_file_mutex); break; } else if(rc == INCOMPATIBLE_STATE) { debug2("pm_destroy_partition(%s): %s " @@ -1276,7 +1301,6 @@ extern int bg_free_block(bg_record_t *bg_record) bg_record->bg_block_id, bg_err_str(rc), bg_record->state); - continue; } error("pm_destroy_partition(%s): %s " @@ -1284,7 +1308,6 @@ extern int bg_free_block(bg_record_t *bg_record) bg_record->bg_block_id, bg_err_str(rc), bg_record->state); } - slurm_mutex_unlock(&api_file_mutex); #else bg_record->state = RM_PARTITION_FREE; #endif @@ -1306,6 +1329,12 @@ extern void *mult_free_block(void *args) { bg_record_t *bg_record = NULL; + slurm_mutex_lock(&freed_cnt_mutex); + if ((bg_freeing_list == NULL) + && ((bg_freeing_list = list_create(destroy_bg_record)) == NULL)) + fatal("malloc failure in bg_freeing_list"); + slurm_mutex_unlock(&freed_cnt_mutex); + /* * Don't just exit when there is no work left. Creating * pthreads from within a dynamically linked object (plugin) @@ -1329,6 +1358,8 @@ extern void *mult_free_block(void *args) } slurm_mutex_lock(&freed_cnt_mutex); free_cnt--; + if(bg_freeing_list) + list_destroy(bg_freeing_list); slurm_mutex_unlock(&freed_cnt_mutex); return NULL; } @@ -1337,8 +1368,15 @@ extern void *mult_free_block(void *args) extern void *mult_destroy_block(void *args) { bg_record_t *bg_record = NULL; + bg_record_t *found_record = NULL; int rc; - + char *temp_name = NULL; + slurm_mutex_lock(&freed_cnt_mutex); + if ((bg_freeing_list == NULL) + && ((bg_freeing_list = list_create(destroy_bg_record)) == NULL)) + fatal("malloc failure in bg_freeing_list"); + slurm_mutex_unlock(&freed_cnt_mutex); + /* * Don't just exit when there is no work left. Creating * pthreads from within a dynamically linked object (plugin) @@ -1354,40 +1392,62 @@ extern void *mult_destroy_block(void *args) continue; } slurm_mutex_lock(&block_state_mutex); - if(bg_record->job_running == -2) + if(bg_record->job_running == -2) { + slurm_mutex_unlock(&block_state_mutex); goto already_here; + } bg_record->job_running = -2; slurm_mutex_unlock(&block_state_mutex); + + slurm_mutex_lock(&freed_cnt_mutex); + if(find_bg_record_in_list(bg_freeing_list, + bg_record->bg_block_id)) { + slurm_mutex_unlock(&freed_cnt_mutex); + goto already_here; + } + + found_record = xmalloc(sizeof(bg_record_t)); + found_record->bg_block_id = xstrdup(bg_record->bg_block_id); + list_push(bg_freeing_list, found_record); + slurm_mutex_unlock(&freed_cnt_mutex); + debug("removing the jobs on block %s\n", bg_record->bg_block_id); term_jobs_on_block(bg_record->bg_block_id); debug2("destroying %s", (char *)bg_record->bg_block_id); - bg_free_block(bg_record); - remove_from_bg_list(bg_list, bg_record); - if(!bg_record->bg_block_id) { - error("This one didn't have anything"); + if(bg_free_block(bg_record) == SLURM_ERROR) goto already_here; - } + remove_from_bg_list(bg_list, bg_record); + #ifdef HAVE_BG_FILES debug("removing from database %s", - (char *)bg_record->bg_block_id); + (char *)found_record->bg_block_id); slurm_mutex_lock(&api_file_mutex); - rc = rm_remove_partition(bg_record->bg_block_id); + rc = rm_remove_partition(found_record->bg_block_id); if (rc != STATUS_OK) { - error("rm_remove_partition(%s): %s", - bg_record->bg_block_id, - bg_err_str(rc)); + if(rc == PARTITION_NOT_FOUND) { + debug("block %s is not found", + found_record->bg_block_id); + } else { + error("rm_remove_partition(%s): %s", + found_record->bg_block_id, + bg_err_str(rc)); + } } else debug("done\n"); - slurm_mutex_unlock(&api_file_mutex); - + slurm_mutex_unlock(&api_file_mutex); #endif slurm_mutex_lock(&block_state_mutex); if(blocks_are_created) destroy_bg_record(bg_record); + destroy_bg_record(found_record); slurm_mutex_unlock(&block_state_mutex); + slurm_mutex_lock(&freed_cnt_mutex); + remove_from_bg_list(bg_freeing_list, found_record); + slurm_mutex_unlock(&freed_cnt_mutex); + already_here: slurm_mutex_lock(&freed_cnt_mutex); num_block_freed++; @@ -1395,6 +1455,8 @@ extern void *mult_destroy_block(void *args) } slurm_mutex_lock(&freed_cnt_mutex); destroy_cnt--; + if(bg_freeing_list) + list_destroy(bg_freeing_list); slurm_mutex_unlock(&freed_cnt_mutex); return NULL; } @@ -1408,7 +1470,6 @@ extern int free_block_list(List delete_list) pthread_attr_t attr_agent; pthread_t thread_agent; - slurm_mutex_lock(&freed_cnt_mutex); /* set up which list to push onto */ if(bluegene_layout_mode == LAYOUT_DYNAMIC) { block_list = &bg_destroy_block_list; @@ -1417,6 +1478,7 @@ extern int free_block_list(List delete_list) block_list = &bg_free_block_list; count = &free_cnt; } + slurm_mutex_lock(&freed_cnt_mutex); if ((*block_list == NULL) && ((*block_list = list_create(NULL)) == NULL)) fatal("malloc failure in free_block_list"); @@ -1811,124 +1873,104 @@ static int _validate_config_nodes(void) if(!bg_recover) return SLURM_ERROR; - if(bg_list) { - itr_conf = list_iterator_create(bg_list); - while ((bg_record = (bg_record_t*) list_next(itr_conf))) { - /* translate hostlist to ranged - string for consistent format - search here - */ - node_use = SELECT_COPROCESSOR_MODE; - if(bg_curr_block_list) { - itr_curr = list_iterator_create( - bg_curr_block_list); - while ((init_bg_record = (bg_record_t*) - list_next(itr_curr)) - != NULL) { - if (strcasecmp(bg_record->nodes, - init_bg_record->nodes)) - continue; /* wrong nodes */ - if (bg_record->conn_type - != init_bg_record->conn_type) - continue; /* wrong conn_type */ - if(bg_record->quarter != - init_bg_record->quarter) - continue; /* wrong quart */ - if(bg_record->segment != - init_bg_record->segment) - continue; /* wrong segment */ - copy_bg_record(init_bg_record, - bg_record); - break; - } - list_iterator_destroy(itr_curr); - } else { - error("_validate_config_nodes: " - "no bg_curr_block_list"); - } - if (!bg_record->bg_block_id) { - format_node_name(bg_record, tmp_char); - - info("Block found in bluegene.conf to be " - "created: Nodes:%s", - tmp_char); - rc = SLURM_ERROR; - } else { - list_append(bg_found_block_list, bg_record); - format_node_name(bg_record, tmp_char); + itr_conf = list_iterator_create(bg_list); + while ((bg_record = (bg_record_t*) list_next(itr_conf))) { + /* translate hostlist to ranged + string for consistent format + search here + */ + node_use = SELECT_COPROCESSOR_MODE; + itr_curr = list_iterator_create(bg_curr_block_list); + while ((init_bg_record = (bg_record_t*) + list_next(itr_curr)) + != NULL) { + if (strcasecmp(bg_record->nodes, + init_bg_record->nodes)) + continue; /* wrong nodes */ + if (bg_record->conn_type + != init_bg_record->conn_type) + continue; /* wrong conn_type */ + if(bg_record->quarter != + init_bg_record->quarter) + continue; /* wrong quart */ + if(bg_record->segment != + init_bg_record->segment) + continue; /* wrong segment */ + copy_bg_record(init_bg_record, + bg_record); + break; + } + list_iterator_destroy(itr_curr); + + if (!bg_record->bg_block_id) { + format_node_name(bg_record, tmp_char); - info("Found existing BG BlockID:%s " - "Nodes:%s Conn:%s", - bg_record->bg_block_id, - tmp_char, - convert_conn_type(bg_record->conn_type)); - if((bg_record->state == RM_PARTITION_READY) - || (bg_record->state - == RM_PARTITION_CONFIGURING)) - list_push(bg_booted_block_list, - bg_record); - } - } - list_iterator_destroy(itr_conf); - if(bluegene_layout_mode == LAYOUT_DYNAMIC) - goto finished; - if(bg_curr_block_list) { - itr_curr = list_iterator_create(bg_curr_block_list); - while ((init_bg_record = (bg_record_t*) - list_next(itr_curr)) - != NULL) { - debug3("%s %d %d%d%d %d%d%d", - init_bg_record->bg_block_id, - init_bg_record->bp_count, - init_bg_record->geo[X], - init_bg_record->geo[Y], - init_bg_record->geo[Z], - DIM_SIZE[X], - DIM_SIZE[Y], - DIM_SIZE[Z]); - if ((init_bg_record->geo[X] == DIM_SIZE[X]) - && (init_bg_record->geo[Y] == DIM_SIZE[Y]) - && (init_bg_record->geo[Z] == DIM_SIZE[Z])) - { - bg_record = (bg_record_t*) - xmalloc(sizeof(bg_record_t)); - list_append(bg_list, bg_record); - list_append(bg_found_block_list, - bg_record); - copy_bg_record(init_bg_record, - bg_record); - bg_record->full_block = 1; - debug("full system %s", - bg_record->bg_block_id); - format_node_name(bg_record, tmp_char); - info("Found existing BG " - "BlockID:%s " - "Nodes:%s Conn:%s", - bg_record->bg_block_id, - tmp_char, - convert_conn_type( - bg_record->conn_type)); - if((bg_record->state - == RM_PARTITION_READY) - || (bg_record->state - == RM_PARTITION_CONFIGURING)) - list_push(bg_booted_block_list, - bg_record); - break; - } - } - list_iterator_destroy(itr_curr); + info("Block found in bluegene.conf to be " + "created: Nodes:%s", + tmp_char); + rc = SLURM_ERROR; } else { - error("_validate_config_nodes: " - "no bg_curr_block_list 2"); + list_push(bg_found_block_list, bg_record); + format_node_name(bg_record, tmp_char); + + info("Found existing BG BlockID:%s Nodes:%s Conn:%s", + bg_record->bg_block_id, + tmp_char, + convert_conn_type(bg_record->conn_type)); + if(((bg_record->state == RM_PARTITION_READY) + || (bg_record->state == RM_PARTITION_CONFIGURING)) + && !block_exist_in_list(bg_booted_block_list, + bg_record)) + list_push(bg_booted_block_list, bg_record); + } + } + list_iterator_destroy(itr_conf); + if(bluegene_layout_mode == LAYOUT_DYNAMIC) + goto finished; + + itr_curr = list_iterator_create(bg_curr_block_list); + while ((init_bg_record = (bg_record_t*) list_next(itr_curr)) + != NULL) { + debug3("%s %d %d%d%d %d%d%d", + init_bg_record->bg_block_id, + init_bg_record->bp_count, + init_bg_record->geo[X], + init_bg_record->geo[Y], + init_bg_record->geo[Z], + DIM_SIZE[X], + DIM_SIZE[Y], + DIM_SIZE[Z]); + if ((init_bg_record->geo[X] == DIM_SIZE[X]) + && (init_bg_record->geo[Y] == DIM_SIZE[Y]) + && (init_bg_record->geo[Z] == DIM_SIZE[Z])) + { + bg_record = (bg_record_t*) + xmalloc(sizeof(bg_record_t)); + list_push(bg_list, bg_record); + list_push(bg_found_block_list, bg_record); + copy_bg_record(init_bg_record, bg_record); + bg_record->full_block = 1; + debug("full system %s", + bg_record->bg_block_id); + format_node_name(bg_record, tmp_char); + info("Found existing BG BlockID:%s Nodes:%s Conn:%s", + bg_record->bg_block_id, + tmp_char, + convert_conn_type(bg_record->conn_type)); + if(((bg_record->state == RM_PARTITION_READY) + || (bg_record->state == RM_PARTITION_CONFIGURING)) + && !block_exist_in_list(bg_booted_block_list, + bg_record)) + list_push(bg_booted_block_list, bg_record); + break; } - finished: - if(list_count(bg_list) == list_count(bg_curr_block_list)) - rc = SLURM_SUCCESS; - } else { - error("_validate_config_nodes: no bg_list"); - rc = SLURM_ERROR; } + list_iterator_destroy(itr_curr); + +finished: + if(list_count(bg_list) == list_count(bg_curr_block_list)) + rc = SLURM_SUCCESS; + #endif return rc; @@ -2037,7 +2079,7 @@ static int _delete_old_blocks(void) if ((bg_destroy_block_list == NULL) && ((bg_destroy_block_list = list_create(NULL)) == NULL)) fatal("malloc failure in block_list"); - + itr_curr = list_iterator_create(bg_destroy_list); while ((init_record = (bg_record_t*) list_next(itr_curr))) { list_push(bg_destroy_block_list, init_record); @@ -2069,7 +2111,7 @@ static int _delete_old_blocks(void) } list_iterator_destroy(itr_curr); slurm_mutex_unlock(&freed_cnt_mutex); - + retries=30; while(num_block_to_free != num_block_freed) { _update_bg_record_state(bg_destroy_list); @@ -2166,7 +2208,7 @@ static int _split_block(bg_record_t *bg_record, int procs, int *block_inx) error("you asked for something that was already this size"); return SLURM_ERROR; } - debug("asking for %d 32s from a %d block", + debug2("asking for %d 32s from a %d block", num_segment, bg_record->node_cnt); small_count = num_segment+num_quarter; @@ -2191,11 +2233,15 @@ static int _split_block(bg_record_t *bg_record, int procs, int *block_inx) found_record = _create_small_record(bg_record, quarter, segment); - if(_add_block_db(found_record, block_inx) == SLURM_ERROR) - return SLURM_ERROR; - - list_push(bg_list, found_record); - print_bg_record(found_record); + if(block_exist_in_list(bg_list, found_record)) { + destroy_bg_record(found_record); + } else { + if(_add_block_db(found_record, block_inx) + == SLURM_ERROR) + return SLURM_ERROR; + list_push(bg_list, found_record); + print_bg_record(found_record); + } node_cnt += bluegene_bp_node_cnt/small_size; if(node_cnt == 128) { node_cnt = 0; @@ -2217,174 +2263,160 @@ static int _breakup_blocks(ba_request_t *request, List my_block_list, uint16_t last_quarter = (uint16_t) NO_VAL; char tmp_char[256]; - debug("proc count = %d size = %d", + debug2("proc count = %d size = %d", request->procs, request->size); - if(bg_list) { - itr = list_iterator_create(bg_list); - - while ((bg_record = (bg_record_t *) list_next(itr)) - != NULL) { - if(bg_record->job_running > -1) - continue; - if(bg_record->state != RM_PARTITION_FREE) - continue; - proc_cnt = bg_record->bp_count * - bg_record->cpus_per_bp; - if(proc_cnt == request->procs) { - debug2("found it here %s, %s", - bg_record->bg_block_id, - bg_record->nodes); - list_iterator_destroy(itr); - request->save_name = - xmalloc(sizeof(char) * 4); - sprintf(request->save_name, - "%d%d%d\0", + itr = list_iterator_create(bg_list); + while ((bg_record = (bg_record_t *) list_next(itr)) != NULL) { + if(bg_record->job_running != -1) + continue; + if(bg_record->state != RM_PARTITION_FREE) + continue; + proc_cnt = bg_record->bp_count * + bg_record->cpus_per_bp; + if(proc_cnt == request->procs) { + debug2("found it here %s, %s", + bg_record->bg_block_id, + bg_record->nodes); + request->save_name = xmalloc(sizeof(char) * 4); + sprintf(request->save_name, "%d%d%d\0", + bg_record->start[X], + bg_record->start[Y], + bg_record->start[Z]); + rc = SLURM_SUCCESS; + goto finished; + } + if(bg_record->node_cnt > bluegene_bp_node_cnt) + continue; + if(proc_cnt < request->procs) { + if(last_quarter != bg_record->quarter){ + last_quarter = bg_record->quarter; + total_proc_cnt = proc_cnt; + } else { + total_proc_cnt += proc_cnt; + } + debug2("1 got %d on quarter %d", + total_proc_cnt, last_quarter); + if(total_proc_cnt == request->procs) { + request->save_name = xmalloc(sizeof(char) * 4); + sprintf(request->save_name, "%d%d%d\0", bg_record->start[X], bg_record->start[Y], bg_record->start[Z]); - rc = SLURM_SUCCESS; - goto finished; - } - if(bg_record->node_cnt > bluegene_bp_node_cnt) - continue; - if(proc_cnt < request->procs) { - if(last_quarter != bg_record->quarter){ - last_quarter = - bg_record->quarter; - total_proc_cnt = proc_cnt; - } else { - total_proc_cnt += proc_cnt; + if(!my_block_list) { + rc = SLURM_SUCCESS; + goto finished; } - debug2("1 got %d on quarter %d", - total_proc_cnt, last_quarter); - if(total_proc_cnt == request->procs) { - request->save_name = - xmalloc(sizeof(char) * 4); - sprintf(request->save_name, - "%d%d%d\0", - bg_record->start[X], - bg_record->start[Y], - bg_record->start[Z]); - list_iterator_destroy(itr); - if(!my_block_list) { - rc = SLURM_SUCCESS; - goto finished; - } - bg_record = _create_small_record( - bg_record, - last_quarter, - (uint16_t) NO_VAL); + bg_record = _create_small_record( + bg_record, + last_quarter, + (uint16_t) NO_VAL); + if(block_exist_in_list(bg_list, bg_record)) + destroy_bg_record(bg_record); + else { if(_add_block_db(bg_record, block_inx) == SLURM_ERROR) return SLURM_ERROR; - list_push(bg_list, bg_record); print_bg_record(bg_record); - rc = SLURM_SUCCESS; - goto finished; } - continue; + rc = SLURM_SUCCESS; + goto finished; } - break; - } - if(bg_record) { - debug("got one on the first pass"); - goto found_one; + continue; } - list_iterator_reset(itr); - last_quarter = (uint16_t) NO_VAL; - while ((bg_record = (bg_record_t *) list_next(itr)) - != NULL) { - if(bg_record->job_running > -1) - continue; - proc_cnt = bg_record->bp_count * - bg_record->cpus_per_bp; - if(proc_cnt == request->procs) { - debug2("found it here %s, %s", - bg_record->bg_block_id, - bg_record->nodes); - list_iterator_destroy(itr); - request->save_name = - xmalloc(sizeof(char) * 4); - sprintf(request->save_name, - "%d%d%d\0", + break; + } + if(bg_record) { + debug2("got one on the first pass"); + goto found_one; + } + list_iterator_reset(itr); + last_quarter = (uint16_t) NO_VAL; + while ((bg_record = (bg_record_t *) list_next(itr)) + != NULL) { + if(bg_record->job_running != -1) + continue; + proc_cnt = bg_record->bp_count * bg_record->cpus_per_bp; + if(proc_cnt == request->procs) { + debug2("found it here %s, %s", + bg_record->bg_block_id, + bg_record->nodes); + request->save_name = xmalloc(sizeof(char) * 4); + sprintf(request->save_name, "%d%d%d\0", + bg_record->start[X], + bg_record->start[Y], + bg_record->start[Z]); + rc = SLURM_SUCCESS; + goto finished; + } + + if(bg_record->node_cnt > bluegene_bp_node_cnt) + continue; + if(proc_cnt < request->procs) { + if(last_quarter != bg_record->quarter){ + last_quarter = bg_record->quarter; + total_proc_cnt = proc_cnt; + } else { + total_proc_cnt += proc_cnt; + } + debug2("got %d on quarter %d", + total_proc_cnt, last_quarter); + if(total_proc_cnt == request->procs) { + request->save_name = xmalloc(sizeof(char) * 4); + sprintf(request->save_name, "%d%d%d\0", bg_record->start[X], bg_record->start[Y], bg_record->start[Z]); - rc = SLURM_SUCCESS; - goto finished; - } - - if(bg_record->node_cnt > bluegene_bp_node_cnt) - continue; - if(proc_cnt < request->procs) { - if(last_quarter != bg_record->quarter){ - last_quarter = - bg_record->quarter; - total_proc_cnt = proc_cnt; - } else { - total_proc_cnt += proc_cnt; + if(!my_block_list) { + rc = SLURM_SUCCESS; + goto finished; } - debug2("got %d on quarter %d", - total_proc_cnt, last_quarter); - if(total_proc_cnt == request->procs) { - request->save_name = - xmalloc(sizeof(char) * 4); - sprintf(request->save_name, - "%d%d%d\0", - bg_record->start[X], - bg_record->start[Y], - bg_record->start[Z]); - list_iterator_destroy(itr); - if(!my_block_list) { - rc = SLURM_SUCCESS; - goto finished; - } - bg_record = _create_small_record( - bg_record, - last_quarter, - (uint16_t) NO_VAL); + bg_record = _create_small_record( + bg_record, + last_quarter, + (uint16_t) NO_VAL); + if(block_exist_in_list(bg_list, bg_record)) + destroy_bg_record(bg_record); + else { if(_add_block_db(bg_record, block_inx) == SLURM_ERROR) return SLURM_ERROR; - list_push(bg_list, bg_record); print_bg_record(bg_record); - rc = SLURM_SUCCESS; - goto finished; } - continue; + rc = SLURM_SUCCESS; + goto finished; } - - break; - } - found_one: - if(bg_record) { - format_node_name(bg_record, tmp_char); + continue; + } + break; + } +found_one: + if(bg_record) { + format_node_name(bg_record, tmp_char); - debug("going to split %s, %s", - bg_record->bg_block_id, - tmp_char); - if(_split_block(bg_record, request->procs, - block_inx) - == SLURM_SUCCESS) { - request->save_name = - xmalloc(sizeof(char) * 4); - sprintf(request->save_name, - "%d%d%d\0", - bg_record->start[X], - bg_record->start[Y], - bg_record->start[Z]); - } - list_iterator_destroy(itr); + debug2("going to split %s, %s", + bg_record->bg_block_id, + tmp_char); + request->save_name = xmalloc(sizeof(char) * 4); + sprintf(request->save_name, "%d%d%d\0", + bg_record->start[X], + bg_record->start[Y], + bg_record->start[Z]); + if(!my_block_list) { rc = SLURM_SUCCESS; - goto finished; + goto finished; } - list_iterator_destroy(itr); + _split_block(bg_record, request->procs, block_inx); + rc = SLURM_SUCCESS; + goto finished; } + finished: + list_iterator_destroy(itr); + return rc; } @@ -2451,7 +2483,7 @@ static int _add_bg_record(List records, char *nodes, bg_record->segment = (uint16_t)NO_VAL; /* bg_record->boot_state = 0; Implicit */ /* bg_record->state = 0; Implicit */ - debug("asking for %s %d %d",nodes, num_quarter, num_segment); + debug2("asking for %s %d %d", nodes, num_quarter, num_segment); len = strlen(nodes); i=0; while((nodes[i] != '[' && (nodes[i] > 57 || nodes[i] < 48)) @@ -2479,7 +2511,7 @@ static int _add_bg_record(List records, char *nodes, bg_record->job_running = -1; if(bg_record->conn_type != SELECT_SMALL) - list_append(records, bg_record); + list_push(records, bg_record); else { if(num_segment==0 && num_quarter==0) { info("No specs given for this small block, " @@ -2526,7 +2558,7 @@ static int _add_bg_record(List records, char *nodes, quarter, segment); - list_append(records, found_record); + list_push(records, found_record); node_cnt += bluegene_bp_node_cnt/small_size; if(node_cnt == 128) { node_cnt = 0; diff --git a/src/plugins/select/bluegene/plugin/bluegene.h b/src/plugins/select/bluegene/plugin/bluegene.h index 73fd4fbd592..df01c7787ae 100644 --- a/src/plugins/select/bluegene/plugin/bluegene.h +++ b/src/plugins/select/bluegene/plugin/bluegene.h @@ -130,6 +130,7 @@ extern List bg_curr_block_list; /* Initial bg block state */ extern List bg_list; /* List of configured BG blocks */ extern List bg_job_block_list; /* jobs running in these blocks */ extern List bg_booted_block_list; /* blocks that are booted */ +extern List bg_freeing_list; /* blocks that being freed */ extern bool agent_fini; extern pthread_mutex_t block_state_mutex; @@ -158,11 +159,12 @@ extern void fini_bg(void); /* Log a bg_record's contents */ extern void print_bg_record(bg_record_t *record); extern void destroy_bg_record(void *object); +extern int block_exist_in_list(List my_list, bg_record_t *bg_record); extern void process_nodes(bg_record_t *bg_record); extern void copy_bg_record(bg_record_t *fir_record, bg_record_t *sec_record); -/* return bg_record from bg_list */ -extern bg_record_t *find_bg_record(char *bg_block_id); +/* return bg_record from a bg_list */ +extern bg_record_t *find_bg_record_in_list(List my_list, char *bg_block_id); /* change username of a block bg_record_t target_name needs to be updated before call of function. -- GitLab