From f0527ca74ba0bdb72b30c66fbd8fa78b77632bc3 Mon Sep 17 00:00:00 2001 From: Danny Auble <da@llnl.gov> Date: Tue, 23 May 2006 23:07:29 +0000 Subject: [PATCH] mods to sfree to make it more robust --- src/plugins/select/bluegene/plugin/bluegene.c | 9 ++-- src/plugins/select/bluegene/plugin/opts.c | 20 ++++--- src/plugins/select/bluegene/plugin/sfree.c | 53 +++++++++++-------- src/plugins/select/bluegene/plugin/sfree.h | 1 + 4 files changed, 51 insertions(+), 32 deletions(-) diff --git a/src/plugins/select/bluegene/plugin/bluegene.c b/src/plugins/select/bluegene/plugin/bluegene.c index c38351512e5..35d9131b5ce 100644 --- a/src/plugins/select/bluegene/plugin/bluegene.c +++ b/src/plugins/select/bluegene/plugin/bluegene.c @@ -842,11 +842,12 @@ extern int create_defined_blocks(bg_layout_t overlapped) #ifdef HAVE_BG_FILES bg_record_t *found_record = NULL; - char *name = NULL; int geo[BA_SYSTEM_DIMENSIONS]; int i; ListIterator itr_found; init_wires(); +#else + char *name = NULL; #endif slurm_mutex_lock(&block_state_mutex); reset_ba_system(); @@ -1016,12 +1017,14 @@ extern int create_dynamic_block(ba_request_t *request, List my_block_list) bg_record_t *bg_record = NULL; List results = NULL; uint16_t num_quarter=0, num_nodecard=0; - char *name = NULL; bitstr_t *my_bitmap = NULL; int geo[BA_SYSTEM_DIMENSIONS]; int i; blockreq_t blockreq; - +#ifndef HAVE_BG_FILES + char *name = NULL; +#endif + slurm_mutex_lock(&block_state_mutex); reset_ba_system(); diff --git a/src/plugins/select/bluegene/plugin/opts.c b/src/plugins/select/bluegene/plugin/opts.c index 75c7f7ed50b..300e27578f6 100644 --- a/src/plugins/select/bluegene/plugin/opts.c +++ b/src/plugins/select/bluegene/plugin/opts.c @@ -44,6 +44,7 @@ void parse_command_line(int argc, char *argv[]) {"all", no_argument, 0, 'a'}, {"bgblock", required_argument, 0, 'b'}, {"partition", required_argument, 0, 'p'}, + {"wait", no_argument, 0, 'w'}, {"version", no_argument, 0, 'V'}, {"help", no_argument, 0, 'h'}, {"usage", no_argument, 0, 'u'}, @@ -51,7 +52,7 @@ void parse_command_line(int argc, char *argv[]) }; while ((opt_char = - getopt_long(argc, argv, "ab:hup:V", + getopt_long(argc, argv, "ab:hup:wV", long_options, &option_index)) != -1) { switch (opt_char) { case (int) '?': @@ -69,6 +70,9 @@ void parse_command_line(int argc, char *argv[]) case (int) 'p': bg_block_id = optarg; break; + case (int) 'w': + wait_full = true; + break; case (int) 'h': case (int) OPT_LONG_HELP: _help(); @@ -114,7 +118,7 @@ static void _print_version(void) static void _usage(void) { - printf("Usage: sfree [-huVa] [-b]\n"); + printf("Usage: sfree [-huwVa] [-b]\n"); } static void _help(void) @@ -125,9 +129,13 @@ static void _help(void) printf("\ Usage: sfree [OPTIONS]\n\ -b, --bgblock free specific bgblock named\n\ - -a, --all free all bgblocks\n\ - -V, --version output version information and exit\n\ + -a, --all free all bgblocks\n\ + -w, --wait wait to make sure all blocks have been freed\n\ + (Otherwise sfree will start the free and once\n\ + sure the block(s) have started to free will\n\ + exit)\n\ + -V, --version output version information and exit\n\ \nHelp options:\n\ - --help show this help message\n\ - --usage display brief usage message\n"); + --help show this help message\n\ + --usage display brief usage message\n"); } diff --git a/src/plugins/select/bluegene/plugin/sfree.c b/src/plugins/select/bluegene/plugin/sfree.c index 310e24be4d5..6052dece6c1 100644 --- a/src/plugins/select/bluegene/plugin/sfree.c +++ b/src/plugins/select/bluegene/plugin/sfree.c @@ -36,6 +36,7 @@ int all_blocks = 0; char *bg_block_id = NULL; +bool wait_full = false; #ifdef HAVE_BG_FILES @@ -222,7 +223,15 @@ int main(int argc, char *argv[]) free(bg_block_id); - delete_record->state = NO_VAL; + if ((rc = rm_get_data(block_ptr, + RM_PartitionState, + &delete_record->state)) + != STATUS_OK) { + error("rm_get_data" + "(RM_PartitionState): %s", + _bg_err_str(rc)); + } + list_push(delete_record_list, delete_record); slurm_attr_init(&attr_agent); @@ -272,7 +281,7 @@ static int _free_block(delete_record_t *delete_record) info("freeing bgblock %s", delete_record->bg_block_id); _term_jobs_on_block(delete_record->bg_block_id); while (1) { - if (delete_record->state != NO_VAL + if (delete_record->state != (rm_partition_state_t)NO_VAL && delete_record->state != RM_PARTITION_FREE && delete_record->state != RM_PARTITION_DEALLOCATING) { info("pm_destroy %s",delete_record->bg_block_id); @@ -289,10 +298,13 @@ static int _free_block(delete_record_t *delete_record) } } - if(i>5) - delete_record->state = RM_PARTITION_FREE; - i++; - + if(!wait_full) { + if(i>5) + delete_record->state = RM_PARTITION_FREE; + + i++; + } + if ((delete_record->state == RM_PARTITION_FREE) || (delete_record->state == RM_PARTITION_ERROR)) break; @@ -312,7 +324,8 @@ static int _update_bg_record_state() rm_partition_t *block_ptr = NULL; delete_record_t *delete_record = NULL; ListIterator itr; - + int found = 0; + if ((rc = rm_get_partitions_info(block_state, &block_list)) != STATUS_OK) { error("rm_get_partitions_info(): %s", _bg_err_str(rc)); @@ -322,7 +335,7 @@ static int _update_bg_record_state() if ((rc = rm_get_data(block_list, RM_PartListSize, &num_blocks)) != STATUS_OK) { error("rm_get_data(RM_PartListSize): %s", _bg_err_str(rc)); - state = NO_VAL; + state = -1; num_blocks = 0; } @@ -334,7 +347,7 @@ static int _update_bg_record_state() != STATUS_OK) { error("rm_get_data(RM_PartListNextPart): %s", _bg_err_str(rc)); - state = NO_VAL; + state = -1; break; } } else { @@ -344,7 +357,7 @@ static int _update_bg_record_state() != STATUS_OK) { error("rm_get_data(RM_PartListFirstPart: %s", _bg_err_str(rc)); - state = NO_VAL; + state = -1; break; } } @@ -354,7 +367,7 @@ static int _update_bg_record_state() != STATUS_OK) { error("rm_get_data(RM_PartitionID): %s", _bg_err_str(rc)); - state = NO_VAL; + state = -1; break; } @@ -362,7 +375,6 @@ static int _update_bg_record_state() error("No Partition ID was returned from database"); continue; } - itr = list_iterator_create(delete_record_list); while ((delete_record = (delete_record_t*) list_next(itr))) { @@ -371,17 +383,8 @@ static int _update_bg_record_state() if(strcmp(delete_record->bg_block_id, name)) { continue; } + state = 1; - if(state == NO_VAL) - goto clean_up; - else if(j>=num_blocks) { - error("This bgblock, %s, " - "doesn't exist in MMCS", - bg_block_id); - state = NO_VAL; - goto clean_up; - } - if ((rc = rm_get_data(block_ptr, RM_PartitionState, &delete_record->state)) @@ -395,7 +398,11 @@ static int _update_bg_record_state() list_iterator_destroy(itr); free(name); } -clean_up: + if(state != 1) { + error("The requested block %s was not found in system.", + bg_block_id); + num_block_to_free = num_block_freed; + } if ((rc = rm_free_partition_list(block_list)) != STATUS_OK) { error("rm_free_partition_list(): %s", _bg_err_str(rc)); } diff --git a/src/plugins/select/bluegene/plugin/sfree.h b/src/plugins/select/bluegene/plugin/sfree.h index dd49a418547..46ce804a37e 100644 --- a/src/plugins/select/bluegene/plugin/sfree.h +++ b/src/plugins/select/bluegene/plugin/sfree.h @@ -45,6 +45,7 @@ typedef void (*sighandler_t) (int); extern char *bg_block_id; extern int all_blocks; +extern bool wait_full; extern void parse_command_line(int argc, char *argv[]); -- GitLab