From aa19c74cc0ef114daa9ce0f3b2a3fb9fe35b4bbb Mon Sep 17 00:00:00 2001 From: Danny Auble <da@llnl.gov> Date: Mon, 17 Oct 2005 21:38:49 +0000 Subject: [PATCH] fixes for small partitions works on ubgl correctly --- src/plugins/select/bluegene/bgl_job_place.c | 39 +++++--- src/plugins/select/bluegene/bgl_job_run.c | 8 +- src/plugins/select/bluegene/bgl_part_info.c | 8 +- src/plugins/select/bluegene/bluegene.c | 6 +- src/plugins/select/bluegene/partition_sys.c | 1 - src/slurmctld/node_scheduler.c | 103 +++++++++++++------- src/slurmctld/slurmctld.h | 3 + 7 files changed, 107 insertions(+), 61 deletions(-) diff --git a/src/plugins/select/bluegene/bgl_job_place.c b/src/plugins/select/bluegene/bgl_job_place.c index dc32e36c7df..4c98e6028cd 100644 --- a/src/plugins/select/bluegene/bgl_job_place.c +++ b/src/plugins/select/bluegene/bgl_job_place.c @@ -103,6 +103,18 @@ static int _find_best_partition_match(struct job_record* job_ptr, error("_find_best_partition_match: There is no bgl_list"); return SLURM_ERROR; } + /* have to check job_ptr->checked to see which time the node + scheduler is looking to see if it is runnable. If checked >=2 + we want to fall through to tell the scheduler that it is runnable + just not right now. + */ + if(full_system_partition->job_running && job_ptr->checked<2) { + job_ptr->checked++; + debug("_find_best_partition_match none found " + "full system running on partition %s.", + full_system_partition->bgl_part_id); + return SLURM_ERROR; + } select_g_get_jobinfo(job_ptr->select_jobinfo, SELECT_DATA_CONN_TYPE, &conn_type); @@ -119,20 +131,19 @@ static int _find_best_partition_match(struct job_record* job_ptr, /* this is where we should have the control flow depending on * the spec arguement */ - itr = list_iterator_create(bgl_list); *found_bgl_record = NULL; - if(full_system_partition->job_running) { - debug("_find_best_partition_match none found"); - return SLURM_ERROR; - } - debug("number of partitions to check: %d", list_count(bgl_list)); itr = list_iterator_create(bgl_list); while ((record = (bgl_record_t*) list_next(itr))) { /* Check processor count */ - if(record->job_running) { - job_running = 1; + /* have to check job_ptr->checked to see which time the node + scheduler is looking to see if it is runnable. + If checked >=2 we want to fall through to tell the + scheduler that it is runnable just not right now. + */ + if(record->job_running && job_ptr->checked<2) { + job_running++; debug("partition %s in use by %s", record->bgl_part_id, record->user_name); @@ -163,8 +174,8 @@ static int _find_best_partition_match(struct job_record* job_ptr, * check that the number of nodes is suitable */ if ((record->bp_count < min_nodes) - || (max_nodes != 0 && record->bp_count > max_nodes) - || (record->bp_count < target_size)) { + || (max_nodes != 0 && record->bp_count > max_nodes) + || (record->bp_count < target_size)) { debug("partition %s node count not suitable", record->bgl_part_id); continue; @@ -230,16 +241,12 @@ static int _find_best_partition_match(struct job_record* job_ptr, continue; /* Not usable */ } - /* mark as in use */ - slurm_mutex_lock(&part_state_mutex); - record->job_running = 1; - slurm_mutex_unlock(&part_state_mutex); - *found_bgl_record = record; break; } list_iterator_destroy(itr); - + job_ptr->checked++; + /* set the bitmap and do other allocation activities */ if (*found_bgl_record) { debug("_find_best_partition_match %s <%s>", diff --git a/src/plugins/select/bluegene/bgl_job_run.c b/src/plugins/select/bluegene/bgl_job_run.c index f86cfb915cc..5ef788d237e 100644 --- a/src/plugins/select/bluegene/bgl_job_run.c +++ b/src/plugins/select/bluegene/bgl_job_run.c @@ -186,7 +186,7 @@ static void _sync_agent(bgl_update_t *bgl_update_ptr) error("No partition %s", bgl_update_ptr->bgl_part_id); return; } - + bgl_record->job_running = 1; if(bgl_record->state==RM_PARTITION_READY) { if(bgl_record->user_uid != bgl_update_ptr->uid) { slurm_mutex_lock(&part_state_mutex); @@ -228,7 +228,6 @@ static void _start_agent(bgl_update_t *bgl_update_ptr) int retries; bgl_record = find_bgl_record(bgl_update_ptr->bgl_part_id); - if(!bgl_record) { error("partition %s not found in bgl_list", bgl_update_ptr->bgl_part_id); @@ -238,7 +237,7 @@ static void _start_agent(bgl_update_t *bgl_update_ptr) slurm_mutex_lock(&part_state_mutex); bgl_record->job_running = 1; slurm_mutex_unlock(&part_state_mutex); - + if(bgl_record->state == RM_PARTITION_DEALLOCATING) { debug("Partition is in Deallocating state, waiting for free."); bgl_free_partition(bgl_record); @@ -751,6 +750,9 @@ int term_job(struct job_record *job_ptr) return rc; } bgl_record = find_bgl_record(part_id); + info("Finished job %u in BGL partition %s", + job_ptr->job_id, + bgl_recordr->bgl_part_id); bgl_record->state = RM_PARTITION_FREE; bgl_record->job_running = 0; last_bgl_update = time(NULL); diff --git a/src/plugins/select/bluegene/bgl_part_info.c b/src/plugins/select/bluegene/bgl_part_info.c index f1bde2d7cf7..3cff059732c 100644 --- a/src/plugins/select/bluegene/bgl_part_info.c +++ b/src/plugins/select/bluegene/bgl_part_info.c @@ -305,7 +305,9 @@ extern int update_partition_list() break; } else if(bgl_record->node_use != node_use) { debug("node_use of Partition %s was %d and now is %d", - name, bgl_record->node_use, node_use); + bgl_record->bgl_part_id, + bgl_record->node_use, + node_use); bgl_record->node_use = node_use; updated = 1; } @@ -319,7 +321,9 @@ extern int update_partition_list() break; } else if(bgl_record->state != state) { debug("state of Partition %s was %d and now is %d", - name, bgl_record->state, state); + bgl_record->bgl_part_id, + bgl_record->state, + state); /* check to make sure partition went through freeing correctly diff --git a/src/plugins/select/bluegene/bluegene.c b/src/plugins/select/bluegene/bluegene.c index b24c833ed3a..6f18248d995 100644 --- a/src/plugins/select/bluegene/bluegene.c +++ b/src/plugins/select/bluegene/bluegene.c @@ -621,7 +621,6 @@ extern int create_static_partitions(List part_list) bgl_record = (bgl_record_t*) xmalloc(sizeof(bgl_record_t)); bgl_record->nodes = xmalloc(sizeof(char)*13); - full_system_partition = bgl_record; #ifdef HAVE_BGL_FILES bgl_record->geo[X] = DIM_SIZE[X] - 1; bgl_record->geo[Y] = DIM_SIZE[Y] - 1; @@ -1246,8 +1245,9 @@ static int _validate_config_nodes(void) record = (bgl_record_t*) xmalloc(sizeof(bgl_record_t)); list_append(bgl_list, record); - - full_system_partition = record; + debug("full system %s", + init_record->bgl_part_id); + full_system_partition = init_record; record->full_partition = 1; record->bgl_part_id = xstrdup( init_record->bgl_part_id); diff --git a/src/plugins/select/bluegene/partition_sys.c b/src/plugins/select/bluegene/partition_sys.c index 11b8dae75aa..4c7ac659e53 100755 --- a/src/plugins/select/bluegene/partition_sys.c +++ b/src/plugins/select/bluegene/partition_sys.c @@ -528,7 +528,6 @@ int read_bgl_partitions() else bgl_record->cnodes_per_bp = procs_per_node; - printf("got %d\n",bgl_record->cnodes_per_bp); bgl_record->part_lifecycle = STATIC; clean_up: if (bgl_recover diff --git a/src/slurmctld/node_scheduler.c b/src/slurmctld/node_scheduler.c index 09dfd5c2771..b7388813ec2 100644 --- a/src/slurmctld/node_scheduler.c +++ b/src/slurmctld/node_scheduler.c @@ -408,7 +408,8 @@ _pick_best_nodes(struct node_set *node_set_ptr, int node_set_size, } /* Is Consumable Resources enabled? */ - error_code = select_g_get_info_from_plugin (SELECT_CR_PLUGIN, &cr_enabled); + error_code = select_g_get_info_from_plugin (SELECT_CR_PLUGIN, + &cr_enabled); if (error_code != SLURM_SUCCESS) return error_code; @@ -417,7 +418,9 @@ _pick_best_nodes(struct node_set *node_set_ptr, int node_set_size, job_ptr->cr_enabled = cr_enabled; /* CR enabled for this job */ debug3(" Is this Job %u in exclusive mode? %d cr_enabled %d", - job_ptr->job_id, job_ptr->details->exclusive, cr_enabled); + job_ptr->job_id, + job_ptr->details->exclusive, + cr_enabled); if (job_ptr->details->exclusive) { partially_idle_node_bitmap = bit_copy(idle_node_bitmap); @@ -438,10 +441,12 @@ _pick_best_nodes(struct node_set *node_set_ptr, int node_set_size, /* we have already confirmed that all of these nodes have a * usable configuration and are in the proper partition */ if (min_nodes != 0) - total_nodes = bit_set_count(job_ptr->details->req_node_bitmap); + total_nodes = bit_set_count( + job_ptr->details->req_node_bitmap); if (job_ptr->num_procs != 0) { if (cr_enabled) { - error_code = select_g_get_extra_jobinfo (NULL, job_ptr, + error_code = select_g_get_extra_jobinfo (NULL, + job_ptr, SELECT_CR_CPU_COUNT, &total_cpus); if (error_code != SLURM_SUCCESS) { @@ -449,7 +454,8 @@ _pick_best_nodes(struct node_set *node_set_ptr, int node_set_size, return error_code; } } else - total_cpus = count_cpus(job_ptr->details->req_node_bitmap); + total_cpus = count_cpus( + job_ptr->details->req_node_bitmap); } if ((max_nodes != 0) && (total_nodes > max_nodes)) { @@ -465,32 +471,39 @@ _pick_best_nodes(struct node_set *node_set_ptr, int node_set_size, return ESLURM_REQUESTED_PART_CONFIG_UNAVAILABLE; } if ((min_nodes <= total_nodes) && - (max_nodes <= min_nodes ) && + (max_nodes <= min_nodes) && (job_ptr->num_procs <= total_cpus )) { if (!bit_super_set(job_ptr->details->req_node_bitmap, avail_node_bitmap)) { if (cr_enabled) - FREE_NULL_BITMAP(partially_idle_node_bitmap); + FREE_NULL_BITMAP( + partially_idle_node_bitmap); return ESLURM_REQUESTED_PART_CONFIG_UNAVAILABLE; } if (shared) { - if (!bit_super_set(job_ptr->details->req_node_bitmap, + if (!bit_super_set(job_ptr->details-> + req_node_bitmap, share_node_bitmap)) { if (cr_enabled) - FREE_NULL_BITMAP(partially_idle_node_bitmap); + FREE_NULL_BITMAP( + partially_idle_node_bitmap); return ESLURM_NODES_BUSY; } } else if (cr_enabled) { - if (!bit_super_set(job_ptr->details->req_node_bitmap, + if (!bit_super_set(job_ptr->details-> + req_node_bitmap, partially_idle_node_bitmap)) { - FREE_NULL_BITMAP(partially_idle_node_bitmap); + FREE_NULL_BITMAP( + partially_idle_node_bitmap); return ESLURM_NODES_BUSY; } } else { - if (!bit_super_set(job_ptr->details->req_node_bitmap, + if (!bit_super_set(job_ptr->details-> + req_node_bitmap, idle_node_bitmap)) { if (cr_enabled) - FREE_NULL_BITMAP(partially_idle_node_bitmap); + FREE_NULL_BITMAP( + partially_idle_node_bitmap); return ESLURM_NODES_BUSY; } } @@ -516,13 +529,15 @@ _pick_best_nodes(struct node_set *node_set_ptr, int node_set_size, continue; if (!runable_ever) { int cr_disabled = 0; - error_code = _add_node_set_info(&node_set_ptr[i], - &total_bitmap, - &total_nodes, &total_cpus, - cr_disabled); + error_code = _add_node_set_info( + &node_set_ptr[i], + &total_bitmap, + &total_nodes, &total_cpus, + cr_disabled); if (error_code != SLURM_SUCCESS) { if (cr_enabled) - FREE_NULL_BITMAP(partially_idle_node_bitmap); + FREE_NULL_BITMAP( + partially_idle_node_bitmap); return error_code; } } @@ -538,12 +553,15 @@ _pick_best_nodes(struct node_set *node_set_ptr, int node_set_size, idle_node_bitmap); node_set_ptr[i].nodes = bit_set_count(node_set_ptr[i].my_bitmap); - error_code = _add_node_set_info(&node_set_ptr[i], &avail_bitmap, - &avail_nodes, &avail_cpus, + error_code = _add_node_set_info(&node_set_ptr[i], + &avail_bitmap, + &avail_nodes, + &avail_cpus, cr_enabled); if (error_code != SLURM_SUCCESS) { if (cr_enabled) - FREE_NULL_BITMAP(partially_idle_node_bitmap); + FREE_NULL_BITMAP( + partially_idle_node_bitmap); return error_code; } if ((job_ptr->details->req_node_bitmap) && @@ -560,12 +578,16 @@ _pick_best_nodes(struct node_set *node_set_ptr, int node_set_size, #ifndef HAVE_BGL if (shared) { - pick_code = _pick_best_load(job_ptr, avail_bitmap, - min_nodes, max_nodes); + pick_code = _pick_best_load(job_ptr, + avail_bitmap, + min_nodes, + max_nodes); } else #else - pick_code = select_g_job_test(job_ptr, avail_bitmap, - min_nodes, max_nodes); + pick_code = select_g_job_test(job_ptr, + avail_bitmap, + min_nodes, + max_nodes); #endif if (pick_code == SLURM_SUCCESS) { @@ -577,7 +599,8 @@ _pick_best_nodes(struct node_set *node_set_ptr, int node_set_size, } FREE_NULL_BITMAP(total_bitmap); if (cr_enabled) - FREE_NULL_BITMAP(partially_idle_node_bitmap); + FREE_NULL_BITMAP( + partially_idle_node_bitmap); *select_bitmap = avail_bitmap; return SLURM_SUCCESS; } @@ -618,8 +641,10 @@ _pick_best_nodes(struct node_set *node_set_ptr, int node_set_size, bit_and(avail_bitmap, avail_node_bitmap); if (cr_enabled) job_ptr->cr_enabled = 0; - pick_code = select_g_job_test(job_ptr, avail_bitmap, - min_nodes, max_nodes); + pick_code = select_g_job_test(job_ptr, + avail_bitmap, + min_nodes, + max_nodes); if (cr_enabled) job_ptr->cr_enabled = 1; if (pick_code == SLURM_SUCCESS) { @@ -633,8 +658,10 @@ _pick_best_nodes(struct node_set *node_set_ptr, int node_set_size, if (!runable_ever) { if (cr_enabled) job_ptr->cr_enabled = 0; - pick_code = select_g_job_test(job_ptr, total_bitmap, - min_nodes, max_nodes); + pick_code = select_g_job_test(job_ptr, + total_bitmap, + min_nodes, + max_nodes); if (cr_enabled) job_ptr->cr_enabled = 1; if (pick_code == SLURM_SUCCESS) @@ -658,7 +685,7 @@ _pick_best_nodes(struct node_set *node_set_ptr, int node_set_size, error_code = ESLURM_REQUESTED_NODE_CONFIG_UNAVAILABLE; info("_pick_best_nodes: job never runnable"); } - + job_ptr->checked = 0; if (error_code == SLURM_SUCCESS) error_code = ESLURM_NODES_BUSY; return error_code; @@ -699,9 +726,10 @@ _add_node_set_info(struct node_set *node_set_ptr, if (bit_test (node_set_ptr->my_bitmap, i) == 0) continue; allocated_cpus = 0; - error_code = select_g_get_select_nodeinfo(&node_record_table_ptr[i], - SELECT_CR_USED_CPUS, - &allocated_cpus); + error_code = select_g_get_select_nodeinfo( + &node_record_table_ptr[i], + SELECT_CR_USED_CPUS, + &allocated_cpus); if (error_code != SLURM_SUCCESS) { error(" cons_res: Invalid Node reference", node_record_table_ptr[i]); @@ -709,9 +737,11 @@ _add_node_set_info(struct node_set *node_set_ptr, } *node_cnt += 1; - *cpu_cnt += node_record_table_ptr[i].cpus - allocated_cpus; + *cpu_cnt += + node_record_table_ptr[i].cpus - allocated_cpus; } - debug3(" cons_res: _add_set_info node_cnt %d cpu_cnt %d ", *node_cnt, *cpu_cnt); + debug3(" cons_res: _add_set_info node_cnt %d cpu_cnt %d ", + *node_cnt, *cpu_cnt); } return error_code; } @@ -845,6 +875,7 @@ extern int select_nodes(struct job_record *job_ptr, bool test_only) error_code = SLURM_SUCCESS; goto cleanup; } + info("starting job %u", job_ptr->job_id); if (select_g_job_begin(job_ptr) != SLURM_SUCCESS) { /* Leave job queued, something is hosed */ error("select_g_job_begin(%u): %m", job_ptr->job_id); diff --git a/src/slurmctld/slurmctld.h b/src/slurmctld/slurmctld.h index 7e17d20e5b5..e31678f81bc 100644 --- a/src/slurmctld/slurmctld.h +++ b/src/slurmctld/slurmctld.h @@ -334,6 +334,9 @@ struct job_record { each of the ntask_cnt hosts */ uint16_t mail_type; /* see MAIL_JOB_* in slurm.h */ char *mail_user; /* user to get e-mail notification */ + uint32_t checked; /* for bgl to tell plugin it already + checked and all partitions were full + looking for best choice now */ }; struct step_record { -- GitLab