From 9ca53d1150a85a9cd7e82f160270a31d2839dbd7 Mon Sep 17 00:00:00 2001 From: Danny Auble <da@llnl.gov> Date: Fri, 14 Oct 2005 23:05:38 +0000 Subject: [PATCH] update for bgl small partition manager --- src/partition_allocator/partition_allocator.c | 2 +- src/plugins/select/bluegene/bgl_job_place.c | 34 ++++++++++-- src/plugins/select/bluegene/bgl_job_run.c | 53 ++++++++++++++----- src/plugins/select/bluegene/bluegene.c | 8 +-- src/plugins/select/bluegene/bluegene.h | 47 ++++++++-------- src/slurmctld/job_mgr.c | 2 +- src/slurmctld/node_scheduler.c | 11 ++++ 7 files changed, 113 insertions(+), 44 deletions(-) diff --git a/src/partition_allocator/partition_allocator.c b/src/partition_allocator/partition_allocator.c index 766ca7818c3..8ea8db90b0a 100644 --- a/src/partition_allocator/partition_allocator.c +++ b/src/partition_allocator/partition_allocator.c @@ -635,7 +635,7 @@ extern void pa_init(node_info_msg_t *node_info_ptr) #endif pa_system_ptr->num_of_proc = node_info_ptr->record_count; } - + #ifdef HAVE_BGL_FILES if (have_db2 && (DIM_SIZE[X]==0) && (DIM_SIZE[X]==0) && (DIM_SIZE[X]==0)) { diff --git a/src/plugins/select/bluegene/bgl_job_place.c b/src/plugins/select/bluegene/bgl_job_place.c index 9558baf6632..dc32e36c7df 100644 --- a/src/plugins/select/bluegene/bgl_job_place.c +++ b/src/plugins/select/bluegene/bgl_job_place.c @@ -97,7 +97,8 @@ static int _find_best_partition_match(struct job_record* job_ptr, uint32_t req_procs = job_ptr->num_procs; int rot_cnt = 0; uint32_t proc_cnt; - + int job_running = 0; + if(!bgl_list) { error("_find_best_partition_match: There is no bgl_list"); return SLURM_ERROR; @@ -120,11 +121,29 @@ static int _find_best_partition_match(struct job_record* job_ptr, itr = list_iterator_create(bgl_list); *found_bgl_record = NULL; - + + if(full_system_partition->job_running) { + debug("_find_best_partition_match none found"); + return SLURM_ERROR; + } + debug("number of partitions to check: %d", list_count(bgl_list)); + itr = list_iterator_create(bgl_list); while ((record = (bgl_record_t*) list_next(itr))) { /* Check processor count */ - printf("%d\n",req_procs); + if(record->job_running) { + job_running = 1; + debug("partition %s in use by %s", + record->bgl_part_id, + record->user_name); + continue; + } + if(record->full_partition && job_running) { + debug("Can't run on full system partition " + "another partition has a job running."); + continue; + } + if (req_procs > record->cnodes_per_bp) { /* We use the c-node count here. Job could start * twice this count if VIRTUAL_NODE_MODE, but this @@ -210,12 +229,17 @@ static int _find_best_partition_match(struct job_record* job_ptr, if (!match) continue; /* Not usable */ } - + + /* mark as in use */ + slurm_mutex_lock(&part_state_mutex); + record->job_running = 1; + slurm_mutex_unlock(&part_state_mutex); + *found_bgl_record = record; break; } list_iterator_destroy(itr); - + /* set the bitmap and do other allocation activities */ if (*found_bgl_record) { debug("_find_best_partition_match %s <%s>", diff --git a/src/plugins/select/bluegene/bgl_job_run.c b/src/plugins/select/bluegene/bgl_job_run.c index 45b4137e8d3..f86cfb915cc 100644 --- a/src/plugins/select/bluegene/bgl_job_run.c +++ b/src/plugins/select/bluegene/bgl_job_run.c @@ -428,7 +428,7 @@ static void _term_agent(bgl_update_t *bgl_update_ptr) /* remove the partition's users */ bgl_record = find_bgl_record(bgl_update_ptr->bgl_part_id); if(bgl_record) { - debug2("got the record %s user is %s", + debug("got the record %s user is %s", bgl_record->bgl_part_id, bgl_record->user_name); @@ -674,25 +674,34 @@ extern int start_job(struct job_record *job_ptr) _part_op(bgl_update_ptr); #else - if (bgl_list) { - ListIterator itr; - bgl_record_t *bgl_record; - char *part_id = NULL; - uint16_t node_use; + ListIterator itr; + bgl_record_t *bgl_record = NULL; + bgl_record_t *found_record = NULL; + char *part_id = NULL; + uint16_t node_use; + if (bgl_list) { + select_g_get_jobinfo(job_ptr->select_jobinfo, SELECT_DATA_PART_ID, &part_id); select_g_get_jobinfo(job_ptr->select_jobinfo, SELECT_DATA_NODE_USE, &node_use); + if(!part_id) { + error("NO part_id"); + return rc; + } + bgl_record = find_bgl_record(part_id); itr = list_iterator_create(bgl_list); - while ((bgl_record = (bgl_record_t *) list_next(itr))) { - if (!part_id) - break; - if ((!bgl_record->bgl_part_id) - || (strcmp(part_id, bgl_record->bgl_part_id))) + while ((found_record = (bgl_record_t *) list_next(itr))) { + if (bgl_record->full_partition) + found_record->state = RM_PARTITION_FREE; + else if(found_record->full_partition) + found_record->state = RM_PARTITION_FREE; + if ((!found_record->bgl_part_id) + || (strcmp(part_id, found_record->bgl_part_id))) continue; - bgl_record->node_use = node_use; - bgl_record->state = RM_PARTITION_READY; + found_record->node_use = node_use; + found_record->state = RM_PARTITION_READY; last_bgl_update = time(NULL); break; } @@ -729,6 +738,24 @@ int term_job(struct job_record *job_ptr) info("Queue termination of job %u in BGL partition %s", job_ptr->job_id, bgl_update_ptr->bgl_part_id); _part_op(bgl_update_ptr); +#else + bgl_record_t *bgl_record; + char *part_id = NULL; + + if (bgl_list) { + + select_g_get_jobinfo(job_ptr->select_jobinfo, + SELECT_DATA_PART_ID, &part_id); + if(!part_id) { + error("NO part_id"); + return rc; + } + bgl_record = find_bgl_record(part_id); + bgl_record->state = RM_PARTITION_FREE; + bgl_record->job_running = 0; + last_bgl_update = time(NULL); + xfree(part_id); + } #endif return rc; } diff --git a/src/plugins/select/bluegene/bluegene.c b/src/plugins/select/bluegene/bluegene.c index b7262719d67..b24c833ed3a 100644 --- a/src/plugins/select/bluegene/bluegene.c +++ b/src/plugins/select/bluegene/bluegene.c @@ -54,6 +54,7 @@ pthread_mutex_t part_state_mutex = PTHREAD_MUTEX_INITIALIZER; int num_part_to_free = 0; int num_part_freed = 0; int partitions_are_created = 0; +bgl_record_t *full_system_partition = NULL; #ifdef HAVE_BGL_FILES static pthread_mutex_t freed_cnt_mutex = PTHREAD_MUTEX_INITIALIZER; @@ -620,7 +621,7 @@ extern int create_static_partitions(List part_list) bgl_record = (bgl_record_t*) xmalloc(sizeof(bgl_record_t)); bgl_record->nodes = xmalloc(sizeof(char)*13); - + full_system_partition = bgl_record; #ifdef HAVE_BGL_FILES bgl_record->geo[X] = DIM_SIZE[X] - 1; bgl_record->geo[Y] = DIM_SIZE[Y] - 1; @@ -638,7 +639,7 @@ extern int create_static_partitions(List part_list) bgl_record->geo[X], bgl_record->geo[Y], bgl_record->geo[Z]); bgl_record->quarter = -1; - + bgl_record->full_partition = 1; if(bgl_found_part_list) { itr = list_iterator_create(bgl_found_part_list); while ((found_record = (bgl_record_t *) list_next(itr)) @@ -742,7 +743,7 @@ no_total: debug("full partiton = %s.", bgl_record->bgl_part_id); bgl_record->full_partition = 1; - + full_system_partition = bgl_record; break; } } @@ -1246,6 +1247,7 @@ static int _validate_config_nodes(void) xmalloc(sizeof(bgl_record_t)); list_append(bgl_list, record); + full_system_partition = record; record->full_partition = 1; record->bgl_part_id = xstrdup( init_record->bgl_part_id); diff --git a/src/plugins/select/bluegene/bluegene.h b/src/plugins/select/bluegene/bluegene.h index cf4d4c64bf8..54151c38656 100644 --- a/src/plugins/select/bluegene/bluegene.h +++ b/src/plugins/select/bluegene/bluegene.h @@ -44,26 +44,8 @@ #include "src/partition_allocator/partition_allocator.h" #include "src/plugins/select/bluegene/wrap_rm_api.h" -/* Global variables */ -extern rm_BGL_t *bgl; -extern char *bluegene_blrts; -extern char *bluegene_linux; -extern char *bluegene_mloader; -extern char *bluegene_ramdisk; -extern char *bridge_api_file; -extern int numpsets; -extern pa_system_t *pa_system_ptr; -extern time_t last_bgl_update; -extern List bgl_curr_part_list; /* Initial bgl partition state */ -extern List bgl_list; /* List of configured BGL blocks */ -extern bool agent_fini; -extern pthread_mutex_t part_state_mutex; -extern int num_part_to_free; -extern int num_part_freed; -extern int partitions_are_created; -extern int procs_per_node; - typedef int lifecycle_type_t; + enum part_lifecycle {DYNAMIC, STATIC}; typedef struct bgl_record { @@ -96,8 +78,9 @@ typedef struct bgl_record { partition */ int job_running; /* signal if there is a job running on the partition */ - int cnodes_per_bp; - int quarter; + int cnodes_per_bp; /* count of cnodes per Base part */ + int quarter; /* used for small partitions + determine quarter of BP */ } bgl_record_t; typedef struct { @@ -116,6 +99,28 @@ typedef struct { List switch_list; } bgl_bp_t; + +/* Global variables */ +extern rm_BGL_t *bgl; +extern char *bluegene_blrts; +extern char *bluegene_linux; +extern char *bluegene_mloader; +extern char *bluegene_ramdisk; +extern char *bridge_api_file; +extern int numpsets; +extern pa_system_t *pa_system_ptr; +extern time_t last_bgl_update; +extern List bgl_curr_part_list; /* Initial bgl partition state */ +extern List bgl_list; /* List of configured BGL blocks */ +extern bool agent_fini; +extern pthread_mutex_t part_state_mutex; +extern int num_part_to_free; +extern int num_part_freed; +extern int partitions_are_created; +extern int procs_per_node; +extern bgl_record_t *full_system_partition; + + #define MAX_PTHREAD_RETRIES 1 #include "bgl_part_info.h" diff --git a/src/slurmctld/job_mgr.c b/src/slurmctld/job_mgr.c index 2d92a96df29..7e72e6ba761 100644 --- a/src/slurmctld/job_mgr.c +++ b/src/slurmctld/job_mgr.c @@ -1600,7 +1600,7 @@ extern int job_complete(uint32_t job_id, uid_t uid, bool requeue, requeue = 0; if (job_return_code == 0) job_return_code = 1; - info("Batch job launch failure, JobId=%u", job_ptr->job_id); + info("Batch job launch failure, JobId=%u", job_ptr->job_id); } if (requeue && job_ptr->details && job_ptr->batch_flag) { diff --git a/src/slurmctld/node_scheduler.c b/src/slurmctld/node_scheduler.c index 56bc7214ab8..d1b387fafd2 100644 --- a/src/slurmctld/node_scheduler.c +++ b/src/slurmctld/node_scheduler.c @@ -286,8 +286,19 @@ _pick_best_load(struct job_record *job_ptr, bitstr_t * bitmap, * to this job */ if (job_ptr->details && job_ptr->details->req_node_bitmap) bit_or(bitmap, job_ptr->details->req_node_bitmap); + +#ifdef HAVE_BGL + /* here to reset the bitmap for small parititons the + * BGL plugin will do the sched. + */ + bit_or(bitmap, light_load_bit); +#endif error_code = select_g_job_test(job_ptr, bitmap, min_nodes, max_nodes); +#ifdef HAVE_BGL + FREE_NULL_BITMAP(light_load_bit); + return error_code; +#endif /* now try to use idle and lightly loaded nodes */ if (error_code) { -- GitLab