From f45c8f094cd1c9f23c6998e76f084b7bf187a2d9 Mon Sep 17 00:00:00 2001 From: Moe Jette <jette1@llnl.gov> Date: Fri, 17 Dec 2004 02:45:02 +0000 Subject: [PATCH] Restore logic to support proper job placement using common data structure. --- src/plugins/select/bluegene/bgl_job_place.c | 236 ++++++++++++------ src/plugins/select/bluegene/bluegene.c | 36 +-- src/plugins/select/bluegene/bluegene.h | 10 +- src/plugins/select/bluegene/partition_sys.c | 2 - src/plugins/select/bluegene/select_bluegene.c | 4 +- 5 files changed, 185 insertions(+), 103 deletions(-) diff --git a/src/plugins/select/bluegene/bgl_job_place.c b/src/plugins/select/bluegene/bgl_job_place.c index 367c326e5c2..c67f96d165e 100644 --- a/src/plugins/select/bluegene/bgl_job_place.c +++ b/src/plugins/select/bluegene/bgl_job_place.c @@ -24,48 +24,69 @@ * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. \*****************************************************************************/ +#if HAVE_CONFIG_H +# include "config.h" +#endif + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <slurm/slurm.h> + +#include "src/common/bitstring.h" +#include "src/common/list.h" +#include "src/common/log.h" +#include "src/common/macros.h" + +#include "src/slurmctld/proc_req.h" #include "src/common/node_select.h" #include "bluegene.h" #define BUFSIZE 4096 #define BITSIZE 128 -#define DEFAULT_BLUEGENE_SERIAL "BGL" #define _DEBUG 0 +#define SWAP(a,b,t) \ +_STMT_START { \ + (t) = (a); \ + (a) = (b); \ + (b) = (t); \ +} _STMT_END -static int _rotate_geo(int *geometry); -static bgl_record_t *_find_best_partition_match(struct job_record* job_ptr, +static int _find_best_partition_match(struct job_record* job_ptr, bitstr_t* slurm_part_bitmap, int min_nodes, int max_nodes, - int spec); + int spec, bgl_record_t** found_bgl_record); +static void _rotate_geo(uint16_t *req_geometry, int rot_cnt); /* Rotate a 3-D geometry array through its six permutations */ -static int _rotate_geo(int *geometry) +static void _rotate_geo(uint16_t *req_geometry, int rot_cnt) { - static int rotate_count = 0; - int temp; - - if (rotate_count==(PA_SYSTEM_DIMENSIONS-1)) { - //printf("Special!\n"); - temp=geometry[X]; - geometry[X]=geometry[Z]; - geometry[Z]=temp; - rotate_count++; - return 1; - - } else if(rotate_count<(PA_SYSTEM_DIMENSIONS*2)) { - temp=geometry[X]; - geometry[X]=geometry[Y]; - geometry[Y]=geometry[Z]; - geometry[Z]=temp; - rotate_count++; - return 1; - } else { - rotate_count = 0; - return 0; + uint16_t tmp; + + switch (rot_cnt) { + case 0: /* ABC -> ACB */ + SWAP(req_geometry[1], req_geometry[2], tmp); + break; + case 1: /* ACB -> CAB */ + SWAP(req_geometry[0], req_geometry[1], tmp); + break; + case 2: /* CAB -> CBA */ + SWAP(req_geometry[1], req_geometry[2], tmp); + break; + case 3: /* CBA -> BCA */ + SWAP(req_geometry[0], req_geometry[1], tmp); + break; + case 4: /* BCA -> BAC */ + SWAP(req_geometry[1], req_geometry[2], tmp); + break; + case 5: /* BAC -> ABC */ + SWAP(req_geometry[0], req_geometry[1], tmp); + break; } } + /* * finds the best match for a given job request * @@ -77,15 +98,15 @@ static int _rotate_geo(int *geometry) * returns 1 for error (no match) * */ -static bgl_record_t *_find_best_partition_match(struct job_record* job_ptr, +static int _find_best_partition_match(struct job_record* job_ptr, bitstr_t* slurm_part_bitmap, int min_nodes, int max_nodes, - int spec) + int spec, bgl_record_t** found_bgl_record) { ListIterator itr; - bgl_record_t *record; + bgl_record_t* record; int i; - int req_geometry[SYSTEM_DIMENSIONS]; - int conn_type, node_use, rotate, target_size = 1; + uint16_t req_geometry[SYSTEM_DIMENSIONS]; + uint16_t conn_type, node_use, rotate, target_size = 1; sort_bgl_record_inc_size(bgl_list); @@ -105,51 +126,114 @@ static bgl_record_t *_find_best_partition_match(struct job_record* job_ptr, /* this is where we should have the control flow depending on * the spec arguement */ itr = list_iterator_create(bgl_list); - + *found_bgl_record = NULL; + + /* + * FIXME: NEED TO PUT THIS LOGIC IN: + * if RM_NAV, then the partition with both the TORUS and the + * dims should be favored over the MESH and the dims, but + * foremost is the correct num of dims. + */ debug("number of partitions to check: %d", list_count(bgl_list)); - while ((record = (bgl_record_t*) list_next(itr)) != NULL) { + while ((record = (bgl_record_t*) list_next(itr))) { /* * check that the number of nodes is suitable */ - if ((record->state == RM_PARTITION_FREE) - && ((conn_type == record->conn_type) - || (conn_type != SELECT_NAV)) - && ((node_use == record->node_use) - || (node_use != SELECT_NAV)) - && (record->bp_count > min_nodes) - && (record->bp_count < max_nodes) - && (record->bp_count < target_size) - ) { - - /*****************************************/ - /* match up geometry as "best" possible */ - /*****************************************/ - if (req_geometry[0]) { - /* match requested geometry */ - - while(1) { - if ((record->coord[X] == req_geometry[X]) - && (record->coord[Y] == req_geometry[Y]) - && (record->coord[Z] == req_geometry[Z])) - break; - else if(!_rotate_geo(req_geometry)) - break; + if ((record->bp_count < min_nodes) + || (max_nodes != 0 && record->bp_count > max_nodes) + || (record->bp_count < target_size)) { + debug("partition %s node count not suitable", + record->bgl_part); + continue; + } + + /* + * Next we check that this partition's bitmap is within + * the set of nodes which the job can use. + * Nodes not available for the job could be down, + * drained, allocated to some other job, or in some + * SLURM partition not available to this job. + */ + if (!bit_super_set(record->bitmap, slurm_part_bitmap)) { + debug("bgl partition %s has nodes not usable by this " + "job", record->nodes); + continue; + } + + /* + * Insure that any required nodes are in this BGL partition + */ + if (job_ptr->details->req_node_bitmap + && (!bit_super_set(job_ptr->details->req_node_bitmap, + record->bitmap))) { + info("bgl partition %s lacks required nodes", + record->nodes); + continue; + } + + /***********************************************/ + /* check the connection type specified matches */ + /***********************************************/ + if ((conn_type != record->conn_type) + && (conn_type != SELECT_NAV)) { + debug("bgl partition %s conn-type not usable", + record->nodes); + continue; + } + + /***********************************************/ + /* check the node_use specified matches */ + /***********************************************/ + if ((node_use != record->node_use) + && (node_use != SELECT_NAV)) { + debug("bgl partition %s node-use not usable", + record->nodes); + continue; + } + + /*****************************************/ + /* match up geometry as "best" possible */ + /*****************************************/ + if (req_geometry[0] == 0) + ; /* Geometry not specified */ + else { /* match requested geometry */ + bool match = false; + int rot_cnt; /* attempt six rotations */ + + for (rot_cnt=0; rot_cnt<6; rot_cnt++) { + if ((record->coord[X] >= req_geometry[X]) + && (record->coord[Y] >= req_geometry[Y]) + && (record->coord[Z] >= req_geometry[Z])) { + match = true; + break; } - - } else if (record->bp_count == target_size) - break; - + _rotate_geo(req_geometry, rot_cnt); + } + + if (!match) + continue; /* Not usable */ } - - /* set the bitmap and do other allocation activities */ - if (record) { - - return record; + + if ((*found_bgl_record == NULL) + || (record->bp_count < (*found_bgl_record)->bp_count)) { + *found_bgl_record = record; + if (record->bp_count == target_size) + break; } } + list_iterator_destroy(itr); + + /* set the bitmap and do other allocation activities */ + if (*found_bgl_record) { + debug("_find_best_partition_match %s <%s>", + (*found_bgl_record)->bgl_part, + (*found_bgl_record)->nodes); + bit_and(slurm_part_bitmap, (*found_bgl_record)->bitmap); + return SLURM_SUCCESS; + } debug("_find_best_partition_match none found"); - return NULL; + return SLURM_ERROR; } /* @@ -172,24 +256,20 @@ extern int submit_job(struct job_record *job_ptr, bitstr_t *slurm_part_bitmap, SELECT_PRINT_MIXED); debug("bluegene:submit_job: %s nodes=%d-%d", buf, min_nodes, max_nodes); - if((record = _find_best_partition_match(job_ptr, - slurm_part_bitmap, - min_nodes, - max_nodes, - spec)) == NULL) { + if (_find_best_partition_match(job_ptr, slurm_part_bitmap, min_nodes, + max_nodes, spec, &record)) { return SLURM_ERROR; } else { /* now we place the part_id into the env of the script to run */ - + char bgl_part_id[BITSIZE]; #ifdef HAVE_BGL_FILES - select_g_set_jobinfo(job_ptr->select_jobinfo, - SELECT_DATA_PART_ID, record->bgl_part_id); + snprintf(bgl_part_id, BITSIZE, "%s", record->bgl_part_id); #else - select_g_set_jobinfo(job_ptr->select_jobinfo, - SELECT_DATA_PART_ID, "UNDEFINED"); + snprintf(bgl_part_id, BITSIZE, "UNDEFINED"); #endif - + select_g_set_jobinfo(job_ptr->select_jobinfo, + SELECT_DATA_PART_ID, bgl_part_id); } - + return SLURM_SUCCESS; } diff --git a/src/plugins/select/bluegene/bluegene.c b/src/plugins/select/bluegene/bluegene.c index c0b58b01c63..1f95b31342a 100644 --- a/src/plugins/select/bluegene/bluegene.c +++ b/src/plugins/select/bluegene/bluegene.c @@ -51,7 +51,8 @@ static bgl_record_t* _find_config_by_nodes(char* nodes); static int _listfindf_part_record(bgl_record_t* record, char *nodes); static int _parse_bgl_spec(char *in_line); //static int _parse_request(char* request_string, partition_t** request); -//static void _process_config(void); +static void _process_config(void); +static int _read_bgl_conf(void); //static int _sync_partitions(void); static int _validate_config_nodes(void); @@ -208,18 +209,24 @@ int create_static_partitions(List part_list) j=0; /* printf("creating list %d%d%dx%d%d%d\n", */ /* start[X],start[Y],start[Z],end[X],end[Y],end[Z]); */ - for (x = start[X]; x <= end[X]; x++) - for (y = start[Y]; y <= end[Y]; y++) + for (x = start[X]; x <= end[X]; x++) { + for (y = start[Y]; y <= end[Y]; y++) { for (z = start[Z]; z <= end[Z]; z++) { list_append(bgl_record->bgl_part_list, &pa_system_ptr->grid[x][y][z]); j++; } + } + } bgl_record->bp_count = j; set_bgl_part(bgl_record->bgl_part_list, bgl_record->bp_count, bgl_record->conn_type); - + if (node_name2bitmap(bgl_record->nodes, false, + &(bgl_record->bitmap))) { + error("Unable to convert nodes %s to bitmap", + bgl_record->nodes); + } } list_iterator_destroy(itr); @@ -437,7 +444,7 @@ static int _copy_slurm_partition_list(List slurm_part_list) * Read and process the bluegene.conf configuration file so to interpret what * partitions are static/dynamic, torus/mesh, etc. */ -extern int read_bgl_conf(void) +static int _read_bgl_conf(void) { DEF_TIMERS; FILE *bgl_spec_file; /* pointer to input data file */ @@ -465,7 +472,7 @@ extern int read_bgl_conf(void) /* bgl_conf defined in bgl_node_alloc.h */ bgl_spec_file = fopen(bgl_conf, "r"); if (bgl_spec_file == NULL) - fatal("read_bgl_conf error opening file %s, %m", + fatal("_read_bgl_conf error opening file %s, %m", bgl_conf); /* empty the old list before reading new data */ @@ -482,7 +489,7 @@ extern int read_bgl_conf(void) while (fgets(in_line, BUFSIZE, bgl_spec_file) != NULL) { line_num++; if (strlen(in_line) >= (BUFSIZE - 1)) { - error("read_bgl_config line %d, of input file %s " + error("_read_bgl_config line %d, of input file %s " "too long", line_num, bgl_conf); fclose(bgl_spec_file); return E2BIG; @@ -525,7 +532,7 @@ extern int read_bgl_conf(void) if (!bluegene_ramdisk) fatal("RamDiskImage not configured in bluegene.conf"); END_TIMER; - debug("read_bgl_conf: finished loading configuration %s", TIME_STR); + debug("_read_bgl_conf: finished loading configuration %s", TIME_STR); return error_code; } @@ -631,13 +638,12 @@ static void _destroy_bgl_record(void* object) bgl_record_t* this_record = (bgl_record_t*) object; if (this_record) { - if(this_record->nodes) - xfree(this_record->nodes); - if(this_record->owner_name) - xfree(this_record->owner_name); - if(this_record->bgl_part_list) + xfree(this_record->nodes); + xfree(this_record->owner_name); + if (this_record->bgl_part_list) list_destroy(this_record->bgl_part_list); - + if (this_record->bitmap) + bit_free(this_record->bitmap); //xfree(this_record->bgl_part_id); xfree(this_record); } @@ -759,7 +765,7 @@ extern int init_bgl(void) rm_size3D_t bp_size; #endif - read_bgl_conf(); + _read_bgl_conf(); #ifdef HAVE_BGL_FILES if ((rc = rm_set_serial(BGL_SERIAL)) != STATUS_OK) { diff --git a/src/plugins/select/bluegene/bluegene.h b/src/plugins/select/bluegene/bluegene.h index 49f69c10ec7..62515bc6bee 100644 --- a/src/plugins/select/bluegene/bluegene.h +++ b/src/plugins/select/bluegene/bluegene.h @@ -95,6 +95,7 @@ typedef struct bgl_record { List bgl_part_list; int bp_count; int switch_count; + bitstr_t *bitmap; } bgl_record_t; typedef struct { @@ -128,17 +129,12 @@ typedef struct { /* bluegene.c */ /**********************************************/ -/* - * Read and process the bluegene.conf configuration file so to interpret what - * partitions are static/dynamic, torus/mesh, etc. - */ -int read_bgl_conf(void); /* Initialize all plugin variables */ -int init_bgl(void); +extern int init_bgl(void); /* Purge all plugin variables */ -void fini_bgl(void); +extern void fini_bgl(void); /* * create_static_partitions - create the static partitions that will be used diff --git a/src/plugins/select/bluegene/partition_sys.c b/src/plugins/select/bluegene/partition_sys.c index bc0a198ab1f..4b9abf93c76 100755 --- a/src/plugins/select/bluegene/partition_sys.c +++ b/src/plugins/select/bluegene/partition_sys.c @@ -213,8 +213,6 @@ static int _post_allocate(bgl_record_t *bgl_record) system(command); rm_get_data(bgl_record->bgl_part, RM_PartitionID, &bgl_record->bgl_part_id); - bgl_record->nodes = xstrdup(bgl_record->nodes); - bgl_record->node_use = bgl_record->node_use; bgl_record->conn_type = bgl_record->conn_type; list_push(bgl_list, bgl_record); diff --git a/src/plugins/select/bluegene/select_bluegene.c b/src/plugins/select/bluegene/select_bluegene.c index 8f244611fa0..be08dacb12d 100644 --- a/src/plugins/select/bluegene/select_bluegene.c +++ b/src/plugins/select/bluegene/select_bluegene.c @@ -227,7 +227,9 @@ extern int select_p_node_init(struct node_record *node_ptr, int node_cnt) /* * select_p_job_test - Given a specification of scheduling requirements, - * identify the nodes which "best" satify the request. + * identify the nodes which "best" satify the request. The specified + * nodes may be DOWN or BUSY at the time of this test as may be used + * to deterime if a job could ever run. * IN job_ptr - pointer to job being scheduled * IN/OUT bitmap - usable nodes are set on input, nodes not required to * satisfy the request are cleared, other left set -- GitLab