From a568756be22389e45f53a0b96be9b7e56a8bf171 Mon Sep 17 00:00:00 2001 From: Danny Auble <da@llnl.gov> Date: Thu, 13 Oct 2005 21:39:53 +0000 Subject: [PATCH] small partition ci for bgl --- NEWS | 3 + slurm/slurm.h.in | 3 +- src/api/node_select_info.h | 2 + src/common/node_select.c | 5 + src/partition_allocator/partition_allocator.h | 6 +- src/plugins/select/bluegene/bgl_job_place.c | 12 +- src/plugins/select/bluegene/bgl_part_info.c | 2 + .../select/bluegene/bgl_switch_connections.c | 188 ++++++- src/plugins/select/bluegene/bluegene.c | 527 ++++++++++-------- src/plugins/select/bluegene/bluegene.h | 12 +- src/plugins/select/bluegene/partition_sys.c | 139 +++-- src/plugins/select/bluegene/select_bluegene.c | 7 + src/smap/partition_functions.c | 131 ++--- 13 files changed, 649 insertions(+), 388 deletions(-) diff --git a/NEWS b/NEWS index 4d06240d445..06be6c15eba 100644 --- a/NEWS +++ b/NEWS @@ -11,6 +11,9 @@ documents those changes that are of interest to users and admins. -- Added task plugin and use task prolog/epilog(s). -- New slurmd_step functionality added. Fork exec instead of using shared memory. Not completely tested. + -- BGL small partition logic in place in plugin and smap. Scheduler needs + to be rewritten to handle multiple partitions on a single node. No + documentation written on process yet. * Changes in SLURM 0.7.0-pre1 ============================= diff --git a/slurm/slurm.h.in b/slurm/slurm.h.in index 7c38be6181a..ec875b9b559 100644 --- a/slurm/slurm.h.in +++ b/slurm/slurm.h.in @@ -171,7 +171,8 @@ enum job_wait_reason { enum connection_type { SELECT_MESH, /* nodes wired in mesh */ SELECT_TORUS, /* nodes wired in torus */ - SELECT_NAV /* nodes wired in torus else mesh */ + SELECT_NAV, /* nodes wired in torus else mesh */ + SELECT_SMALL /* nodes in a small partition */ }; enum node_use_type { diff --git a/src/api/node_select_info.h b/src/api/node_select_info.h index d3d9c9aff2c..e8dc94cee21 100644 --- a/src/api/node_select_info.h +++ b/src/api/node_select_info.h @@ -43,6 +43,8 @@ typedef struct { int state; int conn_type; int node_use; + int cnodes_per_bp; + int quarter; } bgl_info_record_t; typedef struct { diff --git a/src/common/node_select.c b/src/common/node_select.c index 3c462c7ff9e..46b5d347815 100644 --- a/src/common/node_select.c +++ b/src/common/node_select.c @@ -754,6 +754,7 @@ extern char *select_g_sprint_jobinfo(select_jobinfo_t jobinfo, static int _unpack_node_info(bgl_info_record_t *bgl_info_record, Buf buffer) { uint16_t uint16_tmp; + uint32_t uint32_tmp; safe_unpackstr_xmalloc(&(bgl_info_record->nodes), &uint16_tmp, buffer); safe_unpackstr_xmalloc(&bgl_info_record->owner_name, &uint16_tmp, buffer); @@ -766,6 +767,10 @@ static int _unpack_node_info(bgl_info_record_t *bgl_info_record, Buf buffer) bgl_info_record->conn_type = (int) uint16_tmp; safe_unpack16(&uint16_tmp, buffer); bgl_info_record->node_use = (int) uint16_tmp; + safe_unpack16(&uint16_tmp, buffer); + bgl_info_record->cnodes_per_bp = (int) uint16_tmp; + safe_unpack32(&uint32_tmp, buffer); + bgl_info_record->quarter = (int) uint32_tmp; return SLURM_SUCCESS; diff --git a/src/partition_allocator/partition_allocator.h b/src/partition_allocator/partition_allocator.h index ac94e2ec151..4fbd7876f65 100644 --- a/src/partition_allocator/partition_allocator.h +++ b/src/partition_allocator/partition_allocator.h @@ -76,7 +76,7 @@ extern bool have_db2; enum {X, Y, Z}; /* */ -enum {MESH, TORUS}; +enum {MESH, TORUS, SMALL}; enum {COPROCESSOR, VIRTUAL}; /* NOTE: Definition of bgl_info_record_t moved to src/api/node_select_info.h */ @@ -106,7 +106,7 @@ typedef struct { * - letter - filled in after the request is fulfilled * - geometry - request size * - size - node count for request - * - conn_type - MESH or TORUS + * - conn_type - MESH or TORUS or SMALL * - rotate_count - when rotating we keep a count so we aren't in an infinate loop. * - elongate_count - when elongating we keep a count so we aren't in an infinate loop. * - rotate - weather to allow rotating or not. @@ -235,7 +235,7 @@ extern void destroy_bgl_info_record(void* object); * IN - elongate: if true, will try to fit different geometries of * same size requests * IN - contig: enforce contiguous regions constraint - * IN - conn_type: connection type of request (TORUS or MESH) + * IN - conn_type: connection type of request (TORUS or MESH or SMALL) * * return success of allocation/validation of params */ diff --git a/src/plugins/select/bluegene/bgl_job_place.c b/src/plugins/select/bluegene/bgl_job_place.c index cfed2c0c7df..9558baf6632 100644 --- a/src/plugins/select/bluegene/bgl_job_place.c +++ b/src/plugins/select/bluegene/bgl_job_place.c @@ -124,18 +124,22 @@ static int _find_best_partition_match(struct job_record* job_ptr, debug("number of partitions to check: %d", list_count(bgl_list)); while ((record = (bgl_record_t*) list_next(itr))) { /* Check processor count */ - if (req_procs > 512) { + printf("%d\n",req_procs); + if (req_procs > record->cnodes_per_bp) { /* We use the c-node count here. Job could start * twice this count if VIRTUAL_NODE_MODE, but this - * is now controlled by mpirun, not SLURM */ - proc_cnt = record->bp_count * 512; + * is now controlled by mpirun, not SLURM + * We now use the number set by the admins in the + * slurm.conf file. This should never happen. + */ + proc_cnt = record->bp_count * record->cnodes_per_bp; if (req_procs > proc_cnt) { debug("partition %s CPU count too low", record->bgl_part_id); continue; } } - + /* * check that the number of nodes is suitable */ diff --git a/src/plugins/select/bluegene/bgl_part_info.c b/src/plugins/select/bluegene/bgl_part_info.c index 52a17195116..f1bde2d7cf7 100644 --- a/src/plugins/select/bluegene/bgl_part_info.c +++ b/src/plugins/select/bluegene/bgl_part_info.c @@ -193,6 +193,8 @@ extern void pack_partition(bgl_record_t *bgl_record, Buf buffer) pack16((uint16_t)bgl_record->state, buffer); pack16((uint16_t)bgl_record->conn_type, buffer); pack16((uint16_t)bgl_record->node_use, buffer); + pack16((uint16_t)bgl_record->cnodes_per_bp, buffer); + pack32(bgl_record->quarter, buffer); } extern int update_partition_list() diff --git a/src/plugins/select/bluegene/bgl_switch_connections.c b/src/plugins/select/bluegene/bgl_switch_connections.c index c0c49c98d03..ae8a89e99b5 100644 --- a/src/plugins/select/bluegene/bgl_switch_connections.c +++ b/src/plugins/select/bluegene/bgl_switch_connections.c @@ -305,6 +305,150 @@ static int _destroy_bgl_bp_list(List bgl_bp_list) return SLURM_SUCCESS; } +extern int configure_small_partition(bgl_record_t *bgl_record) +{ + bool small = true; + ListIterator itr; + pa_node_t* pa_node = NULL; + int rc = SLURM_SUCCESS; + rm_BP_t *curr_bp; + rm_bp_id_t bp_id = NULL; + int num_ncards = 4; + rm_nodecard_t *ncard; + rm_nodecard_list_t *ncard_list; + rm_quarter_t quarter; + int num, i; + + if(bgl_record->bp_count != 1) { + error("Requesting small partition with %d bps, needs to be 1.", + bgl_record->bp_count); + return SLURM_ERROR; + } + + /* set that we are doing a small partition */ + if ((rc = rm_set_data(bgl_record->bgl_part, RM_PartitionSmall, + &small)) != STATUS_OK) { + fatal("rm_set_data(RM_PartitionPsetsPerBP)", bgl_err_str(rc)); + } + + if ((rc = rm_set_data(bgl_record->bgl_part, + RM_PartitionNodeCardNum, + &num_ncards)) + != STATUS_OK) { + fatal("rm_set_data: RM_PartitionBPNum: %s", bgl_err_str(rc)); + } + + itr = list_iterator_create(bgl_record->bgl_part_list); + pa_node = list_next(itr); + list_iterator_destroy(itr); + + if (_get_bp_by_location(bgl, pa_node->coord, &curr_bp) + == SLURM_ERROR) { + fatal("_get_bp_by_location()"); + } + + /* Set the one BP */ + if ((rc = rm_set_data(bgl_record->bgl_part, + RM_PartitionBPNum, + &bgl_record->bp_count)) + != STATUS_OK) { + fatal("rm_set_data: RM_PartitionBPNum: %s", bgl_err_str(rc)); + return SLURM_ERROR; + } + if ((rc = rm_set_data(bgl_record->bgl_part, + RM_PartitionFirstBP, + curr_bp)) + != STATUS_OK) { + fatal("rm_set_data(" + "RM_PartitionFirstBP): %s", + bgl_err_str(rc)); + return SLURM_ERROR; + } + + /* find the bp_id of the bp to get the nodecards */ + if ((rc = rm_get_data(curr_bp, RM_BPID, &bp_id)) + != STATUS_OK) { + error("rm_get_data(): %d", rc); + return SLURM_ERROR; + } + + if ((rc = rm_get_nodecards(bp_id, &ncard_list)) + != STATUS_OK) { + error("rm_get_nodecards(%s): %d", + bp_id, rc); + return SLURM_ERROR; + } + + if((rc = rm_get_data(ncard_list, RM_NodeCardListSize, &num)) + != STATUS_OK) { + error("rm_get_data(RM_NodeCardListSize): %s", bgl_err_str(rc)); + return SLURM_ERROR; + } + num_ncards = 0; + for(i=0; i<num; i++) { + if (i) { + if ((rc = rm_get_data(ncard_list, + RM_NodeCardListNext, + &ncard)) != STATUS_OK) { + error("rm_get_data(RM_NodeCardListNext): %s", + rc); + rc = SLURM_ERROR; + goto cleanup; + } + } else { + if ((rc = rm_get_data(ncard_list, + RM_NodeCardListFirst, + &ncard)) != STATUS_OK) { + error("rm_get_data(RM_NodeCardListFirst: %s", + rc); + rc = SLURM_ERROR; + goto cleanup; + } + } + + if ((rc = rm_get_data(ncard, + RM_NodeCardQuarter, + &quarter)) != STATUS_OK) { + error("rm_get_data(PartitionID): %d",rc); + rc = SLURM_ERROR; + goto cleanup; + } + if(bgl_record->quarter != quarter) + continue; + if (num_ncards) { + if ((rc = rm_set_data(bgl_record->bgl_part, + RM_PartitionNextNodeCard, + ncard)) + != STATUS_OK) { + fatal("rm_set_data(" + "RM_PartitionNextNodeCard): %s", + bgl_err_str(rc)); + + } + } else { + if ((rc = rm_set_data(bgl_record->bgl_part, + RM_PartitionFirstNodeCard, + ncard)) + != STATUS_OK) { + fatal("rm_set_data(" + "RM_PartitionFirstNodeCard): %s", + bgl_err_str(rc)); + } + } + num_ncards++; + if(num_ncards == 4) + break; + } +cleanup: + if ((rc = rm_free_nodecard_list(ncard_list)) != STATUS_OK) { + error("rm_free_nodecard_list(): %s", bgl_err_str(rc)); + return SLURM_ERROR; + } + + debug("making the small partition"); + return rc; +} + /** * connect the given switch up with the given connections */ @@ -427,12 +571,10 @@ extern int configure_partition_switches(bgl_record_t * bgl_record) RM_PartitionFirstBP, curr_bp)) != STATUS_OK) { + list_iterator_destroy(bgl_itr); fatal("rm_set_data(" "RM_PartitionFirstBP): %s", bgl_err_str(rc)); - list_iterator_destroy(bgl_itr); - rc = SLURM_ERROR; - goto cleanup; } first_bp = 0; } else { @@ -440,21 +582,17 @@ extern int configure_partition_switches(bgl_record_t * bgl_record) RM_PartitionNextBP, curr_bp)) != STATUS_OK) { + list_iterator_destroy(bgl_itr); fatal("rm_set_data(RM_PartitionNextBP)" ": %s", bgl_err_str(rc)); - list_iterator_destroy(bgl_itr); - rc = SLURM_ERROR; - goto cleanup; } } } if ((rc = rm_get_data(curr_bp, RM_BPID, &bpid)) != STATUS_OK) { - fatal("rm_get_data: RM_BPID: %s", bgl_err_str(rc)); list_iterator_destroy(bgl_itr); - rc = SLURM_ERROR; - goto cleanup; + fatal("rm_get_data: RM_BPID: %s", bgl_err_str(rc)); } if(!bpid) { @@ -468,31 +606,25 @@ extern int configure_partition_switches(bgl_record_t * bgl_record) if ((rc = rm_get_data(bgl, RM_NextSwitch, &curr_switch)) != STATUS_OK) { + list_iterator_destroy(bgl_itr); fatal("rm_get_data: RM_NextSwitch: %s", bgl_err_str(rc)); - list_iterator_destroy(bgl_itr); - rc = SLURM_ERROR; - goto cleanup; } } else { if ((rc = rm_get_data(bgl, RM_FirstSwitch, &curr_switch)) != STATUS_OK) { + list_iterator_destroy(bgl_itr); fatal("rm_get_data: " "RM_FirstSwitch: %s", bgl_err_str(rc)); - list_iterator_destroy(bgl_itr); - rc = SLURM_ERROR; - goto cleanup; } } if ((rc = rm_get_data(curr_switch, RM_SwitchBPID, &curr_bpid)) != STATUS_OK) { + list_iterator_destroy(bgl_itr); fatal("rm_get_data: RM_SwitchBPID: %s", bgl_err_str(rc)); - list_iterator_destroy(bgl_itr); - rc = SLURM_ERROR; - goto cleanup; } if(!curr_bpid) { @@ -515,7 +647,7 @@ extern int configure_partition_switches(bgl_record_t * bgl_record) if(found_bpid==PA_SYSTEM_DIMENSIONS) { - debug2("adding midplane %d%d%d\n", + debug2("adding midplane %d%d%d", bgl_bp->coord[X], bgl_bp->coord[Y], bgl_bp->coord[Z]); @@ -533,7 +665,7 @@ extern int configure_partition_switches(bgl_record_t * bgl_record) /* rc = SLURM_ERROR; */ /* goto cleanup; */ /* } */ - debug2("adding switch dim %d\n", + debug2("adding switch dim %d", bgl_switch->dim); if (_add_switch_conns(coord_switch @@ -553,15 +685,13 @@ extern int configure_partition_switches(bgl_record_t * bgl_record) coord_switch [bgl_switch->dim])) != STATUS_OK) { + list_iterator_destroy( + switch_itr); + list_iterator_destroy(bgl_itr); fatal("rm_set_data(" "RM_PartitionFirst" "Switch): %s", bgl_err_str(rc)); - list_iterator_destroy( - switch_itr); - list_iterator_destroy(bgl_itr); - rc = SLURM_ERROR; - goto cleanup; } first_switch = 0; @@ -572,15 +702,13 @@ extern int configure_partition_switches(bgl_record_t * bgl_record) coord_switch [bgl_switch->dim])) != STATUS_OK) { + list_iterator_destroy( + switch_itr); + list_iterator_destroy(bgl_itr); fatal("rm_set_data(" "RM_PartitionNext" "Switch:) %s", bgl_err_str(rc)); - list_iterator_destroy( - switch_itr); - list_iterator_destroy(bgl_itr); - rc = SLURM_ERROR; - goto cleanup; } } } diff --git a/src/plugins/select/bluegene/bluegene.c b/src/plugins/select/bluegene/bluegene.c index f627f9fb3bf..d69798f9cf3 100644 --- a/src/plugins/select/bluegene/bluegene.c +++ b/src/plugins/select/bluegene/bluegene.c @@ -63,14 +63,17 @@ static int _update_bgl_record_state(); /* some local functions */ #ifdef HAVE_BGL static int _addto_node_list(bgl_record_t *bgl_record, int *start, int *end); +static int _update_bgl_record_state(List bgl_destroy_list); #endif static void _set_bgl_lists(); static int _validate_config_nodes(void); static int _bgl_record_cmpf_inc(bgl_record_t* rec_a, bgl_record_t* rec_b); +static int _delete_old_partitions(void); +static char *_get_bgl_conf(void); +static void _strip_13_10(char *line); static int _parse_bgl_spec(char *in_line); static void _process_nodes(bgl_record_t *bgl_record); static int _reopen_bridge_log(void); -static void _strip_13_10(char *line); /* Initialize all plugin variables */ extern int init_bgl(void) @@ -530,10 +533,10 @@ extern int create_static_partitions(List part_list) if(bgl_list) { itr = list_iterator_create(bgl_list); while ((bgl_record = (bgl_record_t *) list_next(itr)) - != NULL) { - + != NULL) { if(bgl_record->bp_count>0 - && !bgl_record->full_partition) { + && !bgl_record->full_partition + && bgl_record->cnodes_per_bp == procs_per_node) { debug("adding %s %d%d%d", bgl_record->nodes, bgl_record->start[X], @@ -569,8 +572,16 @@ extern int create_static_partitions(List part_list) bgl_found_part_list); while ((found_record = (bgl_record_t*) list_next(itr_found)) != NULL) { - if (!strcmp(bgl_record->nodes, - found_record->nodes)) { + /*printf("%s %d %s %d\n",*/ +/* bgl_record->nodes, */ +/* bgl_record->quarter, */ +/* found_record->nodes, */ +/* found_record->quarter); */ + + if ((!strcmp(bgl_record->nodes, + found_record->nodes)) + && (bgl_record->quarter == + found_record->quarter)) { /* don't reboot this one */ break; } @@ -617,7 +628,8 @@ extern int create_static_partitions(List part_list) bgl_record->geo[X] = DIM_SIZE[X]-1; bgl_record->geo[Y] = DIM_SIZE[Y]-1; bgl_record->geo[Z] = DIM_SIZE[Z]-1; - + bgl_record->quarter = -1; + if(bgl_found_part_list) { itr = list_iterator_create(bgl_found_part_list); while ((found_record = (bgl_record_t *) list_next(itr)) @@ -681,6 +693,7 @@ extern int create_static_partitions(List part_list) } xfree(name); bgl_record->node_use = SELECT_COPROCESSOR_MODE; + bgl_record->cnodes_per_bp = procs_per_node; #ifdef HAVE_BGL_FILES if((rc = configure_partition(bgl_record)) == SLURM_ERROR) { slurm_mutex_unlock(&part_state_mutex); @@ -832,6 +845,123 @@ extern void *mult_destroy_part(void *args) return NULL; } +/* + * Read and process the bluegene.conf configuration file so to interpret what + * partitions are static/dynamic, torus/mesh, etc. + */ +extern int read_bgl_conf(void) +{ + FILE *bgl_spec_file; /* pointer to input data file */ + int line_num; /* line number in input file */ + char in_line[BUFSIZE]; /* input line */ + int i, j, error_code = SLURM_SUCCESS; + static time_t last_config_update = (time_t) 0; + struct stat config_stat; + + debug("Reading the bluegene.conf file"); + + /* check if config file has changed */ + if (!bgl_conf) + bgl_conf = _get_bgl_conf(); + if (stat(bgl_conf, &config_stat) < 0) + fatal("can't stat bluegene.conf file %s: %m", bgl_conf); + if (last_config_update) { + if(last_config_update == config_stat.st_mtime) + debug("bluegene.conf unchanged"); + else + debug("bluegene.conf changed, doing nothing"); + _reopen_bridge_log(); + last_config_update = config_stat.st_mtime; + return SLURM_SUCCESS; + } + last_config_update = config_stat.st_mtime; + + /* initialization */ + /* bgl_conf defined in bgl_node_alloc.h */ + bgl_spec_file = fopen(bgl_conf, "r"); + if (bgl_spec_file == NULL) + fatal("_read_bgl_conf error opening file %s, %m", + bgl_conf); + + _set_bgl_lists(); + + /* process the data file */ + line_num = 0; + while (fgets(in_line, BUFSIZE, bgl_spec_file) != NULL) { + line_num++; + _strip_13_10(in_line); + if (strlen(in_line) >= (BUFSIZE - 1)) { + error("_read_bgl_config line %d, of input file %s " + "too long", line_num, bgl_conf); + fclose(bgl_spec_file); + xfree(bgl_conf); + return E2BIG; + } + + /* everything after a non-escaped "#" is a comment */ + /* replace comment flag "#" with an end of string (NULL) */ + /* escape sequence "\#" translated to "#" */ + for (i = 0; i < BUFSIZE; i++) { + if (in_line[i] == (char) NULL) + break; + if (in_line[i] != '#') + continue; + if ((i > 0) && (in_line[i - 1] == '\\')) { + for (j = i; j < BUFSIZE; j++) { + in_line[j - 1] = in_line[j]; + } + continue; + } + in_line[i] = (char) NULL; + break; + } + + /* parse what is left, non-comments */ + /* partition configuration parameters */ + error_code = _parse_bgl_spec(in_line); + + /* report any leftover strings on input line */ + report_leftover(in_line, line_num); + } + fclose(bgl_spec_file); + xfree(bgl_conf); + + if (!bluegene_blrts) + fatal("BlrtsImage not configured in bluegene.conf"); + if (!bluegene_linux) + fatal("LinuxImage not configured in bluegene.conf"); + if (!bluegene_mloader) + fatal("MloaderImage not configured in bluegene.conf"); + if (!bluegene_ramdisk) + fatal("RamDiskImage not configured in bluegene.conf"); + if (!bridge_api_file) + info("BridgeAPILogFile not configured in bluegene.conf"); + else + _reopen_bridge_log(); + if (!numpsets) + info("Warning: Numpsets not configured in bluegene.conf"); +//#if 0 + /* Check to see if the configs we have are correct */ + if (_validate_config_nodes() == SLURM_ERROR) { + _delete_old_partitions(); + } +//#endif + /* looking for partitions only I created */ + if (create_static_partitions(NULL) == SLURM_ERROR) { + /* error in creating the static partitions, so + * partitions referenced by submitted jobs won't + * correspond to actual slurm partitions/bgl + * partitions. + */ + fatal("Error, could not create the static partitions"); + return SLURM_ERROR; + } + debug("Partitions have finished being created."); + partitions_are_created = 1; + + return error_code; +} + #ifdef HAVE_BGL static int _addto_node_list(bgl_record_t *bgl_record, int *start, int *end) { @@ -870,7 +1000,105 @@ static int _addto_node_list(bgl_record_t *bgl_record, int *start, int *end) } return node_count; } -#endif + +static int _update_bgl_record_state(List bgl_destroy_list) +{ + rm_partition_state_flag_t part_state = PARTITION_ALL_FLAG; + char *name = NULL; + rm_partition_list_t *part_list = NULL; + int j, rc, func_rc = SLURM_SUCCESS, num_parts = 0; + rm_partition_state_t state; + rm_partition_t *part_ptr = NULL; + ListIterator itr; + bgl_record_t* bgl_record = NULL; + + if(!bgl_destroy_list) { + return SLURM_SUCCESS; + } + + if ((rc = rm_get_partitions_info(part_state, &part_list)) + != STATUS_OK) { + error("rm_get_partitions_info(): %s", bgl_err_str(rc)); + return SLURM_ERROR; + } + + if ((rc = rm_get_data(part_list, RM_PartListSize, &num_parts)) + != STATUS_OK) { + error("rm_get_data(RM_PartListSize): %s", bgl_err_str(rc)); + func_rc = SLURM_ERROR; + num_parts = 0; + } + + for (j=0; j<num_parts; j++) { + if (j) { + if ((rc = rm_get_data(part_list, + RM_PartListNextPart, + &part_ptr)) + != STATUS_OK) { + error("rm_get_data(RM_PartListNextPart): %s", + bgl_err_str(rc)); + func_rc = SLURM_ERROR; + break; + } + } else { + if ((rc = rm_get_data(part_list, + RM_PartListFirstPart, + &part_ptr)) + != STATUS_OK) { + error("rm_get_data(RM_PartListFirstPart: %s", + bgl_err_str(rc)); + func_rc = SLURM_ERROR; + break; + } + } + if ((rc = rm_get_data(part_ptr, + RM_PartitionID, + &name)) + != STATUS_OK) { + error("rm_get_data(RM_PartitionID): %s", + bgl_err_str(rc)); + func_rc = SLURM_ERROR; + break; + } + if (!name) { + error("RM_Partition is NULL"); + continue; + } + + itr = list_iterator_create(bgl_destroy_list); + while ((bgl_record = (bgl_record_t*) list_next(itr))) { + if(!bgl_record->bgl_part_id) + continue; + if(strcmp(bgl_record->bgl_part_id, name)) { + continue; + } + + slurm_mutex_lock(&part_state_mutex); + if ((rc = rm_get_data(part_ptr, + RM_PartitionState, + &state)) + != STATUS_OK) { + error("rm_get_data(RM_PartitionState): %s", + bgl_err_str(rc)); + } else if(bgl_record->state != state) { + debug("state of Partition %s was %d " + "and now is %d", + name, bgl_record->state, state); + bgl_record->state = state; + } + slurm_mutex_unlock(&part_state_mutex); + break; + } + list_iterator_destroy(itr); + free(name); + } + + if ((rc = rm_free_partition_list(part_list)) != STATUS_OK) { + error("rm_free_partition_list(): %s", bgl_err_str(rc)); + } + return func_rc; +} +#endif /* HAVE_BGL_FILES */ static void _set_bgl_lists() { @@ -945,6 +1173,9 @@ static int _validate_config_nodes(void) if (record->conn_type != init_record->conn_type) continue; /* wrong conn_type */ + if(record->quarter != + init_record->quarter) + continue; /* wrong quart */ record->bgl_part_id = xstrdup( init_record->bgl_part_id); record->state = init_record->state; @@ -1000,7 +1231,8 @@ static int _validate_config_nodes(void) if ((init_record->geo[X] == DIM_SIZE[X]) && (init_record->geo[Y] == DIM_SIZE[Y]) && (init_record->geo[Z] == DIM_SIZE[Z])) { - record = (bgl_record_t*) xmalloc(sizeof(bgl_record_t)); + record = (bgl_record_t*) + xmalloc(sizeof(bgl_record_t)); list_append(bgl_list, record); record->full_partition = 1; @@ -1027,6 +1259,10 @@ static int _validate_config_nodes(void) init_record->boot_state; record->switch_count = init_record->switch_count; + record->cnodes_per_bp = + init_record->cnodes_per_bp; + record->quarter = + init_record->quarter; if((record->bitmap = bit_copy(init_record->bitmap)) == NULL) { @@ -1233,123 +1469,6 @@ static char *_get_bgl_conf(void) return rc; } -/* - * Read and process the bluegene.conf configuration file so to interpret what - * partitions are static/dynamic, torus/mesh, etc. - */ -extern int read_bgl_conf(void) -{ - FILE *bgl_spec_file; /* pointer to input data file */ - int line_num; /* line number in input file */ - char in_line[BUFSIZE]; /* input line */ - int i, j, error_code = SLURM_SUCCESS; - static time_t last_config_update = (time_t) 0; - struct stat config_stat; - - debug("Reading the bluegene.conf file"); - - /* check if config file has changed */ - if (!bgl_conf) - bgl_conf = _get_bgl_conf(); - if (stat(bgl_conf, &config_stat) < 0) - fatal("can't stat bluegene.conf file %s: %m", bgl_conf); - if (last_config_update) { - if(last_config_update == config_stat.st_mtime) - debug("bluegene.conf unchanged"); - else - debug("bluegene.conf changed, doing nothing"); - _reopen_bridge_log(); - last_config_update = config_stat.st_mtime; - return SLURM_SUCCESS; - } - last_config_update = config_stat.st_mtime; - - /* initialization */ - /* bgl_conf defined in bgl_node_alloc.h */ - bgl_spec_file = fopen(bgl_conf, "r"); - if (bgl_spec_file == NULL) - fatal("_read_bgl_conf error opening file %s, %m", - bgl_conf); - - _set_bgl_lists(); - - /* process the data file */ - line_num = 0; - while (fgets(in_line, BUFSIZE, bgl_spec_file) != NULL) { - line_num++; - _strip_13_10(in_line); - if (strlen(in_line) >= (BUFSIZE - 1)) { - error("_read_bgl_config line %d, of input file %s " - "too long", line_num, bgl_conf); - fclose(bgl_spec_file); - xfree(bgl_conf); - return E2BIG; - } - - /* everything after a non-escaped "#" is a comment */ - /* replace comment flag "#" with an end of string (NULL) */ - /* escape sequence "\#" translated to "#" */ - for (i = 0; i < BUFSIZE; i++) { - if (in_line[i] == (char) NULL) - break; - if (in_line[i] != '#') - continue; - if ((i > 0) && (in_line[i - 1] == '\\')) { - for (j = i; j < BUFSIZE; j++) { - in_line[j - 1] = in_line[j]; - } - continue; - } - in_line[i] = (char) NULL; - break; - } - - /* parse what is left, non-comments */ - /* partition configuration parameters */ - error_code = _parse_bgl_spec(in_line); - - /* report any leftover strings on input line */ - report_leftover(in_line, line_num); - } - fclose(bgl_spec_file); - xfree(bgl_conf); - - if (!bluegene_blrts) - fatal("BlrtsImage not configured in bluegene.conf"); - if (!bluegene_linux) - fatal("LinuxImage not configured in bluegene.conf"); - if (!bluegene_mloader) - fatal("MloaderImage not configured in bluegene.conf"); - if (!bluegene_ramdisk) - fatal("RamDiskImage not configured in bluegene.conf"); - if (!bridge_api_file) - info("BridgeAPILogFile not configured in bluegene.conf"); - else - _reopen_bridge_log(); - if (!numpsets) - info("Warning: Numpsets not configured in bluegene.conf"); -//#if 0 - /* Check to see if the configs we have are correct */ - if (_validate_config_nodes() == SLURM_ERROR) { - _delete_old_partitions(); - } -//#endif - /* looking for partitions only I created */ - if (create_static_partitions(NULL) == SLURM_ERROR) { - /* error in creating the static partitions, so - * partitions referenced by submitted jobs won't - * correspond to actual slurm partitions/bgl - * partitions. - */ - fatal("Error, could not create the static partitions"); - return SLURM_ERROR; - } - debug("Partitions have finished being created."); - partitions_are_created = 1; - - return error_code; -} - /* Explicitly strip out new-line and carriage-return */ static void _strip_13_10(char *line) { @@ -1378,13 +1497,17 @@ static void _strip_13_10(char *line) static int _parse_bgl_spec(char *in_line) { int error_code = SLURM_SUCCESS; - char *nodes = NULL, *conn_type = NULL, *node_use = NULL; + char *nodes = NULL, *conn_type = NULL; char *blrts_image = NULL, *linux_image = NULL; char *mloader_image = NULL, *ramdisk_image = NULL; char *api_file = NULL; - int pset_num=-1, api_verb=-1; + int pset_num=-1, api_verb=-1, node_split=-1; bgl_record_t *bgl_record = NULL; + bgl_record_t *small_bgl_record = NULL; + pa_node_t *pa_node = NULL; struct passwd *pw_ent = NULL; + ListIterator itr; + int i=0; //info("in_line = %s",in_line); error_code = slurm_parser(in_line, @@ -1397,7 +1520,7 @@ static int _parse_bgl_spec(char *in_line) "Nodes=", 's', &nodes, "RamDiskImage=", 's', &ramdisk_image, "Type=", 's', &conn_type, - "Use=", 's', &node_use, + "Split=", 'd', &node_split, "END"); if (error_code) @@ -1429,7 +1552,6 @@ static int _parse_bgl_spec(char *in_line) bridge_api_file = api_file; api_file = NULL; /* nothing left to xfree */ } - if (pset_num > 0) { numpsets = pset_num; } @@ -1440,8 +1562,8 @@ static int _parse_bgl_spec(char *in_line) /* Process node information */ if (!nodes) return SLURM_SUCCESS; /* not partition line. */ + bgl_record = (bgl_record_t*) xmalloc(sizeof(bgl_record_t)); - list_append(bgl_list, bgl_record); bgl_record->user_name = xstrdup(slurmctld_conf.slurm_user_name); if((pw_ent = getpwnam(bgl_record->user_name)) == NULL) { @@ -1466,7 +1588,51 @@ static int _parse_bgl_spec(char *in_line) xfree(conn_type); bgl_record->node_use = SELECT_COPROCESSOR_MODE; + bgl_record->cnodes_per_bp = procs_per_node; + bgl_record->quarter = -1; + /* number to split 2= 2x256 4= 4x128 6= 4x128, 2x256 */ + if(node_split == 1) { + + itr = list_iterator_create(bgl_record->bgl_part_list); + while ((pa_node = list_next(itr)) != NULL) { + for(i=0; i<4 ; i++) { + small_bgl_record = + (bgl_record_t*) + xmalloc(sizeof(bgl_record_t)); + list_append(bgl_list, small_bgl_record); + + small_bgl_record->user_name = + xstrdup(bgl_record->user_name); + small_bgl_record->user_uid = + bgl_record->user_uid; + small_bgl_record->bgl_part_list = + list_create(NULL); + small_bgl_record->hostlist = + hostlist_create(NULL); + small_bgl_record->nodes = + xstrdup(bgl_record->nodes); + + _process_nodes(small_bgl_record); + + small_bgl_record->conn_type = + SELECT_SMALL; + + small_bgl_record->node_use = + SELECT_COPROCESSOR_MODE; + + small_bgl_record->cnodes_per_bp = + procs_per_node/4; + small_bgl_record->quarter = i; + } + } + list_iterator_destroy(itr); + destroy_bgl_record(bgl_record); + } else + list_append(bgl_list, bgl_record); + + +no_split: #if _DEBUG debug("_parse_bgl_spec: added nodes=%s type=%s use=%s", bgl_record->nodes, @@ -1613,102 +1779,3 @@ static int _reopen_bridge_log(void) return SLURM_SUCCESS; } -#ifdef HAVE_BGL_FILES -static int _update_bgl_record_state(List bgl_destroy_list) -{ - rm_partition_state_flag_t part_state = PARTITION_ALL_FLAG; - char *name = NULL; - rm_partition_list_t *part_list = NULL; - int j, rc, func_rc = SLURM_SUCCESS, num_parts = 0; - rm_partition_state_t state; - rm_partition_t *part_ptr = NULL; - ListIterator itr; - bgl_record_t* bgl_record = NULL; - - if(!bgl_destroy_list) { - return SLURM_SUCCESS; - } - - if ((rc = rm_get_partitions_info(part_state, &part_list)) - != STATUS_OK) { - error("rm_get_partitions_info(): %s", bgl_err_str(rc)); - return SLURM_ERROR; - } - - if ((rc = rm_get_data(part_list, RM_PartListSize, &num_parts)) - != STATUS_OK) { - error("rm_get_data(RM_PartListSize): %s", bgl_err_str(rc)); - func_rc = SLURM_ERROR; - num_parts = 0; - } - - for (j=0; j<num_parts; j++) { - if (j) { - if ((rc = rm_get_data(part_list, - RM_PartListNextPart, - &part_ptr)) - != STATUS_OK) { - error("rm_get_data(RM_PartListNextPart): %s", - bgl_err_str(rc)); - func_rc = SLURM_ERROR; - break; - } - } else { - if ((rc = rm_get_data(part_list, - RM_PartListFirstPart, - &part_ptr)) - != STATUS_OK) { - error("rm_get_data(RM_PartListFirstPart: %s", - bgl_err_str(rc)); - func_rc = SLURM_ERROR; - break; - } - } - if ((rc = rm_get_data(part_ptr, - RM_PartitionID, - &name)) - != STATUS_OK) { - error("rm_get_data(RM_PartitionID): %s", - bgl_err_str(rc)); - func_rc = SLURM_ERROR; - break; - } - if (!name) { - error("RM_Partition is NULL"); - continue; - } - - itr = list_iterator_create(bgl_destroy_list); - while ((bgl_record = (bgl_record_t*) list_next(itr))) { - if(!bgl_record->bgl_part_id) - continue; - if(strcmp(bgl_record->bgl_part_id, name)) { - continue; - } - - slurm_mutex_lock(&part_state_mutex); - if ((rc = rm_get_data(part_ptr, - RM_PartitionState, - &state)) - != STATUS_OK) { - error("rm_get_data(RM_PartitionState): %s", - bgl_err_str(rc)); - } else if(bgl_record->state != state) { - debug("state of Partition %s was %d " - "and now is %d", - name, bgl_record->state, state); - bgl_record->state = state; - } - slurm_mutex_unlock(&part_state_mutex); - break; - } - list_iterator_destroy(itr); - free(name); - } - - if ((rc = rm_free_partition_list(part_list)) != STATUS_OK) { - error("rm_free_partition_list(): %s", bgl_err_str(rc)); - } - return func_rc; -} -#endif /* HAVE_BGL_FILES */ diff --git a/src/plugins/select/bluegene/bluegene.h b/src/plugins/select/bluegene/bluegene.h index efabdf4e11c..cf4d4c64bf8 100644 --- a/src/plugins/select/bluegene/bluegene.h +++ b/src/plugins/select/bluegene/bluegene.h @@ -61,6 +61,7 @@ extern pthread_mutex_t part_state_mutex; extern int num_part_to_free; extern int num_part_freed; extern int partitions_are_created; +extern int procs_per_node; typedef int lifecycle_type_t; enum part_lifecycle {DYNAMIC, STATIC}; @@ -95,6 +96,8 @@ typedef struct bgl_record { partition */ int job_running; /* signal if there is a job running on the partition */ + int cnodes_per_bp; + int quarter; } bgl_record_t; typedef struct { @@ -189,19 +192,20 @@ extern char *bgl_err_str(status_t inx); */ extern int create_static_partitions(List part_list); +extern int bgl_free_partition(bgl_record_t *bgl_record); +extern void *mult_free_part(void *args); +extern void *mult_destroy_part(void *args); extern int read_bgl_conf(void); /* partition_sys.c */ /*****************************************************/ extern int configure_partition(bgl_record_t * bgl_conf_record); -extern int read_bgl_partitions(void); +extern int read_bgl_partitions(); /* bgl_switch_connections.c */ /*****************************************************/ +extern int configure_small_partition(bgl_record_t *bgl_record); extern int configure_partition_switches(bgl_record_t * bgl_conf_record); -extern int bgl_free_partition(bgl_record_t *bgl_record); -extern void *mult_free_part(void *args); -extern void *mult_destroy_part(void *args); #endif /* _BLUEGENE_H_ */ diff --git a/src/plugins/select/bluegene/partition_sys.c b/src/plugins/select/bluegene/partition_sys.c index 1e7a9afa37a..11b8dae75aa 100755 --- a/src/plugins/select/bluegene/partition_sys.c +++ b/src/plugins/select/bluegene/partition_sys.c @@ -51,6 +51,7 @@ List bgl_sys_allocated = NULL; static void _pre_allocate(bgl_record_t *bgl_record); static int _post_allocate(bgl_record_t *bgl_record); static int _post_bgl_init_read(void *object, void *arg); +static int _split_block(bgl_record_t *bgl_record); #define MAX_ADD_RETRY 2 @@ -93,6 +94,7 @@ static void _print_list(List list) static void _pre_allocate(bgl_record_t *bgl_record) { int rc; + int send_psets=numpsets; if ((rc = rm_set_data(bgl_record->bgl_part, RM_PartitionBlrtsImg, bluegene_blrts)) != STATUS_OK) @@ -113,13 +115,12 @@ static void _pre_allocate(bgl_record_t *bgl_record) if ((rc = rm_set_data(bgl_record->bgl_part, RM_PartitionConnection, &bgl_record->conn_type)) != STATUS_OK) error("rm_set_data(RM_PartitionConnection)", bgl_err_str(rc)); - - /* if ((rc = rm_set_data(bgl_record->bgl_part, RM_PartitionMode, */ -/* &bgl_record->node_use)) != STATUS_OK) */ -/* error("rm_set_data(RM_PartitionMode)", bgl_err_str(rc)); */ - + + if(bgl_record->cnodes_per_bp == (procs_per_node/4)) + send_psets = numpsets/4; + if ((rc = rm_set_data(bgl_record->bgl_part, RM_PartitionPsetsPerBP, - &numpsets)) != STATUS_OK) + &send_psets)) != STATUS_OK) error("rm_set_data(RM_PartitionPsetsPerBP)", bgl_err_str(rc)); if ((rc = rm_set_data(bgl_record->bgl_part, RM_PartitionUserName, @@ -144,7 +145,8 @@ static int _post_allocate(bgl_record_t *bgl_record) debug("adding partition\n"); for(i=0;i<MAX_ADD_RETRY; i++) { - if ((rc = rm_add_partition(bgl_record->bgl_part)) != STATUS_OK) { + if ((rc = rm_add_partition(bgl_record->bgl_part)) + != STATUS_OK) { error("rm_add_partition(): %s", bgl_err_str(rc)); rc = SLURM_ERROR; } else { @@ -197,14 +199,37 @@ static int _post_allocate(bgl_record_t *bgl_record) return rc; } +static int _post_bgl_init_read(void *object, void *arg) +{ + bgl_record_t *bgl_record = (bgl_record_t *) object; + int i = 1024; + bgl_record->nodes = xmalloc(i); + while (hostlist_ranged_string(bgl_record->hostlist, i, + bgl_record->nodes) < 0) { + i *= 2; + xrealloc(bgl_record->nodes, i); + } + + if (node_name2bitmap(bgl_record->nodes, + false, + &bgl_record->bitmap)) { + error("Unable to convert nodes %s to bitmap", + bgl_record->nodes); + } + //print_bgl_record(bgl_record); + + return SLURM_SUCCESS; +} extern int configure_partition(bgl_record_t *bgl_record) { /* new partition to be added */ rm_new_partition(&bgl_record->bgl_part); _pre_allocate(bgl_record); - - configure_partition_switches(bgl_record); + if(bgl_record->cnodes_per_bp < procs_per_node) + configure_small_partition(bgl_record); + else + configure_partition_switches(bgl_record); _post_allocate(bgl_record); return 1; @@ -230,7 +255,8 @@ int read_bgl_partitions() char *part_name = NULL; rm_partition_list_t *part_list = NULL; rm_partition_state_flag_t state = PARTITION_ALL_FLAG; - + rm_nodecard_t *ncard = NULL; + bool small = false; if ((rc = rm_set_serial(BGL_SERIAL)) != STATUS_OK) { error("rm_set_serial(): %s\n", bgl_err_str(rc)); @@ -303,18 +329,53 @@ int read_bgl_partitions() free(part_name); bgl_record->state = -1; - if ((rc = rm_get_data(part_ptr, RM_PartitionBPNum, &bp_cnt)) + bgl_record->quarter = -1; + + if ((rc = rm_get_data(part_ptr, + RM_PartitionBPNum, + &bp_cnt)) != STATUS_OK) { - error("rm_get_data(RM_BPNum): %s", bgl_err_str(rc)); + error("rm_get_data(RM_BPNum): %s", + bgl_err_str(rc)); bp_cnt = 0; } - + if(bp_cnt==0) goto clean_up; + if ((rc = rm_get_data(part_ptr, RM_PartitionSmall, &small)) + != STATUS_OK) { + error("rm_get_data(RM_BPNum): %s", bgl_err_str(rc)); + bp_cnt = 0; + } + if(small) { + if((rc = rm_get_data(part_ptr, + RM_PartitionFirstNodeCard, + &ncard)) + != STATUS_OK) { + error("rm_get_data(RM_FirstCard): %s", + bgl_err_str(rc)); + bp_cnt = 0; + } + if ((rc = rm_get_data(ncard, + RM_NodeCardQuarter, + &bgl_record->quarter)) != STATUS_OK) { + error("rm_get_data(CardQuarter): %d",rc); + bp_cnt = 0; + } + debug("%s is in quarter %d", + bgl_record->bgl_part_id, + bgl_record->quarter); + } + bgl_record->bgl_part_list = list_create(NULL); bgl_record->hostlist = hostlist_create(NULL); - + + /* this needs to be changed for small partitions, + we just don't know what they are suppose to look + like just yet. + */ + for (i=0; i<bp_cnt; i++) { if(i) { if ((rc = rm_get_data(part_ptr, @@ -351,7 +412,7 @@ int read_bgl_partitions() error("No BP ID was returned from database"); continue; } - + coord = find_bp_loc(bpid); free(bpid); @@ -368,13 +429,17 @@ int read_bgl_partitions() // need to get the 000x000 range for nodes // also need to get coords - - if ((rc = rm_get_data(part_ptr, RM_PartitionConnection, - &bgl_record->conn_type)) - != STATUS_OK) { - error("rm_get_data(RM_PartitionConnection): %s", - bgl_err_str(rc)); - } + if(small) + bgl_record->conn_type = SELECT_SMALL; + else + if ((rc = rm_get_data(part_ptr, + RM_PartitionConnection, + &bgl_record->conn_type)) + != STATUS_OK) { + error("rm_get_data" + "(RM_PartitionConnection): %s", + bgl_err_str(rc)); + } if ((rc = rm_get_data(part_ptr, RM_PartitionMode, &bgl_record->node_use)) != STATUS_OK) { @@ -457,7 +522,13 @@ int read_bgl_partitions() error("rm_get_data(RM_PartitionSwitchNum): %s", bgl_err_str(rc)); } - + + if(small) + bgl_record->cnodes_per_bp = procs_per_node/4; + else + bgl_record->cnodes_per_bp = procs_per_node; + + printf("got %d\n",bgl_record->cnodes_per_bp); bgl_record->part_lifecycle = STATIC; clean_up: if (bgl_recover @@ -473,27 +544,5 @@ clean_up: if (bgl_recover return rc; } -static int _post_bgl_init_read(void *object, void *arg) -{ - bgl_record_t *bgl_record = (bgl_record_t *) object; - int i = 1024; - bgl_record->nodes = xmalloc(i); - while (hostlist_ranged_string(bgl_record->hostlist, i, - bgl_record->nodes) < 0) { - i *= 2; - xrealloc(bgl_record->nodes, i); - } - - if (node_name2bitmap(bgl_record->nodes, - false, - &bgl_record->bitmap)) { - error("Unable to convert nodes %s to bitmap", - bgl_record->nodes); - } - //print_bgl_record(bgl_record); - - return SLURM_SUCCESS; -} - #endif diff --git a/src/plugins/select/bluegene/select_bluegene.c b/src/plugins/select/bluegene/select_bluegene.c index 71241cda26a..cdaec1c5ac1 100644 --- a/src/plugins/select/bluegene/select_bluegene.c +++ b/src/plugins/select/bluegene/select_bluegene.c @@ -30,6 +30,10 @@ #define HUGE_BUF_SIZE (1024*16) + +/* global */ +int procs_per_node = 512; + /* * These variables are required by the generic plugin interface. If they * are not found in the plugin, the plugin loader will ignore it. @@ -210,6 +214,9 @@ extern int select_p_job_init(List job_list) /* All initialization is performed by select_p_part_init() */ extern int select_p_node_init(struct node_record *node_ptr, int node_cnt) { + if(node_cnt>0) + if(node_ptr->cpus > 0) + procs_per_node = node_ptr->cpus; return SLURM_SUCCESS; } diff --git a/src/smap/partition_functions.c b/src/smap/partition_functions.c index 279394999aa..2fc3c727a05 100644 --- a/src/smap/partition_functions.c +++ b/src/smap/partition_functions.c @@ -42,10 +42,10 @@ typedef struct { enum node_use_type bgl_node_use; rm_partition_state_t state; int letter_num; - int start[PA_SYSTEM_DIMENSIONS]; - int end[PA_SYSTEM_DIMENSIONS]; List nodelist; int size; + int cnodes_per_bp; + int quarter; bool printed; } db2_block_info_t; @@ -62,7 +62,6 @@ static char *_part_state_str(rm_partition_state_t state); static int _print_text_part(partition_info_t *part_ptr, db2_block_info_t *db2_info_ptr); #ifdef HAVE_BGL -static int _set_start_finish(db2_block_info_t *db2_info_ptr); static void _block_list_del(void *object); static void _nodelist_del(void *object); static int _list_match_all(void *object, void *key); @@ -156,7 +155,7 @@ extern void get_slurm_part() extern void get_bgl_part() { #ifdef HAVE_BGL - int error_code, i, j, recs=0, count = 0; + int error_code, i, j, recs=0, count = 0, last_count = -1; static partition_info_msg_t *part_info_ptr = NULL; static partition_info_msg_t *new_part_ptr = NULL; static node_select_info_msg_t *bgl_info_ptr = NULL; @@ -259,8 +258,19 @@ extern void get_bgl_part() = new_bgl_ptr->bgl_info_array[i].conn_type; block_ptr->bgl_node_use = new_bgl_ptr->bgl_info_array[i].node_use; - _marknodes(block_ptr, i); - _set_start_finish(block_ptr); + block_ptr->cnodes_per_bp + = new_bgl_ptr->bgl_info_array[i].cnodes_per_bp; + block_ptr->quarter + = new_bgl_ptr->bgl_info_array[i].quarter; + if(block_ptr->quarter == -1 || last_count == -1) { + last_count++; + _marknodes(block_ptr, last_count); + } else + block_ptr->letter_num = last_count; + + if(block_ptr->bgl_conn_type == SELECT_SMALL) + block_ptr->size = 0; + } if (!params.no_header) @@ -494,8 +504,19 @@ static int _print_text_part(partition_info_t *part_ptr, char *nodes = NULL, time_buf[20]; if(!params.commandline) { - mvwprintw(pa_system_ptr->text_win, pa_system_ptr->ycord, - pa_system_ptr->xcord, "%c", part_ptr->root_only); + if((params.display == BGLPART) + && db2_info_ptr->quarter != -1) { + mvwprintw(pa_system_ptr->text_win, + pa_system_ptr->ycord, + pa_system_ptr->xcord, "%c.%d", + part_ptr->root_only, + db2_info_ptr->quarter); + } else { + mvwprintw(pa_system_ptr->text_win, + pa_system_ptr->ycord, + pa_system_ptr->xcord, "%c", + part_ptr->root_only); + } pa_system_ptr->xcord += 4; if (part_ptr->name) { @@ -594,9 +615,15 @@ static int _print_text_part(partition_info_t *part_ptr, pa_system_ptr->xcord += 10; } } - - mvwprintw(pa_system_ptr->text_win, pa_system_ptr->ycord, - pa_system_ptr->xcord, "%d", part_ptr->total_nodes); + if(part_ptr->total_nodes == 0) + mvwprintw(pa_system_ptr->text_win, + pa_system_ptr->ycord, + pa_system_ptr->xcord, ".25"); + else + mvwprintw(pa_system_ptr->text_win, + pa_system_ptr->ycord, + pa_system_ptr->xcord, "%d", + part_ptr->total_nodes); pa_system_ptr->xcord += 7; tempxcord = pa_system_ptr->xcord; @@ -633,7 +660,14 @@ static int _print_text_part(partition_info_t *part_ptr, i++; } - + if((params.display == BGLPART) + && (db2_info_ptr->quarter != -1)) { + mvwprintw(pa_system_ptr->text_win, + pa_system_ptr->ycord, + pa_system_ptr->xcord, ".%d", + db2_info_ptr->quarter); + } + pa_system_ptr->xcord = 1; pa_system_ptr->ycord++; } else { @@ -657,8 +691,7 @@ static int _print_text_part(partition_info_t *part_ptr, } width = strlen(time_buf); - printf("%9.9s ", time_buf); - + printf("%9.9s ", time_buf); } } @@ -675,7 +708,6 @@ static int _print_text_part(partition_info_t *part_ptr, db2_info_ptr->bgl_conn_type)); printf("%9.9s ", _convert_node_use( db2_info_ptr->bgl_node_use)); - } } @@ -687,7 +719,12 @@ static int _print_text_part(partition_info_t *part_ptr, nodes = part_ptr->allow_groups; else nodes = part_ptr->nodes; - printf("%s\n",nodes); + + if((params.display == BGLPART) + && (db2_info_ptr->quarter != -1)) + printf("%s.%d\n", nodes, db2_info_ptr->quarter); + else + printf("%s\n",nodes); } return printed; } @@ -698,14 +735,10 @@ static void _block_list_del(void *object) db2_block_info_t *block_ptr = (db2_block_info_t *)object; if (block_ptr) { - if(block_ptr->bgl_user_name) - xfree(block_ptr->bgl_user_name); - if(block_ptr->bgl_block_name) - xfree(block_ptr->bgl_block_name); - if(block_ptr->slurm_part_name) - xfree(block_ptr->slurm_part_name); - if(block_ptr->nodes) - xfree(block_ptr->nodes); + xfree(block_ptr->bgl_user_name); + xfree(block_ptr->bgl_block_name); + xfree(block_ptr->slurm_part_name); + xfree(block_ptr->nodes); if(block_ptr->nodelist) list_destroy(block_ptr->nodelist); @@ -727,50 +760,6 @@ static int _list_match_all(void *object, void *key) return 1; } - -static int _set_start_finish(db2_block_info_t *db2_info_ptr) -{ - int number; - int j=0; - - while (db2_info_ptr->nodes[j] != '\0') { - if ((db2_info_ptr->nodes[j] == '[') - && (db2_info_ptr->nodes[j+8] == ']') - && ((db2_info_ptr->nodes[j+4] == 'x') - || (db2_info_ptr->nodes[j+4] == '-'))) { - j++; - number = atoi(db2_info_ptr->nodes + j); - db2_info_ptr->start[X] = number / 100; - db2_info_ptr->start[Y] = (number % 100) / 10; - db2_info_ptr->start[Z] = (number % 10); - j += 4; - number = atoi(db2_info_ptr->nodes + j); - db2_info_ptr->end[X] = number / 100; - db2_info_ptr->end[Y] = (number % 100) / 10; - db2_info_ptr->end[Z] = (number % 10); - j += 5; - if(db2_info_ptr->nodes[j] != ',') - break; - } else if((db2_info_ptr->nodes[j] < 58 - && db2_info_ptr->nodes[j] > 47) - && db2_info_ptr->nodes[j-1] != '[') { - number = atoi(db2_info_ptr->nodes + j); - db2_info_ptr->start[X] = db2_info_ptr->end[X] - = number / 100; - db2_info_ptr->start[Y] = db2_info_ptr->end[Y] - = (number % 100) / 10; - db2_info_ptr->start[Z] = db2_info_ptr->end[Z] - = (number % 10); - j+=3; - if(db2_info_ptr->nodes[j] != ',') - break; - } - - j++; - } - return 1; -} - static int _in_slurm_partition(List slurm_nodes, List bgl_nodes) { ListIterator slurm_itr; @@ -790,9 +779,7 @@ static int _in_slurm_partition(List slurm_nodes, List bgl_nodes) && (coord[Z] == slurm_coord[Z])) { found=1; break; - } - - + } } if(!found) { break; @@ -917,6 +904,8 @@ static char* _convert_conn_type(enum connection_type conn_type) return "MESH"; case (SELECT_TORUS): return "TORUS"; + case (SELECT_SMALL): + return "SMALL"; case (SELECT_NAV): return "NAV"; } -- GitLab