From 8cc1197be83ada5ae211d2ef7789831bad5d2a9c Mon Sep 17 00:00:00 2001 From: Moe Jette <jette1@llnl.gov> Date: Fri, 1 Oct 2004 01:10:29 +0000 Subject: [PATCH] Major cleanup of select/bluegene, fixed a couple of memory leaks. Still looking for reason the plugin adds 15MB to slurmctld's size (compared with select/linear). --- src/plugins/select/bluegene/Makefile.am | 2 +- src/plugins/select/bluegene/bluegene.c | 409 +++++------------- src/plugins/select/bluegene/bluegene.h | 23 +- src/plugins/select/bluegene/partition_sys.c | 43 +- src/plugins/select/bluegene/select_bluegene.c | 14 +- 5 files changed, 157 insertions(+), 334 deletions(-) diff --git a/src/plugins/select/bluegene/Makefile.am b/src/plugins/select/bluegene/Makefile.am index 1f3bf2a25da..71ce93a09ca 100644 --- a/src/plugins/select/bluegene/Makefile.am +++ b/src/plugins/select/bluegene/Makefile.am @@ -4,7 +4,7 @@ CPPFLAGS = -DBLUEGENE_CONFIG_FILE=\"$(sysconfdir)/bluegene.conf\" AUTOMAKE_OPTIONS = foreign -PLUGIN_FLAGS = -module -avoid-version --export-dynamic -lm +PLUGIN_FLAGS = -module -avoid-version --export-dynamic -lm INCLUDES = -I$(top_srcdir) -I$(top_srcdir)/src/common diff --git a/src/plugins/select/bluegene/bluegene.c b/src/plugins/select/bluegene/bluegene.c index 8a6b12acc13..00b37849643 100644 --- a/src/plugins/select/bluegene/bluegene.c +++ b/src/plugins/select/bluegene/bluegene.c @@ -1,7 +1,7 @@ /*****************************************************************************\ * bluegene.c - bgl node allocation plugin. ***************************************************************************** - * Copyright (C) 2003 The Regents of the University of California. + * Copyright (C) 2004 The Regents of the University of California. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). * Written by Dan Phung <phung4@llnl.gov> * @@ -24,6 +24,7 @@ \*****************************************************************************/ #include <stdlib.h> + #include "src/slurmctld/proc_req.h" #include "src/common/list.h" #include "src/common/macros.h" @@ -35,13 +36,16 @@ #include "partition_sys.h" #include "src/common/hostlist.h" -#define RANGE_MAX 8192 #define BUFSIZE 4096 #define BITSIZE 128 -#define SLEEP_TIME 3 /* BLUEGENE_PTHREAD checks every 3 secs */ +#define SLEEP_TIME 60 /* BLUEGENE_PTHREAD checks every 60 secs */ char* bgl_conf = BLUEGENE_CONFIG_FILE; +/* Global variables */ +List bgl_list; /* list of bgl_record entries */ +List bgl_conf_list; /* list of bgl_conf_record entries */ + #define SWAP(a,b,t) \ _STMT_START { \ (t) = (a); \ @@ -50,48 +54,22 @@ _STMT_START { \ } _STMT_END /** some local functions */ - -/** */ +static int _copy_slurm_partition_list(List slurm_part_list); static int _find_best_partition_match(struct job_record* job_ptr, bitstr_t* slurm_part_bitmap, int min_nodes, int max_nodes, int spec, bgl_record_t** found_bgl_record); -/** */ static int _parse_request(char* request_string, partition_t** request); -/** */ -static int _get_request_dimensions(int* bl, int* tr, uint16_t** dim); -/** */ -static int _extract_range(char* request, char** result); -/** */ static int _wire_bgl_partitions(); -/** */ static int _bgl_record_cmpf_inc(bgl_record_t* rec_a, bgl_record_t* rec_b); -/** */ static int _bgl_record_cmpf_dec(bgl_record_t* rec_a, bgl_record_t* rec_b); -/** - * to be used by list object to destroy the array elements - */ static void _destroy_bgl_record(void* object); -/** */ static void _destroy_bgl_conf_record(void* object); - -/** */ static void _print_bitmap(bitstr_t* bitmap); - -/** */ static void _process_config(); -/** */ static int _parse_bgl_spec(char *in_line); -/** */ -static int _copy_slurm_partition_list(); -/** */ static void _find_part_type(char* nodes, rm_partition_t** return_part_type); -/** */ static int _listfindf_conf_part_record(bgl_conf_record_t* record, char *nodes); -/** */ -static int _compute_part_size(char* nodes); -/** */ static void _update_bgl_node_bitmap(); -/** */ static void _diff_tv_str(struct timeval *tv1,struct timeval *tv2, char *tv_str, int len_tv_str); @@ -105,7 +83,7 @@ static void _rotate_geo(uint16_t *req_geometry, int rot_cnt); * OUT - (global, to slurmctld): Table of partitionIDs to geometries * RET - success of fitting all configurations */ -extern int create_static_partitions() +extern int create_static_partitions(List part_list) { /** purge the old list. Later on, it may be more efficient just to amend the list */ if (bgl_list){ @@ -114,7 +92,7 @@ extern int create_static_partitions() bgl_list = list_create(_destroy_bgl_record); /** copy the slurm conf partition info, this will fill in bgl_list */ - if (_copy_slurm_partition_list()){ + if (_copy_slurm_partition_list(part_list)){ return SLURM_ERROR; } @@ -172,9 +150,10 @@ static void _process_config() * bl_coord, tr_coord, dimensions, and size */ request_result = NULL; - if (_parse_request(bgl_part->nodes, &request_result) || request_result == NULL){ + if (_parse_request(bgl_part->nodes, &request_result) + || (request_result == NULL)) error("_process_config: error parsing request %s", bgl_part->nodes); - } else { + else { /** * bgl_part->part_type should have been extracted in * copy_slurm_partition_list @@ -191,7 +170,7 @@ static void _process_config() * that we can maintain our own separate table of bgl_part_id to * slurm_part_id. */ -static int _copy_slurm_partition_list() +static int _copy_slurm_partition_list(List slurm_part_list) { struct part_record* slurm_part; bgl_record_t* bgl_record; @@ -199,10 +178,6 @@ static int _copy_slurm_partition_list() char* cur_nodes, *delimiter=",", *nodes_tmp, *next_ptr, *orig_ptr; int err, rc; - if (!slurm_part_list){ - error("_copy_slurm_partition_list: slurm_part_list is not initialized yet"); - return SLURM_ERROR; - } itr = list_iterator_create(slurm_part_list); /** * try to find the corresponding bgl_conf_record for the @@ -238,9 +213,8 @@ static int _copy_slurm_partition_list() bgl_record->part_type = (rm_partition_t*) xmalloc(sizeof(rm_partition_t)); _find_part_type(cur_nodes, &bgl_record->part_type); - bgl_record->hostlist = (hostlist_t *) xmalloc(sizeof(hostlist_t)); - *(bgl_record->hostlist) = hostlist_create(cur_nodes); - bgl_record->size = hostlist_count(*(bgl_record->hostlist)); + bgl_record->hostlist = hostlist_create(cur_nodes); + bgl_record->size = hostlist_count(bgl_record->hostlist); if (node_name2bitmap(cur_nodes, false, &(bgl_record->bitmap))){ error("unable to convert nodes %s to bitmap", cur_nodes); } @@ -263,10 +237,7 @@ static int _copy_slurm_partition_list() } /* end while(cur_nodes) */ cleanup: - if (orig_ptr){ - xfree(orig_ptr); - orig_ptr = NULL; - } + xfree(orig_ptr); } /* end while(slurm_part) */ list_iterator_destroy(itr); @@ -325,7 +296,6 @@ extern int read_bgl_conf() /* partition configuration parameters */ if ((error_code = _parse_bgl_spec(in_line))) { error("_parse_bgl_spec error, skipping this line"); - } /* report any leftover strings on input line */ @@ -370,8 +340,6 @@ static int _parse_bgl_spec(char *in_line) if (error_code || !nodes || !part_type){ xfree(nodes); xfree(part_type); - nodes = NULL; - part_type = NULL; return error_code; } @@ -379,24 +347,23 @@ static int _parse_bgl_spec(char *in_line) // debug("partition type %s", part_type); new_record = (bgl_conf_record_t*) xmalloc(sizeof(bgl_conf_record_t)); - new_record->nodes = xstrdup(nodes); + new_record->nodes = nodes; new_record->part_type = xmalloc(sizeof(rm_partition_t)); - if (strcasecmp(part_type, "TORUS") == 0){ - // error("warning, TORUS specified, but I can't handle those yet! Defaulting to mesh"); - /** FIXME */ + if (strcasecmp(part_type, "TORUS") == 0) *(new_record->part_type) = RM_TORUS; - // new_record->part_type = RM_MESH; - } else if (strcasecmp(part_type, "MESH") == 0) { + else if (strcasecmp(part_type, "MESH") == 0) *(new_record->part_type) = RM_MESH; - } else { + else { error("_parse_bgl_spec: partition type %s invalid for nodes %s", part_type, nodes); error("defaulting to type: MESH"); - /* error("defaulting to type: PREFER_TORUS"); */ *(new_record->part_type) = RM_MESH; } list_push(bgl_conf_list, new_record); + + xfree(part_type); + /* xfree(nodes); Don't do this as we moved value into new record type */ return SLURM_SUCCESS; } @@ -404,55 +371,29 @@ static int _parse_bgl_spec(char *in_line) static void _destroy_bgl_record(void* object) { bgl_record_t* this_record = (bgl_record_t*) object; + if (this_record){ - if (this_record->slurm_part_id){ - xfree(this_record->slurm_part_id); - this_record->slurm_part_id = NULL; - } - if (this_record->nodes){ - xfree(this_record->nodes); - this_record->nodes = NULL; - } - if (this_record->part_type){ - xfree(this_record->part_type); - this_record->part_type = NULL; - } - if (this_record->hostlist){ - hostlist_destroy(*(this_record->hostlist)); - *(this_record->hostlist) = NULL; - } - if (this_record->bitmap){ + xfree(this_record->nodes); + xfree(this_record->slurm_part_id); + xfree(this_record->part_type); + hostlist_destroy(this_record->hostlist); + if (this_record->bitmap) bit_free(this_record->bitmap); - this_record->bitmap = NULL; - } - #ifdef _RM_API_H__ - if (this_record->bgl_part_id){ - xfree(this_record->bgl_part_id); - this_record->bgl_part_id = NULL; - } + xfree(this_record->bgl_part_id); #endif - + xfree(this_record); } - xfree(this_record); - this_record = NULL; } static void _destroy_bgl_conf_record(void* object) { bgl_conf_record_t* this_record = (bgl_conf_record_t*) object; if (this_record){ - if (this_record->nodes){ - xfree(this_record->nodes); - this_record->nodes = NULL; - } - if (this_record->part_type){ - xfree(this_record->part_type); - this_record->part_type = NULL; - } + xfree(this_record->nodes); + xfree(this_record->part_type); + xfree(this_record); } - xfree(this_record); - this_record = NULL; } /** @@ -468,6 +409,7 @@ static void _find_part_type(char* nodes, rm_partition_t** return_part_type) nodes); *return_part_type = (rm_partition_t*) xmalloc(sizeof(rm_partition_t)); + if (record != NULL && record->part_type != NULL){ **return_part_type = *(record->part_type); } else { @@ -482,161 +424,79 @@ static int _listfindf_conf_part_record(bgl_conf_record_t* record, char *nodes) return (!strcasecmp(record->nodes, nodes)); } -static int _compute_part_size(char* nodes) -{ - int size; - /* nhosts is stored as int, hopefully unsigned 32-bit */ - hostlist_t hosts = hostlist_create(nodes); - size = (int) hostlist_count(hosts); - hostlist_destroy(hosts); - // debug("compute_part_size for %s = %d", nodes, size); - return size; -} - /** - * converts a request of form ABCxXYZ to two int*'s - * of bl[ABC] and tr[XYZ]. + * parses the request_string */ -static int _char2intptr(char* request, int** bl, int** tr) +static int _char2num(char in) { - int i, rc; - char *request_tmp, *delimit = ",x", *next_ptr, *orig_ptr; - char zero = '0'; - char* token; - - rc = SLURM_ERROR; - request_tmp = xstrdup(request); - (*bl) = (int*) xmalloc(sizeof(int) * SYSTEM_DIMENSIONS); - (*tr) = (int*) xmalloc(sizeof(int) * SYSTEM_DIMENSIONS); - - orig_ptr = request_tmp; - token = strtok_r(request_tmp, delimit, &next_ptr); - if (token == NULL) - goto cleanup; - - for (i=0; i<SYSTEM_DIMENSIONS; i++){ - (*bl)[i] = (int)(token[i]-zero); - } - - request_tmp = next_ptr; - token = strtok_r(request_tmp, delimit, &next_ptr); - if (token == NULL) - goto cleanup; - - for (i=0; i<SYSTEM_DIMENSIONS; i++){ - (*tr)[i] = (int)(token[i]-zero); - } - - rc = SLURM_SUCCESS; - - cleanup: - if (rc == SLURM_ERROR){ - xfree(bl); - xfree(tr); - bl = NULL; tr = NULL; - } - - if (orig_ptr != NULL){ - xfree(orig_ptr); - orig_ptr = NULL; - } - - return rc; + int i = in - '0'; + if ((i < 0) || (i > 9)) + return -1; + return i; } -/** - * parses the request_string - */ static int _parse_request(char* request_string, partition_t** request_result) { - char* range; - int *bl=NULL, *tr=NULL; - uint16_t *dim=NULL; - int i, rc; + int loc = 0, i,j, rc = SLURM_ERROR; - rc = SLURM_ERROR; if (!request_string) - goto cleanup; + return SLURM_ERROR; debug("incoming request %s", request_string); *request_result = (partition_t*) xmalloc(sizeof(partition_t)); - - /** token needs to be of the form 000x000 */ - if(_extract_range(request_string, &range)){ - goto cleanup; + + for (i=0; ; i++) { + if (request_string[i] == '\0') + break; + if (loc == 0) { + if (request_string[i] == '[') + loc++; + } else if (loc == 1) { + for (j=0; j<SYSTEM_DIMENSIONS; j++) + (*request_result)->bl_coord[j] = _char2num(request_string[i+j]); + i += (SYSTEM_DIMENSIONS - 1); + loc++; + } else if (loc == 2) { + if (request_string[i] != 'x') + break; + loc++; + } else if (loc == 3) { + for (j=0; j<SYSTEM_DIMENSIONS; j++) + (*request_result)->tr_coord[j] = _char2num(request_string[i+j]); + i += (SYSTEM_DIMENSIONS - 1); + loc++; + } else if (loc == 4) { + if (request_string[i] != ']') + break; + loc++; + break; + } } - - if (_char2intptr(range, &bl, &tr) || bl == NULL || tr == NULL || - _get_request_dimensions(bl, tr, &dim)){ + if (loc != 5) goto cleanup; - } - /** place all the correct values into the request */ - for (i=0; i<SYSTEM_DIMENSIONS; i++){ - (*request_result)->bl_coord[i] = bl[i]; - (*request_result)->tr_coord[i] = tr[i]; - (*request_result)->dimensions[i] = dim[i]; + (*request_result)->size = 1; + for (i=0; i<SYSTEM_DIMENSIONS; i++) { + if (((*request_result)->bl_coord[i] < 0) + || ((*request_result)->tr_coord[i] < 0)) + goto cleanup; + /* count self */ + (*request_result)->dimensions[i] = + (*request_result)->tr_coord[i] - + (*request_result)->bl_coord[i] + 1; + (*request_result)->size *= + (*request_result)->dimensions[i]; } - - (*request_result)->size = int_array_size(dim); rc = SLURM_SUCCESS; cleanup: - if (bl) { - xfree(bl); - bl = NULL; - } - if (tr) { - xfree(tr); - tr = NULL; - } - if (dim) { - xfree(dim); - dim = NULL; - } - if (range){ - xfree(range); - range = NULL; - } - if (rc == SLURM_ERROR){ - xfree(request_result); - request_result = NULL; - } + if (rc == SLURM_ERROR) + xfree(*request_result); return rc; } -static int _get_request_dimensions(int* bl, int* tr, uint16_t** dim) -{ - int i; - /* - debug("get request dimensions dim: bl[%d %d %d] tr[%d %d %d]", - bl[0], bl[1], bl[2], tr[0], tr[1], tr[2]); - */ - if (bl == NULL || tr == NULL){ - return SLURM_ERROR; - } - - *dim = (uint16_t*) xmalloc(sizeof(uint16_t) * SYSTEM_DIMENSIONS); - for (i=0; i<SYSTEM_DIMENSIONS; i++){ - (*dim)[i] = tr[i] - bl[i] + 1; /* plus one because we're - counting current - number, so 0 to 1 = 2 - */ - if ((*dim)[i] <= 0){ - error("_get_request_dimensions: tr dimension less than bl dimension."); - goto cleanup; - } - } - // debug("dim: [%d %d %d]", (*dim)[0], (*dim)[1], (*dim)[2]); - return SLURM_SUCCESS; - - cleanup: - xfree(*dim); - *dim = NULL; - return SLURM_ERROR; -} - -extern int init_bgl() +/* Initialize all plugin variables */ +extern int init_bgl(void) { #ifdef _RM_API_H__ int rc; @@ -653,50 +513,17 @@ extern int init_bgl() return SLURM_SUCCESS; } -static int _extract_range(char* request, char** result) +/* Purge all plugin variables */ +extern void fini_bgl(void) { - int RANGE_SIZE = 7; /* expecting something of the size: 000x000 = 7 chars */ - int i, request_length; - int start = 0, end = 0; - - if (!request) - return SLURM_ERROR; - *result = (char*) xmalloc(sizeof(RANGE_SIZE) + 1); /* +1 for NULL term */ - - request_length = strlen(request); - for(i=0; i<request_length; i++){ - if (request[i] == ']'){ - xstrcatchar(*result, '\0'); - end = 1; - break; - } - - if (start){ - xstrcatchar(*result, request[i]); - } - - if (request[i] == '[') - start = 1; - } - - if (!start || !end) - goto cleanup; - - return SLURM_SUCCESS; - - cleanup: - if (*result){ - xfree(*result); - *result = NULL; - } - error("_extract_range: could not extract range from node list"); - return SLURM_ERROR; + list_destroy(bgl_conf_list); } extern void print_bgl_record(bgl_record_t* record) { - if (!record){ + if (!record) { error("print_bgl_record, record given is null"); + return; } debug(" bgl_record:"); @@ -709,8 +536,8 @@ extern void print_bgl_record(bgl_record_t* record) debug(" \tpart_type: %s", convert_part_type(record->part_type)); if (record->hostlist){ - char buffer[RANGE_MAX]; - hostlist_ranged_string(*(record->hostlist), RANGE_MAX, buffer); + char buffer[BUFSIZE]; + hostlist_ranged_string(record->hostlist, BUFSIZE, buffer); debug(" \thostlist %s", buffer); } @@ -722,10 +549,9 @@ extern void print_bgl_record(bgl_record_t* record) } if (record->bitmap){ - char* bitstring = (char*) xmalloc(sizeof(char)*BITSIZE); + char bitstring[BITSIZE]; bit_fmt(bitstring, BITSIZE, record->bitmap); debug("\tbitmap: %s", bitstring); - xfree(bitstring); } } @@ -979,12 +805,11 @@ extern int submit_job(struct job_record *job_ptr, bitstr_t *slurm_part_bitmap, /* since the bgl_part_id is a number, (most likely single digit), * we'll create an LLNL_#, i.e. LLNL_4 = 6 chars + 1 for NULL */ - char *bgl_part_id = NULL; - xstrfmtcat(bgl_part_id, "LLNL_%i", *(record->bgl_part_id)); + char bgl_part_id[BITSIZE]; + snprintf(bgl_part_id, BITSIZE, "LLNL_%i", *(record->bgl_part_id)); debug("found fake bgl_part_id %s", bgl_part_id); select_g_set_jobinfo(job_ptr->select_jobinfo, SELECT_DATA_PART_ID, bgl_part_id); - xfree(bgl_part_id); } /** we should do the BGL stuff here like, init BGL job stuff... */ @@ -998,11 +823,9 @@ extern int submit_job(struct job_record *job_ptr, bitstr_t *slurm_part_bitmap, */ static void _print_bitmap(bitstr_t* bitmap) { - char* bitstring = (char*) xmalloc(sizeof(char)*BITSIZE); + char bitstring[BITSIZE]; bit_fmt(bitstring, BITSIZE, bitmap); debug("bitmap:\t%s", bitstring); - xfree(bitstring); - bitstring = NULL; } /** @@ -1025,15 +848,8 @@ static void _update_bgl_node_bitmap() // rm_size3D_t bp_size,size_in_bp,m_size; // rm_size3D_t bp_size,size_in_bp,m_size; char* reason = NULL; - char* down_node_list = NULL; - char* bgl_down_node = NULL; - char* slurm_down_node = NULL; - - bgl_down_node = xmalloc(sizeof(char) * BUFSIZE); - slurm_down_node = xmalloc(sizeof(char) * BUFSIZE); - /** FIXME: dunno if we have to do this, but i'm reserving mem just to be safe */ - down_node_list = xmalloc(sizeof(char) * BUFSIZE); - down_node_list[0] = '\0'; + char down_node_list[BUFSIZE]; + char bgl_down_node[128]; if (!bgl){ error("error, BGL is not initialized"); @@ -1060,7 +876,8 @@ static void _update_bgl_node_bitmap() /* from here we either update the node or bitmap entry */ /** translate the location to the "node name" */ - xstrfmtcat(bgl_down_node, "bgl%d%d%d", BPLoc.X, BPLoc.Y, BPLoc.Z); + snprintf(bgl_down_node, sizeof[bgl_down_node], "bgl%d%d%d", + BPLoc.X, BPLoc.Y, BPLoc.Z); debug("update bgl node bitmap: %s loc(%s) is in state %s", BPID, bgl_down_node, @@ -1073,9 +890,12 @@ static void _update_bgl_node_bitmap() * that slurm knows about = comma separated * node list */ - if (done_node_list[0] != '\0') - xstrcat(down_node_list,","); - xstrcat(down_node_list, bgl_down_node); + if (strlen(down_node_list) + strlen(bgl_down_node) + 2) < BUFSIZE) { + if (done_node_list[0] != '\0') + strcat(down_node_list,","); + strcat(down_node_list, bgl_down_node); + } else + error("down_node_list overflow"); } } if (!down_node_list){ @@ -1087,12 +907,6 @@ static void _update_bgl_node_bitmap() time_ptr); slurm_drain_nodes(down_node_list, reason); } - - cleanup: - xfree(bgl_down_node); - xfree(slurm_down_node); - xfree(down_node_list); - #endif } @@ -1124,7 +938,7 @@ extern void set_bp_node_state(rm_BP_state_t state, node_record node){ debug("RM_BP_DOWN"); break; case RM_BP_NAV: - debug("BGL state update returned UNKNOWN state"); + debug("RM_BP_NAV"); break; defalt: debug("BGL state update returned UNKNOWN state"); @@ -1145,17 +959,16 @@ bluegene_agent(void *args) { struct timeval tv1, tv2; char tv_str[20]; - while (1) { - sleep(SLEEP_TIME); /* don't run continuously */ + while (1) { gettimeofday(&tv1, NULL); _update_bgl_node_bitmap(); - gettimeofday(&tv2, NULL); _diff_tv_str(&tv1, &tv2, tv_str, 20); #if DEBUG info("Bluegene status update: completed, %s", tv_str); #endif + sleep(SLEEP_TIME); /* don't run continuously */ } } diff --git a/src/plugins/select/bluegene/bluegene.h b/src/plugins/select/bluegene/bluegene.h index 6df8eb6427f..e5c2eadd412 100644 --- a/src/plugins/select/bluegene/bluegene.h +++ b/src/plugins/select/bluegene/bluegene.h @@ -42,9 +42,6 @@ rm_BGL_t *bgl; #endif -List slurm_part_list; /* cached copy of slurm's part_list */ -List bgl_list; /* list of bgl_record entries */ -List bgl_conf_list; /* list of bgl_conf_record entries */ typedef int lifecycle_type_t; enum part_lifecycle {DYNAMIC, STATIC}; @@ -53,7 +50,7 @@ typedef struct bgl_record { pm_partition_id_t* bgl_part_id; /* ID returned from CMCS */ char* nodes; /* String of nodes in partition */ lifecycle_type_t part_lifecycle;/* either STATIC or DYNAMIC */ - hostlist_t* hostlist; /* expanded form of hosts */ + hostlist_t hostlist; /* expanded form of hosts */ bitstr_t *bitmap; /* bitmap of nodes for this partition */ struct partition* alloc_part; /* the allocated partition */ int size; /* node count for the partitions */ @@ -75,10 +72,22 @@ typedef struct bgl_conf_record{ * */ extern int read_bgl_conf(); -/** */ -extern int init_bgl(); -extern int create_static_partitions(); +/* Initialize all plugin variables */ +extern int init_bgl(void); + +/* Purge all plugin variables */ +extern void fini_bgl(void); + +/** + * create_static_partitions - create the static partitions that will be used + * for scheduling. + * IN - part_list: slurmctld's list of partition configurations + * OUT - (global, to slurmctld): Table of partitionIDs to geometries + * RET - success of fitting all configurations + */ +extern int create_static_partitions(List part_list); + /** */ extern int submit_job(struct job_record *job_ptr, bitstr_t *bitmap, int min_nodes, int max_nodes); diff --git a/src/plugins/select/bluegene/partition_sys.c b/src/plugins/select/bluegene/partition_sys.c index cc91d3c0858..1cdf8dce69b 100755 --- a/src/plugins/select/bluegene/partition_sys.c +++ b/src/plugins/select/bluegene/partition_sys.c @@ -31,10 +31,10 @@ /** need this to have it compile with the BGL header*/ // typedef int MPIR_PROCDESC; +#include <math.h> #include <stdlib.h> #include <stdio.h> #include <unistd.h> -#include <math.h> #include "src/common/list.h" #include "src/common/xmalloc.h" #include "partition_sys.h" @@ -70,8 +70,8 @@ List bgl_sys_free = NULL; List bgl_sys_allocated = NULL; void _init_sys(partition_t*); -int _is_not_equals_all_coord(uint16_t* rec_a, uint16_t* rec_b); -int _is_not_equals_some_coord(uint16_t* rec_A, uint16_t* rec_b); +static int _is_not_equals_all_coord(uint16_t* rec_a, uint16_t* rec_b); +static int _is_not_equals_some_coord(uint16_t* rec_a, uint16_t* rec_b); #ifdef _RM_API_H__ /** @@ -218,7 +218,7 @@ int _create_bgl_partitions(List requests) * we assume that the partitioning done before hand * */ -int _fit_request(List sys, List allocated, uint16_t* request) +static int _fit_request(List sys, List allocated, uint16_t* request) { int i, rc = 1; uint16_t* new_request = NULL; @@ -317,7 +317,8 @@ int _fit_request(List sys, List allocated, uint16_t* request) * odd number sizes, and dimensions will kill this!!! * */ -int _break_up_partition(List sys, partition_t* partition_to_break, int index) +static int _break_up_partition(List sys, partition_t* partition_to_break, + int index) { /* the two new partitions to create */ partition_t *first_part, *second_part; @@ -340,13 +341,15 @@ int _break_up_partition(List sys, partition_t* partition_to_break, int index) first_part->dimensions[index] /= 2; second_part->dimensions[index] /= 2; - diff = partition_to_break->tr_coord[index] - partition_to_break->bl_coord[index]; + diff = partition_to_break->tr_coord[index] - + partition_to_break->bl_coord[index]; first_part->tr_coord[index] = floor(diff/2); second_part->bl_coord[index] = ceil(diff/2); itr = list_iterator_create(sys); while ((next = (partition_t*) list_next(itr))) { - if(!is_not_correct_dimension(next->dimensions, partition_to_break->dimensions)){ + if(!is_not_correct_dimension(next->dimensions, + partition_to_break->dimensions)){ /* next we remove the old partition */ list_remove(itr); @@ -784,7 +787,7 @@ void rotate_part(const uint16_t* config, uint16_t** new_config) xfree(*new_config); (*new_config) = (uint16_t*) xmalloc(SYSTEM_DIMENSIONS * sizeof(uint16_t)); - + if (config[0] > config[1]){ if (config[1] > config[2]){ ; // array already sorted @@ -857,7 +860,7 @@ void _init_sys(partition_t *part) /** * to be used by list object to destroy the array elements */ -void _int_array_destroy(void* object) +static void _int_array_destroy(void* object) { xfree(object); } @@ -932,7 +935,7 @@ int get_switch(int* cur_coord, List switch_list) switch_list = list_create(rm_switch_t_destroy); } - if (get_bp_by_location(cur_coord, bp)){ + if (_get_bp_by_location(cur_coord, bp)){ return SLURM_ERROR; } @@ -987,7 +990,7 @@ void rm_switch_t_destroy(void* object) * this is just stupid. there are some implicit rules for where * "NextBP" goes to, but we don't know, so we have to do this. */ -int get_bp_by_location(int* cur_coord, rm_BP_t* bp) +static int _get_bp_by_location(int* cur_coord, rm_BP_t* bp) { int BP_num; rm_location_t* loc; @@ -1002,7 +1005,7 @@ int get_bp_by_location(int* cur_coord, rm_BP_t* bp) rm_get_data(bgl, RM_NextBP, &bp); } - error("get_bp_by_location: could not find specified bp."); + error("_get_bp_by_location: could not find specified bp."); return SLURM_ERROR; } #endif @@ -1012,7 +1015,7 @@ int get_bp_by_location(int* cur_coord, rm_BP_t* bp) * * returns 0 if equals, 1 if not equals */ -int _is_not_equals_some_coord(uint16_t* rec_a, uint16_t* rec_b) +static int _is_not_equals_some_coord(uint16_t* rec_a, uint16_t* rec_b) { int i; for (i=0; i<SYSTEM_DIMENSIONS; i++){ @@ -1027,7 +1030,7 @@ int _is_not_equals_some_coord(uint16_t* rec_a, uint16_t* rec_b) * * returns 0 if equals, 1 if not equals */ -int _is_not_equals_all_coord(uint16_t* rec_a, uint16_t* rec_b) +static int _is_not_equals_all_coord(uint16_t* rec_a, uint16_t* rec_b) { int i; for (i=0; i<SYSTEM_DIMENSIONS; i++){ @@ -1040,7 +1043,7 @@ int _is_not_equals_all_coord(uint16_t* rec_a, uint16_t* rec_b) /** * sort the partitions by increasing size */ -void sort_partitions_by_inc_size(List parts){ +static void _sort_partitions_by_inc_size(List parts){ if (parts == NULL) return; list_sort(parts, (ListCmpF) _partition_cmpf_inc); @@ -1049,7 +1052,7 @@ void sort_partitions_by_inc_size(List parts){ /** * sort the partitions by decreasing size */ -void sort_partitions_by_dec_size(List parts){ +static void _sort_partitions_by_dec_size(List parts){ if (parts == NULL) return; list_sort(parts, (ListCmpF) _partition_cmpf_dec); @@ -1062,7 +1065,7 @@ void sort_partitions_by_dec_size(List parts){ * returns: -1: rec_a < rec_b 0: rec_a == rec_b 1: rec_a > rec_b * */ -int _partition_cmpf_inc(struct partition* rec_a, struct partition* rec_b) +static int _partition_cmpf_inc(struct partition* rec_a, struct partition* rec_b) { if (rec_a->size < rec_b->size) return -1; @@ -1078,7 +1081,7 @@ int _partition_cmpf_inc(struct partition* rec_a, struct partition* rec_b) * returns: -1: rec_a > rec_b 0: rec_a == rec_b 1: rec_a < rec_b * */ -int _partition_cmpf_dec(struct partition* rec_a, struct partition* rec_b) +static int _partition_cmpf_dec(struct partition* rec_a, struct partition* rec_b) { if (rec_a->size > rec_b->size) return -1; @@ -1089,13 +1092,13 @@ int _partition_cmpf_dec(struct partition* rec_a, struct partition* rec_b) } /** */ -List _get_bgl_sys_allocated() +static List _get_bgl_sys_allocated() { return bgl_sys_allocated; } /** */ -List _get_bgl_sys_free() +static List _get_bgl_sys_free() { return bgl_sys_free; } diff --git a/src/plugins/select/bluegene/select_bluegene.c b/src/plugins/select/bluegene/select_bluegene.c index 256bf14e0d9..1f38d8c70ad 100644 --- a/src/plugins/select/bluegene/select_bluegene.c +++ b/src/plugins/select/bluegene/select_bluegene.c @@ -87,7 +87,7 @@ static bool thread_running = false; static pthread_mutex_t thread_flag_mutex = PTHREAD_MUTEX_INITIALIZER; /** initialize the status pthread */ -int _init_status_pthread(); +static int _init_status_pthread(); /* * init() is called when the plugin is loaded, before any other functions @@ -114,7 +114,7 @@ int init ( void ) return SLURM_SUCCESS; } -int _init_status_pthread() +static int _init_status_pthread() { pthread_attr_t attr; @@ -146,6 +146,8 @@ int fini ( void ) } pthread_mutex_unlock( &thread_flag_mutex ); + fini_bgl(); + return SLURM_SUCCESS; } @@ -162,21 +164,19 @@ int fini ( void ) extern int select_p_part_init(List part_list) { debug("select_p_part_init"); - /** isn't the part_list already accessible to me? */ - slurm_part_list = part_list; if (read_bgl_conf()) return SLURM_ERROR; /* create_static_partitions */ - if (create_static_partitions()){ + if (create_static_partitions(part_list)){ /* error in creating the static partitions, so * partitions referenced by submitted jobs won't * correspond to actual slurm partitions/bgl * partitions. */ fatal("Error, could not create the static partitions"); - return 1; + return SLURM_ERROR; } return SLURM_SUCCESS; @@ -207,11 +207,9 @@ extern int select_p_node_init(struct node_record *node_ptr, int node_cnt) return SLURM_ERROR; } - // error("select/bluegene plugin not yet functional"); debug("select_p_node_init should be doing a system wide status " "check on all the nodes to updated the system bitmap, " "along with killing old jobs, etc"); - // return SLURM_ERROR; return SLURM_SUCCESS; } -- GitLab