From 8cc1197be83ada5ae211d2ef7789831bad5d2a9c Mon Sep 17 00:00:00 2001
From: Moe Jette <jette1@llnl.gov>
Date: Fri, 1 Oct 2004 01:10:29 +0000
Subject: [PATCH] Major cleanup of select/bluegene, fixed a couple of memory
 leaks. Still looking for reason the plugin adds 15MB to slurmctld's size
 (compared with select/linear).

---
 src/plugins/select/bluegene/Makefile.am       |   2 +-
 src/plugins/select/bluegene/bluegene.c        | 409 +++++-------------
 src/plugins/select/bluegene/bluegene.h        |  23 +-
 src/plugins/select/bluegene/partition_sys.c   |  43 +-
 src/plugins/select/bluegene/select_bluegene.c |  14 +-
 5 files changed, 157 insertions(+), 334 deletions(-)

diff --git a/src/plugins/select/bluegene/Makefile.am b/src/plugins/select/bluegene/Makefile.am
index 1f3bf2a25da..71ce93a09ca 100644
--- a/src/plugins/select/bluegene/Makefile.am
+++ b/src/plugins/select/bluegene/Makefile.am
@@ -4,7 +4,7 @@
 CPPFLAGS =  -DBLUEGENE_CONFIG_FILE=\"$(sysconfdir)/bluegene.conf\"
 AUTOMAKE_OPTIONS = foreign
 
-PLUGIN_FLAGS = -module -avoid-version --export-dynamic  -lm
+PLUGIN_FLAGS = -module -avoid-version --export-dynamic -lm
 
 INCLUDES = -I$(top_srcdir) -I$(top_srcdir)/src/common
 
diff --git a/src/plugins/select/bluegene/bluegene.c b/src/plugins/select/bluegene/bluegene.c
index 8a6b12acc13..00b37849643 100644
--- a/src/plugins/select/bluegene/bluegene.c
+++ b/src/plugins/select/bluegene/bluegene.c
@@ -1,7 +1,7 @@
 /*****************************************************************************\
  *  bluegene.c - bgl node allocation plugin. 
  *****************************************************************************
- *  Copyright (C) 2003 The Regents of the University of California.
+ *  Copyright (C) 2004 The Regents of the University of California.
  *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
  *  Written by Dan Phung <phung4@llnl.gov>
  *  
@@ -24,6 +24,7 @@
 \*****************************************************************************/
 
 #include <stdlib.h>
+
 #include "src/slurmctld/proc_req.h"
 #include "src/common/list.h"
 #include "src/common/macros.h"
@@ -35,13 +36,16 @@
 #include "partition_sys.h"
 #include "src/common/hostlist.h"
 
-#define RANGE_MAX 8192
 #define BUFSIZE 4096
 #define BITSIZE 128
-#define SLEEP_TIME 3 /* BLUEGENE_PTHREAD checks every 3 secs */
+#define SLEEP_TIME 60 /* BLUEGENE_PTHREAD checks every 60 secs */
 
 char* bgl_conf = BLUEGENE_CONFIG_FILE;
 
+/* Global variables */
+List bgl_list;				/* list of bgl_record entries */
+List bgl_conf_list;			/* list of bgl_conf_record entries */
+
 #define SWAP(a,b,t)	\
 _STMT_START {	\
 	(t) = (a);	\
@@ -50,48 +54,22 @@ _STMT_START {	\
 } _STMT_END
 
  /** some local functions */
-
-/** */
+static int  _copy_slurm_partition_list(List slurm_part_list);
 static int _find_best_partition_match(struct job_record* job_ptr, bitstr_t* slurm_part_bitmap,
 			       int min_nodes, int max_nodes,
 			       int spec, bgl_record_t** found_bgl_record);
-/** */
 static int _parse_request(char* request_string, partition_t** request);
-/** */
-static int _get_request_dimensions(int* bl, int* tr, uint16_t** dim);
-/** */
-static int _extract_range(char* request, char** result);
-/** */
 static int _wire_bgl_partitions();
-/** */
 static int _bgl_record_cmpf_inc(bgl_record_t* rec_a, bgl_record_t* rec_b);
-/** */
 static int _bgl_record_cmpf_dec(bgl_record_t* rec_a, bgl_record_t* rec_b);
-/** 
- * to be used by list object to destroy the array elements
- */
 static void _destroy_bgl_record(void* object);
-/** */
 static void _destroy_bgl_conf_record(void* object);
-
-/** */
 static void _print_bitmap(bitstr_t* bitmap);
-
-/** */
 static void _process_config();
-/** */
 static int _parse_bgl_spec(char *in_line);
-/** */
-static int _copy_slurm_partition_list();
-/** */
 static void _find_part_type(char* nodes, rm_partition_t** return_part_type);
-/** */
 static int _listfindf_conf_part_record(bgl_conf_record_t* record, char *nodes);
-/** */
-static int _compute_part_size(char* nodes);
-/** */
 static void _update_bgl_node_bitmap();
-/** */
 static void _diff_tv_str(struct timeval *tv1,struct timeval *tv2,
 		char *tv_str, int len_tv_str);
 
@@ -105,7 +83,7 @@ static void _rotate_geo(uint16_t *req_geometry, int rot_cnt);
  * OUT - (global, to slurmctld): Table of partitionIDs to geometries
  * RET - success of fitting all configurations
  */
-extern int create_static_partitions()
+extern int create_static_partitions(List part_list)
 {
 	/** purge the old list.  Later on, it may be more efficient just to amend the list */
 	if (bgl_list){
@@ -114,7 +92,7 @@ extern int create_static_partitions()
 	bgl_list = list_create(_destroy_bgl_record);
 
 	/** copy the slurm conf partition info, this will fill in bgl_list */
-	if (_copy_slurm_partition_list()){
+	if (_copy_slurm_partition_list(part_list)){
 		return SLURM_ERROR;
 	}
 
@@ -172,9 +150,10 @@ static void _process_config()
 		 * bl_coord, tr_coord, dimensions, and size
 		 */
 		request_result = NULL;
-		if (_parse_request(bgl_part->nodes, &request_result) || request_result == NULL){
+		if (_parse_request(bgl_part->nodes, &request_result)
+		|| (request_result == NULL))
 			error("_process_config: error parsing request %s", bgl_part->nodes);
-		} else {
+		else {
 			/** 
 			 * bgl_part->part_type should have been extracted in
 			 * copy_slurm_partition_list
@@ -191,7 +170,7 @@ static void _process_config()
  * that we can maintain our own separate table of bgl_part_id to
  * slurm_part_id.
  */
-static int _copy_slurm_partition_list()
+static int _copy_slurm_partition_list(List slurm_part_list)
 {
 	struct part_record* slurm_part;
 	bgl_record_t* bgl_record;
@@ -199,10 +178,6 @@ static int _copy_slurm_partition_list()
 	char* cur_nodes, *delimiter=",", *nodes_tmp, *next_ptr, *orig_ptr;
 	int err, rc;
 
-	if (!slurm_part_list){
-		error("_copy_slurm_partition_list: slurm_part_list is not initialized yet");
-		return SLURM_ERROR;
-	}
 	itr = list_iterator_create(slurm_part_list);
 	/** 
 	 * try to find the corresponding bgl_conf_record for the
@@ -238,9 +213,8 @@ static int _copy_slurm_partition_list()
 			bgl_record->part_type = (rm_partition_t*) xmalloc(sizeof(rm_partition_t));
 
 			_find_part_type(cur_nodes, &bgl_record->part_type);
-			bgl_record->hostlist = (hostlist_t *) xmalloc(sizeof(hostlist_t));
-			*(bgl_record->hostlist) = hostlist_create(cur_nodes);
-			bgl_record->size = hostlist_count(*(bgl_record->hostlist));
+			bgl_record->hostlist = hostlist_create(cur_nodes);
+			bgl_record->size = hostlist_count(bgl_record->hostlist);
 			if (node_name2bitmap(cur_nodes, false, &(bgl_record->bitmap))){
 				error("unable to convert nodes %s to bitmap", cur_nodes);
 			}
@@ -263,10 +237,7 @@ static int _copy_slurm_partition_list()
 		} /* end while(cur_nodes) */
 
 	cleanup:
-		if (orig_ptr){
-			xfree(orig_ptr);
-			orig_ptr = NULL;
-		}
+		xfree(orig_ptr);
 			
 	} /* end while(slurm_part) */
 	list_iterator_destroy(itr);
@@ -325,7 +296,6 @@ extern int read_bgl_conf()
 		/* partition configuration parameters */
 		if ((error_code = _parse_bgl_spec(in_line))) {
 			error("_parse_bgl_spec error, skipping this line");
-
 		}
 		
 		/* report any leftover strings on input line */
@@ -370,8 +340,6 @@ static int _parse_bgl_spec(char *in_line)
 	if (error_code || !nodes || !part_type){
 		xfree(nodes);
 		xfree(part_type);
-		nodes = NULL;
-		part_type = NULL;
 		return error_code;
 	}
 
@@ -379,24 +347,23 @@ static int _parse_bgl_spec(char *in_line)
 	// debug("partition type %s", part_type);
 
 	new_record = (bgl_conf_record_t*) xmalloc(sizeof(bgl_conf_record_t));
-	new_record->nodes = xstrdup(nodes);
+	new_record->nodes = nodes;
 	new_record->part_type = xmalloc(sizeof(rm_partition_t));
 
-	if (strcasecmp(part_type, "TORUS") == 0){
-		// error("warning, TORUS specified, but I can't handle those yet!  Defaulting to mesh");
-		/** FIXME */
+	if (strcasecmp(part_type, "TORUS") == 0)
 		*(new_record->part_type) = RM_TORUS;
-		// new_record->part_type = RM_MESH;
-	} else if (strcasecmp(part_type, "MESH") == 0) {
+	else if (strcasecmp(part_type, "MESH") == 0)
 		*(new_record->part_type) = RM_MESH;
-	} else {
+	else {
 		error("_parse_bgl_spec: partition type %s invalid for nodes %s",
 		      part_type, nodes);
 		error("defaulting to type: MESH");
-		/* error("defaulting to type: PREFER_TORUS"); */
 		*(new_record->part_type) = RM_MESH;
 	}
 	list_push(bgl_conf_list, new_record);
+
+	xfree(part_type);
+	/* xfree(nodes);	Don't do this as we moved value into new record type */
 	
 	return SLURM_SUCCESS;
 }
@@ -404,55 +371,29 @@ static int _parse_bgl_spec(char *in_line)
 static void _destroy_bgl_record(void* object)
 {
 	bgl_record_t* this_record = (bgl_record_t*) object;
+
 	if (this_record){
-		if (this_record->slurm_part_id){
-			xfree(this_record->slurm_part_id);
-			this_record->slurm_part_id = NULL;
-		}
-		if (this_record->nodes){
-			xfree(this_record->nodes);
-			this_record->nodes = NULL;
-		}
-		if (this_record->part_type){
-			xfree(this_record->part_type);
-			this_record->part_type = NULL;
-		}
-		if (this_record->hostlist){
-			hostlist_destroy(*(this_record->hostlist));
-			*(this_record->hostlist) = NULL;
-		}
-		if (this_record->bitmap){
+		xfree(this_record->nodes);
+		xfree(this_record->slurm_part_id);
+		xfree(this_record->part_type);
+		hostlist_destroy(this_record->hostlist);
+		if (this_record->bitmap)
 			bit_free(this_record->bitmap);
-			this_record->bitmap = NULL;
-		}
-
 #ifdef _RM_API_H__
-		if (this_record->bgl_part_id){
-			xfree(this_record->bgl_part_id);
-			this_record->bgl_part_id = NULL;
-		}
+		xfree(this_record->bgl_part_id);
 #endif
-
+		xfree(this_record);
 	}
-	xfree(this_record);
-	this_record = NULL;
 }
 
 static void _destroy_bgl_conf_record(void* object)
 {
 	bgl_conf_record_t* this_record = (bgl_conf_record_t*) object;
 	if (this_record){
-		if (this_record->nodes){
-			xfree(this_record->nodes);
-			this_record->nodes = NULL;
-		}
-		if (this_record->part_type){
-			xfree(this_record->part_type);
-			this_record->part_type = NULL;
-		}
+		xfree(this_record->nodes);
+		xfree(this_record->part_type);
+		xfree(this_record);
 	}
-	xfree(this_record);
-	this_record = NULL;
 }
 
 /** 
@@ -468,6 +409,7 @@ static void _find_part_type(char* nodes, rm_partition_t** return_part_type)
 						      nodes);
 
 	*return_part_type = (rm_partition_t*) xmalloc(sizeof(rm_partition_t));
+
 	if (record != NULL && record->part_type != NULL){
 		**return_part_type = *(record->part_type);
 	} else {
@@ -482,161 +424,79 @@ static int _listfindf_conf_part_record(bgl_conf_record_t* record, char *nodes)
 	return (!strcasecmp(record->nodes, nodes));
 }
 
-static int _compute_part_size(char* nodes) 
-{
-	int size;
-	/* nhosts is stored as int, hopefully unsigned 32-bit */
-	hostlist_t hosts = hostlist_create(nodes);
-	size = (int) hostlist_count(hosts);
-	hostlist_destroy(hosts);
-	// debug("compute_part_size for %s = %d", nodes, size);	
-	return size;
-}
-
 /** 
- * converts a request of form ABCxXYZ to two int*'s
- * of bl[ABC] and tr[XYZ].
+ * parses the request_string
  */
-static int _char2intptr(char* request, int** bl, int** tr)
+static int _char2num(char in)
 {
-	int i, rc;
-	char *request_tmp, *delimit = ",x", *next_ptr, *orig_ptr;
-	char zero = '0';
-	char* token;
-
-	rc = SLURM_ERROR;
-	request_tmp = xstrdup(request);
-	(*bl) = (int*) xmalloc(sizeof(int) * SYSTEM_DIMENSIONS);
-	(*tr) = (int*) xmalloc(sizeof(int) * SYSTEM_DIMENSIONS);
-
-	orig_ptr = request_tmp;
-	token = strtok_r(request_tmp, delimit, &next_ptr);
-	if (token == NULL)
-		goto cleanup;
-
-	for (i=0; i<SYSTEM_DIMENSIONS; i++){
-		(*bl)[i] = (int)(token[i]-zero);
-	}
-	
-	request_tmp = next_ptr;
-	token = strtok_r(request_tmp, delimit, &next_ptr);
-	if (token == NULL)
-		goto cleanup;
-
-	for (i=0; i<SYSTEM_DIMENSIONS; i++){
-		(*tr)[i] = (int)(token[i]-zero);
-	}
-
-	rc = SLURM_SUCCESS;
-
- cleanup:
-	if (rc == SLURM_ERROR){
-		xfree(bl);
-		xfree(tr);
-		bl = NULL; tr = NULL;
-	}
-
-	if (orig_ptr != NULL){
-		xfree(orig_ptr);
-		orig_ptr = NULL;
-	}
-
-	return rc;
+	int i = in - '0';
+	if ((i < 0) || (i > 9))
+		return -1;
+	return i;
 }
 
-/** 
- * parses the request_string
- */
 static int _parse_request(char* request_string, partition_t** request_result)
 {
-	char* range;
-	int *bl=NULL, *tr=NULL;
-	uint16_t *dim=NULL;
-	int i, rc;
+	int loc = 0, i,j, rc = SLURM_ERROR;
 
-	rc = SLURM_ERROR;
 	if (!request_string)
-		goto cleanup;
+		return SLURM_ERROR;
 	
 	debug("incoming request %s", request_string);
 	*request_result = (partition_t*) xmalloc(sizeof(partition_t));
-	
-	/** token needs to be of the form 000x000 */
-	if(_extract_range(request_string, &range)){
-		goto cleanup;
+
+	for (i=0; ; i++) {
+		if (request_string[i] == '\0')
+			break;
+		if (loc == 0) {
+			if (request_string[i] == '[')
+				loc++;
+		} else if (loc == 1) {
+			for (j=0; j<SYSTEM_DIMENSIONS; j++)
+				(*request_result)->bl_coord[j] = _char2num(request_string[i+j]);
+			i += (SYSTEM_DIMENSIONS - 1);
+			loc++;
+		} else if (loc == 2) {
+			if (request_string[i] != 'x')
+				break;
+			loc++;
+		} else if (loc == 3) {
+			for (j=0; j<SYSTEM_DIMENSIONS; j++)
+				(*request_result)->tr_coord[j] = _char2num(request_string[i+j]);
+			i += (SYSTEM_DIMENSIONS - 1);
+			loc++;
+		} else if (loc == 4) {
+			if (request_string[i] != ']')
+				break;
+			loc++;
+			break;
+		}
 	}
-	
-	if (_char2intptr(range, &bl, &tr) || bl == NULL || tr == NULL ||
-	    _get_request_dimensions(bl, tr, &dim)){
+	if (loc != 5)
 		goto cleanup;
-	}
 
-	/** place all the correct values into the request */
-	for (i=0; i<SYSTEM_DIMENSIONS; i++){
-		(*request_result)->bl_coord[i] = bl[i];
-		(*request_result)->tr_coord[i] = tr[i];
-		(*request_result)->dimensions[i] = dim[i];
+	(*request_result)->size = 1;
+	for (i=0; i<SYSTEM_DIMENSIONS; i++) {
+		if (((*request_result)->bl_coord[i] < 0)
+		||  ((*request_result)->tr_coord[i] < 0))
+			goto cleanup;
+		/* count self */
+		(*request_result)->dimensions[i] =
+			(*request_result)->tr_coord[i] -
+			(*request_result)->bl_coord[i] + 1;
+		(*request_result)->size *=
+			(*request_result)->dimensions[i];
 	}
-
-	(*request_result)->size = int_array_size(dim);
 	rc = SLURM_SUCCESS;
 	
  cleanup: 
-	if (bl) {
-		xfree(bl);
-		bl = NULL;
-	}
-	if (tr) {
-		xfree(tr);
-		tr = NULL; 
-	}
-	if (dim) {
-		xfree(dim);
-		dim = NULL;
-	}
-	if (range){
-		xfree(range);	
-		range = NULL;
-	}
-	if (rc == SLURM_ERROR){
-		xfree(request_result);
-		request_result = NULL;
-	}
+	if (rc == SLURM_ERROR)
+		xfree(*request_result);
 	return rc;
 }
 
-static int _get_request_dimensions(int* bl, int* tr, uint16_t** dim)
-{
-	int i;
-	/*
-	  debug("get request dimensions dim: bl[%d %d %d] tr[%d %d %d]", 
-	  bl[0], bl[1], bl[2], tr[0], tr[1], tr[2]);
-	*/
-	if (bl == NULL || tr == NULL){
-		return SLURM_ERROR;
-	}
-		
-	*dim = (uint16_t*) xmalloc(sizeof(uint16_t) * SYSTEM_DIMENSIONS);
-	for (i=0; i<SYSTEM_DIMENSIONS; i++){
-		(*dim)[i] = tr[i] - bl[i] + 1; /* plus one because we're
-						  counting current
-						  number, so 0 to 1 = 2
-					       */
-		if ((*dim)[i] <= 0){
-			error("_get_request_dimensions: tr dimension less than bl dimension.");
-			goto cleanup;
-		}
-	}
-	// debug("dim: [%d %d %d]", (*dim)[0], (*dim)[1], (*dim)[2]);
-	return SLURM_SUCCESS;
-
- cleanup: 
-	xfree(*dim);
-	*dim = NULL;
-	return SLURM_ERROR;
-}
-
-extern int init_bgl()
+/* Initialize all plugin variables */
+extern int init_bgl(void)
 {
 #ifdef _RM_API_H__
 	int rc;
@@ -653,50 +513,17 @@ extern int init_bgl()
 	return SLURM_SUCCESS;
 }
 
-static int _extract_range(char* request, char** result)
+/* Purge all plugin variables */
+extern void fini_bgl(void)
 {
-	int RANGE_SIZE = 7; /* expecting something of the size: 000x000 = 7 chars */
-	int i, request_length;
-	int start = 0, end = 0;
-
-	if (!request)
-		return SLURM_ERROR;
-	*result = (char*) xmalloc(sizeof(RANGE_SIZE) + 1); /* +1 for NULL term */
-
-	request_length = strlen(request);
-	for(i=0; i<request_length; i++){
-		if (request[i] == ']'){
-			xstrcatchar(*result, '\0');
-			end = 1;
-			break;
-		}
-
-		if (start){
-			xstrcatchar(*result, request[i]);
-		}
-		
-		if (request[i] == '[')
-			start = 1;
-	}
-
-	if (!start || !end)
-		goto cleanup;
-
-	return SLURM_SUCCESS;
-
- cleanup:
-	if (*result){
-		xfree(*result);
-		*result = NULL;
-	}
-	error("_extract_range: could not extract range from node list");
-	return SLURM_ERROR;
+	list_destroy(bgl_conf_list);
 }
 
 extern void print_bgl_record(bgl_record_t* record)
 {
-	if (!record){
+	if (!record) {
 		error("print_bgl_record, record given is null");
+		return;
 	}
 
 	debug(" bgl_record:");
@@ -709,8 +536,8 @@ extern void print_bgl_record(bgl_record_t* record)
 	debug(" \tpart_type: %s", convert_part_type(record->part_type));
 
 	if (record->hostlist){
-		char buffer[RANGE_MAX];
-		hostlist_ranged_string(*(record->hostlist), RANGE_MAX, buffer);
+		char buffer[BUFSIZE];
+		hostlist_ranged_string(record->hostlist, BUFSIZE, buffer);
 		debug(" \thostlist %s", buffer);
 	}
 
@@ -722,10 +549,9 @@ extern void print_bgl_record(bgl_record_t* record)
 	}
 
 	if (record->bitmap){
-		char* bitstring = (char*) xmalloc(sizeof(char)*BITSIZE);
+		char bitstring[BITSIZE];
 		bit_fmt(bitstring, BITSIZE, record->bitmap);
 		debug("\tbitmap: %s", bitstring);
-		xfree(bitstring);
 	}
 }
 
@@ -979,12 +805,11 @@ extern int submit_job(struct job_record *job_ptr, bitstr_t *slurm_part_bitmap,
 		/* since the bgl_part_id is a number, (most likely single digit), 
 		 * we'll create an LLNL_#, i.e. LLNL_4 = 6 chars + 1 for NULL
 		 */
-		char *bgl_part_id = NULL;
-		xstrfmtcat(bgl_part_id, "LLNL_%i", *(record->bgl_part_id));
+		char bgl_part_id[BITSIZE];
+		snprintf(bgl_part_id, BITSIZE, "LLNL_%i", *(record->bgl_part_id));
 		debug("found fake bgl_part_id %s", bgl_part_id);
 		select_g_set_jobinfo(job_ptr->select_jobinfo,
 			SELECT_DATA_PART_ID, bgl_part_id);
-		xfree(bgl_part_id);
 	}
 
 	/** we should do the BGL stuff here like, init BGL job stuff... */
@@ -998,11 +823,9 @@ extern int submit_job(struct job_record *job_ptr, bitstr_t *slurm_part_bitmap,
  */
 static void _print_bitmap(bitstr_t* bitmap)
 {
-	char* bitstring = (char*) xmalloc(sizeof(char)*BITSIZE);
+	char bitstring[BITSIZE];
 	bit_fmt(bitstring, BITSIZE, bitmap);
 	debug("bitmap:\t%s", bitstring);
-	xfree(bitstring);
-	bitstring = NULL;
 }
 
 /** 
@@ -1025,15 +848,8 @@ static void _update_bgl_node_bitmap()
 	// rm_size3D_t bp_size,size_in_bp,m_size;
 	// rm_size3D_t bp_size,size_in_bp,m_size;
 	char* reason = NULL;
-	char* down_node_list = NULL;
-	char* bgl_down_node = NULL;
-	char* slurm_down_node = NULL;
-
-	bgl_down_node = xmalloc(sizeof(char) * BUFSIZE);
-	slurm_down_node = xmalloc(sizeof(char) * BUFSIZE);
-	/** FIXME: dunno if we have to do this, but i'm reserving mem just to be safe */
-	down_node_list = xmalloc(sizeof(char) * BUFSIZE);
-	down_node_list[0] = '\0';
+	char down_node_list[BUFSIZE];
+	char bgl_down_node[128];
 
 	if (!bgl){
 		error("error, BGL is not initialized");
@@ -1060,7 +876,8 @@ static void _update_bgl_node_bitmap()
 		/* from here we either update the node or bitmap
 		   entry */
 		/** translate the location to the "node name" */
-		xstrfmtcat(bgl_down_node, "bgl%d%d%d", BPLoc.X, BPLoc.Y, BPLoc.Z);
+		snprintf(bgl_down_node, sizeof[bgl_down_node], "bgl%d%d%d", 
+			BPLoc.X, BPLoc.Y, BPLoc.Z);
 		debug("update bgl node bitmap: %s loc(%s) is in state %s", 
 		      BPID, 
 		      bgl_down_node,
@@ -1073,9 +890,12 @@ static void _update_bgl_node_bitmap()
 			 * that slurm knows about = comma separated
 			 * node list
 			 */
-			if (done_node_list[0] != '\0')
-				xstrcat(down_node_list,",");
-			xstrcat(down_node_list, bgl_down_node);
+			if (strlen(down_node_list) + strlen(bgl_down_node) + 2) < BUFSIZE) {
+				if (done_node_list[0] != '\0')
+					strcat(down_node_list,",");
+				strcat(down_node_list, bgl_down_node);
+			} else
+				error("down_node_list overflow");
 		}
 	}
 	if (!down_node_list){
@@ -1087,12 +907,6 @@ static void _update_bgl_node_bitmap()
 			time_ptr);
 		slurm_drain_nodes(down_node_list, reason);
 	}
-
- cleanup:
-	xfree(bgl_down_node);
-	xfree(slurm_down_node);
-	xfree(down_node_list);
-	  
 #endif
 }
 
@@ -1124,7 +938,7 @@ extern void set_bp_node_state(rm_BP_state_t state, node_record node){
 		debug("RM_BP_DOWN");
 		break;
 	case RM_BP_NAV:
-		debug("BGL state update returned UNKNOWN state");
+		debug("RM_BP_NAV");
 		break;
 	defalt:
 		debug("BGL state update returned UNKNOWN state");
@@ -1145,17 +959,16 @@ bluegene_agent(void *args)
 {
 	struct timeval tv1, tv2;
 	char tv_str[20];
-	while (1) {
-		sleep(SLEEP_TIME);      /* don't run continuously */
 
+	while (1) {
 		gettimeofday(&tv1, NULL);
 		_update_bgl_node_bitmap();
-
 		gettimeofday(&tv2, NULL);
 		_diff_tv_str(&tv1, &tv2, tv_str, 20);
 #if DEBUG
 		info("Bluegene status update: completed, %s", tv_str);
 #endif
+		sleep(SLEEP_TIME);      /* don't run continuously */
 	}
 }
 
diff --git a/src/plugins/select/bluegene/bluegene.h b/src/plugins/select/bluegene/bluegene.h
index 6df8eb6427f..e5c2eadd412 100644
--- a/src/plugins/select/bluegene/bluegene.h
+++ b/src/plugins/select/bluegene/bluegene.h
@@ -42,9 +42,6 @@
   rm_BGL_t *bgl;
 #endif
 
-List slurm_part_list;			/* cached copy of slurm's part_list */
-List bgl_list;				/* list of bgl_record entries */
-List bgl_conf_list;			/* list of bgl_conf_record entries */
 typedef int lifecycle_type_t;
 enum part_lifecycle {DYNAMIC, STATIC};
 
@@ -53,7 +50,7 @@ typedef struct bgl_record {
 	pm_partition_id_t* bgl_part_id;	/* ID returned from CMCS	*/
 	char* nodes;			/* String of nodes in partition */
 	lifecycle_type_t part_lifecycle;/* either STATIC or DYNAMIC	*/
-	hostlist_t* hostlist;		/* expanded form of hosts */
+	hostlist_t hostlist;		/* expanded form of hosts */
 	bitstr_t *bitmap;		/* bitmap of nodes for this partition */
 	struct partition* alloc_part;       /* the allocated partition   */
 	int size;			/* node count for the partitions */
@@ -75,10 +72,22 @@ typedef struct bgl_conf_record{
  * 
  */
 extern int read_bgl_conf();
-/** */
-extern int init_bgl();
 
-extern int create_static_partitions();
+/* Initialize all plugin variables */
+extern int init_bgl(void);
+
+/* Purge all plugin variables */
+extern void fini_bgl(void);
+
+/**
+ * create_static_partitions - create the static partitions that will be used
+ * for scheduling.  
+ * IN - part_list: slurmctld's list of partition configurations 
+ * OUT - (global, to slurmctld): Table of partitionIDs to geometries
+ * RET - success of fitting all configurations
+ */
+extern int create_static_partitions(List part_list);
+
 /** */
 extern int submit_job(struct job_record *job_ptr, bitstr_t *bitmap,
 	       int min_nodes, int max_nodes);
diff --git a/src/plugins/select/bluegene/partition_sys.c b/src/plugins/select/bluegene/partition_sys.c
index cc91d3c0858..1cdf8dce69b 100755
--- a/src/plugins/select/bluegene/partition_sys.c
+++ b/src/plugins/select/bluegene/partition_sys.c
@@ -31,10 +31,10 @@
 /** need this to have it compile with the BGL header*/
 // typedef int MPIR_PROCDESC;
 
+#include <math.h>
 #include <stdlib.h>
 #include <stdio.h>
 #include <unistd.h>
-#include <math.h>
 #include "src/common/list.h"
 #include "src/common/xmalloc.h"
 #include "partition_sys.h"
@@ -70,8 +70,8 @@ List bgl_sys_free = NULL;
 List bgl_sys_allocated = NULL;
 
 void _init_sys(partition_t*);
-int _is_not_equals_all_coord(uint16_t* rec_a, uint16_t* rec_b);
-int _is_not_equals_some_coord(uint16_t* rec_A, uint16_t* rec_b);
+static int _is_not_equals_all_coord(uint16_t* rec_a, uint16_t* rec_b);
+static int _is_not_equals_some_coord(uint16_t* rec_a, uint16_t* rec_b);
 
 #ifdef _RM_API_H__
 /** 
@@ -218,7 +218,7 @@ int _create_bgl_partitions(List requests)
  * we assume that the partitioning done before hand
  * 
  */
-int _fit_request(List sys, List allocated, uint16_t* request)
+static int _fit_request(List sys, List allocated, uint16_t* request)
 {
 	int i, rc = 1;
 	uint16_t* new_request = NULL;
@@ -317,7 +317,8 @@ int _fit_request(List sys, List allocated, uint16_t* request)
  * odd number sizes, and dimensions will kill this!!!
  * 
  */
-int _break_up_partition(List sys, partition_t* partition_to_break, int index)
+static int _break_up_partition(List sys, partition_t* partition_to_break, 
+		int index)
 {
 	/* the two new partitions to create */
 	partition_t *first_part, *second_part;
@@ -340,13 +341,15 @@ int _break_up_partition(List sys, partition_t* partition_to_break, int index)
 	first_part->dimensions[index] /= 2;
 	second_part->dimensions[index] /= 2;
 
-	diff = partition_to_break->tr_coord[index] - partition_to_break->bl_coord[index];
+	diff = partition_to_break->tr_coord[index] - 
+		partition_to_break->bl_coord[index];
 	first_part->tr_coord[index] = floor(diff/2);
 	second_part->bl_coord[index] = ceil(diff/2);
 
 	itr = list_iterator_create(sys);
 	while ((next = (partition_t*) list_next(itr))) {
-		if(!is_not_correct_dimension(next->dimensions, partition_to_break->dimensions)){
+		if(!is_not_correct_dimension(next->dimensions, 
+				partition_to_break->dimensions)){
 			/* next we remove the old partition */
 			list_remove(itr);
 
@@ -784,7 +787,7 @@ void rotate_part(const uint16_t* config, uint16_t** new_config)
 	xfree(*new_config);
 	(*new_config) = (uint16_t*) xmalloc(SYSTEM_DIMENSIONS * 
 		sizeof(uint16_t));
-	
+
 	if (config[0] > config[1]){
 		if (config[1] > config[2]){
 			; // array already sorted
@@ -857,7 +860,7 @@ void _init_sys(partition_t *part)
 /** 
  * to be used by list object to destroy the array elements
  */
-void _int_array_destroy(void* object)
+static void _int_array_destroy(void* object)
 {
 	xfree(object);
 }
@@ -932,7 +935,7 @@ int get_switch(int* cur_coord, List switch_list)
 		switch_list = list_create(rm_switch_t_destroy);
 	}
 
-	if (get_bp_by_location(cur_coord, bp)){
+	if (_get_bp_by_location(cur_coord, bp)){
 		return SLURM_ERROR;
 	}
 	
@@ -987,7 +990,7 @@ void rm_switch_t_destroy(void* object)
  * this is just stupid.  there are some implicit rules for where
  * "NextBP" goes to, but we don't know, so we have to do this.
  */
-int get_bp_by_location(int* cur_coord, rm_BP_t* bp)
+static int _get_bp_by_location(int* cur_coord, rm_BP_t* bp)
 {
 	int BP_num;
 	rm_location_t* loc;
@@ -1002,7 +1005,7 @@ int get_bp_by_location(int* cur_coord, rm_BP_t* bp)
 		rm_get_data(bgl, RM_NextBP, &bp);	
 	}
 
-	error("get_bp_by_location: could not find specified bp.");
+	error("_get_bp_by_location: could not find specified bp.");
 	return SLURM_ERROR;
 }
 #endif
@@ -1012,7 +1015,7 @@ int get_bp_by_location(int* cur_coord, rm_BP_t* bp)
  * 
  * returns 0 if equals, 1 if not equals
  */
-int _is_not_equals_some_coord(uint16_t* rec_a, uint16_t* rec_b)
+static int _is_not_equals_some_coord(uint16_t* rec_a, uint16_t* rec_b)
 {
 	int i;
 	for (i=0; i<SYSTEM_DIMENSIONS; i++){
@@ -1027,7 +1030,7 @@ int _is_not_equals_some_coord(uint16_t* rec_a, uint16_t* rec_b)
  * 
  * returns 0 if equals, 1 if not equals
  */
-int _is_not_equals_all_coord(uint16_t* rec_a, uint16_t* rec_b)
+static int _is_not_equals_all_coord(uint16_t* rec_a, uint16_t* rec_b)
 {
 	int i;
 	for (i=0; i<SYSTEM_DIMENSIONS; i++){
@@ -1040,7 +1043,7 @@ int _is_not_equals_all_coord(uint16_t* rec_a, uint16_t* rec_b)
 /** 
  * sort the partitions by increasing size
  */
-void sort_partitions_by_inc_size(List parts){
+static void _sort_partitions_by_inc_size(List parts){
 	if (parts == NULL)
 		return;
 	list_sort(parts, (ListCmpF) _partition_cmpf_inc);
@@ -1049,7 +1052,7 @@ void sort_partitions_by_inc_size(List parts){
 /** 
  * sort the partitions by decreasing size
  */
-void sort_partitions_by_dec_size(List parts){
+static void _sort_partitions_by_dec_size(List parts){
 	if (parts == NULL)
 		return;
 	list_sort(parts, (ListCmpF) _partition_cmpf_dec);
@@ -1062,7 +1065,7 @@ void sort_partitions_by_dec_size(List parts){
  * returns: -1: rec_a  < rec_b    0: rec_a == rec_b   1: rec_a > rec_b
  * 
  */
-int _partition_cmpf_inc(struct partition* rec_a, struct partition* rec_b)
+static int _partition_cmpf_inc(struct partition* rec_a, struct partition* rec_b)
 {
 	if (rec_a->size < rec_b->size)
 		return -1;
@@ -1078,7 +1081,7 @@ int _partition_cmpf_inc(struct partition* rec_a, struct partition* rec_b)
  * returns: -1: rec_a > rec_b    0: rec_a == rec_b   1: rec_a < rec_b
  * 
  */
-int _partition_cmpf_dec(struct partition* rec_a, struct partition* rec_b)
+static int _partition_cmpf_dec(struct partition* rec_a, struct partition* rec_b)
 {
 	if (rec_a->size > rec_b->size)
 		return -1;
@@ -1089,13 +1092,13 @@ int _partition_cmpf_dec(struct partition* rec_a, struct partition* rec_b)
 }
 
 /** */
-List _get_bgl_sys_allocated()
+static List _get_bgl_sys_allocated()
 {
 	return bgl_sys_allocated;	
 }
 
 /** */
-List _get_bgl_sys_free()
+static List _get_bgl_sys_free()
 {
 	return bgl_sys_free;
 }
diff --git a/src/plugins/select/bluegene/select_bluegene.c b/src/plugins/select/bluegene/select_bluegene.c
index 256bf14e0d9..1f38d8c70ad 100644
--- a/src/plugins/select/bluegene/select_bluegene.c
+++ b/src/plugins/select/bluegene/select_bluegene.c
@@ -87,7 +87,7 @@ static bool thread_running = false;
 static pthread_mutex_t thread_flag_mutex = PTHREAD_MUTEX_INITIALIZER;
 
 /** initialize the status pthread */
-int _init_status_pthread();
+static int _init_status_pthread();
 
 /*
  * init() is called when the plugin is loaded, before any other functions
@@ -114,7 +114,7 @@ int init ( void )
 	return SLURM_SUCCESS;
 }
 
-int _init_status_pthread()
+static int _init_status_pthread()
 {
 	pthread_attr_t attr;
 
@@ -146,6 +146,8 @@ int fini ( void )
 	}
 	pthread_mutex_unlock( &thread_flag_mutex );
 
+	fini_bgl();
+
 	return SLURM_SUCCESS;
 }
 
@@ -162,21 +164,19 @@ int fini ( void )
 extern int select_p_part_init(List part_list)
 { 
 	debug("select_p_part_init");
-	/** isn't the part_list already accessible to me? */
-	slurm_part_list = part_list;
 
 	if (read_bgl_conf())
 		return SLURM_ERROR;
 
 	/* create_static_partitions */
-	if (create_static_partitions()){
+	if (create_static_partitions(part_list)){
 		/* error in creating the static partitions, so
 		 * partitions referenced by submitted jobs won't
 		 * correspond to actual slurm partitions/bgl
 		 * partitions.
 		 */
 		fatal("Error, could not create the static partitions");
-		return 1;
+		return SLURM_ERROR;
 	}
 
 	return SLURM_SUCCESS; 
@@ -207,11 +207,9 @@ extern int select_p_node_init(struct node_record *node_ptr, int node_cnt)
 		return SLURM_ERROR;
 	}
 
-	// error("select/bluegene plugin not yet functional");
 	debug("select_p_node_init should be doing a system wide status "
 	      "check on all the nodes to updated the system bitmap, "
 	      "along with killing old jobs, etc");
-	// return SLURM_ERROR;
 	return SLURM_SUCCESS;
 }
 
-- 
GitLab