From 9ca53d1150a85a9cd7e82f160270a31d2839dbd7 Mon Sep 17 00:00:00 2001
From: Danny Auble <da@llnl.gov>
Date: Fri, 14 Oct 2005 23:05:38 +0000
Subject: [PATCH] update for bgl small partition manager

---
 src/partition_allocator/partition_allocator.c |  2 +-
 src/plugins/select/bluegene/bgl_job_place.c   | 34 ++++++++++--
 src/plugins/select/bluegene/bgl_job_run.c     | 53 ++++++++++++++-----
 src/plugins/select/bluegene/bluegene.c        |  8 +--
 src/plugins/select/bluegene/bluegene.h        | 47 ++++++++--------
 src/slurmctld/job_mgr.c                       |  2 +-
 src/slurmctld/node_scheduler.c                | 11 ++++
 7 files changed, 113 insertions(+), 44 deletions(-)

diff --git a/src/partition_allocator/partition_allocator.c b/src/partition_allocator/partition_allocator.c
index 766ca7818c3..8ea8db90b0a 100644
--- a/src/partition_allocator/partition_allocator.c
+++ b/src/partition_allocator/partition_allocator.c
@@ -635,7 +635,7 @@ extern void pa_init(node_info_msg_t *node_info_ptr)
 #endif
 		pa_system_ptr->num_of_proc = node_info_ptr->record_count;
         } 
-
+	
 #ifdef HAVE_BGL_FILES
 	if (have_db2
 	&&  (DIM_SIZE[X]==0) && (DIM_SIZE[X]==0) && (DIM_SIZE[X]==0)) {
diff --git a/src/plugins/select/bluegene/bgl_job_place.c b/src/plugins/select/bluegene/bgl_job_place.c
index 9558baf6632..dc32e36c7df 100644
--- a/src/plugins/select/bluegene/bgl_job_place.c
+++ b/src/plugins/select/bluegene/bgl_job_place.c
@@ -97,7 +97,8 @@ static int _find_best_partition_match(struct job_record* job_ptr,
 	uint32_t req_procs = job_ptr->num_procs;
 	int rot_cnt = 0;
 	uint32_t proc_cnt;
-			
+	int job_running = 0;
+
 	if(!bgl_list) {
 		error("_find_best_partition_match: There is no bgl_list");
 		return SLURM_ERROR;
@@ -120,11 +121,29 @@ static int _find_best_partition_match(struct job_record* job_ptr,
 
 	itr = list_iterator_create(bgl_list);
 	*found_bgl_record = NULL;
-
+	
+	if(full_system_partition->job_running) {
+		debug("_find_best_partition_match none found");
+		return SLURM_ERROR;
+	}
+	
 	debug("number of partitions to check: %d", list_count(bgl_list));
+     	itr = list_iterator_create(bgl_list);
 	while ((record = (bgl_record_t*) list_next(itr))) {
 		/* Check processor count */
-		printf("%d\n",req_procs);
+		if(record->job_running) {
+			job_running = 1;
+			debug("partition %s in use by %s", 
+			      record->bgl_part_id,
+			      record->user_name);
+			continue;
+		}
+		if(record->full_partition && job_running) {
+			debug("Can't run on full system partition "
+			      "another partition has a job running.");
+			continue;
+		}
+			
 		if (req_procs > record->cnodes_per_bp) {
 			/* We use the c-node count here. Job could start
 			 * twice this count if VIRTUAL_NODE_MODE, but this
@@ -210,12 +229,17 @@ static int _find_best_partition_match(struct job_record* job_ptr,
 			if (!match) 
 				continue;	/* Not usable */
 		}
-
+		
+		/* mark as in use */ 
+		slurm_mutex_lock(&part_state_mutex);
+		record->job_running = 1;
+		slurm_mutex_unlock(&part_state_mutex);
+		
 		*found_bgl_record = record;
 		break;
 	}
 	list_iterator_destroy(itr);
-	
+
 	/* set the bitmap and do other allocation activities */
 	if (*found_bgl_record) {
 		debug("_find_best_partition_match %s <%s>", 
diff --git a/src/plugins/select/bluegene/bgl_job_run.c b/src/plugins/select/bluegene/bgl_job_run.c
index 45b4137e8d3..f86cfb915cc 100644
--- a/src/plugins/select/bluegene/bgl_job_run.c
+++ b/src/plugins/select/bluegene/bgl_job_run.c
@@ -428,7 +428,7 @@ static void _term_agent(bgl_update_t *bgl_update_ptr)
 	/* remove the partition's users */
 	bgl_record = find_bgl_record(bgl_update_ptr->bgl_part_id);
 	if(bgl_record) {
-		debug2("got the record %s user is %s",
+		debug("got the record %s user is %s",
 		      bgl_record->bgl_part_id,
 		      bgl_record->user_name);
 
@@ -674,25 +674,34 @@ extern int start_job(struct job_record *job_ptr)
 
 	_part_op(bgl_update_ptr);
 #else
-	if (bgl_list) {
-		ListIterator itr;
-		bgl_record_t *bgl_record;
-		char *part_id = NULL;
-		uint16_t node_use;
+	ListIterator itr;
+	bgl_record_t *bgl_record = NULL;
+	bgl_record_t *found_record = NULL;
+	char *part_id = NULL;
+	uint16_t node_use;
 
+	if (bgl_list) {
+		
 		select_g_get_jobinfo(job_ptr->select_jobinfo,
 			SELECT_DATA_PART_ID, &part_id);
 		select_g_get_jobinfo(job_ptr->select_jobinfo,
 			SELECT_DATA_NODE_USE, &node_use);
+		if(!part_id) {
+			error("NO part_id");
+			return rc;
+		}
+		bgl_record = find_bgl_record(part_id);
 		itr = list_iterator_create(bgl_list);
-		while ((bgl_record = (bgl_record_t *) list_next(itr))) {
-			if (!part_id)
-				break;
-			if ((!bgl_record->bgl_part_id)
-			||  (strcmp(part_id, bgl_record->bgl_part_id)))
+		while ((found_record = (bgl_record_t *) list_next(itr))) {
+			if (bgl_record->full_partition)
+				found_record->state = RM_PARTITION_FREE;
+			else if(found_record->full_partition)
+				found_record->state = RM_PARTITION_FREE;
+			if ((!found_record->bgl_part_id)
+			    ||  (strcmp(part_id, found_record->bgl_part_id)))
 				continue;
-			bgl_record->node_use = node_use;
-			bgl_record->state = RM_PARTITION_READY;
+			found_record->node_use = node_use;
+			found_record->state = RM_PARTITION_READY;
 			last_bgl_update = time(NULL);
 			break;
 		}
@@ -729,6 +738,24 @@ int term_job(struct job_record *job_ptr)
 	info("Queue termination of job %u in BGL partition %s",
 		job_ptr->job_id, bgl_update_ptr->bgl_part_id);
 	_part_op(bgl_update_ptr);
+#else
+	bgl_record_t *bgl_record;
+	char *part_id = NULL;
+		
+	if (bgl_list) {
+		
+		select_g_get_jobinfo(job_ptr->select_jobinfo,
+			SELECT_DATA_PART_ID, &part_id);
+		if(!part_id) {
+			error("NO part_id");
+			return rc;
+		}
+		bgl_record = find_bgl_record(part_id);
+		bgl_record->state = RM_PARTITION_FREE;
+		bgl_record->job_running = 0;
+		last_bgl_update = time(NULL);		
+		xfree(part_id);
+	}
 #endif
 	return rc;
 }
diff --git a/src/plugins/select/bluegene/bluegene.c b/src/plugins/select/bluegene/bluegene.c
index b7262719d67..b24c833ed3a 100644
--- a/src/plugins/select/bluegene/bluegene.c
+++ b/src/plugins/select/bluegene/bluegene.c
@@ -54,6 +54,7 @@ pthread_mutex_t part_state_mutex = PTHREAD_MUTEX_INITIALIZER;
 int num_part_to_free = 0;
 int num_part_freed = 0;
 int partitions_are_created = 0;
+bgl_record_t *full_system_partition = NULL;
 
 #ifdef HAVE_BGL_FILES
 static pthread_mutex_t freed_cnt_mutex = PTHREAD_MUTEX_INITIALIZER;
@@ -620,7 +621,7 @@ extern int create_static_partitions(List part_list)
 
 	bgl_record = (bgl_record_t*) xmalloc(sizeof(bgl_record_t));
 	bgl_record->nodes = xmalloc(sizeof(char)*13);
-
+	full_system_partition = bgl_record;
 #ifdef HAVE_BGL_FILES
 	bgl_record->geo[X] = DIM_SIZE[X] - 1;
 	bgl_record->geo[Y] = DIM_SIZE[Y] - 1;
@@ -638,7 +639,7 @@ extern int create_static_partitions(List part_list)
 			bgl_record->geo[X], bgl_record->geo[Y], 
 			bgl_record->geo[Z]);
 	bgl_record->quarter = -1;
-
+	bgl_record->full_partition = 1;
        	if(bgl_found_part_list) {
 		itr = list_iterator_create(bgl_found_part_list);
 		while ((found_record = (bgl_record_t *) list_next(itr)) 
@@ -742,7 +743,7 @@ no_total:
 				debug("full partiton = %s.", 
 				      bgl_record->bgl_part_id);
 				bgl_record->full_partition = 1;
-				
+				full_system_partition = bgl_record;
 				break;
 			}
 		}
@@ -1246,6 +1247,7 @@ static int _validate_config_nodes(void)
 						xmalloc(sizeof(bgl_record_t));
 					list_append(bgl_list, record);
 	
+					full_system_partition = record;
 					record->full_partition = 1;
 					record->bgl_part_id = xstrdup(
 						init_record->bgl_part_id);
diff --git a/src/plugins/select/bluegene/bluegene.h b/src/plugins/select/bluegene/bluegene.h
index cf4d4c64bf8..54151c38656 100644
--- a/src/plugins/select/bluegene/bluegene.h
+++ b/src/plugins/select/bluegene/bluegene.h
@@ -44,26 +44,8 @@
 #include "src/partition_allocator/partition_allocator.h"
 #include "src/plugins/select/bluegene/wrap_rm_api.h"
 
-/* Global variables */
-extern rm_BGL_t *bgl;
-extern char *bluegene_blrts;
-extern char *bluegene_linux;
-extern char *bluegene_mloader;
-extern char *bluegene_ramdisk;
-extern char *bridge_api_file;
-extern int numpsets;
-extern pa_system_t *pa_system_ptr;
-extern time_t last_bgl_update;
-extern List bgl_curr_part_list; 	/* Initial bgl partition state */
-extern List bgl_list;			/* List of configured BGL blocks */
-extern bool agent_fini;
-extern pthread_mutex_t part_state_mutex;
-extern int num_part_to_free;
-extern int num_part_freed;
-extern int partitions_are_created;
-extern int procs_per_node;
-
 typedef int lifecycle_type_t;
+
 enum part_lifecycle {DYNAMIC, STATIC};
 
 typedef struct bgl_record {
@@ -96,8 +78,9 @@ typedef struct bgl_record {
 					   partition */
 	int job_running;                /* signal if there is a job running 
 					   on the partition */
-	int cnodes_per_bp;
-	int quarter;
+	int cnodes_per_bp;              /* count of cnodes per Base part */
+	int quarter;                    /* used for small partitions 
+					   determine quarter of BP */
 } bgl_record_t;
 
 typedef struct {
@@ -116,6 +99,28 @@ typedef struct {
 	List switch_list;
 } bgl_bp_t;
 
+
+/* Global variables */
+extern rm_BGL_t *bgl;
+extern char *bluegene_blrts;
+extern char *bluegene_linux;
+extern char *bluegene_mloader;
+extern char *bluegene_ramdisk;
+extern char *bridge_api_file;
+extern int numpsets;
+extern pa_system_t *pa_system_ptr;
+extern time_t last_bgl_update;
+extern List bgl_curr_part_list; 	/* Initial bgl partition state */
+extern List bgl_list;			/* List of configured BGL blocks */
+extern bool agent_fini;
+extern pthread_mutex_t part_state_mutex;
+extern int num_part_to_free;
+extern int num_part_freed;
+extern int partitions_are_created;
+extern int procs_per_node;
+extern bgl_record_t *full_system_partition;
+
+
 #define MAX_PTHREAD_RETRIES  1
 
 #include "bgl_part_info.h"
diff --git a/src/slurmctld/job_mgr.c b/src/slurmctld/job_mgr.c
index 2d92a96df29..7e72e6ba761 100644
--- a/src/slurmctld/job_mgr.c
+++ b/src/slurmctld/job_mgr.c
@@ -1600,7 +1600,7 @@ extern int job_complete(uint32_t job_id, uid_t uid, bool requeue,
 		requeue = 0;
 		if (job_return_code == 0)
 			job_return_code = 1;
-		info("Batch job launch failure, JobId=%u", job_ptr->job_id);	
+		info("Batch job launch failure, JobId=%u", job_ptr->job_id);
 	}
 
 	if (requeue && job_ptr->details && job_ptr->batch_flag) {
diff --git a/src/slurmctld/node_scheduler.c b/src/slurmctld/node_scheduler.c
index 56bc7214ab8..d1b387fafd2 100644
--- a/src/slurmctld/node_scheduler.c
+++ b/src/slurmctld/node_scheduler.c
@@ -286,8 +286,19 @@ _pick_best_load(struct job_record *job_ptr, bitstr_t * bitmap,
 	 * to this job */
 	if (job_ptr->details && job_ptr->details->req_node_bitmap)
 		bit_or(bitmap, job_ptr->details->req_node_bitmap);
+	
+#ifdef HAVE_BGL
+	/* here to reset the bitmap for small parititons the
+	 * BGL plugin will do the sched. 
+	 */
+	bit_or(bitmap, light_load_bit);
+#endif
 	error_code = select_g_job_test(job_ptr, bitmap, 
 			min_nodes, max_nodes);
+#ifdef HAVE_BGL
+	FREE_NULL_BITMAP(light_load_bit);
+	return error_code;
+#endif
 
 	/* now try to use idle and lightly loaded nodes */
 	if (error_code) {
-- 
GitLab