From aa19c74cc0ef114daa9ce0f3b2a3fb9fe35b4bbb Mon Sep 17 00:00:00 2001
From: Danny Auble <da@llnl.gov>
Date: Mon, 17 Oct 2005 21:38:49 +0000
Subject: [PATCH] fixes for small partitions works on ubgl correctly

---
 src/plugins/select/bluegene/bgl_job_place.c |  39 +++++---
 src/plugins/select/bluegene/bgl_job_run.c   |   8 +-
 src/plugins/select/bluegene/bgl_part_info.c |   8 +-
 src/plugins/select/bluegene/bluegene.c      |   6 +-
 src/plugins/select/bluegene/partition_sys.c |   1 -
 src/slurmctld/node_scheduler.c              | 103 +++++++++++++-------
 src/slurmctld/slurmctld.h                   |   3 +
 7 files changed, 107 insertions(+), 61 deletions(-)

diff --git a/src/plugins/select/bluegene/bgl_job_place.c b/src/plugins/select/bluegene/bgl_job_place.c
index dc32e36c7df..4c98e6028cd 100644
--- a/src/plugins/select/bluegene/bgl_job_place.c
+++ b/src/plugins/select/bluegene/bgl_job_place.c
@@ -103,6 +103,18 @@ static int _find_best_partition_match(struct job_record* job_ptr,
 		error("_find_best_partition_match: There is no bgl_list");
 		return SLURM_ERROR;
 	}
+	/* have to check job_ptr->checked to see which time the node 
+	   scheduler is looking to see if it is runnable.  If checked >=2 
+	   we want to fall through to tell the scheduler that it is runnable
+	   just not right now. 
+	*/
+	if(full_system_partition->job_running && job_ptr->checked<2) {
+		job_ptr->checked++;
+		debug("_find_best_partition_match none found "
+		      "full system running on partition %s.",
+		      full_system_partition->bgl_part_id);
+		return SLURM_ERROR;
+	}
 
 	select_g_get_jobinfo(job_ptr->select_jobinfo,
 		SELECT_DATA_CONN_TYPE, &conn_type);
@@ -119,20 +131,19 @@ static int _find_best_partition_match(struct job_record* job_ptr,
 	/* this is where we should have the control flow depending on
 	 * the spec arguement */
 
-	itr = list_iterator_create(bgl_list);
 	*found_bgl_record = NULL;
 	
-	if(full_system_partition->job_running) {
-		debug("_find_best_partition_match none found");
-		return SLURM_ERROR;
-	}
-	
 	debug("number of partitions to check: %d", list_count(bgl_list));
      	itr = list_iterator_create(bgl_list);
 	while ((record = (bgl_record_t*) list_next(itr))) {
 		/* Check processor count */
-		if(record->job_running) {
-			job_running = 1;
+		/* have to check job_ptr->checked to see which time the node 
+		   scheduler is looking to see if it is runnable.  
+		   If checked >=2 we want to fall through to tell the 
+		   scheduler that it is runnable just not right now. 
+		*/
+		if(record->job_running && job_ptr->checked<2) {
+			job_running++;
 			debug("partition %s in use by %s", 
 			      record->bgl_part_id,
 			      record->user_name);
@@ -163,8 +174,8 @@ static int _find_best_partition_match(struct job_record* job_ptr,
 		 * check that the number of nodes is suitable
 		 */
  		if ((record->bp_count < min_nodes)
-		||  (max_nodes != 0 && record->bp_count > max_nodes)
-		||  (record->bp_count < target_size)) {
+		    ||  (max_nodes != 0 && record->bp_count > max_nodes)
+		    ||  (record->bp_count < target_size)) {
 			debug("partition %s node count not suitable",
 				record->bgl_part_id);
 			continue;
@@ -230,16 +241,12 @@ static int _find_best_partition_match(struct job_record* job_ptr,
 				continue;	/* Not usable */
 		}
 		
-		/* mark as in use */ 
-		slurm_mutex_lock(&part_state_mutex);
-		record->job_running = 1;
-		slurm_mutex_unlock(&part_state_mutex);
-		
 		*found_bgl_record = record;
 		break;
 	}
 	list_iterator_destroy(itr);
-
+	job_ptr->checked++;
+			
 	/* set the bitmap and do other allocation activities */
 	if (*found_bgl_record) {
 		debug("_find_best_partition_match %s <%s>", 
diff --git a/src/plugins/select/bluegene/bgl_job_run.c b/src/plugins/select/bluegene/bgl_job_run.c
index f86cfb915cc..5ef788d237e 100644
--- a/src/plugins/select/bluegene/bgl_job_run.c
+++ b/src/plugins/select/bluegene/bgl_job_run.c
@@ -186,7 +186,7 @@ static void _sync_agent(bgl_update_t *bgl_update_ptr)
 		error("No partition %s", bgl_update_ptr->bgl_part_id);
 		return;
 	}
-		
+	bgl_record->job_running = 1;				
 	if(bgl_record->state==RM_PARTITION_READY) {
 		if(bgl_record->user_uid != bgl_update_ptr->uid) {
 			slurm_mutex_lock(&part_state_mutex);
@@ -228,7 +228,6 @@ static void _start_agent(bgl_update_t *bgl_update_ptr)
 	int retries;
 	
 	bgl_record = find_bgl_record(bgl_update_ptr->bgl_part_id);
-					
 	if(!bgl_record) {
 		error("partition %s not found in bgl_list",
 		      bgl_update_ptr->bgl_part_id);
@@ -238,7 +237,7 @@ static void _start_agent(bgl_update_t *bgl_update_ptr)
 	slurm_mutex_lock(&part_state_mutex);
 	bgl_record->job_running = 1;
 	slurm_mutex_unlock(&part_state_mutex);
-		
+			
 	if(bgl_record->state == RM_PARTITION_DEALLOCATING) {
 		debug("Partition is in Deallocating state, waiting for free.");
 		bgl_free_partition(bgl_record);
@@ -751,6 +750,9 @@ int term_job(struct job_record *job_ptr)
 			return rc;
 		}
 		bgl_record = find_bgl_record(part_id);
+		info("Finished job %u in BGL partition %s",
+		     job_ptr->job_id, 
+		     bgl_recordr->bgl_part_id);
 		bgl_record->state = RM_PARTITION_FREE;
 		bgl_record->job_running = 0;
 		last_bgl_update = time(NULL);		
diff --git a/src/plugins/select/bluegene/bgl_part_info.c b/src/plugins/select/bluegene/bgl_part_info.c
index f1bde2d7cf7..3cff059732c 100644
--- a/src/plugins/select/bluegene/bgl_part_info.c
+++ b/src/plugins/select/bluegene/bgl_part_info.c
@@ -305,7 +305,9 @@ extern int update_partition_list()
 			break;
 		} else if(bgl_record->node_use != node_use) {
 			debug("node_use of Partition %s was %d and now is %d",
-			      name, bgl_record->node_use, node_use);
+			      bgl_record->bgl_part_id, 
+			      bgl_record->node_use, 
+			      node_use);
 			bgl_record->node_use = node_use;
 			updated = 1;
 		}
@@ -319,7 +321,9 @@ extern int update_partition_list()
 			break;
 		} else if(bgl_record->state != state) {
 			debug("state of Partition %s was %d and now is %d",
-			      name, bgl_record->state, state);
+			      bgl_record->bgl_part_id, 
+			      bgl_record->state, 
+			      state);
 			/* 
 			   check to make sure partition went 
 			   through freeing correctly 
diff --git a/src/plugins/select/bluegene/bluegene.c b/src/plugins/select/bluegene/bluegene.c
index b24c833ed3a..6f18248d995 100644
--- a/src/plugins/select/bluegene/bluegene.c
+++ b/src/plugins/select/bluegene/bluegene.c
@@ -621,7 +621,6 @@ extern int create_static_partitions(List part_list)
 
 	bgl_record = (bgl_record_t*) xmalloc(sizeof(bgl_record_t));
 	bgl_record->nodes = xmalloc(sizeof(char)*13);
-	full_system_partition = bgl_record;
 #ifdef HAVE_BGL_FILES
 	bgl_record->geo[X] = DIM_SIZE[X] - 1;
 	bgl_record->geo[Y] = DIM_SIZE[Y] - 1;
@@ -1246,8 +1245,9 @@ static int _validate_config_nodes(void)
 					record = (bgl_record_t*) 
 						xmalloc(sizeof(bgl_record_t));
 					list_append(bgl_list, record);
-	
-					full_system_partition = record;
+					debug("full system %s",
+					      init_record->bgl_part_id);
+					full_system_partition = init_record;
 					record->full_partition = 1;
 					record->bgl_part_id = xstrdup(
 						init_record->bgl_part_id);
diff --git a/src/plugins/select/bluegene/partition_sys.c b/src/plugins/select/bluegene/partition_sys.c
index 11b8dae75aa..4c7ac659e53 100755
--- a/src/plugins/select/bluegene/partition_sys.c
+++ b/src/plugins/select/bluegene/partition_sys.c
@@ -528,7 +528,6 @@ int read_bgl_partitions()
 		else
 			bgl_record->cnodes_per_bp = procs_per_node;
 		
-		printf("got %d\n",bgl_record->cnodes_per_bp);
 		bgl_record->part_lifecycle = STATIC;
 						
 clean_up:	if (bgl_recover
diff --git a/src/slurmctld/node_scheduler.c b/src/slurmctld/node_scheduler.c
index 09dfd5c2771..b7388813ec2 100644
--- a/src/slurmctld/node_scheduler.c
+++ b/src/slurmctld/node_scheduler.c
@@ -408,7 +408,8 @@ _pick_best_nodes(struct node_set *node_set_ptr, int node_set_size,
 	}
 
         /* Is Consumable Resources enabled? */
-        error_code = select_g_get_info_from_plugin (SELECT_CR_PLUGIN, &cr_enabled);
+        error_code = select_g_get_info_from_plugin (SELECT_CR_PLUGIN, 
+						    &cr_enabled);
         if (error_code != SLURM_SUCCESS)
                 return error_code;
           
@@ -417,7 +418,9 @@ _pick_best_nodes(struct node_set *node_set_ptr, int node_set_size,
                 job_ptr->cr_enabled = cr_enabled; /* CR enabled for this job */
 
                 debug3(" Is this Job %u in exclusive mode? %d cr_enabled %d", 
-                      job_ptr->job_id, job_ptr->details->exclusive, cr_enabled);
+		       job_ptr->job_id, 
+		       job_ptr->details->exclusive, 
+		       cr_enabled);
 
                 if (job_ptr->details->exclusive) {
                   partially_idle_node_bitmap = bit_copy(idle_node_bitmap);
@@ -438,10 +441,12 @@ _pick_best_nodes(struct node_set *node_set_ptr, int node_set_size,
 		/* we have already confirmed that all of these nodes have a
 		 * usable configuration and are in the proper partition */
 		if (min_nodes != 0)
-			total_nodes = bit_set_count(job_ptr->details->req_node_bitmap);
+			total_nodes = bit_set_count(
+				job_ptr->details->req_node_bitmap);
 		if (job_ptr->num_procs != 0) {
                   if (cr_enabled) {
-                        error_code = select_g_get_extra_jobinfo (NULL, job_ptr, 
+                        error_code = select_g_get_extra_jobinfo (NULL, 
+								 job_ptr, 
                                                                  SELECT_CR_CPU_COUNT, 
                                                                  &total_cpus);
                         if (error_code != SLURM_SUCCESS) {
@@ -449,7 +454,8 @@ _pick_best_nodes(struct node_set *node_set_ptr, int node_set_size,
                                 return error_code;
                         }
                   } else 
-                        total_cpus = count_cpus(job_ptr->details->req_node_bitmap);
+                        total_cpus = count_cpus(
+				job_ptr->details->req_node_bitmap);
                 }
 		if ((max_nodes != 0) &&
 		    (total_nodes > max_nodes)) {
@@ -465,32 +471,39 @@ _pick_best_nodes(struct node_set *node_set_ptr, int node_set_size,
 			return ESLURM_REQUESTED_PART_CONFIG_UNAVAILABLE;
 		}
 		if ((min_nodes <= total_nodes) && 
-		    (max_nodes <= min_nodes  ) &&
+		    (max_nodes <= min_nodes) &&
 		    (job_ptr->num_procs <= total_cpus )) {
 			if (!bit_super_set(job_ptr->details->req_node_bitmap, 
                                         avail_node_bitmap)) {
                                 if (cr_enabled) 
-                                       FREE_NULL_BITMAP(partially_idle_node_bitmap);
+                                       FREE_NULL_BITMAP(
+					       partially_idle_node_bitmap);
 				return ESLURM_REQUESTED_PART_CONFIG_UNAVAILABLE;
                         }
 			if (shared) {
-				if (!bit_super_set(job_ptr->details->req_node_bitmap, 
+				if (!bit_super_set(job_ptr->details->
+						   req_node_bitmap, 
                                                    share_node_bitmap)) {
                                         if (cr_enabled) 
-                                          FREE_NULL_BITMAP(partially_idle_node_bitmap);
+                                          FREE_NULL_BITMAP(
+						  partially_idle_node_bitmap);
 					return ESLURM_NODES_BUSY;
                                 }
 			} else if (cr_enabled) {
-				if (!bit_super_set(job_ptr->details->req_node_bitmap, 
+				if (!bit_super_set(job_ptr->details->
+						   req_node_bitmap, 
                                                    partially_idle_node_bitmap)) {
-                                        FREE_NULL_BITMAP(partially_idle_node_bitmap);
+                                        FREE_NULL_BITMAP(
+						partially_idle_node_bitmap);
 					return ESLURM_NODES_BUSY;
                                 }
                         } else {
-				if (!bit_super_set(job_ptr->details->req_node_bitmap, 
+				if (!bit_super_set(job_ptr->details->
+						   req_node_bitmap, 
                                                    idle_node_bitmap)) {
                                         if (cr_enabled) 
-                                          FREE_NULL_BITMAP(partially_idle_node_bitmap);
+						FREE_NULL_BITMAP(
+							partially_idle_node_bitmap);
 					return ESLURM_NODES_BUSY;
                                 }
 			}
@@ -516,13 +529,15 @@ _pick_best_nodes(struct node_set *node_set_ptr, int node_set_size,
 				continue;
 			if (!runable_ever) {
                                 int cr_disabled = 0;
-			        error_code = _add_node_set_info(&node_set_ptr[i],
-						   &total_bitmap, 
-						   &total_nodes, &total_cpus,
-                                                   cr_disabled);
+			        error_code = _add_node_set_info(
+					&node_set_ptr[i],
+					&total_bitmap, 
+					&total_nodes, &total_cpus,
+					cr_disabled);
                                 if (error_code != SLURM_SUCCESS) {
                                   if (cr_enabled) 
-                                          FREE_NULL_BITMAP(partially_idle_node_bitmap);
+                                          FREE_NULL_BITMAP(
+						  partially_idle_node_bitmap);
                                   return error_code;
                                 }
                         }
@@ -538,12 +553,15 @@ _pick_best_nodes(struct node_set *node_set_ptr, int node_set_size,
 					idle_node_bitmap);
 			node_set_ptr[i].nodes =
 				bit_set_count(node_set_ptr[i].my_bitmap);
-                        error_code = _add_node_set_info(&node_set_ptr[i], &avail_bitmap, 
-                                                        &avail_nodes, &avail_cpus, 
+                        error_code = _add_node_set_info(&node_set_ptr[i], 
+							&avail_bitmap, 
+                                                        &avail_nodes, 
+							&avail_cpus, 
                                                         cr_enabled);
                         if (error_code != SLURM_SUCCESS) {
                                 if (cr_enabled) 
-                                        FREE_NULL_BITMAP(partially_idle_node_bitmap);
+                                        FREE_NULL_BITMAP(
+						partially_idle_node_bitmap);
                                 return error_code;
                         }
 			if ((job_ptr->details->req_node_bitmap) &&
@@ -560,12 +578,16 @@ _pick_best_nodes(struct node_set *node_set_ptr, int node_set_size,
 
 #ifndef HAVE_BGL
 			if (shared) {
-				pick_code = _pick_best_load(job_ptr, avail_bitmap, 
-							min_nodes, max_nodes);
+				pick_code = _pick_best_load(job_ptr, 
+							    avail_bitmap, 
+							    min_nodes, 
+							    max_nodes);
 			} else
 #else
-				pick_code = select_g_job_test(job_ptr, avail_bitmap, 
-						min_nodes, max_nodes);
+				pick_code = select_g_job_test(job_ptr, 
+							      avail_bitmap, 
+							      min_nodes, 
+							      max_nodes);
 #endif
 
 			if (pick_code == SLURM_SUCCESS) {
@@ -577,7 +599,8 @@ _pick_best_nodes(struct node_set *node_set_ptr, int node_set_size,
 				}
 				FREE_NULL_BITMAP(total_bitmap);
                                 if (cr_enabled) 
- 				         FREE_NULL_BITMAP(partially_idle_node_bitmap);
+ 				         FREE_NULL_BITMAP(
+						 partially_idle_node_bitmap);
 				*select_bitmap = avail_bitmap;
 				return SLURM_SUCCESS;
 			}
@@ -618,8 +641,10 @@ _pick_best_nodes(struct node_set *node_set_ptr, int node_set_size,
 				bit_and(avail_bitmap, avail_node_bitmap);
                                 if (cr_enabled)
                                         job_ptr->cr_enabled = 0;
-				pick_code = select_g_job_test(job_ptr, avail_bitmap, 
-						min_nodes, max_nodes);
+				pick_code = select_g_job_test(job_ptr, 
+							      avail_bitmap, 
+							      min_nodes, 
+							      max_nodes);
                                 if (cr_enabled)
                                         job_ptr->cr_enabled = 1;
 				if (pick_code == SLURM_SUCCESS) {
@@ -633,8 +658,10 @@ _pick_best_nodes(struct node_set *node_set_ptr, int node_set_size,
 			if (!runable_ever) {
                                 if (cr_enabled)
                                         job_ptr->cr_enabled = 0;
-				pick_code = select_g_job_test(job_ptr, total_bitmap, 
-						min_nodes, max_nodes);
+				pick_code = select_g_job_test(job_ptr, 
+							      total_bitmap, 
+							      min_nodes, 
+							      max_nodes);
                                 if (cr_enabled)
                                         job_ptr->cr_enabled = 1;
 				if (pick_code == SLURM_SUCCESS)
@@ -658,7 +685,7 @@ _pick_best_nodes(struct node_set *node_set_ptr, int node_set_size,
 		error_code = ESLURM_REQUESTED_NODE_CONFIG_UNAVAILABLE;
 		info("_pick_best_nodes: job never runnable");
 	}
-
+	job_ptr->checked = 0;
 	if (error_code == SLURM_SUCCESS)
 		error_code = ESLURM_NODES_BUSY;
 	return error_code;
@@ -699,9 +726,10 @@ _add_node_set_info(struct node_set *node_set_ptr,
                         if (bit_test (node_set_ptr->my_bitmap, i) == 0)
                         	continue;
                         allocated_cpus = 0;
-                        error_code = select_g_get_select_nodeinfo(&node_record_table_ptr[i], 
-                                                                SELECT_CR_USED_CPUS, 
-                                                                &allocated_cpus);
+                        error_code = select_g_get_select_nodeinfo(
+				&node_record_table_ptr[i], 
+				SELECT_CR_USED_CPUS, 
+				&allocated_cpus);
                         if (error_code != SLURM_SUCCESS) {
                                error(" cons_res: Invalid Node reference", 
                                      node_record_table_ptr[i]);
@@ -709,9 +737,11 @@ _add_node_set_info(struct node_set *node_set_ptr,
                         }
                         
                         *node_cnt += 1;
-                        *cpu_cnt  += node_record_table_ptr[i].cpus - allocated_cpus;
+                        *cpu_cnt  += 
+				node_record_table_ptr[i].cpus - allocated_cpus;
                 }
-                debug3(" cons_res: _add_set_info node_cnt %d cpu_cnt %d ", *node_cnt, *cpu_cnt);
+                debug3(" cons_res: _add_set_info node_cnt %d cpu_cnt %d ", 
+		       *node_cnt, *cpu_cnt);
         }
         return error_code;
 }
@@ -845,6 +875,7 @@ extern int select_nodes(struct job_record *job_ptr, bool test_only)
 		error_code = SLURM_SUCCESS;
 		goto cleanup;
 	}
+	info("starting job %u", job_ptr->job_id);
 	if (select_g_job_begin(job_ptr) != SLURM_SUCCESS) {
 		/* Leave job queued, something is hosed */
 		error("select_g_job_begin(%u): %m", job_ptr->job_id);
diff --git a/src/slurmctld/slurmctld.h b/src/slurmctld/slurmctld.h
index 7e17d20e5b5..e31678f81bc 100644
--- a/src/slurmctld/slurmctld.h
+++ b/src/slurmctld/slurmctld.h
@@ -334,6 +334,9 @@ struct job_record {
                                            each of the ntask_cnt hosts */
 	uint16_t mail_type;		/* see MAIL_JOB_* in slurm.h */
 	char *mail_user;		/* user to get e-mail notification */
+	uint32_t checked;               /* for bgl to tell plugin it already 
+					   checked and all partitions were full
+					   looking for best choice now */
 };
 
 struct 	step_record {
-- 
GitLab