From 261ee46c292e35f5eded1e7032eee8d8fab5635f Mon Sep 17 00:00:00 2001
From: Danny Auble <da@llnl.gov>
Date: Fri, 4 Mar 2011 22:51:25 +0000
Subject: [PATCH] fix for handling extremely overloaded system

---
 .../select/bluegene/ba/block_allocator.c      | 69 +++----------------
 .../select/bluegene/ba_bgq/block_allocator.c  |  4 +-
 .../select/bluegene/bg_dynamic_block.c        |  4 +-
 src/plugins/select/bluegene/bg_job_place.c    | 13 ++--
 .../select/bluegene/bg_record_functions.c     | 15 +---
 .../select/bluegene/bl/bridge_linker.c        | 12 ++--
 .../bluegene/bl/bridge_switch_connections.c   |  3 +
 src/slurmctld/proc_req.c                      |  3 +-
 8 files changed, 33 insertions(+), 90 deletions(-)

diff --git a/src/plugins/select/bluegene/ba/block_allocator.c b/src/plugins/select/bluegene/ba/block_allocator.c
index d29c6d723ca..116b892b674 100644
--- a/src/plugins/select/bluegene/ba/block_allocator.c
+++ b/src/plugins/select/bluegene/ba/block_allocator.c
@@ -781,8 +781,9 @@ extern int remove_block(List nodes, bool is_small)
 		ba_node = &ba_main_grid[curr_ba_node->coord[X]]
 			[curr_ba_node->coord[Y]]
 			[curr_ba_node->coord[Z]];
+		if (curr_ba_node->used)
+			ba_node->used &= (~BA_MP_USED_TRUE);
 
-		ba_node->used = false;
 		/* Small blocks don't use wires, and only have 1 node,
 		   so just break. */
 		if (is_small)
@@ -859,7 +860,12 @@ extern int check_and_set_mp_list(List nodes)
 		}
 
 		if (ba_node->used)
-			curr_ba_node->used = true;
+			curr_ba_node->used = ba_node->used;
+		if (ba_debug_flags & DEBUG_FLAG_BG_ALGO_DEEP)
+			info("check_and_set_mp_list: "
+			     "%s is used ?= %d %d",
+			     curr_ba_node->coord_str,
+			     curr_ba_node->used, ba_node->used);
 		for(i=0; i<cluster_dims; i++) {
 			ba_switch = &ba_node->axis_switch[i];
 			curr_ba_switch = &curr_ba_node->axis_switch[i];
@@ -1058,65 +1064,6 @@ extern void reset_ba_system(bool track_down_nodes)
 	}
 }
 
-/*
- * IN: hostlist of midplanes we want to be able to use, mark all
- *     others as used.
- * RET: SLURM_SUCCESS on success, or SLURM_ERROR on error
- *
- * Need to call reset_all_removed_mps before starting another
- * allocation attempt if possible use removable_set_mps since it is
- * faster. It does basically the opposite of this function. If you
- * have to come up with this list though it is faster to use this
- * function than if you have to call bitmap2node_name since that is slow.
- */
-extern int set_all_mps_except(char *mps)
-{
-	int x, y, z;
-	hostlist_t hl = hostlist_create(mps);
-	char *host = NULL, *numeric = NULL;
-	int number, coords[HIGHEST_DIMENSIONS];
-
-	memset(coords, 0, sizeof(coords));
-
-	while ((host = hostlist_shift(hl))){
-		numeric = host;
-		number = 0;
-		while (numeric) {
-			if (numeric[0] < '0' || numeric[0] > 'Z'
-			    || (numeric[0] > '9'
-				&& numeric[0] < 'A')) {
-				numeric++;
-				continue;
-			}
-			number = xstrntol(numeric, &p, cluster_dims,
-					  cluster_base);
-			break;
-		}
-		hostlist_parse_int_to_array(
-			number, coords, cluster_dims, cluster_base);
-		ba_main_grid[coords[X]][coords[Y]][coords[Z]].state
-			|= NODE_RESUME;
-		free(host);
-	}
-	hostlist_destroy(hl);
-
-	for (x = 0; x < DIM_SIZE[X]; x++) {
-		for (y = 0; y < DIM_SIZE[Y]; y++)
-			for (z = 0; z < DIM_SIZE[Z]; z++) {
-				if (ba_main_grid[x][y][z].state
-				    & NODE_RESUME) {
-					/* clear the bit and mark as unused */
-					ba_main_grid[x][y][z].state
-						&= ~NODE_RESUME;
-				} else
-					ba_main_grid[x][y][z].used
-						|= BA_MP_USED_TEMP;
-			}
-	}
-
- 	return SLURM_SUCCESS;
-}
-
 /*
  * set values of every grid point (used in smap)
  */
diff --git a/src/plugins/select/bluegene/ba_bgq/block_allocator.c b/src/plugins/select/bluegene/ba_bgq/block_allocator.c
index 3bf5e3e757a..6ee3afe5285 100644
--- a/src/plugins/select/bluegene/ba_bgq/block_allocator.c
+++ b/src/plugins/select/bluegene/ba_bgq/block_allocator.c
@@ -295,8 +295,8 @@ extern int remove_block(List mps, bool is_small)
 			[curr_ba_mp->coord[X]]
 			[curr_ba_mp->coord[Y]]
 			[curr_ba_mp->coord[Z]];
-
-		ba_mp->used &= (~BA_MP_USED_TRUE);
+		if (curr_ba_mp->used)
+			ba_mp->used &= (~BA_MP_USED_TRUE);
 		ba_mp->used &= (~BA_MP_USED_ALTERED_PASS);
 
 		/* Small blocks don't use wires, and only have 1 mp,
diff --git a/src/plugins/select/bluegene/bg_dynamic_block.c b/src/plugins/select/bluegene/bg_dynamic_block.c
index a22c0ac4d20..48926b27931 100644
--- a/src/plugins/select/bluegene/bg_dynamic_block.c
+++ b/src/plugins/select/bluegene/bg_dynamic_block.c
@@ -477,7 +477,7 @@ extern bg_record_t *create_small_record(bg_record_t *bg_record,
 	} else {
 		new_ba_mp = ba_copy_mp(ba_mp);
 		ba_setup_mp(new_ba_mp, false);
-
+		new_ba_mp->used = BA_MP_USED_TRUE;
 		list_append(found_record->ba_mp_list, new_ba_mp);
 		found_record->mp_count = 1;
 		found_record->nodes = xstrdup_printf(
@@ -750,7 +750,7 @@ static int _breakup_blocks(List block_list, List new_blocks,
 	while ((bg_record = list_next(itr))) {
 		if (bg_record->free_cnt) {
 			if (bg_conf->slurm_debug_flags & DEBUG_FLAG_BG_PICK)
-				info("%s being free for other job(s), skipping",
+				info("%s being freed by other job(s), skipping",
 				     bg_record->bg_block_id);
 			continue;
 		}
diff --git a/src/plugins/select/bluegene/bg_job_place.c b/src/plugins/select/bluegene/bg_job_place.c
index 66029df0fb8..cc3c25fbee5 100644
--- a/src/plugins/select/bluegene/bg_job_place.c
+++ b/src/plugins/select/bluegene/bg_job_place.c
@@ -592,14 +592,15 @@ static int _check_for_booted_overlapping_blocks(
 				}
 			}
 
-			if (!SELECT_IS_CHECK_FULL_SET(query_mode)
+			if (((bg_conf->layout_mode == LAYOUT_DYNAMIC)
+			     || ((!SELECT_IS_CHECK_FULL_SET(query_mode)
+				  || SELECT_IS_MODE_RUN_NOW(query_mode))
+				 && (bg_conf->layout_mode != LAYOUT_DYNAMIC)))
 			    && ((found_record->job_running != NO_JOB_RUNNING)
-				|| (found_record->state
-				    == BG_BLOCK_ERROR))) {
+				|| (found_record->state == BG_BLOCK_ERROR))) {
 				if ((found_record->job_running
 				     == BLOCK_ERROR_STATE)
-				    || (found_record->state
-					== BG_BLOCK_ERROR))
+				    || (found_record->state == BG_BLOCK_ERROR))
 					error("can't use %s, "
 					      "overlapping block %s "
 					      "is in an error state.",
@@ -1178,7 +1179,7 @@ static int _find_best_block_match(List block_list,
 				new_blocks = create_dynamic_block(
 					block_list, &request, job_list,
 					track_down_nodes);
-				/* this gets altered in
+				/* this could get altered in
 				 * create_dynamic_block so we reset it */
 				memcpy(request.geometry, req_geometry,
 				       sizeof(req_geometry));
diff --git a/src/plugins/select/bluegene/bg_record_functions.c b/src/plugins/select/bluegene/bg_record_functions.c
index 3010a163901..61356c9f765 100644
--- a/src/plugins/select/bluegene/bg_record_functions.c
+++ b/src/plugins/select/bluegene/bg_record_functions.c
@@ -281,19 +281,6 @@ extern void process_nodes(bg_record_t *bg_record, bool startup)
 	} else if (bg_record->node_cnt == bg_conf->mp_node_cnt)
 		bg_record->full_block = 1;
 
-	if(!bg_record->nodes) {
-		info("block %s doesn't appear to have any nodes in it",
-		     bg_record->bg_block_id);
-		itr = list_iterator_create(bg_record->ba_mp_list);
-		while ((ba_mp = list_next(itr))) {
-			if (!ba_mp->used)
-				continue;
-			info("process_nodes: %s is included in this block",
-			     ba_mp->coord_str);
-		}
-		list_iterator_destroy(itr);
-		xassert(0);
-	}
 	if (node_name2bitmap(bg_record->nodes,
 			     false,
 			     &bg_record->bitmap)) {
@@ -734,7 +721,7 @@ extern int add_bg_record(List records, List *used_nodes,
 		bg_record->mloaderimage =
 			xstrdup(bg_conf->default_mloaderimage);
 
-	if (bg_record->conn_type[0] != SELECT_SMALL) {
+	if (bg_record->conn_type[0] < SELECT_SMALL) {
 		/* this needs to be an append so we keep things in the
 		   order we got them, they will be sorted later */
 		list_append(records, bg_record);
diff --git a/src/plugins/select/bluegene/bl/bridge_linker.c b/src/plugins/select/bluegene/bl/bridge_linker.c
index f9c1fc7e61c..a73fe4df334 100644
--- a/src/plugins/select/bluegene/bl/bridge_linker.c
+++ b/src/plugins/select/bluegene/bl/bridge_linker.c
@@ -962,18 +962,22 @@ extern int bridge_setup_system()
 
 extern int bridge_block_create(bg_record_t *bg_record)
 {
+	int rc = SLURM_SUCCESS;
+
 #if defined HAVE_BG_FILES
 	_new_block((rm_partition_t **)&bg_record->bg_block);
 #endif
 	_pre_allocate(bg_record);
 
 	if (bg_record->cpu_cnt < bg_conf->cpus_per_mp)
-		configure_small_block(bg_record);
+		rc = configure_small_block(bg_record);
 	else
-		configure_block_switches(bg_record);
+		rc = configure_block_switches(bg_record);
 
-	_post_allocate(bg_record);
-	return SLURM_SUCCESS;
+	if (rc == SLURM_SUCCESS)
+		rc = _post_allocate(bg_record);
+
+	return rc;
 }
 
 extern int bridge_block_boot(bg_record_t *bg_record)
diff --git a/src/plugins/select/bluegene/bl/bridge_switch_connections.c b/src/plugins/select/bluegene/bl/bridge_switch_connections.c
index f150666656b..f33c3123fab 100644
--- a/src/plugins/select/bluegene/bl/bridge_switch_connections.c
+++ b/src/plugins/select/bluegene/bl/bridge_switch_connections.c
@@ -639,9 +639,12 @@ cleanup:
 		error("bridge_free_nodecard_list(): %s", bridge_err_str(rc));
 		return SLURM_ERROR;
 	}
+
 #endif
 	if (bg_conf->slurm_debug_flags & DEBUG_FLAG_BG_WIRES)
 		info("making the small block");
+	if (rc != SLURM_ERROR)
+		rc = SLURM_SUCCESS;
 	return rc;
 }
 
diff --git a/src/slurmctld/proc_req.c b/src/slurmctld/proc_req.c
index 19c6a80e963..3482cef99b9 100644
--- a/src/slurmctld/proc_req.c
+++ b/src/slurmctld/proc_req.c
@@ -1470,7 +1470,8 @@ static void _slurm_rpc_complete_batch_script(slurm_msg_t * msg)
 	}
 
 #ifdef HAVE_FRONT_END
-	job_ptr = find_job_record(comp_msg->job_id);
+	if (!job_ptr)
+		job_ptr = find_job_record(comp_msg->job_id);
 	if (job_ptr && job_ptr->front_end_ptr)
 		nodes = job_ptr->front_end_ptr->name;
 	msg_title = "front_end";
-- 
GitLab