From 185705cb3569840f9edd72eb3e6d6a7d2a35a177 Mon Sep 17 00:00:00 2001
From: Danny Auble <da@schedmd.com>
Date: Wed, 12 Sep 2012 17:21:00 -0700
Subject: [PATCH] BGQ - if we are shutting down or the realtime server is
 waiting to start stop the current poll asap.

---
 .../select/bluegene/bl_bgq/bridge_status.cc   | 31 ++++++++++++++-----
 src/plugins/select/bluegene/select_bluegene.c |  2 ++
 2 files changed, 26 insertions(+), 7 deletions(-)

diff --git a/src/plugins/select/bluegene/bl_bgq/bridge_status.cc b/src/plugins/select/bluegene/bl_bgq/bridge_status.cc
index 8b30599fad0..28795c9bd55 100644
--- a/src/plugins/select/bluegene/bl_bgq/bridge_status.cc
+++ b/src/plugins/select/bluegene/bl_bgq/bridge_status.cc
@@ -757,6 +757,8 @@ static void _do_block_poll(void)
 				    block_ptr->getStatus().toValue()),
 			    kill_job_list))
 			updated = 1;
+		if (rt_waiting || slurmctld_config.shutdown_time)
+			break;
 	}
 	slurm_mutex_unlock(&block_state_mutex);
 	unlock_slurmctld(job_read_lock);
@@ -765,7 +767,6 @@ static void _do_block_poll(void)
 
 	if (updated == 1)
 		last_bg_update = time(NULL);
-
 }
 
 /* Even though ba_mp should be coming from the main list
@@ -818,6 +819,9 @@ static void _handle_midplane_update(ComputeHardware::ConstPtr bgq,
 				slurm_mutex_unlock(&ba_system_mutex);
 				slurm_mutex_unlock(&block_state_mutex);
 				unlock_slurmctld(job_read_lock);
+				if (rt_waiting
+				    || slurmctld_config.shutdown_time)
+					return;
 			}
 		}
 	}
@@ -836,18 +840,23 @@ static void _handle_midplane_update(ComputeHardware::ConstPtr bgq,
 			_handle_bad_nodeboard(
 				nb_ptr->getLocation().substr(7,3).c_str(),
 				bg_down_node, nb_ptr->getState(), NULL, 0);
+			if (rt_waiting || slurmctld_config.shutdown_time)
+				return;
 		}
 	}
 
 	for (dim=Dimension::A; dim<=Dimension::D; dim++) {
 		Switch::ConstPtr switch_ptr = bridge_get_switch(mp_ptr, dim);
 		if (switch_ptr) {
-			if (switch_ptr->getState() != Hardware::Available)
+			if (switch_ptr->getState() != Hardware::Available) {
 				_handle_bad_switch(dim,
 						   bg_down_node,
 						   switch_ptr->getState(),
 						   1, 0);
-			else {
+				if (rt_waiting
+				    || slurmctld_config.shutdown_time)
+					return;
+			} else {
 				Cable::ConstPtr my_cable =
 					switch_ptr->getCable();
 				/* Dimensions of length 1 do not have a
@@ -866,6 +875,9 @@ static void _handle_midplane_update(ComputeHardware::ConstPtr bgq,
 						delete_list, 0);
 					slurm_mutex_unlock(&ba_system_mutex);
 					slurm_mutex_unlock(&block_state_mutex);
+					if (rt_waiting
+					    || slurmctld_config.shutdown_time)
+						return;
 				}
 			}
 		}
@@ -892,6 +904,8 @@ static void _do_hardware_poll(int level, uint16_t *coords,
 		     coords[level]++) {
 			/* handle the outter dims here */
 			_do_hardware_poll(level+1, coords, bgqsys);
+			if (rt_waiting || slurmctld_config.shutdown_time)
+				return;
 		}
 		return;
 	}
@@ -929,11 +943,10 @@ static void *_poll(void *no_data)
 		}
 		//debug("polling taking over, realtime is dead");
 		curr_time = time(NULL);
-		if (blocks_are_created)
+		if (!rt_waiting && blocks_are_created)
 			_do_block_poll();
-
 		/* only do every 30 seconds */
-		if ((curr_time - 30) >= last_ran) {
+		if (!rt_waiting && ((curr_time - 30) >= last_ran)) {
 			uint16_t coords[SYSTEM_DIMENSIONS];
 			_do_hardware_poll(0, coords,
 					  bridge_get_compute_hardware());
@@ -1367,6 +1380,8 @@ extern int bridge_status_fini(void)
 		return SLURM_ERROR;
 
 	bridge_status_inited = false;
+	rt_waiting = 1;
+
 #if defined HAVE_BG_FILES
 	/* make the rt connection end. */
 	_bridge_status_disconnect();
@@ -1405,7 +1420,9 @@ extern int bridge_status_update_block_list_state(List block_list)
 	while ((bg_record = (bg_record_t *) list_next(itr))) {
 		BlockFilter filter;
 		Block::Ptrs vec;
-		if (bg_record->magic != BLOCK_MAGIC) {
+		if (!bridge_status_inited)
+			break;
+		else if (bg_record->magic != BLOCK_MAGIC) {
 			/* block is gone */
 			list_remove(itr);
 			continue;
diff --git a/src/plugins/select/bluegene/select_bluegene.c b/src/plugins/select/bluegene/select_bluegene.c
index 0d206de22bc..c1940f9f11d 100644
--- a/src/plugins/select/bluegene/select_bluegene.c
+++ b/src/plugins/select/bluegene/select_bluegene.c
@@ -58,6 +58,7 @@
  * overwritten when linking with the slurmctld.
  */
 #if defined (__APPLE__)
+slurmctld_config_t slurmctld_config __attribute__((weak_import));
 slurm_ctl_conf_t slurmctld_conf __attribute__((weak_import));
 struct node_record *node_record_table_ptr  __attribute__((weak_import)) = NULL;
 int bg_recover __attribute__((weak_import)) = NOT_FROM_CONTROLLER;
@@ -71,6 +72,7 @@ void *acct_db_conn  __attribute__((weak_import)) = NULL;
 char *slurmctld_cluster_name  __attribute__((weak_import)) = NULL;
 slurmdb_cluster_rec_t *working_cluster_rec  __attribute__((weak_import)) = NULL;
 #else
+slurmctld_config_t slurmctld_config;
 slurm_ctl_conf_t slurmctld_conf;
 struct node_record *node_record_table_ptr = NULL;
 int bg_recover = NOT_FROM_CONTROLLER;
-- 
GitLab