From 6e0a4c1c76805f0d8b765a31b6ddda470cecfd4f Mon Sep 17 00:00:00 2001
From: Moe Jette <jette1@llnl.gov>
Date: Fri, 26 Jun 2009 17:49:50 +0000
Subject: [PATCH] svn merge -r17976:17985
 https://eris.llnl.gov/svn/slurm/branches/slurm-2.0

---
 NEWS                                          |  7 +++++++
 .../multifactor/priority_multifactor.c        | 14 ++++++++++++--
 src/plugins/select/bluegene/plugin/bluegene.c | 19 ++-----------------
 src/sacctmgr/sacctmgr.c                       |  3 +++
 src/salloc/salloc.c                           | 13 +++++++------
 src/srun/allocate.c                           | 13 +++++++------
 6 files changed, 38 insertions(+), 31 deletions(-)

diff --git a/NEWS b/NEWS
index dee3d412526..67e4f5f4247 100644
--- a/NEWS
+++ b/NEWS
@@ -68,6 +68,13 @@ documents those changes that are of interest to users and admins.
  -- Better logging for when job's request bad output file locations.
  -- Fix issue where if user specified non-existant file to write to slurmstepd
     will regain privileges before sending batch script ended to the controller.
+ -- Fix bug when using the priority_multifactor plugin with no associations yet.
+ -- BLUEGENE - we no longer check for the images to sync state.  This was 
+    needed long ago when rebooting blocks wasn't a possibility and should 
+    had been removed when that functionality was available.
+ -- Added message about no connection with the database for sacctmgr.
+ -- On BlueGene, let srun or salloc exit on SIGINT if slurmctld dies while
+    booting its block.
 
 * Changes in SLURM 2.0.3
 ========================
diff --git a/src/plugins/priority/multifactor/priority_multifactor.c b/src/plugins/priority/multifactor/priority_multifactor.c
index b7940509650..1f5ee0b56cf 100644
--- a/src/plugins/priority/multifactor/priority_multifactor.c
+++ b/src/plugins/priority/multifactor/priority_multifactor.c
@@ -138,6 +138,7 @@ static int _apply_decay(double decay_factor)
 		return SLURM_SUCCESS;
 
 	xassert(assoc_mgr_association_list);
+	xassert(assoc_mgr_qos_list);
 
 	slurm_mutex_lock(&assoc_mgr_association_lock);
 	itr = list_iterator_create(assoc_mgr_association_list);
@@ -938,7 +939,14 @@ int init ( void )
 		      temp);
 		calc_fairshare = 0;
 		weight_fs = 0;
-	} else {
+	} else if(weight_fs) {
+		if(!assoc_mgr_root_assoc)
+			fatal("It appears you don't have any association "
+			      "data from your database.  "
+			      "The priority/multifactor plugin requires "
+			      "this information to run correctly.  Please "
+			      "check your database connection and try again.");
+
 		if(!cluster_procs)
 			fatal("We need to have a cluster cpu count "
 			      "before we can init the priority/multifactor "
@@ -960,7 +968,9 @@ int init ( void )
 			fatal("pthread_create error %m");
 		
 		slurm_attr_destroy(&thread_attr);
-	}
+	} else
+		calc_fairshare = 0;
+
 	xfree(temp);
 
 	verbose("%s loaded", plugin_name);
diff --git a/src/plugins/select/bluegene/plugin/bluegene.c b/src/plugins/select/bluegene/plugin/bluegene.c
index 074805a2111..1eedebf3614 100644
--- a/src/plugins/select/bluegene/plugin/bluegene.c
+++ b/src/plugins/select/bluegene/plugin/bluegene.c
@@ -1416,7 +1416,8 @@ static int _validate_config_nodes(List curr_block_list,
 	while ((bg_record = list_next(itr_conf))) {
 		list_iterator_reset(itr_curr);
 		while ((init_bg_record = list_next(itr_curr))) {
-			if (strcasecmp(bg_record->nodes, init_bg_record->nodes))
+			if (strcasecmp(bg_record->nodes, 
+				       init_bg_record->nodes))
 				continue; /* wrong nodes */
 			if(!bit_equal(bg_record->ionode_bitmap,
 				      init_bg_record->ionode_bitmap))
@@ -1424,28 +1425,12 @@ static int _validate_config_nodes(List curr_block_list,
 #ifdef HAVE_BGL
 			if (bg_record->conn_type != init_bg_record->conn_type)
 				continue; /* wrong conn_type */
-			if(bg_record->blrtsimage &&
-			   strcasecmp(bg_record->blrtsimage,
-				      init_bg_record->blrtsimage)) 
-				continue;
 #else
 			if ((bg_record->conn_type != init_bg_record->conn_type)
 			    && ((bg_record->conn_type < SELECT_SMALL)
 				&& (init_bg_record->conn_type < SELECT_SMALL)))
 				continue; /* wrong conn_type */
 #endif
-			if(bg_record->linuximage &&
-			   strcasecmp(bg_record->linuximage,
-				      init_bg_record->linuximage))
-				continue;
-			if(bg_record->mloaderimage &&
-			   strcasecmp(bg_record->mloaderimage,
-				      init_bg_record->mloaderimage))
-				continue;
-			if(bg_record->ramdiskimage &&
-			   strcasecmp(bg_record->ramdiskimage,
-				      init_bg_record->ramdiskimage))
-				continue;
 		       			
 			copy_bg_record(init_bg_record, bg_record);
 			/* remove from the curr list since we just
diff --git a/src/sacctmgr/sacctmgr.c b/src/sacctmgr/sacctmgr.c
index b62c43033dc..32841d82a7d 100644
--- a/src/sacctmgr/sacctmgr.c
+++ b/src/sacctmgr/sacctmgr.c
@@ -193,11 +193,14 @@ main (int argc, char *argv[])
 	errno = 0;
 	db_conn = acct_storage_g_get_connection(false, 0, 1);
 	if(errno != SLURM_SUCCESS) {
+		int tmp_errno = errno;
 		if((input_field_count == 2) &&
 		   (!strncasecmp(argv[2], "Configuration", strlen(argv[1]))) &&
 		   ((!strncasecmp(argv[1], "list", strlen(argv[0]))) || 
 		    (!strncasecmp(argv[1], "show", strlen(argv[0])))))
 			sacctmgr_list_config(false);
+		errno = tmp_errno;
+		fprintf(stderr, "Problem talking to the database: %m\n");
 		exit(1);
 	}
 	my_uid = getuid();
diff --git a/src/salloc/salloc.c b/src/salloc/salloc.c
index 68fee323429..5f3a77243df 100644
--- a/src/salloc/salloc.c
+++ b/src/salloc/salloc.c
@@ -102,7 +102,7 @@ static void _node_fail_handler(srun_node_fail_msg_t *msg);
 #define POLL_SLEEP 3			/* retry interval in seconds  */
 static int _wait_bluegene_block_ready(
 			resource_allocation_response_msg_t *alloc);
-static int _blocks_dealloc();
+static int _blocks_dealloc(void);
 #endif
 
 #ifdef HAVE_CRAY_XT
@@ -689,7 +689,7 @@ static int _wait_bluegene_block_ready(resource_allocation_response_msg_t *alloc)
 				    &block_id);
 	
 	for (i=0; (cur_delay < max_delay); i++) {
-		if(i == 1)
+		if (i == 1)
 			info("Waiting for block %s to become ready for job",
 			     block_id);
 		if (i) {
@@ -712,13 +712,14 @@ static int _wait_bluegene_block_ready(resource_allocation_response_msg_t *alloc)
 			is_ready = 1;
 			break;
 		}
+		if (allocation_interrupted)
+			break;
 	}
 	if (is_ready)
      		info("Block %s is ready for job", block_id);
-	else if(!allocation_interrupted)
+	else if (!allocation_interrupted)
 		error("Block %s still not ready", block_id);
-	else /* this should never happen, but if allocation_intrrupted
-		send back not ready */
+	else	/* allocation_interrupted and slurmctld not responing */
 		is_ready = 0;
 
 	xfree(block_id);
@@ -734,7 +735,7 @@ static int _wait_bluegene_block_ready(resource_allocation_response_msg_t *alloc)
  *	0:  no deallocate in progress
  *     -1: error occurred
  */
-static int _blocks_dealloc()
+static int _blocks_dealloc(void)
 {
 	static node_select_info_msg_t *bg_info_ptr = NULL, *new_bg_ptr = NULL;
 	int rc = 0, error_code = 0, i;
diff --git a/src/srun/allocate.c b/src/srun/allocate.c
index a52a56b9725..0b57e6fcf3a 100644
--- a/src/srun/allocate.c
+++ b/src/srun/allocate.c
@@ -102,7 +102,7 @@ static void  _intr_handler(int signo);
 #define POLL_SLEEP 3			/* retry interval in seconds  */
 static int _wait_bluegene_block_ready(
 			resource_allocation_response_msg_t *alloc);
-static int _blocks_dealloc();
+static int _blocks_dealloc(void);
 #endif
 
 #ifdef HAVE_CRAY_XT
@@ -237,7 +237,7 @@ static int _wait_bluegene_block_ready(resource_allocation_response_msg_t *alloc)
 				    &block_id);
 
 	for (i=0; (cur_delay < max_delay); i++) {
-		if(i == 1)
+		if (i == 1)
 			debug("Waiting for block %s to become ready for job",
 			     block_id);
 		if (i) {
@@ -260,13 +260,14 @@ static int _wait_bluegene_block_ready(resource_allocation_response_msg_t *alloc)
 			is_ready = 1;
 			break;
 		}
+		if (destroy_job)
+			break;
 	}
 	if (is_ready)
      		debug("Block %s is ready for job", block_id);
-	else if(!destroy_job)
+	else if (!destroy_job)
 		error("Block %s still not ready", block_id);
-	else /* this should never happen, but if destroy_job
-		send back not ready */
+	else	/* destroy_job set and slurmctld not responing */
 		is_ready = 0;
 
 	xfree(block_id);
@@ -282,7 +283,7 @@ static int _wait_bluegene_block_ready(resource_allocation_response_msg_t *alloc)
  *	0:  no deallocate in progress
  *     -1: error occurred
  */
-static int _blocks_dealloc()
+static int _blocks_dealloc(void)
 {
 	static node_select_info_msg_t *bg_info_ptr = NULL, *new_bg_ptr = NULL;
 	int rc = 0, error_code = 0, i;
-- 
GitLab