From c0fbb5b43d31253bb5cde8ea6aac3b2548610278 Mon Sep 17 00:00:00 2001
From: Moe Jette <jette1@llnl.gov>
Date: Thu, 19 Oct 2006 22:40:00 +0000
Subject: [PATCH] svn merge -r9843:9854
 https://eris.llnl.gov/svn/slurm/branches/slurm-1.1

---
 NEWS                                             |  6 ++++++
 doc/html/slurm.shtml                             | 15 ++++++++-------
 src/common/plugin.c                              | 16 +++++++++-------
 src/plugins/sched/wiki2/msg.c                    |  3 ++-
 .../select/bluegene/plugin/libsched_if64.c       | 13 +++++++++++--
 .../select/bluegene/plugin/slurm_epilog.c        |  2 +-
 .../select/bluegene/plugin/slurm_prolog.c        | 10 ++++++----
 7 files changed, 43 insertions(+), 22 deletions(-)

diff --git a/NEWS b/NEWS
index 259ce99919b..6103472fa80 100644
--- a/NEWS
+++ b/NEWS
@@ -82,6 +82,12 @@ documents those changes that are of interest to users and admins.
 
 * Changes in SLURM 1.1.16
 =========================
+ - BLUEGENE - fix to make prolog run 5 minutes longer to make sure we have
+   enough time to free the overlapping blocks when starting a new job on a 
+   block.
+ - BLUEGENE - edit to the libsched_if.so to read env and look at 
+   MPIRUN_PARTITION to see if we are in slurm or running mpirun natively.
+ - Plugins are now dlopened RTLD_LAZY instead of RTLD_NOW.
 
 * Changes in SLURM 1.1.15
 =========================
diff --git a/doc/html/slurm.shtml b/doc/html/slurm.shtml
index 3426f75f894..61a422bc219 100644
--- a/doc/html/slurm.shtml
+++ b/doc/html/slurm.shtml
@@ -33,12 +33,13 @@ add functionality.</li>
 <p>SLURM provides resource management on about 1000 computers world-wide including 
 many of the most powerful computers in the world including:
 <ul>
-<li><a href="http://www.llnl.gov/asci/platforms/bluegenel/">BlueGene/L</a> with 65,536
-dual-processor compute nodes</li>
-<li><a href="http://www.llnl.gov/linux/thunder/">Thunder</a> a Linux cluster with 1024 
-nodes, each having four Itanium2 processors</li>
-<li><a href="http://www.llnl.gov/asci/platforms/purple/">ASC Purple</a> an IBM SP/AIX 
-cluster with 1500 nodes, each having eight Power5 processors</li>
+<li><a href="http://www.llnl.gov/asc/computing_resources/bluegenel/bluegene_home.html">BlueGene/L</a>
+with 65,536 dual-processor compute nodes</li>
+<li><a href="http://www.llnl.gov/asc/computing_resources/purple/purple_index.html">ASC Purple</a>
+an IBM SP/AIX cluster with 1532 nodes each having eight Power5 processors</li>
+<li>Peloton
+with 1152 nodes each having four sockets with dual core Opteron processors and an
+InfiniBand switch</li>
 </ul>
 There are about 150 downloads of SLURM per month from LLNL's FTP server alone.
 As of September 2006, SLURM has been downloaded over 3500 times to over 500 
@@ -46,6 +47,6 @@ distinct sites in 38 countries.
 SLURM is also distributed and supported by <a href="http://www.hp.com">
 Hewlett-Packard</a> as the resource manager in their XC System Software.</p>
 
-<p style="text-align:center;">Last modified 29 September 2006</p>
+<p style="text-align:center;">Last modified 19 October 2006</p>
 
 <!--#include virtual="footer.txt"-->
diff --git a/src/common/plugin.c b/src/common/plugin.c
index 19409865377..dc7c6b02e0a 100644
--- a/src/common/plugin.c
+++ b/src/common/plugin.c
@@ -109,14 +109,16 @@ plugin_load_from_file( const char *fq_path )
         int (*init)( void );
         
         /*
-         * Try to open the shared object.  We have a choice of trying to
-         * resolve all the symbols (in both directions) now or when the
-         * symbols are first dereferenced and used.  While it's slower to
-         * do it this way, it's a lot easier to debug.  If you get an
-         * error somewhere down the line, you're likely to think it's
-         * some condition that happened then instead of way back here.
+         * Try to open the shared object.  
+	 *
+	 * Use RTLD_LAZY to allow plugins to use symbols that may be 
+	 * defined in only one slurm entity (e.g. srun and not slurmd),
+	 * when the use of that symbol is restricted to within the 
+	 * entity from which it is available. (i.e. srun symbols are only
+	 * used in the context of srun, not slurmd.)
+	 *
          */
-        plug = dlopen( fq_path, RTLD_NOW );
+        plug = dlopen( fq_path, RTLD_LAZY );
         if ( plug == NULL ) {
 		error( "plugin_load_from_file: dlopen(%s): %s",
 			fq_path,
diff --git a/src/plugins/sched/wiki2/msg.c b/src/plugins/sched/wiki2/msg.c
index 1ea7d91aeec..40916cffa53 100644
--- a/src/plugins/sched/wiki2/msg.c
+++ b/src/plugins/sched/wiki2/msg.c
@@ -389,7 +389,8 @@ static int	_parse_msg(char *msg, char **req)
 	if (delta_t > 300) {
 		err_code = -350;
 		err_msg = "TS value too far from NOW";
-		error("wiki: TS delta_t=%u", delta_t);
+		error("wiki: TimeStamp too far from NOW (%u secs)", 
+			delta_t);
 		return -1;
 	}
 
diff --git a/src/plugins/select/bluegene/plugin/libsched_if64.c b/src/plugins/select/bluegene/plugin/libsched_if64.c
index b64740eb288..37c24e5399d 100644
--- a/src/plugins/select/bluegene/plugin/libsched_if64.c
+++ b/src/plugins/select/bluegene/plugin/libsched_if64.c
@@ -37,11 +37,20 @@
  *  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA.
 \*****************************************************************************/
 #include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
 
 int get_parameters(void *params) 
 {
-	printf("YOU ARE OUTSIDE OF SLURM!!!! NOT RUNNING MPIRUN!\n");
-	return -1;
+	char *partition = getenv("MPIRUN_PARTITION"); /* get MPIRUN env
+						   * var to see if we
+						   * are inside slurm
+						   * or not */
+	if (!partition || (strlen(partition) < 3)) {
+		printf("YOU ARE OUTSIDE OF SLURM!!!! NOT RUNNING MPIRUN!\n");
+		return 1;
+	}
+	return 2;
 }
 
 void mpirun_done(int res)
diff --git a/src/plugins/select/bluegene/plugin/slurm_epilog.c b/src/plugins/select/bluegene/plugin/slurm_epilog.c
index 3e9b168ddb9..b5f5bef4b3b 100644
--- a/src/plugins/select/bluegene/plugin/slurm_epilog.c
+++ b/src/plugins/select/bluegene/plugin/slurm_epilog.c
@@ -19,7 +19,7 @@
  *  any later version.
  *
  *  In addition, as a special exception, the copyright holders give permission 
- *  to link the code of portions of this program with the OpenSSL library under 
+ *  to link the code of portions of this program with the OpenSSL library under
  *  certain conditions as described in each individual source file, and 
  *  distribute linked combinations including the two. You must obey the GNU 
  *  General Public License in all respects for all of the code used other than 
diff --git a/src/plugins/select/bluegene/plugin/slurm_prolog.c b/src/plugins/select/bluegene/plugin/slurm_prolog.c
index 95c932f236c..25ef0fa4e6f 100644
--- a/src/plugins/select/bluegene/plugin/slurm_prolog.c
+++ b/src/plugins/select/bluegene/plugin/slurm_prolog.c
@@ -19,7 +19,7 @@
  *  any later version.
  *
  *  In addition, as a special exception, the copyright holders give permission 
- *  to link the code of portions of this program with the OpenSSL library under 
+ *  to link the code of portions of this program with the OpenSSL library under
  *  certain conditions as described in each individual source file, and 
  *  distribute linked combinations including the two. You must obey the GNU 
  *  General Public License in all respects for all of the code used other than 
@@ -61,17 +61,18 @@
 
 /*
  * Check the bgblock's status every POLL_SLEEP seconds. 
- * Retry for a period of MIN_DELAY + 
+ * Retry for a period of MIN_FREE_PERVIOUS_BLOCK_DELAY + MIN_DELAY + 
  * (INCR_DELAY * POLL_SLEEP * base partition count).
  * For example if MIN_DELAY=300 and INCR_DELAY=20 and POLL_SLEEP=3, 
  * wait up to 1260 seconds.
  * For a 16 base partition bgblock to be ready (300 + (20 * 3 * 16).
  */ 
 #define POLL_SLEEP 3			/* retry interval in seconds  */
+#define MIN_FREE_PERVIOUS_BLOCK_DELAY 300 /* time in seconds */
 #define MIN_DELAY  300			/* time in seconds */
 #define INCR_DELAY 20			/* time in seconds per BP */
 
-int max_delay = MIN_DELAY;
+int max_delay = MIN_DELAY + MIN_FREE_PERVIOUS_BLOCK_DELAY;
 int cur_delay = 0; 
   
 enum rm_partition_state {RM_PARTITION_FREE, 
@@ -114,7 +115,8 @@ static int _wait_part_ready(uint32_t job_id)
 {
 	int is_ready = 0, i, rc;
 	
-	max_delay = MIN_DELAY + (INCR_DELAY * _get_job_size(job_id));
+	max_delay = MIN_DELAY + MIN_FREE_PERVIOUS_BLOCK_DELAY +
+		(INCR_DELAY * _get_job_size(job_id));
 
 #if _DEBUG
 	printf("Waiting for job %u to become ready.", job_id);
-- 
GitLab