From a246d3cf1c74ef7c49d8ad0cc70bcf529730b9c0 Mon Sep 17 00:00:00 2001
From: Moe Jette <jette1@llnl.gov>
Date: Thu, 29 Sep 2005 22:02:59 +0000
Subject: [PATCH] merge -r 5417:5444 from slurm-0-6-branch

---
 META                                        |   6 +-
 NEWS                                        |  19 +++
 doc/man/man5/slurm.conf.5                   |  13 +-
 etc/slurm.conf.example                      |   3 +
 src/common/read_config.c                    |  19 ++-
 src/plugins/mpi/lam/Makefile.am             |   3 -
 src/plugins/mpi/mpichgm/Makefile.am         |   7 +-
 src/plugins/mpi/mvapich/Makefile.am         |   5 +-
 src/plugins/mpi/mvapich/mvapich.c           |  20 ++-
 src/plugins/mpi/none/Makefile.am            |   3 -
 src/plugins/select/bluegene/bluegene.c      |   2 -
 src/plugins/select/bluegene/partition_sys.c |  11 +-
 src/slurmd/req.c                            |  17 ++-
 src/smap/job_functions.c                    |   4 +-
 src/smap/partition_functions.c              | 157 +++++++++++++++-----
 src/srun/launch.c                           |   6 +-
 src/srun/msg.c                              |  45 ++++--
 17 files changed, 237 insertions(+), 103 deletions(-)

diff --git a/META b/META
index 7db18821c5f..cc54702ecc3 100644
--- a/META
+++ b/META
@@ -9,9 +9,9 @@
   Name:		slurm
   Major:	0
   Minor:	6
-  Micro:        0
-  Version:	0.6.0
-  Release:	0.pre8
+  Micro:        1
+  Version:	0.6.1
+  Release:	1
   API_CURRENT:	7	
   API_AGE:	4
   API_REVISION:	0
diff --git a/NEWS b/NEWS
index b32e7dc249e..a70d66d1210 100644
--- a/NEWS
+++ b/NEWS
@@ -13,6 +13,25 @@ documents those changes that are of interest to users and admins.
     REQUEST_KILL_JOB/TASKS changed to REQUEST_SIGNAL_JOB/TASKS.
  -- Add support for e-mail notification on job state changes.
 
+* Changes in SLURM 0.6.2
+========================
+
+* Changes in SLURM 0.6.1
+========================
+ -- Fixed smap -Db to display slurm partitions correctly (take 2).
+ -- Add srun fork() retry logic for very heavily loaded system.
+ -- Fix possible srun hang on task launch failure.
+ -- Add support for mvapich v0.9.4, 0.9.5 and gen2.
+
+* Changes in SLURM 0.6.0
+========================
+ -- Add documentation for ProctrackType=proctrack/rms.
+ -- Make proctrack/rms be the default for switch/elan.
+ -- Do not preceed SIGKILL or SIGTERM to job step with (non-requested) SIGCONT.
+ -- Fixed smap -Db to display slurm partitions correctly.  
+ -- Explicitly disallow ProctrackType=proctrack/linuxproc with 
+    SwitchType=switch/elan. They will not work properly together.
+
 * Changes in SLURM 0.6.0-pre8
 =============================
  -- Remove debugging xassert in switch/federation that were accidentally
diff --git a/doc/man/man5/slurm.conf.5 b/doc/man/man5/slurm.conf.5
index 303f19c9d30..b799b3e78c7 100644
--- a/doc/man/man5/slurm.conf.5
+++ b/doc/man/man5/slurm.conf.5
@@ -1,4 +1,4 @@
-.TH "slurm.conf" "5" "August 2005" "slurm.conf 0.6" "Slurm configuration file"
+.TH "slurm.conf" "5" "September 2005" "slurm.conf 0.6" "Slurm configuration file"
 .SH "NAME"
 slurm.conf \- Slurm configuration file 
 .SH "DESCRIPTION"
@@ -235,11 +235,16 @@ The default value is "/usr/local/lib/slurm".
 Identifies the plugin to be used for process tracking. 
 The slurmd daemon uses this mechanism to identify all processes 
 which are children of processes it spawns for a user job. 
-Acceptable values at present include "proctrack/aix" (which 
-is the default for AIX systems) and "proctrack/pgid" (which 
-is the default for all other systems).
+Acceptable values at present include 
+"proctrack/aix" (which uses an AIX kernel extenstion and is 
+the default for AIX systems),
+"proctrack/linuxproc" (which uses linux process tree),
+"proctrack/rms" (which uses Quadrics kernel patch and is the 
+default if "SwitchType=switch/elan") and 
+"proctrack/pgid" (which is the default for all other systems).
 The slurmd daemon must be restarted for a change in ProctrackType 
 to take effect.
+NOTE: "proctrack/linuxproc" is not compatable with "swich/elan."
 .TP
 \fBProlog\fR
 Fully qualified pathname of a script to execute as user root on every 
diff --git a/etc/slurm.conf.example b/etc/slurm.conf.example
index f2223bd353e..0255017fb88 100644
--- a/etc/slurm.conf.example
+++ b/etc/slurm.conf.example
@@ -231,6 +231,9 @@
 #                             the default value on all other computers
 #     "proctrack/linuxproc" : use parent process ID to establish process
 #                             tree, required for MPICH-GM use
+#     "proctrack/rms"       : use Quadrics kernal infrastructure to track 
+#                             processes, strongly recommended for systems
+#                             with a Quadrics switch
 #
 # ProctrackType=proctrack/pgid
 
diff --git a/src/common/read_config.c b/src/common/read_config.c
index 4da69a83d3f..877515c1fab 100644
--- a/src/common/read_config.c
+++ b/src/common/read_config.c
@@ -1310,8 +1310,20 @@ validate_config (slurm_ctl_conf_t *ctl_conf_ptr)
 	if (ctl_conf_ptr->plugindir == NULL)
 		ctl_conf_ptr->plugindir = xstrdup(SLURM_PLUGIN_PATH);
 
-	if (ctl_conf_ptr->proctrack_type == NULL)
-		ctl_conf_ptr->proctrack_type = xstrdup(DEFAULT_PROCTRACK_TYPE);
+	if (ctl_conf_ptr->switch_type == NULL)
+		ctl_conf_ptr->switch_type = xstrdup(DEFAULT_SWITCH_TYPE);
+
+	if (ctl_conf_ptr->proctrack_type == NULL) {
+		if (!strcmp(ctl_conf_ptr->switch_type,"switch/elan"))
+			ctl_conf_ptr->proctrack_type = 
+					xstrdup("proctrack/rms");
+		else
+			ctl_conf_ptr->proctrack_type = 
+					xstrdup(DEFAULT_PROCTRACK_TYPE);
+	}
+	if ((!strcmp(ctl_conf_ptr->switch_type,   "switch/elan"))
+	&&  (!strcmp(ctl_conf_ptr->proctrack_type,"proctrack/linuxproc")))
+		fatal("proctrack/linuxproc is incompatable with switch/elan");
 
         if (ctl_conf_ptr->propagate_rlimits_except) {
                 if ((parse_rlimits( ctl_conf_ptr->propagate_rlimits_except,
@@ -1381,8 +1393,7 @@ validate_config (slurm_ctl_conf_t *ctl_conf_ptr)
 		ctl_conf_ptr->state_save_location = xstrdup(
 				DEFAULT_SAVE_STATE_LOC);
 
-	if (ctl_conf_ptr->switch_type == NULL)
-		ctl_conf_ptr->switch_type = xstrdup(DEFAULT_SWITCH_TYPE);
+	/* see above for switch_type, order dependent */
 
 	if (ctl_conf_ptr->tmp_fs == NULL)
 		ctl_conf_ptr->tmp_fs = xstrdup(DEFAULT_TMP_FS);
diff --git a/src/plugins/mpi/lam/Makefile.am b/src/plugins/mpi/lam/Makefile.am
index b34a0a559ad..bb1d541f8de 100644
--- a/src/plugins/mpi/lam/Makefile.am
+++ b/src/plugins/mpi/lam/Makefile.am
@@ -11,6 +11,3 @@ pkglib_LTLIBRARIES = mpi_lam.la
 
 mpi_lam_la_SOURCES = mpi_lam.c lam.h
 mpi_lam_la_LDFLAGS = $(SO_LDFLAGS) $(PLUGIN_FLAGS)
-mpi_lam_la_LIBADD  =  \
-	$(top_builddir)/src/common/libcommon.la -lpthread \
-	$(top_builddir)/src/api/libslurm.la
diff --git a/src/plugins/mpi/mpichgm/Makefile.am b/src/plugins/mpi/mpichgm/Makefile.am
index abc64f13306..324561f396e 100644
--- a/src/plugins/mpi/mpichgm/Makefile.am
+++ b/src/plugins/mpi/mpichgm/Makefile.am
@@ -10,8 +10,7 @@ INCLUDES = -I$(top_srcdir) -I$(top_srcdir)/src/common
 pkglib_LTLIBRARIES = mpi_mpichgm.la
 
 # Null switch plugin.
-mpi_mpichgm_la_SOURCES = mpi_mpichgm.c mpichgm.c
+mpi_mpichgm_la_SOURCES = mpi_mpichgm.c mpichgm.c \
+                         $(top_srcdir)/src/common/global_srun.c \
+			  $(top_srcdir)/src/common/net.c
 mpi_mpichgm_la_LDFLAGS = $(SO_LDFLAGS) $(PLUGIN_FLAGS)
-mpi_mpichgm_la_LIBADD  =  \
-	$(top_builddir)/src/common/libcommon.la -lpthread \
-	$(top_builddir)/src/api/libslurm.la
diff --git a/src/plugins/mpi/mvapich/Makefile.am b/src/plugins/mpi/mvapich/Makefile.am
index ca57696a78d..76ef64eda6b 100644
--- a/src/plugins/mpi/mvapich/Makefile.am
+++ b/src/plugins/mpi/mvapich/Makefile.am
@@ -10,8 +10,5 @@ INCLUDES = -I$(top_srcdir) -I$(top_srcdir)/src/common
 pkglib_LTLIBRARIES = mpi_mvapich.la
 
 # Null switch plugin.
-mpi_mvapich_la_SOURCES = mpi_mvapich.c mvapich.c
+mpi_mvapich_la_SOURCES = mpi_mvapich.c mvapich.c $(top_srcdir)/src/common/net.c
 mpi_mvapich_la_LDFLAGS = $(SO_LDFLAGS) $(PLUGIN_FLAGS)
-mpi_mvapich_la_LIBADD  =  \
-	$(top_builddir)/src/common/libcommon.la -lpthread \
-	$(top_builddir)/src/api/libslurm.la
diff --git a/src/plugins/mpi/mvapich/mvapich.c b/src/plugins/mpi/mvapich/mvapich.c
index 678e9100cb7..59fbfb5126d 100644
--- a/src/plugins/mpi/mvapich/mvapich.c
+++ b/src/plugins/mpi/mvapich/mvapich.c
@@ -60,7 +60,8 @@ struct mvapich_info
 	int fd;             /* fd for socket connection to MPI task  */
 	int version;        /* Version of mvapich startup protocol   */
 	int rank;           /* This process' MPI rank                */
-	int pid;            /* This rank's local pid (V3 only)       */
+	int pidlen;         /* length of pid buffer                  */
+	char *pid;          /* This rank's local pid (V3 only)       */
 	int addrlen;        /* Length of addr array in bytes         */
 
 	int *addr;          /* This process' address array, which for
@@ -115,7 +116,7 @@ static struct mvapich_info * mvapich_info_create (int fd)
 	if (fd_read_n (fd, &mvi->rank, sizeof (int)) < 0)
 		E_RET ("mvapich: Unable to read rank id: %m", mvi->rank);
 
-	if (mvi->version != 2 && mvi->version != 3)
+	if (mvi->version <= 1 || mvi->version > 3)
 		E_RET ("Unsupported version %d from rank %d", mvi->version, mvi->rank);
 
 	if (fd_read_n (fd, &mvi->addrlen, sizeof (int)) < 0)
@@ -127,25 +128,22 @@ static struct mvapich_info * mvapich_info_create (int fd)
 		E_RET ("mvapich: Unable to read addr info for rank %d: %m", mvi->rank);
 
 	if (mvi->version == 3) {
-		int pidlen;
-		if (fd_read_n (fd, &pidlen, sizeof (int)) < 0)
+		if (fd_read_n (fd, &mvi->pidlen, sizeof (int)) < 0)
 			E_RET ("mvapich: Unable to read pidlen for rank %d: %m", mvi->rank);
 
-		if (pidlen != sizeof (mvi->pid)) 
-			E_RET ("mvapich: Confused. Rank %d pidlen of %d not what I expected", 
-					mvi->rank, pidlen);
+		mvi->pid = xmalloc (mvi->pidlen);
 
-		if (fd_read_n (fd, &mvi->pid, pidlen) < 0)
+		if (fd_read_n (fd, &mvi->pid, mvi->pidlen) < 0)
 			E_RET ("mvapich: Unable to read pid for rank %d: %m", mvi->rank);
 	}
 
-
 	return (mvi);
 }
 
 static void mvapich_info_destroy (struct mvapich_info *mvi)
 {
 	xfree (mvi->addr);
+	xfree (mvi->pid);
 	xfree (mvi);
 	return;
 }
@@ -201,7 +199,7 @@ static void mvapich_bcast (void)
 		 */
 		if (protocol_version == 3) {
 			for (j = 0; j < nprocs; j++)
-				fd_write_n (m->fd, &mvarray[j]->pid, sizeof (int));
+				fd_write_n (m->fd, &mvarray[j]->pid, mvarray[j]->pidlen);
 		}
 
 	}
@@ -288,7 +286,7 @@ static void *mvapich_thr(void *arg)
 
 	mvarray = xmalloc (nprocs * sizeof (*mvarray));
 
-	debug ("mvapich-0.9.[45]: thread started: %ld", pthread_self ());
+	debug ("mvapich-0.9.[45]/gen2: thread started: %ld", pthread_self ());
 
 	while (i < nprocs) {
 		struct mvapich_info *mvi = NULL;
diff --git a/src/plugins/mpi/none/Makefile.am b/src/plugins/mpi/none/Makefile.am
index 5de265c825d..f1b7ab64f1f 100644
--- a/src/plugins/mpi/none/Makefile.am
+++ b/src/plugins/mpi/none/Makefile.am
@@ -12,6 +12,3 @@ pkglib_LTLIBRARIES = mpi_none.la
 # Null MPI plugin.
 mpi_none_la_SOURCES = mpi_none.c
 mpi_none_la_LDFLAGS = $(SO_LDFLAGS) $(PLUGIN_FLAGS)
-mpi_none_la_LIBADD  =  \
-	$(top_builddir)/src/common/libcommon.la -lpthread \
-	$(top_builddir)/src/api/libslurm.la
diff --git a/src/plugins/select/bluegene/bluegene.c b/src/plugins/select/bluegene/bluegene.c
index 705e7e73c40..f627f9fb3bf 100644
--- a/src/plugins/select/bluegene/bluegene.c
+++ b/src/plugins/select/bluegene/bluegene.c
@@ -1482,8 +1482,6 @@ static void _process_nodes(bgl_record_t *bgl_record)
 	int j=0, number;
 	int start[PA_SYSTEM_DIMENSIONS];
 	int end[PA_SYSTEM_DIMENSIONS];
-	char buffer[BUFSIZE];
-	int funky=0;
 	ListIterator itr;
 	pa_node_t* pa_node = NULL;
 	
diff --git a/src/plugins/select/bluegene/partition_sys.c b/src/plugins/select/bluegene/partition_sys.c
index d92826568d2..1e7a9afa37a 100755
--- a/src/plugins/select/bluegene/partition_sys.c
+++ b/src/plugins/select/bluegene/partition_sys.c
@@ -409,10 +409,13 @@ int read_bgl_partitions()
 						slurm_user_name);
 			} else {
 				user_name = NULL;
-				if ((rc = rm_get_data(part_ptr, RM_PartitionFirstUser, 
-						&user_name)) != STATUS_OK) {
-					error("rm_get_data(RM_PartitionFirstUser): %s",
-						bgl_err_str(rc));
+				if ((rc = rm_get_data(part_ptr, 
+						      RM_PartitionFirstUser, 
+						      &user_name)) 
+				    != STATUS_OK) {
+					error("rm_get_data"
+					      "(RM_PartitionFirstUser): %s",
+					      bgl_err_str(rc));
 				}
 				if(!user_name) {
 					error("No user name was "
diff --git a/src/slurmd/req.c b/src/slurmd/req.c
index 64588ea92eb..78a4963f442 100644
--- a/src/slurmd/req.c
+++ b/src/slurmd/req.c
@@ -223,12 +223,17 @@ _fork_new_slurmd(void)
 	 *  to return until signaled by grandchild process that
 	 *  slurmd job manager has been successfully created.
 	 */
-	if (pipe(fds) < 0)
+	if (pipe(fds) < 0) {
 		error("fork_slurmd: pipe: %m");
+		return -1;
+	}
 	
-	if ((pid = fork()) < 0) 
+	if ((pid = fork()) < 0) { 
 		error("fork_slurmd: fork: %m");
-	else if (pid > 0) {
+		close(fds[0]);
+		close(fds[1]);
+		return -1;
+	} else if (pid > 0) {
 		if ((fds[1] >= 0) && (close(fds[1]) < 0))
 			error("Unable to close write-pipe in parent: %m");
 
@@ -817,6 +822,8 @@ _rpc_kill_tasks(slurm_msg_t *msg, slurm_addr *cli_addr)
 		goto done;
 	}
 
+#if 0
+	/* This code was used in an investigation of hung TotalView proceses */
 	if ((req->signal == SIGKILL)
 	    || (req->signal == SIGINT)) { /* for proctrack/linuxproc */
 		/*
@@ -826,7 +833,9 @@ _rpc_kill_tasks(slurm_msg_t *msg, slurm_addr *cli_addr)
 		slurm_container_signal(step->cont_id, SIGCONT);
 		if (slurm_container_signal(step->cont_id, req->signal) < 0)
 			rc = errno;
-	} else if (req->signal == 0) {
+	} else 
+#endif
+	if (req->signal == 0) {
 		if (slurm_container_signal(step->cont_id, req->signal) < 0)
 			rc = errno;
 /* SIGMIGRATE and SIGSOUND are used to initiate job checkpoint on AIX.
diff --git a/src/smap/job_functions.c b/src/smap/job_functions.c
index f9f24660e22..b0ba00e9d44 100644
--- a/src/smap/job_functions.c
+++ b/src/smap/job_functions.c
@@ -173,7 +173,7 @@ static void _print_header_job(void)
 		pa_system_ptr->xcord += 3;
 		mvwprintw(pa_system_ptr->text_win, pa_system_ptr->ycord,
 			  pa_system_ptr->xcord, "JOBID");
-		pa_system_ptr->xcord += 6;
+		pa_system_ptr->xcord += 7;
 		mvwprintw(pa_system_ptr->text_win, pa_system_ptr->ycord,
 			  pa_system_ptr->xcord, "PARTITION");
 		pa_system_ptr->xcord += 10;
@@ -232,7 +232,7 @@ static int _print_text_job(job_info_t * job_ptr)
 		pa_system_ptr->xcord += 3;
 		mvwprintw(pa_system_ptr->text_win, pa_system_ptr->ycord,
 			  pa_system_ptr->xcord, "%d", job_ptr->job_id);
-		pa_system_ptr->xcord += 6;
+		pa_system_ptr->xcord += 7;
 		mvwprintw(pa_system_ptr->text_win, pa_system_ptr->ycord,
 			  pa_system_ptr->xcord, "%.10s", job_ptr->partition);
 		pa_system_ptr->xcord += 10;
diff --git a/src/smap/partition_functions.c b/src/smap/partition_functions.c
index 35f4a2a70f4..279394999aa 100644
--- a/src/smap/partition_functions.c
+++ b/src/smap/partition_functions.c
@@ -66,10 +66,10 @@ static int _set_start_finish(db2_block_info_t *db2_info_ptr);
 static void _block_list_del(void *object);
 static void _nodelist_del(void *object);
 static int _list_match_all(void *object, void *key);
-static int _in_slurm_partition(db2_block_info_t *db2_info_ptr, 
-			       int *first, 
-			       int *last);
+static int _in_slurm_partition(List slurm_nodes, List bgl_nodes);
 static int _print_rest(db2_block_info_t *block_ptr);
+static int _addto_node_list(List nodelist, int *start, int *end);
+static int _make_nodelist(char *nodes, List nodelist);
 #endif
 
 extern void get_slurm_part()
@@ -166,7 +166,8 @@ extern void get_bgl_part()
 	int number, start[PA_SYSTEM_DIMENSIONS], end[PA_SYSTEM_DIMENSIONS];
 	db2_block_info_t *block_ptr = NULL;
 	ListIterator itr;
-	
+	List nodelist = NULL;
+
 	if (part_info_ptr) {
 		error_code = slurm_load_partitions(part_info_ptr->last_update, 
 						   &new_part_ptr, SHOW_ALL);
@@ -247,6 +248,9 @@ extern void get_bgl_part()
 			= xstrdup(new_bgl_ptr->bgl_info_array[i].bgl_part_id);
 		block_ptr->nodes 
 			= xstrdup(new_bgl_ptr->bgl_info_array[i].nodes);
+		block_ptr->nodelist = list_create(_nodelist_del);
+		_make_nodelist(block_ptr->nodes,block_ptr->nodelist);
+		
 		block_ptr->bgl_user_name 
 			= xstrdup(new_bgl_ptr->bgl_info_array[i].owner_name);
 		block_ptr->state 
@@ -273,40 +277,22 @@ extern void get_bgl_part()
 		
 		if (!part.nodes || (part.nodes[0] == '\0'))
 			continue;	/* empty partition */
-		while (part.nodes[j] != '\0') {
-			if ((part.nodes[j]   == '[')
-			    && (part.nodes[j+8] == ']')
-			    && ((part.nodes[j+4] == 'x')
-				|| (part.nodes[j+4] == '-'))) {
-				j++;
-				number = atoi(part.nodes + j);
-				start[X] = number / 100;
-				start[Y] = (number % 100) / 10;
-				start[Z] = (number % 10);
-				j += 4;
-
-				number = atoi(part.nodes + j);
-				end[X] = number / 100;
-				end[Y] = (number % 100) / 10;
-				end[Z] = (number % 10);
-				break;
-			}
-			j++;
-		}
+		nodelist = list_create(_nodelist_del);
+		_make_nodelist(part.nodes,nodelist);	
 		
 		if (block_list) {
 			itr = list_iterator_create(block_list);
 			while ((block_ptr = (db2_block_info_t*) 
 				list_next(itr)) != NULL) {
-				if(_in_slurm_partition(block_ptr, 
-						       start, 
-						       end)) {
+				if(_in_slurm_partition(nodelist,
+						       block_ptr->nodelist)) {
 					block_ptr->slurm_part_name 
 						= xstrdup(part.name);
 				}
 			}
 			list_iterator_destroy(itr);
 		}
+		list_destroy(nodelist);
 	}
 
 	/* Report the BGL Blocks */
@@ -730,6 +716,8 @@ static void _block_list_del(void *object)
 
 static void _nodelist_del(void *object)
 {
+	int *coord = (int *)object;
+	xfree(coord);
 	return;
 }
 
@@ -783,17 +771,39 @@ static int _set_start_finish(db2_block_info_t *db2_info_ptr)
 	return 1;
 }
 
-static int _in_slurm_partition(db2_block_info_t *db2_info_ptr, 
-			       int *first, int *last)
+static int _in_slurm_partition(List slurm_nodes, List bgl_nodes)
 {
-	if((db2_info_ptr->start[X]>=first[X])
-	   && (db2_info_ptr->start[Y]>=first[Y])
-	   && (db2_info_ptr->start[Z]>=first[Z])
-	   && (db2_info_ptr->end[X]<=last[X])
-	   && (db2_info_ptr->end[Y]<=last[Y])
-	   && (db2_info_ptr->end[Z]<=last[Z]))
+	ListIterator slurm_itr;
+	ListIterator bgl_itr;
+	int *coord = NULL;
+	int *slurm_coord = NULL;
+	int found = 0;
+	
+	bgl_itr = list_iterator_create(bgl_nodes);
+	slurm_itr = list_iterator_create(slurm_nodes);
+	while ((coord = list_next(bgl_itr)) != NULL) {
+		list_iterator_reset(slurm_itr);
+		found = 0;
+		while ((slurm_coord = list_next(slurm_itr)) != NULL) {
+			if((coord[X] == slurm_coord[X])
+			   && (coord[Y] == slurm_coord[Y])
+			   && (coord[Z] == slurm_coord[Z])) {
+				found=1;
+				break;
+			}
+			
+			
+		}
+		if(!found) {
+			break;
+		}
+	}
+	list_iterator_destroy(slurm_itr);
+	list_iterator_destroy(bgl_itr);
+			
+	if(found)
 		return 1;
-	else 
+	else
 		return 0;
 	
 }
@@ -823,6 +833,81 @@ static int _print_rest(db2_block_info_t *block_ptr)
 	
 	return SLURM_SUCCESS;
 }
+
+static int _addto_nodelist(List nodelist, int *start, int *end)
+{
+	int *coord = NULL;
+	int x,y,z;
+	
+	assert(end[X] < DIM_SIZE[X]);
+	assert(start[X] >= 0);
+	assert(end[Y] < DIM_SIZE[Y]);
+	assert(start[Y] >= 0);
+	assert(end[Z] < DIM_SIZE[Z]);
+	assert(start[Z] >= 0);
+	
+	for (x = start[X]; x <= end[X]; x++) {
+		for (y = start[Y]; y <= end[Y]; y++) {
+			for (z = start[Z]; z <= end[Z]; z++) {
+				coord = xmalloc(sizeof(int)*3);
+				coord[X] = x;
+				coord[Y] = y;
+				coord[Z] = z;
+				list_append(nodelist, coord);
+			}
+		}
+	}
+	return 1;
+}
+
+static int _make_nodelist(char *nodes, List nodelist)
+{
+	int j = 0;
+	int number;
+	int start[PA_SYSTEM_DIMENSIONS];
+	int end[PA_SYSTEM_DIMENSIONS];
+	
+	if(!nodelist)
+		nodelist = list_create(_nodelist_del);
+	while (nodes[j] != '\0') {
+		if ((nodes[j] == '['
+		     || nodes[j] == ',')
+		    && (nodes[j+8] == ']' 
+			|| nodes[j+8] == ',')
+		    && (nodes[j+4] == 'x'
+			|| nodes[j+4] == '-')) {
+			j++;
+			number = atoi(nodes + j);
+			start[X] = number / 100;
+			start[Y] = (number % 100) / 10;
+			start[Z] = (number % 10);
+			j += 4;
+			number = atoi(nodes + j);
+			end[X] = number / 100;
+			end[Y] = (number % 100) / 10;
+			end[Z] = (number % 10);
+			j += 3;
+			_addto_nodelist(nodelist, start, end);
+			if(nodes[j] != ',')
+				break;
+			j--;
+		} else if((nodes[j] < 58 
+			   && nodes[j] > 47)) {
+					
+			number = atoi(nodes + j);
+			start[X] = number / 100;
+			start[Y] = (number % 100) / 10;
+			start[Z] = (number % 10);
+			j+=3;
+			_addto_nodelist(nodelist, start, start);
+			if(nodes[j] != ',')
+				break;
+		}
+		j++;
+	}
+	return 1;
+}
+
 #endif
 
 static char* _convert_conn_type(enum connection_type conn_type)
diff --git a/src/srun/launch.c b/src/srun/launch.c
index 99f55c8310e..6cadabc8f38 100644
--- a/src/srun/launch.c
+++ b/src/srun/launch.c
@@ -310,6 +310,9 @@ static void _p_launch(slurm_msg_t *req, srun_job_t *job)
 			continue;
 		}
 
+		if (job->state > SRUN_JOB_LAUNCHING)
+			break;
+
 		pthread_mutex_lock(&active_mutex);
 		while (active >= opt.max_threads || rc < 0) 
 			rc = _wait_on_active(thd, job);
@@ -318,9 +321,6 @@ static void _p_launch(slurm_msg_t *req, srun_job_t *job)
 		active++;
 		pthread_mutex_unlock(&active_mutex);
 
-		if (job->state > SRUN_JOB_LAUNCHING)
-			break;
-
 		thd[i].task.req = &req[i];
 		thd[i].task.job = job;
 
diff --git a/src/srun/msg.c b/src/srun/msg.c
index c0f6cab7ef0..6ff00ed8c89 100644
--- a/src/srun/msg.c
+++ b/src/srun/msg.c
@@ -966,7 +966,9 @@ par_thr(void *arg)
 	return (void *)1;
 }
 
-int 
+/* NOTE: call this before creating any pthreads to avoid having forked process 
+ * hang on localtime_t() mutex locked in parent processes pthread */
+extern int 
 msg_thr_create(srun_job_t *job)
 {
 	int i;
@@ -990,18 +992,30 @@ msg_thr_create(srun_job_t *job)
 			     job->jaddr[i]).sin_port));
 	}
 
-	if (pipe(job->forked_msg->par_msg->msg_pipe) == -1) 
-		return SLURM_ERROR; // there was an error
-	if (pipe(job->forked_msg->msg_par->msg_pipe) == -1) 
-		return SLURM_ERROR; // there was an error
+	if (pipe(job->forked_msg->par_msg->msg_pipe) == -1) {
+		error("pipe():  %m"); 
+		return SLURM_ERROR;
+	}
+	if (pipe(job->forked_msg->msg_par->msg_pipe) == -1) {
+		error("pipe():  %m"); 
+		return SLURM_ERROR;
+	}
 	debug2("created the pipes for communication");
-	if((job->forked_msg->par_msg->pid = fork()) == -1)   
-		return SLURM_ERROR; // there was an error
-	else if (job->forked_msg->par_msg->pid == 0) 
-	{                       // child:    
-#ifdef DISABLE_LOCALTIME
-		disable_localtime();
-#endif                   
+
+	/* retry fork for super-heavily loaded systems */
+	for (i = 0; ; i++) {
+		if((job->forked_msg->par_msg->pid = fork()) != -1)
+			break;
+		if (i < 3)
+			usleep(1000);
+		else {
+			error("fork(): %m");
+			return SLURM_ERROR;
+		}
+	}
+
+	if (job->forked_msg->par_msg->pid == 0) {
+		/* child */
 		setsid();  
 		message_thread = 1;
 		close(job->forked_msg->
@@ -1027,10 +1041,9 @@ msg_thr_create(srun_job_t *job)
 		xfree(job->forked_msg->msg_par);	
 		xfree(job->forked_msg);	
 		_exit(0);
-	}
-	else 
-	{ // parent:   
-		
+	} else {
+		/* parent */
+
 		slurm_attr_init(&attr);
 		pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
 		if ((errno = pthread_create(&job->jtid, &attr, &par_thr, 
-- 
GitLab