From 1e38eeb4dc1c3ab85ba1baecf559c2db99abaf8d Mon Sep 17 00:00:00 2001
From: Moe Jette <jette1@llnl.gov>
Date: Mon, 8 Sep 2008 21:59:42 +0000
Subject: [PATCH] svn merge -r14958:15006
 https://eris.llnl.gov/svn/slurm/branches/slurm-1.3

---
 NEWS                                          |   10 +
 configure                                     |    2 +-
 doc/html/faq.shtml                            |    9 +-
 doc/html/team.shtml                           |    4 +-
 doc/man/man5/slurm.conf.5                     |   16 +-
 etc/init.d.slurmdbd                           |   34 +-
 src/api/allocate.c                            |    2 +-
 src/common/assoc_mgr.c                        |   27 +-
 src/common/assoc_mgr.h                        |    9 +-
 src/common/read_config.c                      |   12 +-
 .../filetxt/accounting_storage_filetxt.c      |   10 +-
 .../mysql/accounting_storage_mysql.c          |  130 +-
 .../mysql/mysql_jobacct_process.c             |   16 +-
 .../pgsql/accounting_storage_pgsql.c          |   39 +-
 src/plugins/sched/wiki/job_modify.c           |    9 +-
 src/plugins/sched/wiki2/job_modify.c          |    8 +-
 .../block_allocator/block_allocator.c         | 1064 ++++++-----------
 .../select/bluegene/plugin/bg_job_place.c     |   11 +-
 src/plugins/select/bluegene/plugin/bluegene.c |    1 -
 .../select/bluegene/plugin/slurm_prolog.c     |    5 +-
 src/sacct/options.c                           |   18 +-
 src/sacctmgr/user_functions.c                 |   19 +
 src/salloc/Makefile.am                        |    2 +-
 src/salloc/Makefile.in                        |    2 +-
 src/salloc/salloc.c                           |  115 ++
 src/slurmctld/controller.c                    |    5 +
 src/slurmctld/job_mgr.c                       |  113 +-
 src/slurmctld/job_scheduler.c                 |    4 +
 src/slurmctld/proc_req.c                      |   13 +-
 src/slurmctld/slurmctld.h                     |   12 +-
 src/slurmctld/step_mgr.c                      |    2 +-
 src/slurmd/slurmd/slurmd.c                    |    6 +-
 src/slurmd/slurmstepd/mgr.c                   |   39 +-
 src/slurmd/slurmstepd/req.c                   |   14 +-
 src/slurmdbd/slurmdbd.c                       |    6 +-
 src/smap/configure_functions.c                |    2 +-
 src/srun/allocate.c                           |    2 +-
 src/srun/srun.c                               |   12 +-
 testsuite/expect/globals                      |   51 +
 testsuite/expect/test21.10                    |  129 +-
 testsuite/expect/test21.11                    |    8 +
 testsuite/expect/test21.12                    |    8 +
 testsuite/expect/test21.13                    |    7 +
 testsuite/expect/test21.14                    |    5 +
 testsuite/expect/test21.15                    |    4 +
 testsuite/expect/test21.16                    |    6 +-
 testsuite/expect/test21.17                    |    5 +
 testsuite/expect/test21.18                    |    5 +
 testsuite/expect/test21.19                    |    5 +
 testsuite/expect/test21.5                     |    5 +
 testsuite/expect/test21.6                     |   71 +-
 testsuite/expect/test21.7                     |   10 +-
 testsuite/expect/test21.8                     |    8 +
 testsuite/expect/test21.9                     |    8 +
 testsuite/expect/test7.3                      |    2 +-
 testsuite/expect/test7.7                      |    2 +-
 testsuite/expect/test7.7.prog.c               |   94 +-
 testsuite/expect/test7.8                      |    2 +-
 58 files changed, 1300 insertions(+), 939 deletions(-)

diff --git a/NEWS b/NEWS
index ad68f0d961c..d4c7f49d00f 100644
--- a/NEWS
+++ b/NEWS
@@ -39,6 +39,13 @@ documents those changes that are of interest to users and admins.
     are created in SLURM tables for future use without a reboot of the SLURM
     daemons, but are not reported by any SLURM commands or APIs.
 
+* Changes in SLURM 1.3.9
+========================
+ -- Fix jobs being cancelled by ctrl-C to have correct cancelled state in 
+    accounting.
+ -- Slurmdbd will only cache user data, made for faster start up
+ -- Improved support for job steps in FRONT_END systems
+
 * Changes in SLURM 1.3.8
 ========================
  -- Added PrivateData flags for Users, Usage, and Accounts to Accounting. 
@@ -56,6 +63,7 @@ documents those changes that are of interest to users and admins.
  -- BLUEGENE - Set MPI environment variables from salloc.
  -- BLUEGENE - Fix threading issue for overlap mode
  -- Reject batch scripts containing DOS linebreaks.
+ -- BLUEGENE - Added wait for block boot to salloc
 
 * Changes in SLURM 1.3.7
 ========================
@@ -512,6 +520,8 @@ documents those changes that are of interest to users and admins.
     configuration or other runtime checks.
  -- Add "include" keywork to SPANK plugstack.conf file to optionally include
     other configuration files or directories of configuration files.
+ -- Srun to wait indefinitely for resource allocation to be made. Used to
+    abort after two minutes.
 
 * Changes in SLURM 1.2.34
 =========================
diff --git a/configure b/configure
index 00ef5f35adf..c052058b64a 100755
--- a/configure
+++ b/configure
@@ -25271,7 +25271,7 @@ echo "$as_me: WARNING: *** pkg-config not found. Cannot probe for libglade-2.0 o
 #    fi
 
 
-### Check for gtk2.8 package
+### Check for gtk2.7.1 package
     if test "$ac_have_gtk" == "yes" ; then
         $HAVEPKGCONFIG --exists gtk+-2.0
         if ! test $? -eq 0 ; then
diff --git a/doc/html/faq.shtml b/doc/html/faq.shtml
index b9711d0b1be..6ca32bfc0aa 100644
--- a/doc/html/faq.shtml
+++ b/doc/html/faq.shtml
@@ -890,7 +890,12 @@ about these options.
 clocks on the cluster?</b></a><br>
 In general, yes. Having inconsistent clocks may cause nodes to 
 be unusable. SLURM log files should contain references to 
-expired credentials.
+expired credentials. For example:
+<pre>
+error: Munge decode failed: Expired credential
+ENCODED: Wed May 12 12:34:56 2008
+DECODED: Wed May 12 12:01:12 2008
+</pre>
 
 <p><a name="cred_invalid"><b>21. Why are &quot;Invalid job credential&quot; 
 errors generated?</b></a><br>
@@ -1001,6 +1006,6 @@ sinfo -t drain -h -o "scontrol update nodename='%N' state=drain reason='%E'"
 
 <p class="footer"><a href="#top">top</a></p>
 
-<p style="text-align:center;">Last modified 2 September 2008</p>
+<p style="text-align:center;">Last modified 3 September 2008</p>
 
 <!--#include virtual="footer.txt"-->
diff --git a/doc/html/team.shtml b/doc/html/team.shtml
index f82230ee631..df6f0fdfca3 100644
--- a/doc/html/team.shtml
+++ b/doc/html/team.shtml
@@ -29,6 +29,7 @@ and a host of others.
 <li>Chuck Clouston (Bull)</li>
 <li>Chris Dunlap (LLNL)</li>
 <li>Joey Ekstrom (LLNL/Bringham Young University)</li>
+<li>Josh England (TGSMC)</li>
 <li>Kent Engstr&ouml;m  (National Supercomputer Centre, Sweden)</li>
 <li>Jim Garlick (LLNL)</li>
 <li>Didier Gazen (Laboratoire d'Aerologie, France)</li>
@@ -59,11 +60,12 @@ Networking, Italy)</li>
 <li>Federico Sacerdoti (D.E. Shaw)<li>
 <li>Jeff Squyres (LAM MPI)</li>
 <li>Prashanth Tamraparni (HP, India)</li>
+<li>Adam Todorski (Rensselaer Polytechnic Institute)</li
 <li>Kevin Tew (LLNL/Bringham Young University)</li>
 <li>Jay Windley (Linux NetworX)</li>
 <li>Anne-Marie Wunderlin (Bull)</li>
 </ul>
 
-<p style="text-align:center;">Last modified 28 July 2008</p>
+<p style="text-align:center;">Last modified 5 September 2008</p>
 
 <!--#include virtual="footer.txt"-->
diff --git a/doc/man/man5/slurm.conf.5 b/doc/man/man5/slurm.conf.5
index dce6e249340..1c961e2830b 100644
--- a/doc/man/man5/slurm.conf.5
+++ b/doc/man/man5/slurm.conf.5
@@ -1661,11 +1661,19 @@ is very important.
 \fBSelectTypeParameters\fR should be configured to treat
 memory as a consumable resource and the \fB\-\-mem\fR option
 should be used for job allocations.
-For more information see the following web page:
-\fIhttps://computing.llnl.gov/linux/slurm/cons_res_share.html\fR.
-.na
 Possible values for \fBShared\fR are "EXCLUSIVE", "FORCE", "YES", and "NO".
-.ad
+Sharing of resources is typically useful only when using
+\fBSchedulerType=sched/gang\fR.
+For more information see the following web pages:
+.br
+\fIhttps://computing.llnl.gov/linux/slurm/cons_res.html\fR,
+.br
+\fIhttps://computing.llnl.gov/linux/slurm/cons_res_share.html\fR,
+.br
+\fIhttps://computing.llnl.gov/linux/slurm/gang_scheduling.html\fR, and
+.br
+\fIhttps://computing.llnl.gov/linux/slurm/preempt.html\fR.
+
 .RS
 .TP 12
 \fBEXCLUSIVE\fR
diff --git a/etc/init.d.slurmdbd b/etc/init.d.slurmdbd
index 55473e4383d..45e4471e8e7 100755
--- a/etc/init.d.slurmdbd
+++ b/etc/init.d.slurmdbd
@@ -44,6 +44,12 @@ else
    RETVAL=0
 fi
 
+# We can not use a starter program without losing environment 
+# variables that are critical on Blue Gene systems
+if [ -d /bgl/BlueLight/ppcfloor ]; then
+   STARTPROC=""
+fi
+
 # Source slurm specific configuration
 if [ -f /etc/sysconfig/slurm ] ; then
     . /etc/sysconfig/slurm
@@ -53,18 +59,23 @@ fi
 
 [ -f $CONFDIR/slurmdbd.conf ] || exit 1
 
+# setup library paths for slurm and munge support
+export LD_LIBRARY_PATH="$LIBDIR:$LD_LIBRARY_PATH"
+
 start() {
-    echo -n "starting slurmdbd: " 
+    prog=$1
+    shift
+    echo -n "starting $prog: " 
     unset HOME MAIL USER USERNAME 
-    $STARTPROC $SBINDIR/slurmdbd $SLURMDBD_OPTIONS
+    $STARTPROC $SBINDIR/$prog $SLURMDBD_OPTIONS
     rc_status -v
     echo
     touch /var/lock/subsys/slurmdbd
 }
 
 stop() { 
-    echo -n "stopping slurmdbd: "
-    killproc slurmdbd -TERM
+    echo -n "stopping $1: "
+    killproc $1 -TERM
     rc_status -v
     echo
     rm -f /var/lock/subsys/slurmdbd
@@ -76,7 +87,7 @@ slurmstatus() {
     local rpid
     local pidfile
 
-    pidfile=`grep -i PidFile $CONFDIR/slurmdbd.conf | grep -v '^ *#'`
+    pidfile=`grep -i ${base}pid $CONFDIR/slurmdbd.conf | grep -v '^ *#'`
     if [ $? = 0 ]; then
         pidfile=${pidfile##*=}
         pidfile=${pidfile%#*}
@@ -84,25 +95,26 @@ slurmstatus() {
         pidfile=/var/run/slurmdbd.pid
     fi
 
-    pid=`pidof -o $$ -o $$PPID -o %PPID -x slurmdbd`
+   pid=`pidof -o $$ -o $$PPID -o %PPID -x $1 || \
+         pidof -o $$ -o $$PPID -o %PPID -x ${base}`
 
     if [ -f $pidfile ]; then
         read rpid < $pidfile
         if [ "$rpid" != "" -a "$pid" != "" ]; then
             for i in $pid ; do
                 if [ "$i" = "$rpid" ]; then 
-                    echo $"slurmdbd (pid $pid) is running..."
+                    echo $"${base} (pid $pid) is running..."
                     return 0
                 fi     
             done
         elif [ "$rpid" != "" -a "$pid" = "" ]; then
-            echo $"slurmdbd is stopped"
+            echo $"${base} is stopped"
             return 1
         fi 
 
     fi
      
-    echo $"slurmdbd is stopped"
+    echo $"${base} is stopped"
     
     return 3
 }
@@ -125,8 +137,8 @@ case "$1" in
 	slurmstatus slurmdbd
         ;;
     restart)
-	stop slurmdbd
-	start slurmdbd
+	$0 stop
+	$0 start
         ;;
     condrestart)
         if [ -f /var/lock/subsys/slurm ]; then
diff --git a/src/api/allocate.c b/src/api/allocate.c
index 41f7a96caa3..d87842f5e2a 100644
--- a/src/api/allocate.c
+++ b/src/api/allocate.c
@@ -262,7 +262,7 @@ slurm_allocate_resources_blocking (const job_desc_msg_t *user_req,
 							     timeout);
 			/* If NULL, we didn't get the allocation in 
 			   the time desired, so just free the job id */
-			if (resp == NULL) {
+			if (resp == NULL && errno != ESLURM_ALREADY_DONE) {
 				errnum = errno;
 				slurm_complete_job(job_id, -1);
 			}
diff --git a/src/common/assoc_mgr.c b/src/common/assoc_mgr.c
index 17f09dacf01..3dee13d3113 100644
--- a/src/common/assoc_mgr.c
+++ b/src/common/assoc_mgr.c
@@ -202,7 +202,7 @@ static int _get_local_user_list(void *db_conn, int enforce)
 		while((user = list_next(itr))) {
 			uid_t pw_uid = uid_from_string(user->name);
 			if(pw_uid == (uid_t) -1) {
-				error("couldn't get a uid for user %s",
+				debug("couldn't get a uid for user %s",
 				      user->name);
 				user->uid = (uint32_t)NO_VAL;
 			} else
@@ -218,26 +218,35 @@ static int _get_local_user_list(void *db_conn, int enforce)
 
 extern int assoc_mgr_init(void *db_conn, assoc_init_args_t *args)
 {
-	int enforce = 0;
+	uint16_t enforce = 0;
+	uint16_t refresh = 0;
+	uint16_t cache_level = ASSOC_MGR_CACHE_ALL;
 
 	if(args) {
 		enforce = args->enforce;
 		if(args->remove_assoc_notify)
 			remove_assoc_notify = args->remove_assoc_notify;
+		refresh = args->refresh;
+		cache_level = args->cache_level;
 	}
 
-	if(!local_cluster_name && !slurmdbd_conf)
+	if((!local_cluster_name || refresh) && !slurmdbd_conf) {
+		xfree(local_cluster_name);
 		local_cluster_name = slurm_get_cluster_name();
+	}
 
-	if(!local_association_list) 
+	if((!local_association_list || refresh) 
+	   && (cache_level & ASSOC_MGR_CACHE_ASSOC))
 		if(_get_local_association_list(db_conn, enforce) == SLURM_ERROR)
 			return SLURM_ERROR;
-
-	if(!local_qos_list) 
+	
+	if((!local_qos_list || refresh) 
+	   && (cache_level & ASSOC_MGR_CACHE_QOS)) 
 		if(_get_local_qos_list(db_conn, enforce) == SLURM_ERROR)
 			return SLURM_ERROR;
-
-	if(!local_user_list) 
+	
+	if((!local_user_list || refresh) 
+	   && (cache_level & ASSOC_MGR_CACHE_USER)) 
 		if(_get_local_user_list(db_conn, enforce) == SLURM_ERROR)
 			return SLURM_ERROR;
 
@@ -709,7 +718,7 @@ extern int assoc_mgr_update_local_users(acct_update_object_t *update)
 			}
 			pw_uid = uid_from_string(object->name);
 			if(pw_uid == (uid_t) -1) {
-				error("couldn't get a uid for user %s",
+				debug("couldn't get a uid for user %s",
 				      object->name);
 				object->uid = NO_VAL;
 			} else
diff --git a/src/common/assoc_mgr.h b/src/common/assoc_mgr.h
index df9b26f9b76..2ca1750013b 100644
--- a/src/common/assoc_mgr.h
+++ b/src/common/assoc_mgr.h
@@ -49,8 +49,15 @@
 #include <slurm/slurm.h>
 #include <slurm/slurm_errno.h>
 
+#define ASSOC_MGR_CACHE_ASSOC 0x0001
+#define ASSOC_MGR_CACHE_QOS 0x0002
+#define ASSOC_MGR_CACHE_USER 0x0004
+#define ASSOC_MGR_CACHE_ALL 0xffff
+
 typedef struct {
-	int enforce;
+	uint16_t cache_level;
+	uint16_t enforce;
+       	uint16_t refresh;
 	void (*remove_assoc_notify) (acct_association_rec_t *rec);
 } assoc_init_args_t;
 
diff --git a/src/common/read_config.c b/src/common/read_config.c
index 58ad7042c37..dbe52700f4e 100644
--- a/src/common/read_config.c
+++ b/src/common/read_config.c
@@ -810,6 +810,7 @@ static int _register_conf_node_aliases(slurm_conf_node_t *node_ptr)
 		      "in FRONT_END mode");
 		goto cleanup;
 	}
+
 	hostname = node_ptr->hostnames;
 	address = node_ptr->addresses;
 #else
@@ -825,15 +826,20 @@ static int _register_conf_node_aliases(slurm_conf_node_t *node_ptr)
 #endif
 
 	/* now build the individual node structures */
+#ifdef HAVE_FRONT_END
+	/* we always want the first on in the list to be the one
+	 * returned when looking for localhost
+	 */
+	while ((alias = hostlist_pop(alias_list))) {
+#else
 	while ((alias = hostlist_shift(alias_list))) {
-#ifndef HAVE_FRONT_END
 		hostname = hostlist_shift(hostname_list);
 		address = hostlist_shift(address_list);
 #endif
 
 		_push_to_hashtbls(alias, hostname, address, node_ptr->port,
-					node_ptr->cpus, node_ptr->sockets,
-					node_ptr->cores, node_ptr->threads);
+				  node_ptr->cpus, node_ptr->sockets,
+				  node_ptr->cores, node_ptr->threads);
 
 		free(alias);
 #ifndef HAVE_FRONT_END
diff --git a/src/plugins/accounting_storage/filetxt/accounting_storage_filetxt.c b/src/plugins/accounting_storage/filetxt/accounting_storage_filetxt.c
index a37ad84ee4d..136d9d694f9 100644
--- a/src/plugins/accounting_storage/filetxt/accounting_storage_filetxt.c
+++ b/src/plugins/accounting_storage/filetxt/accounting_storage_filetxt.c
@@ -668,6 +668,7 @@ extern int jobacct_storage_p_step_complete(void *db_conn,
 	float ave_vsize = 0, ave_rss = 0, ave_pages = 0;
 	float ave_cpu = 0, ave_cpu2 = 0;
 	char *account;
+	uint32_t exit_code;
 
 	if(!storage_init) {
 		debug("jobacct init was not called or it failed");
@@ -684,7 +685,12 @@ extern int jobacct_storage_p_step_complete(void *db_conn,
 	
 	if ((elapsed=now-step_ptr->start_time)<0)
 		elapsed=0;	/* For *very* short jobs, if clock is wrong */
-	if (step_ptr->exit_code)
+
+	exit_code = step_ptr->exit_code;
+	if (exit_code == NO_VAL) {
+		comp_status = JOB_CANCELLED;
+		exit_code = 0;
+	} else if (exit_code)
 		comp_status = JOB_FAILED;
 	else
 		comp_status = JOB_COMPLETE;
@@ -740,7 +746,7 @@ extern int jobacct_storage_p_step_complete(void *db_conn,
 		 JOB_STEP,
 		 step_ptr->step_id,	/* stepid */
 		 comp_status,		/* completion status */
-		 step_ptr->exit_code,	/* completion code */
+		 exit_code,	/* completion code */
 		 cpus,          	/* number of tasks */
 		 cpus,                  /* number of cpus */
 		 elapsed,	        /* elapsed seconds */
diff --git a/src/plugins/accounting_storage/mysql/accounting_storage_mysql.c b/src/plugins/accounting_storage/mysql/accounting_storage_mysql.c
index d77c5ea2f37..44817f3cf45 100644
--- a/src/plugins/accounting_storage/mysql/accounting_storage_mysql.c
+++ b/src/plugins/accounting_storage/mysql/accounting_storage_mysql.c
@@ -2011,7 +2011,8 @@ extern int acct_storage_p_add_coord(mysql_conn_t *mysql_conn, uint32_t uid,
 					   "insert into %s "
 					   "(timestamp, action, name, "
 					   "actor, info) "
-					   "values (%d, %u, '%s', '%s', '%s')",
+					   "values (%d, %u, '%s', "
+					   "'%s', \"%s\")",
 					   txn_table,
 					   now, DBD_ADD_ACCOUNT_COORDS, user,
 					   user_name, acct);
@@ -6705,10 +6706,11 @@ extern int clusteracct_storage_p_node_down(mysql_conn_t *mysql_conn,
 	xstrfmtcat(query,
 		   "insert into %s "
 		   "(node_name, cluster, cpu_count, period_start, reason) "
-		   "values ('%s', '%s', %u, %d, '%s') on duplicate key "
+		   "values ('%s', '%s', %u, %d, \"%s\") on duplicate key "
 		   "update period_end=0;",
 		   event_table, node_ptr->name, cluster, 
 		   cpus, event_time, my_reason);
+	debug3("%d(%d) query\n%s", mysql_conn->conn, __LINE__, query);
 	rc = mysql_db_query(mysql_conn->db_conn, query);
 	xfree(query);
 
@@ -6733,6 +6735,7 @@ extern int clusteracct_storage_p_node_up(mysql_conn_t *mysql_conn,
 		"update %s set period_end=%d where cluster='%s' "
 		"and period_end=0 and node_name='%s';",
 		event_table, event_time, cluster, node_ptr->name);
+	debug3("%d(%d) query\n%s", mysql_conn->conn, __LINE__, query);
 	rc = mysql_db_query(mysql_conn->db_conn, query);
 	xfree(query);
 	return rc;
@@ -6964,7 +6967,7 @@ extern int jobacct_storage_p_job_start(mysql_conn_t *mysql_conn,
 {
 #ifdef HAVE_MYSQL
 	int	rc=SLURM_SUCCESS;
-	char	*jname, *nodes;
+	char	*jname = NULL, *nodes = NULL;
 	long	priority;
 	int track_steps = 0;
 	char *block_id = NULL;
@@ -7001,7 +7004,7 @@ extern int jobacct_storage_p_job_start(mysql_conn_t *mysql_conn,
 	if (job_ptr->nodes && job_ptr->nodes[0])
 		nodes = job_ptr->nodes;
 	else
-		nodes = "(null)";
+		nodes = "None assigned";
 
 	if(job_ptr->batch_flag)
 		track_steps = 1;
@@ -7026,27 +7029,52 @@ extern int jobacct_storage_p_job_start(mysql_conn_t *mysql_conn,
 	if(!job_ptr->db_index) {
 		query = xstrdup_printf(
 			"insert into %s "
-			"(jobid, account, associd, uid, gid, partition, "
-			"blockid, eligible, submit, start, name, track_steps, "
-			"state, priority, req_cpus, alloc_cpus, nodelist) "
-			"values (%u, '%s', %u, %u, %u, '%s', '%s', "
-			"%d, %d, %d, '%s', %u, "
-			"%u, %u, %u, %u, '%s') "
-			"on duplicate key update id=LAST_INSERT_ID(id), "
-			"end=0, state=%u",
-			job_table, job_ptr->job_id, job_ptr->account, 
-			job_ptr->assoc_id,
-			job_ptr->user_id, job_ptr->group_id,
-			job_ptr->partition, block_id,
-			(int)job_ptr->details->begin_time,
-			(int)job_ptr->details->submit_time,
-			(int)job_ptr->start_time,
-			jname, track_steps,
-			job_ptr->job_state & (~JOB_COMPLETING),
-			priority, job_ptr->num_procs,
-			job_ptr->total_procs, nodes,
-			job_ptr->job_state & (~JOB_COMPLETING));
-
+			"(jobid, associd, uid, gid, nodelist, ",
+			job_table);
+
+		if(job_ptr->account) 
+			xstrcat(query, "account, ");
+		if(job_ptr->partition) 
+			xstrcat(query, "partition, ");
+		if(block_id) 
+			xstrcat(query, "blockid, ");
+		
+		xstrfmtcat(query, 
+			   "eligible, submit, start, name, track_steps, "
+			   "state, priority, req_cpus, alloc_cpus) "
+			   "values (%u, %u, %u, %u, '%s', ",
+			   job_ptr->job_id, job_ptr->assoc_id,
+			   job_ptr->user_id, job_ptr->group_id, nodes);
+		
+		if(job_ptr->account) 
+			xstrfmtcat(query, "'%s', ", job_ptr->account);
+		if(job_ptr->partition) 
+			xstrfmtcat(query, "'%s', ", job_ptr->partition);
+		if(block_id) 
+			xstrfmtcat(query, "'%s', ", block_id);
+		
+		xstrfmtcat(query, 
+			   "%d, %d, %d, '%s', %u, %u, %u, %u, %u) "
+			   "on duplicate key update "
+			   "id=LAST_INSERT_ID(id), state=%u, associd=%u",
+			   (int)job_ptr->details->begin_time,
+			   (int)job_ptr->details->submit_time,
+			   (int)job_ptr->start_time,
+			   jname, track_steps,
+			   job_ptr->job_state & (~JOB_COMPLETING),
+			   priority, job_ptr->num_procs,
+			   job_ptr->total_procs, 
+			   job_ptr->job_state & (~JOB_COMPLETING),
+			   job_ptr->assoc_id);
+
+		if(job_ptr->account) 
+			xstrfmtcat(query, ", account='%s'", job_ptr->account);
+		if(job_ptr->partition) 
+			xstrfmtcat(query, ", partition='%s'",
+				   job_ptr->partition);
+		if(block_id)
+			xstrfmtcat(query, ", blockid='%s'", block_id);
+		
 		debug3("%d(%d) query\n%s", mysql_conn->conn, __LINE__, query);
 	try_again:
 		if(!(job_ptr->db_index = mysql_insert_ret_id(
@@ -7065,16 +7093,25 @@ extern int jobacct_storage_p_job_start(mysql_conn_t *mysql_conn,
 				rc = SLURM_ERROR;
 		}
 	} else {
-		query = xstrdup_printf(
-			"update %s set partition='%s', blockid='%s', start=%d, "
-			"name='%s', state=%u, alloc_cpus=%u, nodelist='%s', "
-			"account='%s', end=0 where id=%d",
-			job_table, job_ptr->partition, block_id,
-			(int)job_ptr->start_time,
-			jname, 
-			job_ptr->job_state & (~JOB_COMPLETING),
-			job_ptr->total_procs, nodes, 
-			job_ptr->account, job_ptr->db_index);
+		query = xstrdup_printf("update %s set nodelist='%s', ", 
+				       job_table, nodes);
+
+		if(job_ptr->account) 
+			xstrfmtcat(query, "account='%s', ",
+				   job_ptr->account);
+		if(job_ptr->partition) 
+			xstrfmtcat(query, "partition='%s', ",
+				   job_ptr->partition);
+		if(block_id)
+			xstrfmtcat(query, "blockid='%s', ", block_id);
+
+		xstrfmtcat(query, "start=%d, name='%s', state=%u, "
+			   "alloc_cpus=%u, associd=%u where id=%d",
+			   (int)job_ptr->start_time,
+			   jname, job_ptr->job_state & (~JOB_COMPLETING),
+			   job_ptr->total_procs, nodes, 
+			   job_ptr->assoc_id,
+			   job_ptr->db_index);
 		debug3("%d(%d) query\n%s", mysql_conn->conn, __LINE__, query);
 		rc = mysql_db_query(mysql_conn->db_conn, query);
 	}
@@ -7099,7 +7136,7 @@ extern int jobacct_storage_p_job_complete(mysql_conn_t *mysql_conn,
 #ifdef HAVE_MYSQL
 	char *query = NULL, *nodes = NULL;
 	int rc=SLURM_SUCCESS;
-	
+
 	if (!job_ptr->db_index 
 	    && (!job_ptr->details || !job_ptr->details->submit_time)) {
 		error("jobacct_storage_p_job_complete: "
@@ -7110,15 +7147,19 @@ extern int jobacct_storage_p_job_complete(mysql_conn_t *mysql_conn,
 	if(_check_connection(mysql_conn) != SLURM_SUCCESS)
 		return SLURM_ERROR;
 	debug2("mysql_jobacct_job_complete() called");
+	
+	/* If we get an error with this just fall through to avoid an
+	 * infinite loop
+	 */
 	if (job_ptr->end_time == 0) {
 		debug("mysql_jobacct: job %u never started", job_ptr->job_id);
-		return SLURM_ERROR;
+		return SLURM_SUCCESS;
 	}	
 	
 	if (job_ptr->nodes && job_ptr->nodes[0])
 		nodes = job_ptr->nodes;
 	else
-		nodes = "(null)";
+		nodes = "None assigned";
 
 	if(!job_ptr->db_index) {
 		if(!(job_ptr->db_index =
@@ -7135,7 +7176,6 @@ extern int jobacct_storage_p_job_complete(mysql_conn_t *mysql_conn,
 				      job_ptr->job_id);
 				return SLURM_SUCCESS;
 			}
-			jobacct_storage_p_job_start(mysql_conn, job_ptr);
 		}
 	}
 
@@ -7272,7 +7312,8 @@ extern int jobacct_storage_p_step_complete(mysql_conn_t *mysql_conn,
 	float ave_cpu = 0, ave_cpu2 = 0;
 	char *query = NULL;
 	int rc =SLURM_SUCCESS;
-	
+	uint32_t exit_code = 0;
+
 	if (!step_ptr->job_ptr->db_index 
 	    && (!step_ptr->job_ptr->details
 		|| !step_ptr->job_ptr->details->submit_time)) {
@@ -7309,7 +7350,12 @@ extern int jobacct_storage_p_step_complete(mysql_conn_t *mysql_conn,
 	
 	if ((elapsed=now-step_ptr->start_time)<0)
 		elapsed=0;	/* For *very* short jobs, if clock is wrong */
-	if (step_ptr->exit_code)
+	
+	exit_code = step_ptr->exit_code;
+	if (exit_code == NO_VAL) {
+		comp_status = JOB_CANCELLED;
+		exit_code = 0;
+	} else if (exit_code)
 		comp_status = JOB_FAILED;
 	else
 		comp_status = JOB_COMPLETE;
@@ -7369,7 +7415,7 @@ extern int jobacct_storage_p_step_complete(mysql_conn_t *mysql_conn,
 		step_table, (int)now,
 		comp_status,
 		step_ptr->job_ptr->requid, 
-		step_ptr->exit_code,
+		exit_code,
 		/* user seconds */
 		jobacct->user_cpu_sec,	
 		/* user microseconds */
diff --git a/src/plugins/accounting_storage/mysql/mysql_jobacct_process.c b/src/plugins/accounting_storage/mysql/mysql_jobacct_process.c
index 318daf6dc28..ace4c6f4cd0 100644
--- a/src/plugins/accounting_storage/mysql/mysql_jobacct_process.c
+++ b/src/plugins/accounting_storage/mysql/mysql_jobacct_process.c
@@ -500,6 +500,7 @@ no_cond:
 
 		if(row[JOB_REQ_ACCOUNT])
 			job->account = xstrdup(row[JOB_REQ_ACCOUNT]);
+
 		if(row[JOB_REQ_BLOCKID])
 			job->blockid = xstrdup(row[JOB_REQ_BLOCKID]);
 
@@ -569,11 +570,15 @@ no_cond:
 			}
 		} else {
 			job->suspended = atoi(row[JOB_REQ_SUSPENDED]);
-			if(!job->end) {
+
+			if(!job->start) {
+				job->elapsed = 0;
+			} else if(!job->end) {
 				job->elapsed = now - job->start;
 			} else {
 				job->elapsed = job->end - job->start;
 			}
+
 			job->elapsed -= job->suspended;
 		}
 
@@ -581,8 +586,13 @@ no_cond:
 		job->jobname = xstrdup(row[JOB_REQ_NAME]);
 		job->gid = atoi(row[JOB_REQ_GID]);
 		job->exitcode = atoi(row[JOB_REQ_COMP_CODE]);
-		job->partition = xstrdup(row[JOB_REQ_PARTITION]);
-		job->nodes = xstrdup(row[JOB_REQ_NODELIST]);
+
+		if(row[JOB_REQ_PARTITION])
+			job->partition = xstrdup(row[JOB_REQ_PARTITION]);
+
+		if(row[JOB_REQ_NODELIST])
+			job->nodes = xstrdup(row[JOB_REQ_NODELIST]);
+
 		if (!job->nodes || !strcmp(job->nodes, "(null)")) {
 			xfree(job->nodes);
 			job->nodes = xstrdup("(unknown)");
diff --git a/src/plugins/accounting_storage/pgsql/accounting_storage_pgsql.c b/src/plugins/accounting_storage/pgsql/accounting_storage_pgsql.c
index 26c6e05350d..41d4a280848 100644
--- a/src/plugins/accounting_storage/pgsql/accounting_storage_pgsql.c
+++ b/src/plugins/accounting_storage/pgsql/accounting_storage_pgsql.c
@@ -1143,7 +1143,7 @@ extern int jobacct_storage_p_job_start(PGconn *acct_pgsql_db,
 	if (job_ptr->nodes && job_ptr->nodes[0])
 		nodes = job_ptr->nodes;
 	else
-		nodes = "(null)";
+		nodes = "None assigned";
 
 	if(job_ptr->batch_flag)
 		track_steps = 1;
@@ -1250,15 +1250,24 @@ extern int jobacct_storage_p_job_complete(PGconn *acct_pgsql_db,
 	if (job_ptr->nodes && job_ptr->nodes[0])
 		nodes = job_ptr->nodes;
 	else
-		nodes = "(null)";
-
+		nodes = "None assigned";
+	
 	if(!job_ptr->db_index) {
-		job_ptr->db_index = _get_db_index(acct_pgsql_db,
-						  job_ptr->details->submit_time,
-						  job_ptr->job_id,
-						  job_ptr->assoc_id);
-		if(job_ptr->db_index == -1) 
-			return SLURM_ERROR;
+		if(!(job_ptr->db_index =
+		     _get_db_index(acct_pgsql_db,
+				   job_ptr->details->submit_time,
+				   job_ptr->job_id,
+				   job_ptr->assoc_id))) {
+			/* If we get an error with this just fall
+			 * through to avoid an infinite loop
+			 */
+			if(jobacct_storage_p_job_start(acct_pgsql_db, job_ptr)
+			   == SLURM_ERROR) {
+				error("couldn't add job %u at job completion",
+				      job_ptr->job_id);
+				return SLURM_SUCCESS;
+			}
+		}
 	}
 	query = xstrdup_printf("update %s set start=%u, endtime=%u, state=%d, "
 			       "nodelist='%s', comp_code=%u, "
@@ -1385,7 +1394,8 @@ extern int jobacct_storage_p_step_complete(PGconn *acct_pgsql_db,
 	float ave_cpu = 0, ave_cpu2 = 0;
 	char *query = NULL;
 	int rc =SLURM_SUCCESS;
-	
+	uint32_t exit_code;
+
 	if (!step_ptr->job_ptr->db_index 
 	    && (!step_ptr->job_ptr->details
 		|| !step_ptr->job_ptr->details->submit_time)) {
@@ -1425,7 +1435,12 @@ extern int jobacct_storage_p_step_complete(PGconn *acct_pgsql_db,
 
 	if ((elapsed=now-step_ptr->start_time)<0)
 		elapsed=0;	/* For *very* short jobs, if clock is wrong */
-	if (step_ptr->exit_code)
+
+	exit_code = step_ptr->exit_code;
+	if (exit_code == NO_VAL) {
+		comp_status = JOB_CANCELLED;
+		exit_code = 0;
+	} else if (exit_code)
 		comp_status = JOB_FAILED;
 	else
 		comp_status = JOB_COMPLETE;
@@ -1475,7 +1490,7 @@ extern int jobacct_storage_p_step_complete(PGconn *acct_pgsql_db,
 		step_table, (int)now,
 		comp_status,
 		step_ptr->job_ptr->requid, 
-		step_ptr->exit_code, 
+		exit_code, 
 		/* user seconds */
 		jobacct->user_cpu_sec,	
 		/* user microseconds */
diff --git a/src/plugins/sched/wiki/job_modify.c b/src/plugins/sched/wiki/job_modify.c
index 4ac123c9ca7..7cad50eadc1 100644
--- a/src/plugins/sched/wiki/job_modify.c
+++ b/src/plugins/sched/wiki/job_modify.c
@@ -95,11 +95,10 @@ static int	_job_modify(uint32_t jobid, char *bank_ptr,
 				  old_time) * 60);
 		last_job_update = time(NULL);
 	}
-	if (bank_ptr) {
-		info("wiki: change job %u bank %s", jobid, bank_ptr);
-		xfree(job_ptr->account);
-		job_ptr->account = xstrdup(bank_ptr);
-		last_job_update = time(NULL);
+
+	if (bank_ptr &&
+	    (update_job_account("wiki", job_ptr, bank_ptr) != SLURM_SUCCESS)) {
+		return EINVAL;
 	}
 
 	if (new_hostlist) {
diff --git a/src/plugins/sched/wiki2/job_modify.c b/src/plugins/sched/wiki2/job_modify.c
index 9e5f7aec178..714a38976b9 100644
--- a/src/plugins/sched/wiki2/job_modify.c
+++ b/src/plugins/sched/wiki2/job_modify.c
@@ -99,11 +99,9 @@ static int	_job_modify(uint32_t jobid, char *bank_ptr,
 		last_job_update = now;
 	}
 
-	if (bank_ptr) {
-		info("wiki: change job %u bank %s", jobid, bank_ptr);
-		xfree(job_ptr->account);
-		job_ptr->account = xstrdup(bank_ptr);
-		last_job_update = now;
+	if (bank_ptr &&
+	    (update_job_account("wiki", job_ptr, bank_ptr) != SLURM_SUCCESS)) {
+		return EINVAL;
 	}
 
 	if (feature_ptr) {
diff --git a/src/plugins/select/bluegene/block_allocator/block_allocator.c b/src/plugins/select/bluegene/block_allocator/block_allocator.c
index 6001c2ac07f..c55a91280fa 100644
--- a/src/plugins/select/bluegene/block_allocator/block_allocator.c
+++ b/src/plugins/select/bluegene/block_allocator/block_allocator.c
@@ -96,6 +96,11 @@ s_p_options_t bg_conf_file_options[] = {
 	{NULL}
 };
 
+typedef enum {
+	BLOCK_ALGO_FIRST,
+	BLOCK_ALGO_SECOND
+} block_algo_t;
+
 #ifdef HAVE_BG
 /** internal helper functions */
 #ifdef HAVE_BG_FILES
@@ -153,7 +158,7 @@ static void _delete_path_list(void *object);
 static int _find_match(ba_request_t* ba_request, List results);
 
 /** */
-static bool _node_used(ba_node_t* ba_node, int *geometry);
+static bool _node_used(ba_node_t* ba_node, int x_size);
 
 /* */
 static void _switch_config(ba_node_t* source, ba_node_t* target, int dim, 
@@ -167,14 +172,9 @@ static int _set_external_wires(int dim, int count, ba_node_t* source,
 static char *_set_internal_wires(List nodes, int size, int conn_type);
 
 /* */
-static int _find_x_path(List results, ba_node_t *ba_node, 
-			int *start, int *first, 
-			int *geometry, int found, int conn_type);
-
-/* */
-static int _find_x_path2(List results, ba_node_t *ba_node, 
-			 int *start, int *first, 
-			 int *geometry, int found, int conn_type);
+static int _find_x_path(List results, ba_node_t *ba_node, int *start,
+			int x_size, int found, int conn_type, 
+			block_algo_t algo);
 
 /* */
 static int _remove_node(List results, int *node_tar);
@@ -190,7 +190,7 @@ static int _find_next_free_using_port_2(ba_switch_t *curr_switch,
 /* 			     int count, int highest_phys_x);  */
 /* */
 static int _finish_torus(ba_switch_t *curr_switch, int source_port, 
-			   List nodes, int dim, int count, int *start);
+			 int dim, int count, int *start);
 /* */
 static int *_set_best_path();
 
@@ -1432,6 +1432,7 @@ end_it:
  * IN start - where to start the allocation.
  * IN geometry - the requested geometry of the block.
  * IN conn_type - mesh, torus, or small.
+ *
  * RET char * - hostlist of midplanes results represent must be
  *     xfreed.  NULL on failure
  */
@@ -1450,24 +1451,22 @@ extern char *set_bg_block(List results, int *start,
 	   || start[Y]>=DIM_SIZE[Y]
 	   || start[Z]>=DIM_SIZE[Z])
 		return NULL;
-	if(geometry[X]<=0 
-	   || geometry[Y]<=0
-	   || geometry[Z]<=0) {
+
+	if(geometry[X] <= 0 || geometry[Y] <= 0 || geometry[Z] <= 0) {
 		error("problem with geometry %c%c%c, needs to be at least 111",
 		      alpha_num[geometry[X]],
 		      alpha_num[geometry[Y]],
 		      alpha_num[geometry[Z]]);		      
 		return NULL;
 	}
+
 	size = geometry[X] * geometry[Y] * geometry[Z];
-	ba_node = &ba_system_ptr->
-		grid[start[X]][start[Y]][start[Z]];
+	ba_node = &ba_system_ptr->grid[start[X]][start[Y]][start[Z]];
 #else
 	if(start[X]>=DIM_SIZE[X])
 		return NULL;
 	size = geometry[X];
-	ba_node = &ba_system_ptr->
-			grid[start[X]];	
+	ba_node = &ba_system_ptr->grid[start[X]];	
 #endif
 	
 
@@ -1478,36 +1477,34 @@ extern char *set_bg_block(List results, int *start,
 		results = list_create(NULL);
 	else
 		send_results = 1;
-		
+	/* This midplane should have already been checked if it was in
+	   use or not */
 	list_append(results, ba_node);
 	if(conn_type == SELECT_SMALL) {
 		/* adding the ba_node and ending */
 		ba_node->used = true;
-		name = xmalloc(4);
-		snprintf(name, 4, "%c%c%c",
-			 alpha_num[ba_node->coord[X]],
-			 alpha_num[ba_node->coord[Y]],
-			 alpha_num[ba_node->coord[Z]]);
+		name = xstrdup_printf("%c%c%c",
+				      alpha_num[ba_node->coord[X]],
+				      alpha_num[ba_node->coord[Y]],
+				      alpha_num[ba_node->coord[Z]]);
 		goto end_it; 
 	}
 	found = _find_x_path(results, ba_node,
 			     ba_node->coord, 
-			     ba_node->coord, 
-			     geometry, 
+			     geometry[X], 
 			     1,
-			     conn_type);
+			     conn_type, BLOCK_ALGO_FIRST);
 
 	if(!found) {
 		debug2("trying less efficient code");
 		remove_block(results, color_count);
 		list_delete_all(results, &empty_null_destroy_list, "");
 		list_append(results, ba_node);
-		found = _find_x_path2(results, ba_node,
-				      ba_node->coord,
-				      ba_node->coord,
-				      geometry,
-				      1,
-				      conn_type);
+		found = _find_x_path(results, ba_node,
+				     ba_node->coord,
+				     geometry[X],
+				     1,
+				     conn_type, BLOCK_ALGO_SECOND);
 	}
 	if(found) {
 #ifdef HAVE_BG
@@ -2709,10 +2706,25 @@ static int _append_geo(int *geometry, List geos, int rotate)
 }
 
 /*
+ * Fill in the paths and extra midplanes we need for the block.
+ * Basically copy the x path sent in with the start_list in each Y anx
+ * Z dimension filling in every midplane for the block and then
+ * completing the Y and Z wiring, tying the whole block together.
  *
+ * IN/OUT results - total list of midplanes after this function
+ *        returns successfully.  Should be
+ *        an exact copy of the start_list at first.
+ * IN start_list - exact copy of results at first, This should only be
+ *        a list of midplanes on the X dim.  We will work off this and
+ *        the geometry to fill in this wiring for the X dim in all the
+ *        Y and Z coords.
+ * IN geometry - What the block looks like
+ * IN conn_type - Mesh or Torus
+ * 
+ * RET: 0 on failure 1 on success
  */
 static int _fill_in_coords(List results, List start_list,
-			    int *geometry, int conn_type)
+			   int *geometry, int conn_type)
 {
 	ba_node_t *ba_node = NULL;
 	ba_node_t *check_node = NULL;
@@ -2722,8 +2734,9 @@ static int _fill_in_coords(List results, List start_list,
 	ba_switch_t *curr_switch = NULL; 
 	ba_switch_t *next_switch = NULL; 
 	
-	if(!start_list)
+	if(!start_list || !results)
 		return 0;
+	/* go through the start_list and add all the midplanes */
 	itr = list_iterator_create(start_list);
 	while((check_node = (ba_node_t*) list_next(itr))) {		
 		curr_switch = &check_node->axis_switch[X];
@@ -2744,18 +2757,23 @@ static int _fill_in_coords(List results, List start_list,
 					[check_node->coord[X]]
 					[check_node->coord[Y]+y]
 					[check_node->coord[Z]+z];
-				if(ba_node->coord[Y] 
-				   == check_node->coord[Y]
-				   && ba_node->coord[Z] 
-				   == check_node->coord[Z])
+
+				if(ba_node->coord[Y] == check_node->coord[Y]
+				   && ba_node->coord[Z] == check_node->coord[Z])
 					continue;
-				if (!_node_used(ba_node,geometry)) {
+
+				if (!_node_used(ba_node, geometry[X])) {
 					debug3("here Adding %c%c%c",
 					       alpha_num[ba_node->coord[X]],
 					       alpha_num[ba_node->coord[Y]],
 					       alpha_num[ba_node->coord[Z]]);
 					list_append(results, ba_node);
 					next_switch = &ba_node->axis_switch[X];
+					
+					/* since we are going off the
+					 * main system we can send NULL
+					 * here
+					 */
 					_copy_the_path(NULL, curr_switch, 
 						       next_switch, 
 						       0, X);
@@ -2788,6 +2806,24 @@ failed:
 	return rc;
 }
 
+/*
+ * Copy a path through the wiring of a switch to another switch on a
+ * starting port on a dimension.
+ *
+ * IN/OUT: nodes - Local list of midplanes you are keeping track of.  If
+ *         you visit any new midplanes a copy from ba_system_grid  
+ *         will be added to the list.  If NULL the path will be
+ *         set in mark_switch of the main virtual system (ba_system_grid).  
+ * IN: curr_switch - The switch you want to copy the path of
+ * IN/OUT: mark_switch - The switch you want to fill in.  On success
+ *         this switch will contain a complete path from the curr_switch
+ *         starting from the source port.
+ * IN: source - source port number (If calling for the first time
+ *         should be 0 since we are looking for 1 at the end)
+ * IN: dim - Dimension XYZ
+ *
+ * RET: on success 1, on error 0
+ */
 static int _copy_the_path(List nodes, ba_switch_t *curr_switch, 
 			  ba_switch_t *mark_switch, 
 			  int source, int dim)
@@ -2798,7 +2834,8 @@ static int _copy_the_path(List nodes, ba_switch_t *curr_switch,
 	int port_tar, port_tar1;
 	ba_switch_t *next_switch = NULL; 
 	ba_switch_t *next_mark_switch = NULL; 
-	/*set the switch to not be used */
+       
+	/* Copy the source used and port_tar */
 	mark_switch->int_wire[source].used = 
 		curr_switch->int_wire[source].used;
 	mark_switch->int_wire[source].port_tar = 
@@ -2806,6 +2843,7 @@ static int _copy_the_path(List nodes, ba_switch_t *curr_switch,
 
 	port_tar = curr_switch->int_wire[source].port_tar;
 	
+	/* Now to the same thing from the other end */
 	mark_switch->int_wire[port_tar].used = 
 		curr_switch->int_wire[port_tar].used;
 	mark_switch->int_wire[port_tar].port_tar = 
@@ -2828,6 +2866,7 @@ static int _copy_the_path(List nodes, ba_switch_t *curr_switch,
 		       port_tar);	
 	
 	if(port_tar == 1) {
+		/* found the end of the line */
 		mark_switch->int_wire[1].used = 
 			curr_switch->int_wire[1].used;
 		mark_switch->int_wire[1].port_tar = 
@@ -2841,12 +2880,18 @@ static int _copy_the_path(List nodes, ba_switch_t *curr_switch,
 	if(node_curr[X] == node_tar[X]
 	   && node_curr[Y] == node_tar[Y]
 	   && node_curr[Z] == node_tar[Z]) {
+		/* We are going to the same node! this should never
+		   happen */
 		debug4("something bad happened!!");
 		return 0;
 	}
+
+	/* see what the next switch is going to be */
 	next_switch = &ba_system_ptr->
 		grid[node_tar[X]][node_tar[Y]][node_tar[Z]].axis_switch[dim];
 	if(!nodes) {
+		/* If no nodes then just get the next switch to fill
+		   in from the main system */
 		next_mark_switch = &ba_system_ptr->
 			grid[mark_node_tar[X]]
 			[mark_node_tar[Y]]
@@ -2855,6 +2900,7 @@ static int _copy_the_path(List nodes, ba_switch_t *curr_switch,
 	} else {
 		ba_node_t *ba_node = NULL;
 		ListIterator itr = list_iterator_create(nodes);
+		/* see if we have already been to this node */
 		while((ba_node = list_next(itr))) {
 			if (ba_node->coord[X] == mark_node_tar[X] &&
 			    ba_node->coord[Y] == mark_node_tar[Y] &&
@@ -2863,6 +2909,7 @@ static int _copy_the_path(List nodes, ba_switch_t *curr_switch,
 		}
 		list_iterator_destroy(itr);
 		if(!ba_node) {
+			/* If node grab a copy and add it to the list */
 			ba_node = ba_copy_node(&ba_system_ptr->
 					       grid[mark_node_tar[X]]
 					       [mark_node_tar[Y]]
@@ -2877,8 +2924,10 @@ static int _copy_the_path(List nodes, ba_switch_t *curr_switch,
 		next_mark_switch = &ba_node->axis_switch[dim];
 			
 	}
+
+	/* Keep going until we reach the end of the line */
 	return _copy_the_path(nodes, next_switch, next_mark_switch,
-		       port_tar, dim);
+			      port_tar, dim);
 }
 
 static int _find_yz_path(ba_node_t *ba_node, int *first, 
@@ -3350,7 +3399,7 @@ start_again:
 #endif
 			;
 
-		if (!_node_used(ba_node, ba_request->geometry)) {
+		if (!_node_used(ba_node, ba_request->geometry[X])) {
 			debug3("trying this node %c%c%c %c%c%c %d",
 			       alpha_num[start[X]],
 			       alpha_num[start[Y]],
@@ -3419,10 +3468,15 @@ requested_end:
 	return 0;
 }
 
-/* bool _node_used(ba_node_t* ba_node, int geometry,  */
-static bool _node_used(ba_node_t* ba_node, int *geometry)
+/* 
+ * Used to check if midplane is usable in the block we are creating
+ *
+ * IN: ba_node - node to check if is used
+ * IN: x_size - How big is the block in the X dim used to see if the
+ *     wires are full hence making this midplane unusable.
+ */
+static bool _node_used(ba_node_t* ba_node, int x_size)
 {
-	int i=0;
 	ba_switch_t* ba_switch = NULL;
 	
 	/* if we've used this node in another block already */
@@ -3433,17 +3487,27 @@ static bool _node_used(ba_node_t* ba_node, int *geometry)
 		       alpha_num[ba_node->coord[Z]]);
 		return true;
 	}
-	/* if we've used this nodes switches completely in another 
-	   block already */
-	for(i=0;i<1;i++) {
-		if(geometry[i]>1) {
-			ba_switch = &ba_node->axis_switch[i];
-			
-			if(ba_switch->int_wire[3].used 
-			   && ba_switch->int_wire[5].used) {
-				debug3("switch in use dim %d!",i);
-				return true;
-			}
+	/* Check If we've used this node's switches completely in another 
+	   block already.  Right now we are only needing to look at
+	   the X dim since it is the only one with extra wires.  This
+	   can be set up to do all the dim's if in the future if it is
+	   needed. We only need to check this if we are planning on
+	   using more than 1 midplane in the block creation */
+	if(x_size > 1) {
+		/* get the switch of the X Dimension */
+		ba_switch = &ba_node->axis_switch[X];
+		
+		/* If both of these ports are used then the node
+		   is in use since there are no more wires we
+		   can use since these can not connect to each
+		   other they must be connected to the other ports.
+		*/
+		if(ba_switch->int_wire[3].used && ba_switch->int_wire[5].used) {
+			debug3("switch full in the X dim on node %c%c%c!",
+			       alpha_num[ba_node->coord[X]],
+			       alpha_num[ba_node->coord[Y]],
+			       alpha_num[ba_node->coord[Z]]);
+			return true;
 		}
 	}
 		
@@ -3844,7 +3908,7 @@ static char *_set_internal_wires(List nodes, int size, int conn_type)
 	name = xmalloc(BUFSIZE);
 	hostlist = hostlist_create(NULL);
 	itr = list_iterator_create(nodes);
-	while((ba_node[count] = (ba_node_t*) list_next(itr))) {
+	while((ba_node[count] = list_next(itr))) {
 		snprintf(temp_name, sizeof(temp_name), "%c%c%c", 
 			 alpha_num[ba_node[count]->coord[X]],
 			 alpha_num[ba_node[count]->coord[Y]],
@@ -3894,41 +3958,67 @@ static char *_set_internal_wires(List nodes, int size, int conn_type)
 	return name;
 }				
 
+/*
+ * Used to find a complete path based on the conn_type for an x dim.
+ * When starting to wire a block together this should be called first.
+ *
+ * IN/OUT: results - contains the number of midplanes we are
+ *     potentially going to use in the X dim.  
+ * IN: ba_node - current node we are looking at and have already added
+ *     to results.
+ * IN: start - coordinates of the first midplane (so we know when when
+ *     to end with a torus)
+ * IN: x_size - How many midplanes are we looking for in the X dim
+ * IN: found - count of how many midplanes we have found in the x dim
+ * IN: conn_type - MESH or TORUS
+ * IN: algo - algorythm to try an allocation by
+ *
+ * RET: 0 on failure, 1 on success
+ */
 static int _find_x_path(List results, ba_node_t *ba_node, 
-	int *start, int *first, int *geometry, 
-	int found, int conn_type) 
+			int *start, int x_size, 
+			int found, int conn_type, block_algo_t algo) 
 {
 	ba_switch_t *curr_switch = NULL; 
 	ba_switch_t *next_switch = NULL; 
 	
 	int port_tar = 0;
 	int source_port=0;
-	int target_port=0;
+	int target_port=1;
 	int broke = 0, not_first = 0;
-	int ports_to_try[2] = {3,5};
+	int ports_to_try[2] = {4, 2};
 	int *node_tar = NULL;
 	int i = 0;
 	ba_node_t *next_node = NULL;
 	ba_node_t *check_node = NULL;
-/* 	int highest_phys_x = geometry[X] - start[X]; */
+/* 	int highest_phys_x = x_size - start[X]; */
 /* 	info("highest_phys_x is %d", highest_phys_x); */
 
-	ListIterator itr;
+	ListIterator itr = NULL;
 
-	if(!ba_node)
+	if(!ba_node || !results || !start)
 		return 0;
 
-	if(!source_port) {
-		target_port=1;
+	/* we don't need to go any further */
+	if(x_size == 1) 
+		return 1;
+
+	if(algo == BLOCK_ALGO_FIRST) {
 		ports_to_try[0] = 4;
 		ports_to_try[1] = 2;
-			
-	}
+	} else if(algo == BLOCK_ALGO_SECOND) {
+		ports_to_try[0] = 2;
+		ports_to_try[1] = 4;
+	} else {
+		error("Unknown algo %d", algo);
+		return 0;
+	}			
+	
 	curr_switch = &ba_node->axis_switch[X];
-	if(geometry[X] == 1) {
-		goto found_one;
-	}
-	debug3("found - %d",found);
+
+	debug3("Algo(%d) found - %d", algo, found);
+
+	/* Check the 2 ports we can leave though in ports_to_try */
 	for(i=0;i<2;i++) {
 /* 		info("trying port %d", ports_to_try[i]); */
 		/* check to make sure it isn't used */
@@ -3950,53 +4040,48 @@ static int _find_x_path(List results, ba_node_t *ba_node,
 /* 			     port_tar); */
 			/* check to see if we are back at the start of the
 			   block */
-			if((node_tar[X] == 
-			    start[X] && 
-			    node_tar[Y] == 
-			    start[Y] && 
-			    node_tar[Z] == 
-			    start[Z])) {
+			if((node_tar[X] == start[X] 
+			    && node_tar[Y] == start[Y] 
+			    && node_tar[Z] == start[Z])) {
 				broke = 1;
 				goto broke_it;
 			}
 			/* check to see if the port points to itself */
-			if((node_tar[X] == 
-			    ba_node->coord[X] && 
-			    node_tar[Y] == 
-			    ba_node->coord[Y] && 
-			    node_tar[Z] == 
-			    ba_node->coord[Z])) {
+			if((node_tar[X] == ba_node->coord[X]
+			    && node_tar[Y] == ba_node->coord[Y]
+			    && node_tar[Z] == ba_node->coord[Z])) {
 				continue;
 			}
 			/* check to see if I am going to a place I have
 			   already been before */
 			itr = list_iterator_create(results);
-			while((next_node = (ba_node_t*) list_next(itr))) {
-				debug3("looking at %c%c%c and %c%c%c",
+			while((next_node = list_next(itr))) {
+				debug3("Algo(%d) looking at %c%c%c and %c%c%c",
+				       algo,
 				       alpha_num[next_node->coord[X]],
 				       alpha_num[next_node->coord[Y]],
 				       alpha_num[next_node->coord[Z]],
 				       alpha_num[node_tar[X]],
 				       alpha_num[node_tar[Y]],
 				       alpha_num[node_tar[Z]]);
-				if((node_tar[X] == next_node->coord[X] && 
-				    node_tar[Y] == next_node->coord[Y] && 
-				    node_tar[Z] == next_node->coord[Z])) {
+				if((node_tar[X] == next_node->coord[X] 
+				    && node_tar[Y] == next_node->coord[Y]
+				    && node_tar[Z] == next_node->coord[Z])) {
 					not_first = 1;
 					break;
 				}				
 			}
 			list_iterator_destroy(itr);
-			if(not_first && found<DIM_SIZE[X]) {
-				debug2("already been there before");
+			if(not_first && found < DIM_SIZE[X]) {
+				debug2("Algo(%d) already been there before",
+				       algo);
 				not_first = 0;
 				continue;
 			} 
 			not_first = 0;
 				
 		broke_it:
-			next_node = &ba_system_ptr->
-				grid[node_tar[X]]
+			next_node = &ba_system_ptr->grid[node_tar[X]]
 #ifdef HAVE_BG
 				[node_tar[Y]]
 				[node_tar[Z]]
@@ -4004,97 +4089,36 @@ static int _find_x_path(List results, ba_node_t *ba_node,
 				;
 			next_switch = &next_node->axis_switch[X];
 
- 			if((conn_type == SELECT_MESH) 
-			   && (found == (geometry[X]))) {
-				debug2("we found the end of the mesh");
+ 			if((conn_type == SELECT_MESH) && (found == (x_size))) {
+				debug2("Algo(%d) we found the end of the mesh",
+				       algo);
 				return 1;
 			}
-			debug3("Broke = %d Found = %d geometry[X] = %d",
-			       broke, found, geometry[X]);
-
-/* This doesnt' appear to be of any use since we are doing a circular
- * system not a linear one.  Kept just to make sure.
- */
+			debug3("Algo(%d) Broke = %d Found = %d x_size = %d",
+			       algo, broke, found, x_size);
 
-/* 			debug3("Next Phys X %d Highest X %d", */
-/* 			       next_node->phys_x, highest_phys_x); */
-/* 			if(next_node->phys_x >= highest_phys_x) { */
-/* 				debug3("looking for a passthrough"); */
-/* 				if(best_path) */
-/* 					list_destroy(best_path); */
-/* 				best_path = list_create(_delete_path_list); */
-/* 				if(path) */
-/* 					list_destroy(path); */
-/* 				path = list_create(_delete_path_list); */
-	
-/* 				_find_passthrough(curr_switch, */
-/* 						  0, */
-/* 						  results, */
-/* 						  X, */
-/* 						  0, */
-/* 						  highest_phys_x); */
-/* 				if(best_count < BEST_COUNT_INIT) { */
-/* 					debug2("yes found next free %d",  */
-/* 					       best_count); */
-/* 					node_tar = _set_best_path(); */
-/* 					next_node = &ba_system_ptr-> */
-/* 						grid[node_tar[X]] */
-/* #ifdef HAVE_BG */
-/* 						[node_tar[Y]] */
-/* 						[node_tar[Z]] */
-/* #endif */
-/* 						; */
-/* 					next_switch =  */
-/* 						&next_node->axis_switch[X]; */
-					
-/* #ifdef HAVE_BG */
-/* 					debug2("found %d looking at " */
-/* 					       "%c%c%c going to %c%c%c %d", */
-/* 					       found, */
-/* 					       alpha_num[ba_node->coord[X]], */
-/* 					       alpha_num[ba_node->coord[Y]], */
-/* 					       alpha_num[ba_node->coord[Z]], */
-/* 					       alpha_num[node_tar[X]], */
-/* 					       alpha_num[node_tar[Y]], */
-/* 					       alpha_num[node_tar[Z]], */
-/* 					       port_tar); */
-/* #endif		 */
-/* 					list_append(results, next_node); */
-/* 					found++; */
-/* 					if(_find_x_path(results, next_node,  */
-/* 							start, first, geometry, */
-/* 							found, conn_type)) { */
-/* 						return 1; */
-/* 					} else { */
-/* 						found--; */
-/* 						_reset_the_path(curr_switch, 0, */
-/* 								1, X); */
-/* 						_remove_node(results,  */
-/* 							     next_node->coord); */
-/* 						return 0; */
-/* 					} */
-/* 				} */
-/* 			}			 */
-
-			if(broke && (found == geometry[X])) {
+			if(broke && (found == x_size)) {
 				goto found_path;
-			} else if(found == geometry[X]) {
-				debug2("finishing the torus!");
+			} else if(found == x_size) {
+				debug2("Algo(%d) finishing the torus!", algo);
+
 				if(best_path)
-					list_destroy(best_path);
-				best_path = list_create(_delete_path_list);
+					list_flush(best_path);
+				else
+					best_path =
+						list_create(_delete_path_list);
+
 				if(path)
-					list_destroy(path);
-				path = list_create(_delete_path_list);
-				_finish_torus(curr_switch, 
-					      0, 
-					      results, 
-					      X, 
-					      0, 
-					      start);
+					list_flush(path);
+				else
+					path = list_create(_delete_path_list);
+				
+				_finish_torus(curr_switch, 0, X, 0, start);
+
 				if(best_count < BEST_COUNT_INIT) {
-					debug2("Found a best path with %d "
-					       "steps.", best_count);
+					debug2("Algo(%d) Found a best path "
+					       "with %d steps.",
+					       algo, best_count);
 					_set_best_path();
 					return 1;
 				} else {
@@ -4105,10 +4129,11 @@ static int _find_x_path(List results, ba_node_t *ba_node,
 				continue;
 			}
 
-			if (!_node_used(next_node, geometry)) {
+			if (!_node_used(next_node, x_size)) {
 #ifdef HAVE_BG
-				debug2("found %d looking at %c%c%c "
+				debug2("Algo(%d) found %d looking at %c%c%c "
 				       "%d going to %c%c%c %d",
+				       algo,
 				       found,
 				       alpha_num[ba_node->coord[X]],
 				       alpha_num[ba_node->coord[Y]],
@@ -4120,13 +4145,11 @@ static int _find_x_path(List results, ba_node_t *ba_node,
 				       port_tar);
 #endif
 				itr = list_iterator_create(results);
-				while((check_node = 
-				       (ba_node_t*) list_next(itr))) {
-					if((node_tar[X] == 
-					    check_node->coord[X] && 
-					    node_tar[Y] == 
-					    check_node->coord[Y] && 
-					    node_tar[Z] == 
+				while((check_node = list_next(itr))) {
+					if((node_tar[X] == check_node->coord[X]
+					    && node_tar[Y] == 
+					    check_node->coord[Y]
+					    && node_tar[Z] == 
 					    check_node->coord[Z])) {
 						break;
 					}
@@ -4134,7 +4157,8 @@ static int _find_x_path(List results, ba_node_t *ba_node,
 				list_iterator_destroy(itr);
 				if(!check_node) {
 #ifdef HAVE_BG
-					debug2("add %c%c%c",
+					debug2("Algo(%d) add %c%c%c",
+					       algo,
 					       alpha_num[next_node->coord[X]],
 					       alpha_num[next_node->coord[Y]],
 					       alpha_num[next_node->coord[Z]]);
@@ -4142,8 +4166,9 @@ static int _find_x_path(List results, ba_node_t *ba_node,
 					list_append(results, next_node);
 				} else {
 #ifdef HAVE_BG
-					debug2("Hey this is already added "
-					       "%c%c%c",
+					debug2("Algo(%d) Hey this is already "
+					       "added %c%c%c",
+					       algo,
 					       alpha_num[node_tar[X]],
 					       alpha_num[node_tar[Y]],
 					       alpha_num[node_tar[Z]]);
@@ -4151,19 +4176,20 @@ static int _find_x_path(List results, ba_node_t *ba_node,
 					continue;
 				}
 				found++;
-				
+
+				/* look for the next closest midplane */
 				if(!_find_x_path(results, next_node, 
-						 start, first, geometry, 
-						 found, conn_type)) {
-					_remove_node(results,
-						     next_node->coord);
+						 start, x_size, 
+						 found, conn_type, algo)) {
+					_remove_node(results, next_node->coord);
 					found--;
 					continue;
 				} else {
 				found_path:
 #ifdef HAVE_BG
-					debug2("added node %c%c%c %d %d -> "
-					       "%c%c%c %d %d",
+					debug2("Algo(%d) added node %c%c%c "
+					       "%d %d -> %c%c%c %d %d",
+					       algo,
 					       alpha_num[ba_node->coord[X]],
 					       alpha_num[ba_node->coord[Y]],
 					       alpha_num[ba_node->coord[Z]],
@@ -4175,341 +4201,104 @@ static int _find_x_path(List results, ba_node_t *ba_node,
 					       port_tar,
 					       target_port);
 #endif					
-				found_one:			
-					if(geometry[X] != 1) {
-						curr_switch->
-							int_wire
-							[source_port].used = 1;
-						curr_switch->
-							int_wire
-							[source_port].port_tar
-							= ports_to_try[i];
-						curr_switch->
-							int_wire
-							[ports_to_try[i]].used
-							= 1;
-						curr_switch->
-							int_wire
-							[ports_to_try[i]].
-							port_tar = source_port;
+					curr_switch->int_wire[source_port].used
+						= 1;
+					curr_switch->int_wire
+						[source_port].port_tar
+						= ports_to_try[i];
+					curr_switch->int_wire
+						[ports_to_try[i]].used = 1;
+					curr_switch->int_wire
+						[ports_to_try[i]].port_tar 
+						= source_port;
 					
-						next_switch->
-							int_wire[port_tar].used
-							= 1;
-						next_switch->
-							int_wire
-							[port_tar].port_tar
-							= target_port;
-						next_switch->
-							int_wire
-							[target_port].used = 1;
-						next_switch->
-							int_wire
-							[target_port].port_tar
-							= port_tar;
-					}
+					next_switch->int_wire[port_tar].used
+						= 1;
+					next_switch->int_wire[port_tar].port_tar
+						= target_port;
+					next_switch->int_wire[target_port].used
+						= 1;
+					next_switch->int_wire
+						[target_port].port_tar
+						= port_tar;
 					return 1;
-
 				}
 			} 			
 		}
 	}
 
-	debug2("couldn't find path");
-	return 0;
-}
-
-static int _find_x_path2(List results, ba_node_t *ba_node, 
-			 int *start, int *first, int *geometry, 
-			 int found, int conn_type) 
-{
-	ba_switch_t *curr_switch = NULL; 
-	ba_switch_t *next_switch = NULL; 
-	
-	int port_tar = 0;
-	int source_port=0;
-	int target_port=0;
-	int broke = 0, not_first = 0;
-	int ports_to_try[2] = {3,5};
-	int *node_tar = NULL;
-	int i = 0;
-	ba_node_t *next_node = NULL;
-	ba_node_t *check_node = NULL;
-	
-	ListIterator itr;
-	
-	if(!ba_node)
+	if(algo == BLOCK_ALGO_FIRST) {
+		debug2("Algo(%d) couldn't find path", algo);
 		return 0;
-
-	if(!source_port) {
-		target_port=1;
-		ports_to_try[0] = 2;
-		ports_to_try[1] = 4;
+	} else if(algo == BLOCK_ALGO_SECOND) {
+#ifdef HAVE_BG
+		debug2("Algo(%d) looking for the next free node "
+		       "starting at %c%c%c",
+		       algo,
+		       alpha_num[ba_node->coord[X]],
+		       alpha_num[ba_node->coord[Y]],
+		       alpha_num[ba_node->coord[Z]]);
+#endif
+		
+		if(best_path)
+			list_flush(best_path);
+		else
+			best_path = list_create(_delete_path_list);
+		
+		if(path)
+			list_flush(path);
+		else
+			path = list_create(_delete_path_list);
+		
+		_find_next_free_using_port_2(curr_switch, 0, results, X, 0);
+		
+		if(best_count < BEST_COUNT_INIT) {
+			debug2("Algo(%d) yes found next free %d", algo,
+			       best_count);
+			node_tar = _set_best_path();
 			
-	}
-	curr_switch = &ba_node->axis_switch[X];
-	if(geometry[X] == 1) {
-		goto found_one;
-	}
-	debug2("found - %d",found);
-	for(i=0;i<2;i++) {
-		/* check to make sure it isn't used */
-		if(!curr_switch->int_wire[ports_to_try[i]].used) {
-			node_tar = curr_switch->
-				ext_wire[ports_to_try[i]].node_tar;
-			port_tar = curr_switch->
-				ext_wire[ports_to_try[i]].port_tar;
-			if((node_tar[X] == 
-			    start[X] && 
-			    node_tar[Y] == 
-			    start[Y] && 
-			    node_tar[Z] == 
-			    start[Z])) {
-				broke = 1;
-				goto broke_it;
-			}
-			if((node_tar[X] == 
-			    ba_node->coord[X] && 
-			    node_tar[Y] == 
-			    ba_node->coord[Y] && 
-			    node_tar[Z] == 
-			    ba_node->coord[Z])) {
-				continue;
-			}
-			itr = list_iterator_create(results);
-			while((next_node = (ba_node_t*) list_next(itr))) {
-				if((node_tar[X] == 
-				    next_node->coord[X] && 
-				    node_tar[Y] == 
-				    next_node->coord[Y] && 
-				    node_tar[Z] == 
-				    next_node->coord[Z])) {
-					not_first = 1;
-					break;
-				}
-				
-			}
-			list_iterator_destroy(itr);
-			if(not_first && found<DIM_SIZE[X]) {
-				not_first = 0;
-				continue;
-			} 
-			not_first = 0;
-				
-		broke_it:
-			next_node = &ba_system_ptr->
-				grid[node_tar[X]]
+			next_node = &ba_system_ptr->grid[node_tar[X]]
 #ifdef HAVE_BG
 				[node_tar[Y]]
 				[node_tar[Z]]
 #endif
 				;
-
+			
 			next_switch = &next_node->axis_switch[X];
-		
 			
- 			if((conn_type == SELECT_MESH) 
-			   && (found == (geometry[X]))) {
-				debug2("we found the end of the mesh");
+#ifdef HAVE_BG
+			debug2("Algo(%d) found %d looking at %c%c%c "
+			       "going to %c%c%c %d",
+			       algo, found,
+			       alpha_num[ba_node->coord[X]],
+			       alpha_num[ba_node->coord[Y]],
+			       alpha_num[ba_node->coord[Z]],
+			       alpha_num[node_tar[X]],
+			       alpha_num[node_tar[Y]],
+			       alpha_num[node_tar[Z]],
+			       port_tar);
+#endif		
+			list_append(results, next_node);
+			found++;
+			if(_find_x_path(results, next_node, 
+					start, x_size, found,
+					conn_type, algo)) {
 				return 1;
+			} else {
+				found--;
+				_reset_the_path(curr_switch, 0, 1, X);
+				_remove_node(results, next_node->coord);
+				debug2("Algo(%d) couldn't finish "
+				       "the path off this one", algo);
 			}
-			debug3("Broke = %d Found = %d geometry[X] = %d",
-			       broke, found, geometry[X]);
-			if(broke && (found == geometry[X])) {
-				goto found_path;
-			} else if(found == geometry[X]) {
-				debug2("finishing the torus!");
-				if(best_path)
-					list_destroy(best_path);
-				best_path = list_create(_delete_path_list);
-				if(path)
-					list_destroy(path);
-				path = list_create(_delete_path_list);
-				_finish_torus(curr_switch, 
-					      0, 
-					      results, 
-					      X, 
-					      0, 
-					      start);
-				if(best_count < BEST_COUNT_INIT) {
-					debug2("Found a best path with %d "
-					       "steps.", best_count);
-					_set_best_path();
-					return 1;
-				} else {
-					return 0;
-				}
-			} else if(broke) {
-				broke = 0;
-				continue;
-			}
-
-			if (!_node_used(next_node, geometry)) {
-#ifdef HAVE_BG
-				debug2("found %d looking at %c%c%c "
-				       "%d going to %c%c%c %d",
-				       found,
-				       alpha_num[ba_node->coord[X]],
-				       alpha_num[ba_node->coord[Y]],
-				       alpha_num[ba_node->coord[Z]],
-				       ports_to_try[i],
-				       alpha_num[node_tar[X]],
-				       alpha_num[node_tar[Y]],
-				       alpha_num[node_tar[Z]],
-				       port_tar);
-#endif
-				itr = list_iterator_create(results);
-				while((check_node = 
-				       (ba_node_t*) list_next(itr))) {
-					if((node_tar[X] == 
-					    check_node->coord[X] && 
-					    node_tar[Y] == 
-					    check_node->coord[Y] && 
-					    node_tar[Z] == 
-					    check_node->coord[Z])) {
-						break;
-					}
-				}
-				list_iterator_destroy(itr);
-				if(!check_node) {
-#ifdef HAVE_BG
-					debug2("add %c%c%c",
-					       alpha_num[next_node->coord[X]],
-					       alpha_num[next_node->coord[Y]],
-					       alpha_num[next_node->coord[Z]]);
-#endif					       
-					list_append(results, next_node);
-				} else {
-#ifdef HAVE_BG
-					debug2("Hey this is already added "
-					       "%c%c%c",
-					       alpha_num[node_tar[X]],
-					       alpha_num[node_tar[Y]],
-					       alpha_num[node_tar[Z]]);
-#endif
-					continue;
-				}
-				found++;
-				
-				if(!_find_x_path2(results, next_node, 
-						 start, first, geometry, 
-						 found, conn_type)) {
-					_remove_node(results,
-						     next_node->coord);
-					found--;
-					continue;
-				} else {
-				found_path:
-#ifdef HAVE_BG
-					debug2("added node %c%c%c %d %d -> "
-					       "%c%c%c %d %d",
-					       alpha_num[ba_node->coord[X]],
-					       alpha_num[ba_node->coord[Y]],
-					       alpha_num[ba_node->coord[Z]],
-					       source_port,
-					       ports_to_try[i],
-					       alpha_num[node_tar[X]],
-					       alpha_num[node_tar[Y]],
-					       alpha_num[node_tar[Z]],
-					       port_tar,
-					       target_port);
-#endif					
-				found_one:			
-					if(geometry[X] != 1) {
-						curr_switch->
-							int_wire
-							[source_port].used = 1;
-						curr_switch->
-							int_wire
-							[source_port].port_tar
-							= ports_to_try[i];
-						curr_switch->
-							int_wire
-							[ports_to_try[i]].used
-							= 1;
-						curr_switch->
-							int_wire
-							[ports_to_try[i]].
-							port_tar = source_port;
-					
-						next_switch->
-							int_wire[port_tar].used
-							= 1;
-						next_switch->
-							int_wire
-							[port_tar].port_tar
-							= target_port;
-						next_switch->
-							int_wire
-							[target_port].used = 1;
-						next_switch->
-							int_wire
-							[target_port].port_tar
-							= port_tar;
-					}
-					return 1;
-				}
-			} 			
-		}
+		} 
+		
+		debug2("Algo(%d) couldn't find path", algo);
+		return 0;
 	}
-#ifdef HAVE_BG
-	debug2("looking for the next free node starting at %c%c%c",
-	       alpha_num[ba_node->coord[X]],
-	       alpha_num[ba_node->coord[Y]],
-	       alpha_num[ba_node->coord[Z]]);
-#endif
 
-	if(best_path)
-		list_destroy(best_path);
-	best_path = list_create(_delete_path_list);
-	if(path)
-		list_destroy(path);
-	path = list_create(_delete_path_list);
-	
-	_find_next_free_using_port_2(curr_switch, 
-				     0, 
-				     results, 
-				     X, 
-				     0);
-	if(best_count < BEST_COUNT_INIT) {
-		debug2("yes found next free %d", best_count);
-		node_tar = _set_best_path();
-
-		next_node = &ba_system_ptr->
-			grid[node_tar[X]]
-#ifdef HAVE_BG
-			[node_tar[Y]]
-			[node_tar[Z]]
-#endif
-			;
-
-		next_switch = &next_node->axis_switch[X];
-		
-#ifdef HAVE_BG
-		debug2("found %d looking at %c%c%c going to %c%c%c %d",
-		       found,
-		       alpha_num[ba_node->coord[X]],
-		       alpha_num[ba_node->coord[Y]],
-		       alpha_num[ba_node->coord[Z]],
-		       alpha_num[node_tar[X]],
-		       alpha_num[node_tar[Y]],
-		       alpha_num[node_tar[Z]],
-		       port_tar);
-#endif		
-		list_append(results, next_node);
-		found++;
-		if(_find_x_path2(results, next_node, 
-				start, first, geometry, found, conn_type)) {
-			return 1;
-		} else {
-			found--;
-			_reset_the_path(curr_switch, 0, 1, X);
-			_remove_node(results, next_node->coord);
-			debug2("couldn't finish the path off this one");
-		}
-	} 
-	
-	debug2("couldn't find path 2");
+	error("We got here meaning there is a bad algo, "
+	      "but this should never happen algo(%d)", algo);
 	return 0;
 }
 
@@ -4614,8 +4403,8 @@ static int _find_next_free_using_port_2(ba_switch_t *curr_switch,
 		if((source_port%2))
 			target_port=1;
 		
-		list_destroy(best_path);
-		best_path = list_create(_delete_path_list);
+		list_flush(best_path);
+		
 		found = true;
 		path_add->out = target_port;
 		list_push(path, path_add);
@@ -4634,7 +4423,7 @@ static int _find_next_free_using_port_2(ba_switch_t *curr_switch,
 			temp_switch->dim = path_switch->dim;
 			temp_switch->in = path_switch->in;
 			temp_switch->out = path_switch->out;
-			list_append(best_path,temp_switch);
+			list_append(best_path, temp_switch);
 		}
 		list_iterator_destroy(itr);
 		best_count = count;
@@ -4711,205 +4500,28 @@ return_0:
 	return 0;
 }
 
-/* static int _find_passthrough(ba_switch_t *curr_switch, int source_port,  */
-/* 			     List nodes, int dim, int count, int highest_phys_x)  */
-/* { */
-/* 	ba_switch_t *next_switch = NULL;  */
-/* 	ba_path_switch_t *path_add =  */
-/* 		(ba_path_switch_t *) xmalloc(sizeof(ba_path_switch_t)); */
-/* 	ba_path_switch_t *path_switch = NULL; */
-/* 	ba_path_switch_t *temp_switch = NULL; */
-/* 	int port_tar; */
-/* 	int target_port = 0; */
-/* 	int ports_to_try[2] = {3,5}; */
-/* 	int *node_tar= curr_switch->ext_wire[0].node_tar; */
-/* 	int *node_src = curr_switch->ext_wire[0].node_tar; */
-/* 	int i; */
-/* 	int used=0; */
-/* 	int broke = 0; */
-/* 	ba_node_t *ba_node = NULL; */
-	
-/* 	ListIterator itr; */
-/* 	static bool found = false; */
-
-/* 	path_add->geometry[X] = node_src[X]; */
-/* #ifdef HAVE_BG */
-/* 	path_add->geometry[Y] = node_src[Y]; */
-/* 	path_add->geometry[Z] = node_src[Z]; */
-/* #endif */
-/* 	path_add->dim = dim; */
-/* 	path_add->in = source_port; */
-	
-/* 	if(count>=best_count) { */
-/* 		xfree(path_add); */
-/* 		return 0; */
-/* 	} */
-
-/* 	itr = list_iterator_create(nodes); */
-/* 	while((ba_node = (ba_node_t*) list_next(itr))) { */
-		
-/* #ifdef HAVE_BG */
-/* 		if(node_tar[X] == ba_node->coord[X]  */
-/* 		   && node_tar[Y] == ba_node->coord[Y]  */
-/* 		   && node_tar[Z] == ba_node->coord[Z]) { */
-/* 			broke = 1; */
-/* 			break; */
-/* 		} */
-/* #else */
-/* 		if(node_tar[X] == ba_node->coord[X]) { */
-/* 			broke = 1; */
-/* 			break; */
-/* 		} */
-/* #endif */
-		
-/* 	} */
-/* 	list_iterator_destroy(itr); */
-/* 	ba_node = &ba_system_ptr-> */
-/* 		grid[node_tar[X]] */
-/* #ifdef HAVE_BG */
-/* 		[node_tar[Y]] */
-/* 		[node_tar[Z]] */
-/* #endif */
-/* 		; */
-/* 	if(!broke && count>0 */
-/* 	   && !ba_node->used  */
-/* 	   && (ba_node->phys_x < highest_phys_x)) { */
-		
-/* 		debug3("this one not found %c%c%c", */
-/* 		       alpha_num[node_tar[X]], */
-/* 		       alpha_num[node_tar[Y]], */
-/* 		       alpha_num[node_tar[Z]]); */
-		
-/* 		broke = 0; */
-				
-/* 		if((source_port%2)) */
-/* 			target_port=1; */
-		
-/* 		list_destroy(best_path); */
-/* 		best_path = list_create(_delete_path_list); */
-/* 		found = true; */
-/* 		path_add->out = target_port; */
-/* 		list_push(path, path_add); */
-		
-/* 		itr = list_iterator_create(path); */
-/* 		while((path_switch = (ba_path_switch_t*) list_next(itr))){ */
-		
-/* 			temp_switch = (ba_path_switch_t *)  */
-/* 				xmalloc(sizeof(ba_path_switch_t)); */
-			 
-/* 			temp_switch->geometry[X] = path_switch->geometry[X]; */
-/* #ifdef HAVE_BG */
-/* 			temp_switch->geometry[Y] = path_switch->geometry[Y]; */
-/* 			temp_switch->geometry[Z] = path_switch->geometry[Z]; */
-/* #endif */
-/* 			temp_switch->dim = path_switch->dim; */
-/* 			temp_switch->in = path_switch->in; */
-/* 			temp_switch->out = path_switch->out; */
-/* 			list_append(best_path,temp_switch); */
-/* 		} */
-/* 		list_iterator_destroy(itr); */
-/* 		best_count = count; */
-/* 		return 1; */
-/* 	}  */
-
-/* 	if(source_port==0 || source_port==3 || source_port==5) { */
-/* 		if(count==0) { */
-/* 			ports_to_try[0] = 2; */
-/* 			ports_to_try[1] = 4;	 */
-/* 		} else { */
-/* 			ports_to_try[0] = 4; */
-/* 			ports_to_try[1] = 2;	 */
-/* 		} */
-/* 	} */
-			
-/* 	for(i=0;i<2;i++) { */
-/* 		used=0; */
-/* 		if(!curr_switch->int_wire[ports_to_try[i]].used) { */
-/* 			itr = list_iterator_create(path); */
-/* 			while((path_switch =  */
-/* 			       (ba_path_switch_t*) list_next(itr))){ */
-				
-/* 				if(((path_switch->geometry[X] == node_src[X])  */
-/* #ifdef HAVE_BG */
-/* 				    && (path_switch->geometry[Y]  */
-/* 					== node_src[Y]) */
-/* 				    && (path_switch->geometry[Z]  */
-/* 					== node_tar[Z]) */
-/* #endif */
-/* 					   )) { */
-					
-/* 					if( path_switch->out */
-/* 					    == ports_to_try[i]) { */
-/* 						used = 1; */
-/* 						break; */
-/* 					} */
-/* 				} */
-/* 			} */
-/* 			list_iterator_destroy(itr); */
-			
-/* 			if(curr_switch-> */
-/* 			   ext_wire[ports_to_try[i]].node_tar[X] */
-/* 			   == curr_switch->ext_wire[0].node_tar[X]   */
-/* #ifdef HAVE_BG */
-/* 			   && curr_switch-> */
-/* 			   ext_wire[ports_to_try[i]].node_tar[Y]  */
-/* 			   == curr_switch->ext_wire[0].node_tar[Y]  */
-/* 			   && curr_switch-> */
-/* 			   ext_wire[ports_to_try[i]].node_tar[Z]  */
-/* 			   == curr_switch->ext_wire[0].node_tar[Z] */
-/* #endif */
-/* 				) { */
-/* 				continue; */
-/* 			} */
-						
-/* 			if(!used) { */
-/* 				port_tar = curr_switch-> */
-/* 					ext_wire[ports_to_try[i]].port_tar; */
-/* 				node_tar = curr_switch-> */
-/* 					ext_wire[ports_to_try[i]].node_tar; */
-				
-/* 				next_switch = &ba_system_ptr-> */
-/* 					grid[node_tar[X]] */
-/* #ifdef HAVE_BG */
-/* 					[node_tar[Y]] */
-/* 					[node_tar[Z]] */
-/* #endif */
-/* 					.axis_switch[X]; */
-				
-/* 				count++; */
-/* 				path_add->out = ports_to_try[i]; */
-/* 				list_push(path, path_add); */
-/* 				debug3("looking at this one " */
-/* 				       "%c%c%c %d -> %c%c%c %d", */
-/* 				       alpha_num[ba_node->coord[X]], */
-/* 				       alpha_num[ba_node->coord[Y]], */
-/* 				       alpha_num[ba_node->coord[Z]], */
-/* 				       ports_to_try[i], */
-/* 				       alpha_num[node_tar[X]], */
-/* 				       alpha_num[node_tar[Y]], */
-/* 				       alpha_num[node_tar[Z]], */
-/* 				       port_tar); */
-		
-/* 				_find_passthrough(next_switch, port_tar, nodes, */
-/* 						dim, count, highest_phys_x); */
-/* 				while((temp_switch = list_pop(path))  */
-/* 				      != path_add){ */
-/* 					xfree(temp_switch); */
-/* 					debug3("something here 2"); */
-/* 				} */
-/* 			} */
-/* 		} */
-/* 	} */
-/* 	xfree(path_add); */
-/* 	return 0; */
-/* } */
+/*
+ * Used to tie the end of the block to the start. best_path and path
+ * should both be set up before calling this function.
+ *
+ * IN: curr_switch -
+ * IN: source_port - 
+ * IN: dim -
+ * IN: count -
+ * IN: start -
+ * 
+ * RET: 0 on failure, 1 on success
+ *
+ * Sets up global variable best_path, and best_count.  On success
+ * best_count will be >= BEST_COUNT_INIT you can call _set_best_path
+ * to apply this path to the main system (ba_system_ptr)
+ */
 
 static int _finish_torus(ba_switch_t *curr_switch, int source_port,
-			 List nodes, int dim, int count, int *start)
+			 int dim, int count, int *start)
 {
 	ba_switch_t *next_switch = NULL;
-	ba_path_switch_t *path_add =
-		(ba_path_switch_t *) xmalloc(sizeof(ba_path_switch_t));
+	ba_path_switch_t *path_add = xmalloc(sizeof(ba_path_switch_t));
 	ba_path_switch_t *path_switch = NULL;
 	ba_path_switch_t *temp_switch = NULL;
 	int port_tar;
@@ -4945,18 +4557,16 @@ static int _finish_torus(ba_switch_t *curr_switch, int source_port,
 			target_port=1;
 		if(!curr_switch->int_wire[target_port].used) {
 			
-			list_destroy(best_path);
-			best_path = list_create(_delete_path_list);
+			list_flush(best_path);
+			
 			found = true;
 			path_add->out = target_port;
 			list_push(path, path_add);
 			
 			itr = list_iterator_create(path);
-			while((path_switch =
-			       (ba_path_switch_t*) list_next(itr))){
+			while((path_switch = list_next(itr))) {
 				
-				temp_switch = (ba_path_switch_t *)
-					xmalloc(sizeof(ba_path_switch_t));
+				temp_switch = xmalloc(sizeof(ba_path_switch_t));
 				
 				temp_switch->geometry[X] =
 					path_switch->geometry[X];
@@ -4986,8 +4596,7 @@ static int _finish_torus(ba_switch_t *curr_switch, int source_port,
 		used=0;
 		if(!curr_switch->int_wire[ports_to_try[i]].used) {
 			itr = list_iterator_create(path);
-			while((path_switch =
-			       (ba_path_switch_t*) list_next(itr))){
+			while((path_switch = list_next(itr))){
 				
 				if(((path_switch->geometry[X] == node_src[X])
 #ifdef HAVE_BG
@@ -5022,8 +4631,7 @@ static int _finish_torus(ba_switch_t *curr_switch, int source_port,
 				node_tar = curr_switch->
 					ext_wire[ports_to_try[i]].node_tar;
 				
-				next_switch = &ba_system_ptr->
-					grid[node_tar[X]]
+				next_switch = &ba_system_ptr->grid[node_tar[X]]
 #ifdef HAVE_BG
 					[node_tar[Y]]
 					[node_tar[Z]]
@@ -5034,8 +4642,8 @@ static int _finish_torus(ba_switch_t *curr_switch, int source_port,
 				count++;
 				path_add->out = ports_to_try[i];
 				list_push(path, path_add);
-				_finish_torus(next_switch, port_tar, nodes,
-						dim, count, start);
+				_finish_torus(next_switch, port_tar, 
+					      dim, count, start);
 				while((temp_switch = list_pop(path))
 				      != path_add){
 					xfree(temp_switch);
@@ -5048,14 +4656,22 @@ static int _finish_torus(ba_switch_t *curr_switch, int source_port,
        return 0;
 }
 
+/*
+ * using best_path set up previously from _finish_torus or
+ * _find_next_free_using_port_2.  Will set up the path contained there
+ * into the main virtual system.  With will also set the passthrough
+ * flag if there was a passthrough used.
+ */
 static int *_set_best_path()
 {
 	ListIterator itr;
 	ba_path_switch_t *path_switch = NULL;
 	ba_switch_t *curr_switch = NULL; 
 	int *geo = NULL;
+
 	if(!best_path)
 		return NULL;
+
 	itr = list_iterator_create(best_path);
 	while((path_switch = (ba_path_switch_t*) list_next(itr))) {
 		if(passthrough && path_switch->in > 1 && path_switch->out > 1) {
@@ -5070,15 +4686,13 @@ static int *_set_best_path()
 		       path_switch->in, path_switch->out);
 		if(!geo)
 			geo = path_switch->geometry;
-		curr_switch = &ba_system_ptr->
-			grid
+		curr_switch = &ba_system_ptr->grid
 			[path_switch->geometry[X]]
 			[path_switch->geometry[Y]]
 			[path_switch->geometry[Z]].  
 			axis_switch[path_switch->dim];
 #else
-		curr_switch = &ba_system_ptr->
-			grid[path_switch->geometry[X]].
+		curr_switch = &ba_system_ptr->grid[path_switch->geometry[X]].
 			axis_switch[path_switch->dim];
 #endif
 	
diff --git a/src/plugins/select/bluegene/plugin/bg_job_place.c b/src/plugins/select/bluegene/plugin/bg_job_place.c
index a7435af07c4..f62594eede8 100644
--- a/src/plugins/select/bluegene/plugin/bg_job_place.c
+++ b/src/plugins/select/bluegene/plugin/bg_job_place.c
@@ -380,7 +380,7 @@ static bg_record_t *_find_matching_block(List block_list,
 	      test_only);
 		
 	itr = list_iterator_create(block_list);
-	while ((bg_record = (bg_record_t*) list_next(itr))) {		
+	while ((bg_record = list_next(itr))) {		
 		/* If test_only we want to fall through to tell the 
 		   scheduler that it is runnable just not right now. 
 		*/
@@ -636,6 +636,8 @@ static int _check_for_booted_overlapping_blocks(
 					 * bg_record
 					*/
 					list_remove(bg_record_itr);
+					slurm_mutex_lock(&block_state_mutex);
+
 					if(bg_record->original) {
 						debug3("This was a copy");
 						found_record =
@@ -651,8 +653,10 @@ static int _check_for_booted_overlapping_blocks(
 					}
 					destroy_bg_record(bg_record);
 					if(!found_record) {
-						error("1 this record wasn't "
-						      "found in the list!");
+						debug2("This record wasn't "
+						       "found in the bg_list, "
+						       "no big deal, it "
+						       "probably wasn't added");
 						//rc = SLURM_ERROR;
 					} else {
 						List temp_list =
@@ -663,6 +667,7 @@ static int _check_for_booted_overlapping_blocks(
 						free_block_list(temp_list);
 						list_destroy(temp_list);
 					}
+					slurm_mutex_unlock(&block_state_mutex);
 				} 
 				rc = 1;
 					
diff --git a/src/plugins/select/bluegene/plugin/bluegene.c b/src/plugins/select/bluegene/plugin/bluegene.c
index d3ff7a7749d..8c2b9410510 100644
--- a/src/plugins/select/bluegene/plugin/bluegene.c
+++ b/src/plugins/select/bluegene/plugin/bluegene.c
@@ -488,7 +488,6 @@ extern bg_record_t *find_and_remove_org_from_bg_list(List my_list,
 		if(bit_equal(bg_record->bitmap, found_record->bitmap)
 		   && bit_equal(bg_record->ionode_bitmap,
 				found_record->ionode_bitmap)) {
-			
 			if(!strcmp(bg_record->bg_block_id,
 				   found_record->bg_block_id)) {
 				list_remove(itr);
diff --git a/src/plugins/select/bluegene/plugin/slurm_prolog.c b/src/plugins/select/bluegene/plugin/slurm_prolog.c
index af652b7879c..f4ad1d0201d 100644
--- a/src/plugins/select/bluegene/plugin/slurm_prolog.c
+++ b/src/plugins/select/bluegene/plugin/slurm_prolog.c
@@ -176,10 +176,11 @@ static int _get_job_size(uint32_t job_id)
 }
 
 /*
- * Test if any BG blocks are in deallocating state 
+ * Test if any BG blocks are in deallocating state since they are
+ * probably related to this job we will want to sleep longer
  * RET	1:  deallocate in progress
  *	0:  no deallocate in progress
- *	-1: error occurred
+ *     -1: error occurred
  */
 static int _partitions_dealloc()
 {
diff --git a/src/sacct/options.c b/src/sacct/options.c
index b7c2737fc28..da6921117a1 100644
--- a/src/sacct/options.c
+++ b/src/sacct/options.c
@@ -414,7 +414,7 @@ void _help_msg(void)
 	       "-c, --completion\n"
 	       "    Use job completion instead of accounting data.\n"
 	       "-C, --cluster\n"
-	       "    Only send data about this cluster.\n"
+	       "    Only send data about this cluster -1 for all clusters.\n"
 	       "-d, --dump\n"
 	       "    Dump the raw data records\n"
 	       "--duplicates\n"
@@ -622,7 +622,7 @@ void parse_command_line(int argc, char **argv)
 	char *dot = NULL;
 	bool brief_output = FALSE, long_output = FALSE;
 	bool all_users = 0;
-
+	bool all_clusters = 1;
 	static struct option long_options[] = {
 		{"all", 0,0, 'a'},
 		{"accounts", 1, 0, 'A'},
@@ -690,6 +690,10 @@ void parse_command_line(int argc, char **argv)
 			params.opt_completion = 1;
 			break;
 		case 'C':
+			if(!strcasecmp(optarg, "-1")) {
+				all_clusters = 1;
+				break;
+			}
 			if(!params.opt_cluster_list) 
 				params.opt_cluster_list =
 					list_create(slurm_destroy_char);
@@ -934,7 +938,15 @@ void parse_command_line(int argc, char **argv)
 	}
 
 	/* specific clusters requested? */
-	if (params.opt_verbose && params.opt_cluster_list 
+	if(all_clusters) {
+		if(params.opt_cluster_list 
+		   && list_count(params.opt_cluster_list)) {
+			list_destroy(params.opt_cluster_list);
+			params.opt_cluster_list = NULL;
+		}
+		if(params.opt_verbose)
+			fprintf(stderr, "Clusters requested:\n\t: all\n");
+	} else if (params.opt_verbose && params.opt_cluster_list 
 	    && list_count(params.opt_cluster_list)) {
 		fprintf(stderr, "Clusters requested:\n");
 		itr = list_iterator_create(params.opt_cluster_list);
diff --git a/src/sacctmgr/user_functions.c b/src/sacctmgr/user_functions.c
index d87cbb8f55d..66d4289610c 100644
--- a/src/sacctmgr/user_functions.c
+++ b/src/sacctmgr/user_functions.c
@@ -37,6 +37,7 @@
 \*****************************************************************************/
 
 #include "src/sacctmgr/sacctmgr.h"
+#include "src/common/uid.h"
 
 static int _set_cond(int *start, int argc, char *argv[],
 		     acct_user_cond_t *user_cond,
@@ -504,6 +505,7 @@ extern int sacctmgr_add_user(int argc, char *argv[])
 	while((name = list_next(itr))) {
 		user = NULL;
 		if(!sacctmgr_find_user_from_list(local_user_list, name)) {
+			uid_t pw_uid;
 			if(!default_acct) {
 				exit_code=1;
 				fprintf(stderr, " Need a default account for "
@@ -524,6 +526,22 @@ extern int sacctmgr_add_user(int argc, char *argv[])
 				}
 				first = 0;				
 			}
+			pw_uid = uid_from_string(name);
+			if(pw_uid == (uid_t) -1) {
+				char *warning = xstrdup_printf(
+					"There is no uid for user '%s'"
+					"\nAre you sure you want to continue?",
+					name);
+
+				if(!commit_check(warning)) {
+					xfree(warning);
+					rc = SLURM_ERROR;
+					list_flush(user_list);
+					goto no_default;
+				}
+				xfree(warning);
+			}
+
 			user = xmalloc(sizeof(acct_user_rec_t));
 			user->assoc_list = list_create(NULL);
 			user->name = xstrdup(name);
@@ -543,6 +561,7 @@ extern int sacctmgr_add_user(int argc, char *argv[])
 			}
 
 			user->admin_level = admin_level;
+			
 			xstrfmtcat(user_str, "  %s\n", name);
 
 			list_append(user_list, user);
diff --git a/src/salloc/Makefile.am b/src/salloc/Makefile.am
index e2da3019f5c..05fb0f3eb54 100644
--- a/src/salloc/Makefile.am
+++ b/src/salloc/Makefile.am
@@ -3,7 +3,7 @@
 AUTOMAKE_OPTIONS = foreign
 CLEANFILES = core.*
 
-INCLUDES = -I$(top_srcdir) 
+INCLUDES = -I$(top_srcdir) $(BG_INCLUDES)
 
 bin_PROGRAMS = salloc
 
diff --git a/src/salloc/Makefile.in b/src/salloc/Makefile.in
index 91ae04a4039..03315a66ab0 100644
--- a/src/salloc/Makefile.in
+++ b/src/salloc/Makefile.in
@@ -263,7 +263,7 @@ top_builddir = @top_builddir@
 top_srcdir = @top_srcdir@
 AUTOMAKE_OPTIONS = foreign
 CLEANFILES = core.*
-INCLUDES = -I$(top_srcdir) 
+INCLUDES = -I$(top_srcdir) $(BG_INCLUDES)
 salloc_SOURCES = salloc.c salloc.h opt.c opt.h
 convenience_libs = $(top_builddir)/src/api/libslurm.o -ldl
 salloc_LDADD = \
diff --git a/src/salloc/salloc.c b/src/salloc/salloc.c
index de913220ffd..c1a48258353 100644
--- a/src/salloc/salloc.c
+++ b/src/salloc/salloc.c
@@ -50,6 +50,14 @@
 #include "src/salloc/salloc.h"
 #include "src/salloc/opt.h"
 
+#ifdef HAVE_BG
+#include "src/api/job_info.h"
+#include "src/api/node_select_info.h"
+#include "src/common/node_select.h"
+#include "src/plugins/select/bluegene/plugin/bg_boot_time.h"
+#include "src/plugins/select/bluegene/wrap_rm_api.h"
+#endif
+
 #define MAX_RETRIES 3
 
 char **command_argv;
@@ -76,6 +84,16 @@ static void _user_msg_handler(srun_user_msg_t *msg);
 static void _ping_handler(srun_ping_msg_t *msg);
 static void _node_fail_handler(srun_node_fail_msg_t *msg);
 
+#ifdef HAVE_BG
+
+#define POLL_SLEEP 3			/* retry interval in seconds  */
+
+static int _wait_bluegene_block_ready(
+	resource_allocation_response_msg_t *alloc);
+static int _blocks_dealloc();
+#endif
+
+
 int main(int argc, char *argv[])
 {
 	log_options_t logopt = LOG_OPTS_STDERR_ONLY;
@@ -187,6 +205,13 @@ int main(int argc, char *argv[])
 	 * Allocation granted!
 	 */
 	info("Granted job allocation %d", alloc->job_id);
+#ifdef HAVE_BG
+	if (!_wait_bluegene_block_ready(alloc)) {
+		error("Something is wrong with the boot of the block.");
+		goto relinquish;
+	}
+
+#endif
 	if (opt.bell == BELL_ALWAYS
 	    || (opt.bell == BELL_AFTER_DELAY
 		&& ((after - before) > DEFAULT_BELL_DELAY))) {
@@ -520,3 +545,93 @@ static void _node_fail_handler(srun_node_fail_msg_t *msg)
 {
 	error("Node failure on %s", msg->nodelist);
 }
+
+#ifdef HAVE_BG
+/* returns 1 if job and nodes are ready for job to begin, 0 otherwise */
+static int _wait_bluegene_block_ready(resource_allocation_response_msg_t *alloc)
+{
+	int is_ready = 0, i, rc;
+	char *block_id = NULL;
+	int cur_delay = 0;
+	int max_delay = BG_FREE_PREVIOUS_BLOCK + BG_MIN_BLOCK_BOOT +
+		(BG_INCR_BLOCK_BOOT * alloc->node_cnt);
+
+	select_g_get_jobinfo(alloc->select_jobinfo, SELECT_DATA_BLOCK_ID,
+			     &block_id);
+
+	for (i=0; (cur_delay < max_delay); i++) {
+		if(i == 1)
+			info("Waiting for block %s to become ready for job",
+			     block_id);
+		if (i) {
+			sleep(POLL_SLEEP);
+			rc = _blocks_dealloc();
+			if ((rc == 0) || (rc == -1)) 
+				cur_delay += POLL_SLEEP;
+			debug("still waiting");
+		}
+
+		rc = slurm_job_node_ready(alloc->job_id);
+
+		if (rc == READY_JOB_FATAL)
+			break;				/* fatal error */
+		if (rc == READY_JOB_ERROR)		/* error */
+			continue;			/* retry */
+		if ((rc & READY_JOB_STATE) == 0)	/* job killed */
+			break;
+		if (rc & READY_NODE_STATE) {		/* job and node ready */
+			is_ready = 1;
+			break;
+		}
+	}
+
+	if (is_ready)
+     		info("Block %s is ready for job", block_id);
+	else
+		error("Block %s still not ready", block_id);
+	xfree(block_id);
+
+	return is_ready;
+}
+
+/*
+ * Test if any BG blocks are in deallocating state since they are
+ * probably related to this job we will want to sleep longer
+ * RET	1:  deallocate in progress
+ *	0:  no deallocate in progress
+ *     -1: error occurred
+ */
+static int _blocks_dealloc()
+{
+	static node_select_info_msg_t *bg_info_ptr = NULL, *new_bg_ptr = NULL;
+	int rc = 0, error_code = 0, i;
+	
+	if (bg_info_ptr) {
+		error_code = slurm_load_node_select(bg_info_ptr->last_update, 
+						   &new_bg_ptr);
+		if (error_code == SLURM_SUCCESS)
+			select_g_free_node_info(&bg_info_ptr);
+		else if (slurm_get_errno() == SLURM_NO_CHANGE_IN_DATA) {
+			error_code = SLURM_SUCCESS;
+			new_bg_ptr = bg_info_ptr;
+		}
+	} else {
+		error_code = slurm_load_node_select((time_t) NULL, &new_bg_ptr);
+	}
+
+	if (error_code) {
+		error("slurm_load_partitions: %s\n",
+		      slurm_strerror(slurm_get_errno()));
+		return -1;
+	}
+	for (i=0; i<new_bg_ptr->record_count; i++) {
+		if(new_bg_ptr->bg_info_array[i].state 
+		   == RM_PARTITION_DEALLOCATING) {
+			rc = 1;
+			break;
+		}
+	}
+	bg_info_ptr = new_bg_ptr;
+	return rc;
+}
+#endif
diff --git a/src/slurmctld/controller.c b/src/slurmctld/controller.c
index 165d92c96f2..ff9a8313348 100644
--- a/src/slurmctld/controller.c
+++ b/src/slurmctld/controller.c
@@ -304,8 +304,13 @@ int main(int argc, char *argv[])
 	slurmctld_cluster_name = xstrdup(slurmctld_conf.cluster_name);
 	accounting_enforce = slurmctld_conf.accounting_storage_enforce;
 	acct_db_conn = acct_storage_g_get_connection(true, false);
+
+	memset(&assoc_init_arg, 0, sizeof(assoc_init_args_t));
 	assoc_init_arg.enforce = accounting_enforce;
 	assoc_init_arg.remove_assoc_notify = _remove_assoc;
+	assoc_init_arg.refresh = 0;
+	assoc_init_arg.cache_level = ASSOC_MGR_CACHE_ALL;
+
 	if (assoc_mgr_init(acct_db_conn, &assoc_init_arg) &&
 	    accounting_enforce) {
 		error("assoc_mgr_init failure");
diff --git a/src/slurmctld/job_mgr.c b/src/slurmctld/job_mgr.c
index b45b3d9e2e9..8dd391c7a75 100644
--- a/src/slurmctld/job_mgr.c
+++ b/src/slurmctld/job_mgr.c
@@ -1878,6 +1878,7 @@ extern int job_complete(uint32_t job_id, uid_t uid, bool requeue,
 			xfree(job_ptr->state_desc);
 		} else
 			job_ptr->job_state = JOB_COMPLETE | job_comp_flag;
+		
 		if (suspended) {
 			job_ptr->end_time = job_ptr->suspend_time;
 			job_ptr->tot_sus_time += 
@@ -4320,55 +4321,24 @@ int update_job(job_desc_msg_t * job_specs, uid_t uid)
 	}
 
 	if (job_specs->account) {
-		if ((!IS_JOB_PENDING(job_ptr)) || (detail_ptr == NULL)) {
-			info("update_job: attempt to modify account for "
-			     "non-pending job_id %u", job_specs->job_id);
-			error_code = ESLURM_DISABLED;
-		} else {
-			acct_association_rec_t assoc_rec, *assoc_ptr;
-			bzero(&assoc_rec, sizeof(acct_association_rec_t));
-
-			assoc_rec.uid       = job_ptr->user_id;
-			assoc_rec.partition = job_ptr->partition;
-			assoc_rec.acct      = job_specs->account;
-			if (assoc_mgr_fill_in_assoc(acct_db_conn, &assoc_rec,
-						    accounting_enforce, 
-						    &assoc_ptr)) {
-				info("job_update: invalid account %s for "
-				     "job_id %u",
-				     job_specs->account, job_ptr->job_id);
-				error_code = ESLURM_INVALID_ACCOUNT;
-			} else {
-				xfree(job_ptr->account);
-				if (assoc_rec.acct[0] != '\0') {
-					job_ptr->account = 
-							xstrdup(assoc_rec.acct);
-					info("update_job: setting account to "
-					     "%s for job_id %u",
-					     assoc_rec.acct, job_ptr->job_id);
-				} else {
-					info("update_job: cleared account for "
-					     "job_id %u",
-					     job_specs->job_id);
-				}
-				job_ptr->assoc_id = assoc_rec.id;
-				job_ptr->assoc_ptr = (void *) assoc_ptr;
-			}
-		}
+		int rc = update_job_account("update_job", job_ptr, 
+					    job_specs->account);
+		if (rc != SLURM_SUCCESS)
+			error_code = rc;
 	}
 
 	if (job_specs->ntasks_per_node != (uint16_t) NO_VAL) {
 		if ((!IS_JOB_PENDING(job_ptr)) || (detail_ptr == NULL))
 			error_code = ESLURM_DISABLED;
 		else if (super_user) {
-			detail_ptr->ntasks_per_node = 
-					job_specs->ntasks_per_node;
+			detail_ptr->ntasks_per_node = job_specs->
+						      ntasks_per_node;
 			info("update_job: setting ntasks_per_node to %u for "
 			     "job_id %u", job_specs->ntasks_per_node,
 			     job_specs->job_id);
 		} else {
-			error("Not super user: setting ntasks_oper_node to job %u",
-			      job_specs->job_id);
+			error("Not super user: setting ntasks_oper_node to "
+			      "job %u", job_specs->job_id);
 			error_code = ESLURM_ACCESS_DENIED;
 		}
 	}
@@ -5077,6 +5047,16 @@ extern void job_completion_logger(struct job_record  *job_ptr)
 	}
 
 	g_slurm_jobcomp_write(job_ptr);
+
+	/* 
+	 * This means the job wasn't ever eligible, but we want to
+	 * keep track of all jobs, so we will set the db_inx to
+	 * INFINITE and the database will understand what happened.
+	 */ 
+	if(!job_ptr->nodes && !job_ptr->db_index) {
+		jobacct_storage_g_job_start(acct_db_conn, job_ptr);
+	}
+
 	jobacct_storage_g_job_complete(acct_db_conn, job_ptr);
 }
 
@@ -5686,7 +5666,8 @@ extern int job_cancel_by_assoc_id(uint32_t assoc_id)
 		if ((job_ptr->assoc_id != assoc_id) || 
 		    IS_JOB_FINISHED(job_ptr))
 			continue;
-		info("Association deleted, cancelling job %u", job_ptr->job_id);
+		info("Association deleted, cancelling job %u", 
+		     job_ptr->job_id);
 		job_signal(job_ptr->job_id, SIGKILL, 0, 0);
 		job_ptr->state_reason = FAIL_BANK_ACCOUNT;
 		xfree(job_ptr->state_desc);
@@ -5695,3 +5676,55 @@ extern int job_cancel_by_assoc_id(uint32_t assoc_id)
 	list_iterator_destroy(job_iterator);
 	return cnt;
 }
+
+/*
+ * Modify the account associated with a pending job
+ * IN module - where this is called from
+ * IN job_ptr - pointer to job which should be modified
+ * IN new_account - desired account name
+ * RET SLURM_SUCCESS or error code
+ */
+extern int update_job_account(char *module, struct job_record *job_ptr, 
+			      char *new_account)
+{
+	acct_association_rec_t assoc_rec, *assoc_ptr;
+
+	if ((!IS_JOB_PENDING(job_ptr)) || (job_ptr->details == NULL)) {
+		info("%s: attempt to modify account for non-pending "
+		     "job_id %u", module, job_ptr->job_id);
+		return ESLURM_DISABLED;
+	}
+
+
+	bzero(&assoc_rec, sizeof(acct_association_rec_t));
+	assoc_rec.uid       = job_ptr->user_id;
+	assoc_rec.partition = job_ptr->partition;
+	assoc_rec.acct      = new_account;
+	if (assoc_mgr_fill_in_assoc(acct_db_conn, &assoc_rec,
+				    accounting_enforce, &assoc_ptr)) {
+		info("%s: invalid account %s for job_id %u",
+		     module, new_account, job_ptr->job_id);
+		return ESLURM_INVALID_ACCOUNT;
+	}
+
+
+	xfree(job_ptr->account);
+	if (assoc_rec.acct[0] != '\0') {
+		job_ptr->account = xstrdup(assoc_rec.acct);
+		info("%s: setting account to %s for job_id %u",
+		     module, assoc_rec.acct, job_ptr->job_id);
+	} else {
+		info("%s: cleared account for job_id %u",
+		     module, job_ptr->job_id);
+	}
+	job_ptr->assoc_id = assoc_rec.id;
+	job_ptr->assoc_ptr = (void *) assoc_ptr;
+
+	if (job_ptr->details && job_ptr->details->begin_time) {
+		/* Update account associated with the eligible time */
+		jobacct_storage_g_job_start(acct_db_conn, job_ptr);
+	}
+	last_job_update = time(NULL);
+
+	return SLURM_SUCCESS;
+}
diff --git a/src/slurmctld/job_scheduler.c b/src/slurmctld/job_scheduler.c
index 713f98a0b34..7d36219eba6 100644
--- a/src/slurmctld/job_scheduler.c
+++ b/src/slurmctld/job_scheduler.c
@@ -548,7 +548,11 @@ extern int make_batch_job_cred(batch_job_launch_msg_t *launch_msg_ptr,
 	cred_arg.jobid     = launch_msg_ptr->job_id;
 	cred_arg.stepid    = launch_msg_ptr->step_id;
 	cred_arg.uid       = launch_msg_ptr->uid;
+#ifdef HAVE_FRONT_END
+	cred_arg.hostlist  = node_record_table_ptr[0].name;
+#else
 	cred_arg.hostlist  = launch_msg_ptr->nodes;
+#endif
 	if (job_ptr->details == NULL)
 		cred_arg.job_mem = 0;
 	else if (job_ptr->details->job_min_memory & MEM_PER_CPU) {
diff --git a/src/slurmctld/proc_req.c b/src/slurmctld/proc_req.c
index 6a977229a4d..6f4be965c34 100644
--- a/src/slurmctld/proc_req.c
+++ b/src/slurmctld/proc_req.c
@@ -519,8 +519,11 @@ static int _make_step_cred(struct step_record *step_rec,
 	cred_arg.stepid   = step_rec->step_id;
 	cred_arg.uid      = job_ptr->user_id;
 	cred_arg.job_mem  = job_ptr->details->job_min_memory;
+#ifdef HAVE_FRONT_END
+	cred_arg.hostlist = node_record_table_ptr[0].name;
+#else
 	cred_arg.hostlist = step_rec->step_layout->node_list;
-	
+#endif
 	cred_arg.alloc_lps_cnt = job_ptr->alloc_lps_cnt;
 	if ((cred_arg.alloc_lps_cnt > 0) &&
 	    bit_equal(job_ptr->node_bitmap, step_rec->step_node_bitmap)) {
@@ -554,8 +557,7 @@ static int _make_step_cred(struct step_record *step_rec,
 		cred_arg.alloc_lps = NULL;
 	}
 
-	*slurm_cred = slurm_cred_create(slurmctld_config.cred_ctx, 
-			&cred_arg);
+	*slurm_cred = slurm_cred_create(slurmctld_config.cred_ctx, &cred_arg);
 	xfree(cred_arg.alloc_lps);
 	if (*slurm_cred == NULL) {
 		error("slurm_cred_create error");
@@ -1742,7 +1744,8 @@ static void _slurm_rpc_shutdown_controller_immediate(slurm_msg_t * msg)
  *	represent the termination of an entire job */
 static void _slurm_rpc_step_complete(slurm_msg_t *msg)
 {
-	int error_code = SLURM_SUCCESS, rc, rem, step_rc;
+	int error_code = SLURM_SUCCESS, rc, rem;
+	uint32_t step_rc;
 	DEF_TIMERS;
 	step_complete_msg_t *req = (step_complete_msg_t *)msg->data;
 	/* Locks: Write job, write node */
@@ -1801,7 +1804,7 @@ static void _slurm_rpc_step_complete(slurm_msg_t *msg)
 		}
 	} else {
 		error_code = job_step_complete(req->job_id, req->job_step_id,
-				uid, job_requeue, step_rc);
+					       uid, job_requeue, step_rc);
 		unlock_slurmctld(job_write_lock);
 		END_TIMER2("_slurm_rpc_step_complete");
 
diff --git a/src/slurmctld/slurmctld.h b/src/slurmctld/slurmctld.h
index dedbe812713..3895e6fc064 100644
--- a/src/slurmctld/slurmctld.h
+++ b/src/slurmctld/slurmctld.h
@@ -1416,7 +1416,7 @@ extern bool step_on_node(struct job_record  *job_ptr,
  * RET 0 on success, otherwise ESLURM error code
  */
 extern int step_partial_comp(step_complete_msg_t *req, int *rem,
-		int *max_rc);
+			     uint32_t *max_rc);
 
 /* Update time stamps for job step suspend */
 extern void suspend_job_step(struct job_record *job_ptr);
@@ -1438,6 +1438,16 @@ extern int sync_job_files(void);
  */
 extern int update_job (job_desc_msg_t * job_specs, uid_t uid);
 
+/*
+ * Modify the account associated with a pending job
+ * IN module - where this is called from
+ * IN job_ptr - pointer to job which should be modified
+ * IN new_account - desired account name
+ * RET SLURM_SUCCESS or error code
+ */
+extern int update_job_account(char *module, struct job_record *job_ptr, 
+			      char *new_account);
+
 /* Reset nodes_completing field for all jobs */
 extern void update_job_nodes_completing(void);
 
diff --git a/src/slurmctld/step_mgr.c b/src/slurmctld/step_mgr.c
index 5c2c459dbfb..952306bb0ac 100644
--- a/src/slurmctld/step_mgr.c
+++ b/src/slurmctld/step_mgr.c
@@ -1468,7 +1468,7 @@ extern int job_step_checkpoint_task_comp(checkpoint_task_comp_msg_t *ckpt_ptr,
  * RET 0 on success, otherwise ESLURM error code
  */
 extern int step_partial_comp(step_complete_msg_t *req, int *rem, 
-		int *max_rc)
+			     uint32_t *max_rc)
 {
 	struct job_record *job_ptr;
 	struct step_record *step_ptr;
diff --git a/src/slurmd/slurmd/slurmd.c b/src/slurmd/slurmd/slurmd.c
index e02697f002d..2863a5eeebf 100644
--- a/src/slurmd/slurmd/slurmd.c
+++ b/src/slurmd/slurmd/slurmd.c
@@ -589,9 +589,11 @@ _read_config()
 	 * valid aliases */
 	if (conf->node_name == NULL)
 		conf->node_name = slurm_conf_get_aliased_nodename();
-	if (conf->node_name == NULL)
+	
+	if (conf->node_name == NULL) 
 		conf->node_name = slurm_conf_get_nodename("localhost");
-	if (conf->node_name == NULL)
+
+	if (conf->node_name == NULL) 
 		fatal("Unable to determine this slurmd's NodeName");
 
 	_massage_pathname(&conf->logfile);
diff --git a/src/slurmd/slurmstepd/mgr.c b/src/slurmd/slurmstepd/mgr.c
index b3a9378be46..4198bdad34a 100644
--- a/src/slurmd/slurmstepd/mgr.c
+++ b/src/slurmd/slurmstepd/mgr.c
@@ -233,9 +233,25 @@ static void
 _batch_finish(slurmd_job_t *job, int rc)
 {
 	int i;
-	for (i = 0; i < job->ntasks; i++)
+	for (i = 0; i < job->ntasks; i++) {
+		/* If signalled we only need to check one and then
+		   break out of the loop */ 
+		if(WIFSIGNALED(job->task[i]->estatus)) {
+			switch(WTERMSIG(job->task[i]->estatus)) {
+			case SIGTERM:
+			case SIGKILL:
+			case SIGINT:
+				step_complete.step_rc = NO_VAL;
+				break;
+			default:
+				step_complete.step_rc = job->task[i]->estatus;
+				break;
+			}
+			break;
+		}
 		step_complete.step_rc = MAX(step_complete.step_rc,
 					    WEXITSTATUS(job->task[i]->estatus));
+	}
 
 	if (job->argv[0] && (unlink(job->argv[0]) < 0))
 		error("unlink(%s): %m", job->argv[0]);
@@ -494,10 +510,25 @@ _wait_for_children_slurmstepd(slurmd_job_t *job)
 	}
 
 	/* Find the maximum task return code */
-	for (i = 0; i < job->ntasks; i++)
+	for (i = 0; i < job->ntasks; i++) {
+		/* If signalled we only need to check one and then
+		   break out of the loop */ 
+		if(WIFSIGNALED(job->task[i]->estatus)) {
+			switch(WTERMSIG(job->task[i]->estatus)) {
+			case SIGTERM:
+			case SIGKILL:
+			case SIGINT:
+				step_complete.step_rc = NO_VAL;
+				break;
+			default:
+				step_complete.step_rc = job->task[i]->estatus;
+				break;
+			}
+			break;
+		}
 		step_complete.step_rc = MAX(step_complete.step_rc,
-					 WEXITSTATUS(job->task[i]->estatus));
-
+					    WEXITSTATUS(job->task[i]->estatus));
+	}
 	step_complete.wait_children = false;
 
 	pthread_mutex_unlock(&step_complete.lock);
diff --git a/src/slurmd/slurmstepd/req.c b/src/slurmd/slurmstepd/req.c
index ebfbd430ac8..65f324d457c 100644
--- a/src/slurmd/slurmstepd/req.c
+++ b/src/slurmd/slurmstepd/req.c
@@ -174,10 +174,16 @@ _domain_socket_create(const char *dir, const char *nodename,
 	 * First check to see if the named socket already exists.
 	 */
 	if (stat(name, &stat_buf) == 0) {
-		error("Socket %s already exists", name);
-		xfree(name);
-		errno = ESLURMD_STEP_EXISTS;
-		return -1;
+		/* Vestigial from a slurmd crash or job requeue that did not
+		 * happen properly (very rare conditions). Try another name */
+		xstrcat(name, ".ALT");
+		if (stat(name, &stat_buf) == 0) {
+			error("Socket %s already exists", name);
+			xfree(name);
+			errno = ESLURMD_STEP_EXISTS;
+			return -1;
+		}
+		error("Using alternate socket name %s", name);
 	}
 
 	fd = _create_socket(name);
diff --git a/src/slurmdbd/slurmdbd.c b/src/slurmdbd/slurmdbd.c
index ae2a361ca39..eb2580e48f7 100644
--- a/src/slurmdbd/slurmdbd.c
+++ b/src/slurmdbd/slurmdbd.c
@@ -97,6 +97,7 @@ int main(int argc, char *argv[])
 	pthread_attr_t thread_attr;
 	char node_name[128];
 	void *db_conn = NULL;
+	assoc_init_args_t assoc_init_arg;
 
 	_init_config();
 	log_init(argv[0], log_opts, LOG_DAEMON, NULL);
@@ -139,7 +140,10 @@ int main(int argc, char *argv[])
 
 	db_conn = acct_storage_g_get_connection(false, false);
 	
-	if(assoc_mgr_init(db_conn, NULL) == SLURM_ERROR) {
+	memset(&assoc_init_arg, 0, sizeof(assoc_init_args_t));
+	assoc_init_arg.cache_level = ASSOC_MGR_CACHE_USER;
+
+	if(assoc_mgr_init(db_conn, &assoc_init_arg) == SLURM_ERROR) {
 		error("Problem getting cache of data");
 		acct_storage_g_close_connection(&db_conn);
 		goto end_it;
diff --git a/src/smap/configure_functions.c b/src/smap/configure_functions.c
index f899b0c3d71..80735294979 100644
--- a/src/smap/configure_functions.c
+++ b/src/smap/configure_functions.c
@@ -1276,7 +1276,7 @@ static void _print_text_command(allocated_block_t *allocated_block)
 	main_xcord += 7;
 
 	mvwprintw(text_win, main_ycord,
-		  main_xcord, "%d",allocated_block->request->size);
+		  main_xcord, "%d", allocated_block->request->size);
 	main_xcord += 10;
 	
 	if(allocated_block->request->conn_type == SELECT_SMALL) {
diff --git a/src/srun/allocate.c b/src/srun/allocate.c
index 5b5c53bd55b..5faf12f08b3 100644
--- a/src/srun/allocate.c
+++ b/src/srun/allocate.c
@@ -96,7 +96,7 @@ static void _signal_while_allocating(int signo)
 {
 	destroy_job = 1;
 	if (pending_job_id != 0) {
-		slurm_complete_job(pending_job_id, 0);
+		slurm_complete_job(pending_job_id, NO_VAL);
 	}
 }
 
diff --git a/src/srun/srun.c b/src/srun/srun.c
index 1f9247d8a9c..d82a554d358 100644
--- a/src/srun/srun.c
+++ b/src/srun/srun.c
@@ -110,7 +110,7 @@
 
 mpi_plugin_client_info_t mpi_job_info[1];
 static struct termios termdefaults;
-int global_rc;
+uint32_t global_rc = 0;
 srun_job_t *job = NULL;
 
 struct {
@@ -250,7 +250,6 @@ int srun(int ac, char **av)
 		if (!job || create_job_step(job) < 0)
 			exit(1);
 	} else {
-		got_alloc = 1;
 		/* Combined job allocation and job step launch */
 #ifdef HAVE_FRONT_END
 		uid_t my_uid = getuid();
@@ -263,9 +262,11 @@ int srun(int ac, char **av)
 	
 		if ( !(resp = allocate_nodes()) ) 
 			exit(1);
+		got_alloc = 1;
 		_print_job_information(resp);
 		_set_cpu_env_var(resp);
 		job = job_create_allocation(resp);
+		
 		opt.exclusive = false;	/* not applicable for this step */
 		if (!job || create_job_step(job) < 0) {
 			slurm_complete_job(job->jobid, 1);
@@ -433,7 +434,7 @@ cleanup:
 	_task_state_struct_free();
 	log_fini();
 
-	return global_rc;
+	return (int)global_rc;
 }
 
 static int _call_spank_local_user (srun_job_t *job)
@@ -953,7 +954,7 @@ _task_finish(task_exit_msg_t *msg)
 	char buf[2048], *core_str = "", *msg_str, *node_list = NULL;
 	static bool first_done = true;
 	static bool first_error = true;
-	int rc = 0;
+	uint32_t rc = 0;
 	int i;
 
 	verbose("%u tasks finished (rc=%u)",
@@ -975,7 +976,6 @@ _task_finish(task_exit_msg_t *msg)
 		}
 	} else if (WIFSIGNALED(msg->return_code)) {
 		bit_or(task_state.finish_abnormal, tasks_exited);
-		rc = 1;
 		msg_str = strsignal(WTERMSIG(msg->return_code));
 #ifdef WCOREDUMP
 		if (WCOREDUMP(msg->return_code))
@@ -983,9 +983,11 @@ _task_finish(task_exit_msg_t *msg)
 #endif
 		node_list = _taskids_to_nodelist(tasks_exited);
 		if (job->state >= SRUN_JOB_CANCELLED) {
+			rc = NO_VAL;
 			verbose("%s: task %s: %s%s", 
 				node_list, buf, msg_str, core_str);
 		} else {
+			rc = msg->return_code;
 			error("%s: task %s: %s%s", 
 			      node_list, buf, msg_str, core_str);
 		}
diff --git a/testsuite/expect/globals b/testsuite/expect/globals
index 0126835ee00..a9003d3dfbc 100755
--- a/testsuite/expect/globals
+++ b/testsuite/expect/globals
@@ -1134,3 +1134,54 @@ proc check_acct_associations { } {
 	log_user 1
 	return $rc
 }
+
+################################################################
+# 
+#
+# 
+################################################################
+proc check_accounting_admin_level { } {
+        global sacctmgr alpha alpha_numeric_under bin_id
+
+        set admin_level ""
+
+	log_user 0
+
+	spawn $bin_id -u -n
+	expect {
+		-re "($alpha_numeric_under)" {
+			set user_name $expect_out(1,string)
+			exp_continue
+		}
+		eof {
+			wait
+		}
+	}
+
+	if { ![string length $user_name] } {
+	   	send_user "FAILURE: No name returned from id\n"
+		return ""
+	}
+
+     	#
+     	# Use sacctmgr to check admin_level
+     	#
+     	set s_pid [spawn $sacctmgr -n -P list user $user_name format=admin]
+     	expect {
+		-re "($alpha)" {
+	    	      set admin_level $expect_out(1,string)
+		      exp_continue
+ 	       }
+	       timeout {
+		      send_user "FAILURE: sacctmgr add not responding\n"
+		      slow_kill $s_pid
+		      set exit_code 1
+	       }
+	       eof {
+		      wait
+	       }
+        }
+
+	log_user 1
+	return $admin_level
+}
diff --git a/testsuite/expect/test21.10 b/testsuite/expect/test21.10
index e40d6a52d9b..f74f24bb17b 100755
--- a/testsuite/expect/test21.10
+++ b/testsuite/expect/test21.10
@@ -70,11 +70,138 @@ set access_err  0
 
 print_header $test_id
 
+#
+# Check accounting config and bail if not found.
+#
 if { [test_account_storage] == 0 } {
 	send_user "\nWARNING: This test can't be run without a usable AccountStorageType\n"
 	exit 0
 }
- 
+
+if { [string compare [check_accounting_admin_level] "Administrator"] } {
+	send_user "\nWARNING: This test can't be run without being an Accounting administrator.\nUse sacctmgr mod user \$USER_NAME admin=admin.\n"
+	exit 0
+}
+
+#
+# Use sacctmgr to delete the test cluster
+#
+	set nothing 0
+	set matches 0
+
+set sadel_pid [spawn $sacctmgr -i $del $clu $tc1]
+
+	expect {
+		-re "privilege to perform this action" {
+			set access_err 1
+			exp_continue
+		}
+		-re "(There was a problem|Unknown condition|Bad format on|Bad MaxWall|Unknown option)" {
+			send_user "FAILURE: there was a problem with the sacctmgr command\n"
+			incr exit_code 1
+		}
+		-re "Problem getting" {
+			send_user "FAILURE: there was a problem getting information from the database\n"
+			incr exit_code 1
+		}
+		-re "Problem adding" {
+			send_user "FAILURE: there was an unknwon problem\n"
+			incr exit_code 1
+		}
+		-re "No associations" {
+			send_user "FAILURE: your command didn't return anything\n"
+			incr exit_code 1
+		}
+		-re "Deleting clusters" {
+			incr matches
+			exp_continue
+		}
+		-re " Nothing deleted" {
+			incr matches
+			set nothing 1
+			exp_continue
+		}
+		timeout {
+			send_user "\nFAILURE: sacctmgr delete not responding\n"
+			slow_kill $my_pid
+			incr exit_code 1
+		}
+		eof {
+			wait
+		}
+	}
+	if {$access_err != 0} {
+		return 1
+	}
+	if {$matches != 1} {
+		send_user "\nFAILURE: sacctmgr had a problem deleting cluster got $matches\n"
+		incr exit_code 1
+	}
+	if { !$nothing } {
+		if { ![check_acct_associations] } {
+			send_user "\nFAILURE:  Our associations don't line up\n"
+			incr exit_code 1
+		}
+	}
+
+#
+# Use sacctmgr to remove an account
+#
+
+	set matches 0
+	set nothing 1
+	set check "Deleting account"
+
+	set my_pid [eval spawn $sacctmgr -i delete account $nm1]
+	expect {
+		-re "(There was a problem|Unknown condition|Bad format on|Bad MaxWall|Unknown option)" {
+			send_user "FAILURE: there was a problem with the sacctmgr command\n"
+			incr exit_code 1
+		}
+		-re "Problem getting" {
+			send_user "FAILURE: there was a problem getting information from the database\n"
+			incr exit_code 1
+		}
+		-re "Problem adding" {
+			send_user "FAILURE: there was an unknwon problem\n"
+			incr exit_code 1
+		}
+		-re "No associations" {
+			send_user "FAILURE: your command didn't return anything\n"
+			incr exit_code 1
+		}
+		-re "$check" {
+			incr matches
+			exp_continue
+		}
+		-re " Nothing deleted" {
+			incr matches
+			set nothing 1
+			exp_continue
+		}
+		timeout {
+			send_user "\nFAILURE: sacctmgr add not responding\n"
+			slow_kill $my_pid
+			incr exit_code 1
+		}
+		eof {
+			wait
+		}
+	}
+
+	if {$matches != 1} {
+		send_user "\nFAILURE:  sacctmgr had a problem deleting account.
+	got $matches\n"
+		incr exit_code 1
+	}
+
+	if { !$nothing } {
+		if { ![check_acct_associations] } {
+			send_user "\nFAILURE:  Our associations don't line up\n"
+			incr exit_code 1
+		}
+	}
+
 #
 # Use sacctmgr to create a cluster
 #
diff --git a/testsuite/expect/test21.11 b/testsuite/expect/test21.11
index 3411f0b80fa..99571a5925d 100755
--- a/testsuite/expect/test21.11
+++ b/testsuite/expect/test21.11
@@ -78,6 +78,14 @@ if { [test_account_storage] == 0 } {
 	exit 0
 }
  
+#
+# Verify if Administrator privileges
+#
+ if { [string compare [check_accounting_admin_level] "Administrator"] } {
+	send_user "\nWARNING: This test can't be run without being an Accounting administrator.\nUse sacctmgr mod user \$USER_NAME admin=admin.\n"
+	exit 0
+}
+
 #
 # Use sacctmgr to create a cluster
 #
diff --git a/testsuite/expect/test21.12 b/testsuite/expect/test21.12
index a7a363bbe37..ed897c9542b 100755
--- a/testsuite/expect/test21.12
+++ b/testsuite/expect/test21.12
@@ -79,6 +79,14 @@ if { [test_account_storage] == 0 } {
 	exit 0
 }
  
+#
+# Verify if Administrator privileges
+#
+ if { [string compare [check_accounting_admin_level] "Administrator"] } {
+	send_user "\nWARNING: This test can't be run without being an Accounting administrator.\nUse sacctmgr mod user \$USER_NAME admin=admin.\n"
+	exit 0
+}
+
 #
 # Use sacctmgr to create a cluster
 #
diff --git a/testsuite/expect/test21.13 b/testsuite/expect/test21.13
index c6b9074282d..4da00c44013 100755
--- a/testsuite/expect/test21.13
+++ b/testsuite/expect/test21.13
@@ -74,6 +74,13 @@ if { [test_account_storage] == 0 } {
 	exit 0
 }
 
+#
+# Verify if Administrator privileges
+#
+ if { [string compare [check_accounting_admin_level] "Administrator"] } {
+	send_user "\nWARNING: This test can't be run without being an Accounting administrator.\nUse sacctmgr mod user \$USER_NAME admin=admin.\n"
+	exit 0
+}
 
 #
 # Use sacctmgr to create a cluster
diff --git a/testsuite/expect/test21.14 b/testsuite/expect/test21.14
index b73c438cc4b..c6b138ebeb0 100755
--- a/testsuite/expect/test21.14
+++ b/testsuite/expect/test21.14
@@ -75,6 +75,11 @@ if { [test_account_storage] == 0 } {
 	exit 0
 }
 
+if { [string compare [check_accounting_admin_level] "Administrator"] } {
+	send_user "\nWARNING: This test can't be run without being an Accounting administrator.\nUse sacctmgr mod user \$USER_NAME admin=admin.\n"
+	exit 0
+}
+
 
 #
 # Use sacctmgr to create a cluster
diff --git a/testsuite/expect/test21.15 b/testsuite/expect/test21.15
index 529e24a4cda..c6a7f0eeb68 100755
--- a/testsuite/expect/test21.15
+++ b/testsuite/expect/test21.15
@@ -102,6 +102,10 @@ if { [test_account_storage] == 0 } {
 	exit 0
 }
 
+if { [string compare [check_accounting_admin_level] "Administrator"] } {
+	send_user "\nWARNING: This test can't be run without being an Accounting administrator.\nUse sacctmgr mod user \$USER_NAME admin=admin.\n"
+	exit 0
+}
 
 #
 # Use sacctmgr to create a cluster
diff --git a/testsuite/expect/test21.16 b/testsuite/expect/test21.16
index 5d318f87e60..4c8dfc49d6d 100755
--- a/testsuite/expect/test21.16
+++ b/testsuite/expect/test21.16
@@ -88,7 +88,7 @@ set mn		maxnode
 set mw		maxwall
 set dbu		debug
 set access_err  0
-
+#set user_name   "id -u -n"
 
 print_header $test_id
 
@@ -102,6 +102,10 @@ if { [test_account_storage] == 0 } {
 	exit 0
 }
 
+if { [string compare [check_accounting_admin_level] "Administrator"] } {
+	send_user "\nWARNING: This test can't be run without being an Accounting administrator.\nUse sacctmgr mod user \$USER_NAME admin=admin.\n"
+	exit 0
+}
 
 #
 # Use sacctmgr to create a cluster
diff --git a/testsuite/expect/test21.17 b/testsuite/expect/test21.17
index e7376f3debc..21d001a5d1e 100755
--- a/testsuite/expect/test21.17
+++ b/testsuite/expect/test21.17
@@ -102,6 +102,11 @@ if { [test_account_storage] == 0 } {
 	exit 0
 }
 
+if { [string compare [check_accounting_admin_level] "Administrator"] } {
+	send_user "\nWARNING: This test can't be run without being an Accounting administrator.\nUse sacctmgr mod user \$USER_NAME admin=admin.\n"
+	exit 0
+}
+
 
 #
 # Use sacctmgr to create a cluster
diff --git a/testsuite/expect/test21.18 b/testsuite/expect/test21.18
index 84662a10b0d..20c848f138f 100755
--- a/testsuite/expect/test21.18
+++ b/testsuite/expect/test21.18
@@ -102,6 +102,11 @@ if { [test_account_storage] == 0 } {
 	exit 0
 }
 
+if { [string compare [check_accounting_admin_level] "Administrator"] } {
+	send_user "\nWARNING: This test can't be run without being an Accounting administrator.\nUse sacctmgr mod user \$USER_NAME admin=admin.\n"
+	exit 0
+}
+
 
 #
 # Use sacctmgr to create a cluster
diff --git a/testsuite/expect/test21.19 b/testsuite/expect/test21.19
index 4ef4e677330..2d62d68384b 100755
--- a/testsuite/expect/test21.19
+++ b/testsuite/expect/test21.19
@@ -102,6 +102,11 @@ if { [test_account_storage] == 0 } {
 	exit 0
 }
 
+if { [string compare [check_accounting_admin_level] "Administrator"] } {
+	send_user "\nWARNING: This test can't be run without being an Accounting administrator.\nUse sacctmgr mod user \$USER_NAME admin=admin.\n"
+	exit 0
+}
+
 
 #
 # Use sacctmgr to create a cluster
diff --git a/testsuite/expect/test21.5 b/testsuite/expect/test21.5
index 4110ad1184a..5daa909f2de 100755
--- a/testsuite/expect/test21.5
+++ b/testsuite/expect/test21.5
@@ -67,6 +67,11 @@ if { [test_account_storage] == 0 } {
 	exit 0
 }
 
+if { [string compare [check_accounting_admin_level] "Administrator"] } {
+	send_user "\nWARNING: This test can't be run without being an Accounting administrator.\nUse sacctmgr mod user \$USER_NAME admin=admin.\n"
+	exit 0
+}
+
 #
 # Use sacctmgr to delete the test cluster
 #
diff --git a/testsuite/expect/test21.6 b/testsuite/expect/test21.6
index 3bfb62ac521..ac83876bc07 100755
--- a/testsuite/expect/test21.6
+++ b/testsuite/expect/test21.6
@@ -61,11 +61,80 @@ set access_err  0
 
 print_header $test_id
 
+#
+# Check accounting config and bail if not found.
+#
 if { [test_account_storage] == 0 } {
 	send_user "\nWARNING: This test can't be run without a usable AccountStorageType\n"
 	exit 0
 }
- 
+
+if { [string compare [check_accounting_admin_level] "Administrator"] } {
+	send_user "\nWARNING: This test can't be run without being an Accounting administrator.\nUse sacctmgr mod user \$USER_NAME admin=admin.\n"
+	exit 0
+}
+
+#
+# Use sacctmgr to delete the test cluster
+#
+	set nothing 0
+	set matches 0
+
+set sadel_pid [spawn $sacctmgr -i $del $clu $tc1,$tc2,$tc3]
+
+	expect {
+		-re "privilege to perform this action" {
+			set access_err 1
+			exp_continue
+		}
+		-re "(There was a problem|Unknown condition|Bad format on|Bad MaxWall|Unknown option)" {
+			send_user "FAILURE: there was a problem with the sacctmgr command\n"
+			incr exit_code 1
+		}
+		-re "Problem getting" {
+			send_user "FAILURE: there was a problem getting information from the database\n"
+			incr exit_code 1
+		}
+		-re "Problem adding" {
+			send_user "FAILURE: there was an unknwon problem\n"
+			incr exit_code 1
+		}
+		-re "No associations" {
+			send_user "FAILURE: your command didn't return anything\n"
+			incr exit_code 1
+		}
+		-re "Deleting clusters" {
+			incr matches
+			exp_continue
+		}
+		-re " Nothing deleted" {
+			incr matches
+			set nothing 1
+			exp_continue
+		}
+		timeout {
+			send_user "\nFAILURE: sacctmgr delete not responding\n"
+			slow_kill $my_pid
+			incr exit_code 1
+		}
+		eof {
+			wait
+		}
+	}
+	if {$access_err != 0} {
+		return 1
+	}
+	if {$matches != 1} {
+		send_user "\nFAILURE: sacctmgr had a problem deleting cluster got $matches\n"
+		incr exit_code 1
+	}
+	if { !$nothing } {
+		if { ![check_acct_associations] } {
+			send_user "\nFAILURE:  Our associations don't line up\n"
+			incr exit_code 1
+		}
+	}
+
 #
 # Use sacctmgr to create a cluster
 #
diff --git a/testsuite/expect/test21.7 b/testsuite/expect/test21.7
index 0f464fdd195..ee0067db4de 100755
--- a/testsuite/expect/test21.7
+++ b/testsuite/expect/test21.7
@@ -65,7 +65,15 @@ if { [test_account_storage] == 0 } {
 	send_user "\nWARNING: This test can't be run without a usable AccountStorageType\n"
 	exit 0
 }
- 
+
+#
+# Verify if Administrator privileges
+#
+ if { [string compare [check_accounting_admin_level] "Administrator"] } {
+	send_user "\nWARNING: This test can't be run without being an Accounting administrator.\nUse sacctmgr mod user \$USER_NAME admin=admin.\n"
+	exit 0
+}
+
 #
 # Use sacctmgr to create a cluster
 #
diff --git a/testsuite/expect/test21.8 b/testsuite/expect/test21.8
index 6d414415c6a..e90f04bace4 100755
--- a/testsuite/expect/test21.8
+++ b/testsuite/expect/test21.8
@@ -72,6 +72,14 @@ if { [test_account_storage] == 0 } {
 	exit 0
 }
  
+#
+# Verify if Administrator privileges
+#
+ if { [string compare [check_accounting_admin_level] "Administrator"] } {
+	send_user "\nWARNING: This test can't be run without being an Accounting administrator.\nUse sacctmgr mod user \$USER_NAME admin=admin.\n"
+	exit 0
+}
+
 #
 # Use sacctmgr to create a cluster
 #
diff --git a/testsuite/expect/test21.9 b/testsuite/expect/test21.9
index 4ec5194503a..bb5aad587da 100755
--- a/testsuite/expect/test21.9
+++ b/testsuite/expect/test21.9
@@ -72,6 +72,14 @@ if { [test_account_storage] == 0 } {
 	exit 0
 }
  
+#
+# Verify if Administrator privileges
+#
+ if { [string compare [check_accounting_admin_level] "Administrator"] } {
+	send_user "\nWARNING: This test can't be run without being an Accounting administrator.\nUse sacctmgr mod user \$USER_NAME admin=admin.\n"
+	exit 0
+}
+
 #
 # Use sacctmgr to create a cluster
 #
diff --git a/testsuite/expect/test7.3 b/testsuite/expect/test7.3
index 86fe053a947..07b72d80a21 100755
--- a/testsuite/expect/test7.3
+++ b/testsuite/expect/test7.3
@@ -57,7 +57,7 @@ send_user "slurm_dir is $slurm_dir\n"
 if {[test_aix]} {
 	send_user "$bin_cc ${test_prog}.c -Wl,-brtl -g -pthread -o ${test_prog} -I${slurm_dir}/include  -L${slurm_dir}/lib -lslurm -lntbl\n"
 	exec       $bin_cc ${test_prog}.c -Wl,-brtl -g -pthread -o ${test_prog} -I${slurm_dir}/include  -L${slurm_dir}/lib -lslurm -lntbl
-} elseif [file exists ${slurm_dir}/lib64] {
+} elseif [file exists ${slurm_dir}/lib64/libslurm.so] {
 	send_user "$bin_cc ${test_prog}.c -g -pthread -o ${test_prog} -I${slurm_dir}/include -Wl,--rpath=${slurm_dir}/lib64 -L${slurm_dir}/lib64 -lslurm\n"
 	exec       $bin_cc ${test_prog}.c -g -pthread -o ${test_prog} -I${slurm_dir}/include -Wl,--rpath=${slurm_dir}/lib64 -L${slurm_dir}/lib64 -lslurm
 } else {
diff --git a/testsuite/expect/test7.7 b/testsuite/expect/test7.7
index c55a44bc164..7a90ba1cfe5 100755
--- a/testsuite/expect/test7.7
+++ b/testsuite/expect/test7.7
@@ -150,7 +150,7 @@ make_bash_script $file_in "
   echo BEGIN
   $bin_sleep 20
   echo FINI
-  exit 123"
+  exit 0"
 set job_id1 0
 set job_id2 0
 set sbatch_pid [spawn $sbatch -N1-1024 --output=$file_out --comment=test -t1 $file_in]
diff --git a/testsuite/expect/test7.7.prog.c b/testsuite/expect/test7.7.prog.c
index cc8a536858b..81ba2a5e36d 100644
--- a/testsuite/expect/test7.7.prog.c
+++ b/testsuite/expect/test7.7.prog.c
@@ -25,6 +25,7 @@
  *  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA.
 \*****************************************************************************/
 
+#include <errno.h>
 #include <netdb.h>
 #include <stdio.h>
 #include <stdlib.h>
@@ -71,7 +72,7 @@ static int _conn_wiki_port(char *host, int port)
 
 static int _conn_event_port(char *host, int port)
 {
-	int sock_fd;
+	int i, rc, sock_fd;
 	struct sockaddr_in wiki_addr;
 	struct hostent *hptr;
 
@@ -88,11 +89,19 @@ static int _conn_event_port(char *host, int port)
 	wiki_addr.sin_family = AF_INET;
 	wiki_addr.sin_port   = htons(port);
 	memcpy(&wiki_addr.sin_addr.s_addr, hptr->h_addr, hptr->h_length);
-	if (bind(sock_fd, (struct sockaddr *) &wiki_addr,
-			sizeof(wiki_addr))) {
-		printf("WARNING: bind to port %i failed, may not be real error\n",
-			port);
-		return -1;
+	for (i=0; ; i++) {
+		if (i)
+			sleep(5);
+		rc = bind(sock_fd, (struct sockaddr *) &wiki_addr,
+			  sizeof(wiki_addr));
+		if (rc == 0)
+			break;
+		if ((errno != EINVAL) || (i > 5)) {
+			printf("WARNING: bind to port %i; %s\n", 
+			       port, strerror(errno));
+			return -1;
+		}
+		printf("WARNING: port %i in use, retrying\n", port);
 	}
 	listen(sock_fd, 1);
 	return sock_fd;
@@ -179,7 +188,7 @@ static char *_recv_msg(int fd)
 	return buf;
 }	
 
-static void _xmit(char *msg)
+static int _xmit(char *msg)
 {
 	int msg_len = strlen(msg);
 	char *out_msg, *in_msg, sum[20], *sc_ptr;
@@ -199,12 +208,11 @@ static void _xmit(char *msg)
 	printf("recv:%s\n\n", in_msg);
 	sc_ptr = strstr(in_msg, "SC=");
 	sc = atoi(sc_ptr+3);
-	if (sc != 0) {
+	if (sc != 0)
 		fprintf(stderr, "RPC failure\n");
-		exit(1);
-	} 
 	free(in_msg);
 	close(wiki_fd);
+	return sc;
 }
 
 static void _event_mgr(void)
@@ -240,19 +248,22 @@ static void _get_jobs(void)
 	snprintf(out_msg, sizeof(out_msg),
 		"TS=%u AUTH=root DT=%s",
 		(uint32_t) now, "CMD=GETJOBS ARG=0:ALL");
-	_xmit(out_msg);
+	if (_xmit(out_msg))
+		exit(1);
 
 	/* Dump volitile data */
 	snprintf(out_msg, sizeof(out_msg),
 		"TS=%u AUTH=root DT=CMD=GETJOBS ARG=%u:ALL",
 		(uint32_t) now, (uint32_t) 1);
-	_xmit(out_msg);
+	if (_xmit(out_msg))
+		exit(1);
 
 	/* Dump state only */
 	snprintf(out_msg, sizeof(out_msg),
 		"TS=%u AUTH=root DT=CMD=GETJOBS ARG=%u:ALL",
 		(uint32_t) now, (uint32_t) (now+2));
-	_xmit(out_msg);
+	if (_xmit(out_msg))
+		exit(1);
 }
 
 static void _get_nodes(void)
@@ -264,19 +275,22 @@ static void _get_nodes(void)
 	snprintf(out_msg, sizeof(out_msg),
 		"TS=%u AUTH=root DT=%s", 
 		(uint32_t) now, "CMD=GETNODES ARG=0:ALL");
-	_xmit(out_msg);
+	if (_xmit(out_msg))
+		exit(1);
 
 	/* Dump volitile data */
 	snprintf(out_msg, sizeof(out_msg),
 		"TS=%u AUTH=root DT=CMD=GETNODES ARG=%u:ALL",
 		(uint32_t) now, (uint32_t) 1);
-	_xmit(out_msg);
+	if (_xmit(out_msg))
+		exit(1);
 
 	/* Dump state only */
 	snprintf(out_msg, sizeof(out_msg),
 		"TS=%u AUTH=root DT=CMD=GETNODES ARG=%u:ALL",
 		(uint32_t) now, (uint32_t) (now+2));
-	_xmit(out_msg);
+	if (_xmit(out_msg))
+		exit(1);
 }
 
 static void _cancel_job(long my_job_id)
@@ -289,20 +303,32 @@ static void _cancel_job(long my_job_id)
 		"TYPE=ADMIN "
 		"COMMENT=\"cancel comment\" ",
 		(uint32_t) now, my_job_id);
-	_xmit(out_msg);
+	if (_xmit(out_msg))
+		exit(1);
 }
 
 static void _start_job(long my_job_id)
 {
 	time_t now = time(NULL);
 	char out_msg[128];
+	int i, rc;
 
 	snprintf(out_msg, sizeof(out_msg),
 		"TS=%u AUTH=root DT=CMD=STARTJOB ARG=%ld "
 		"COMMENT=\'start comment\' "
 		"TASKLIST=",	/* Empty TASKLIST means we don't care */
 		(uint32_t) now, my_job_id);
-	_xmit(out_msg);
+
+	for (i=0; i<10; i++) {
+		if (i)
+			sleep(10);
+		rc = _xmit(out_msg);
+		if (rc == 0)
+			break;
+		/* Still completing after requeue */
+	}
+	if (rc != 0)
+		exit(1);
 }
 
 static void _suspend_job(long my_job_id)
@@ -313,7 +339,8 @@ static void _suspend_job(long my_job_id)
 	snprintf(out_msg, sizeof(out_msg),
 		"TS=%u AUTH=root DT=CMD=SUSPENDJOB ARG=%ld",
 		(uint32_t) now, my_job_id);
-	_xmit(out_msg);
+	if (_xmit(out_msg))
+		exit(1);
 }
 
 static void _signal_job(long my_job_id)
@@ -324,7 +351,8 @@ static void _signal_job(long my_job_id)
 	snprintf(out_msg, sizeof(out_msg),
 		"TS=%u AUTH=root DT=CMD=SIGNALJOB ARG=%ld VALUE=URG",
 		(uint32_t) now, my_job_id);
-	_xmit(out_msg);
+	if (_xmit(out_msg))
+		exit(1);
 }
 
 static void _modify_job(long my_job_id)
@@ -343,7 +371,8 @@ static void _modify_job(long my_job_id)
 		/* "INVALID=123 " */
 		"TIMELIMIT=10 BANK=test_bank",
 		(uint32_t) now, my_job_id);
-	_xmit(out_msg);
+	if (_xmit(out_msg))
+		exit(1);
 }
 
 static void _notify_job(long my_job_id)
@@ -355,7 +384,8 @@ static void _notify_job(long my_job_id)
 		"TS=%u AUTH=root DT=CMD=NOTIFYJOB ARG=%ld "
 		"MSG=this_is_a_test",
 		(uint32_t) now, my_job_id);
-	_xmit(out_msg);
+	if (_xmit(out_msg))
+		exit(1);
 }
 
 static void _resume_job(long my_job_id)
@@ -366,7 +396,8 @@ static void _resume_job(long my_job_id)
 	snprintf(out_msg, sizeof(out_msg),
 		"TS=%u AUTH=root DT=CMD=RESUMEJOB ARG=%ld",
 		(uint32_t) now, my_job_id);
-	_xmit(out_msg);
+	if (_xmit(out_msg))
+		exit(1);
 }
 
 static void _job_requeue(long my_job_id)
@@ -377,7 +408,8 @@ static void _job_requeue(long my_job_id)
 	snprintf(out_msg, sizeof(out_msg),
 		"TS=%u AUTH=root DT=CMD=REQUEUEJOB ARG=%ld",
 		(uint32_t) now, my_job_id);
-	_xmit(out_msg);
+	if (_xmit(out_msg))
+		exit(1);
 }
 
 static void _job_will_run(long my_job_id)
@@ -389,7 +421,8 @@ static void _job_will_run(long my_job_id)
 		"TS=%u AUTH=root DT=CMD=JOBWILLRUN ARG=JOBID=%ld,%s",
 		(uint32_t) now, my_job_id,
 		"");		/* put available node list here */
-	_xmit(out_msg);
+	if (_xmit(out_msg))
+		exit(1);
 }
 
 static void _initialize(void)
@@ -400,7 +433,8 @@ static void _initialize(void)
 	snprintf(out_msg, sizeof(out_msg),
 		"TS=%u AUTH=root DT=CMD=INITIALIZE ARG=USEHOSTEXP=N EPORT=%u",
 		(uint32_t) now, e_port);
-	_xmit(out_msg);
+	if (_xmit(out_msg))
+		exit(1);
 }
 
 static void _single_msg(void)
@@ -411,8 +445,10 @@ static void _single_msg(void)
 	snprintf(out_msg, sizeof(out_msg),
 		"TS=%u AUTH=root DT=CMD=%s",
 		(uint32_t) now, 
-		"JOBWILLRUN ARG=JOBID=65537,bgl[000x733] JOBID=65539,bgl[000x733] JOBID=65538,bgl[000x733]");
-	_xmit(out_msg);
+		"JOBWILLRUN ARG=JOBID=65537,bgl[000x733] "
+		"JOBID=65539,bgl[000x733] JOBID=65538,bgl[000x733]");
+	if (_xmit(out_msg))
+		exit(1);
 }
 
 int main(int argc, char * argv[])
@@ -458,7 +494,7 @@ int main(int argc, char * argv[])
 	}
 	_cancel_job(job_id+1);
 	_job_requeue(job_id);	/* Put job back into HELD state */
-	sleep(15);
+	sleep(10);
 	_start_job(job_id);
 	_get_jobs();
 #endif
diff --git a/testsuite/expect/test7.8 b/testsuite/expect/test7.8
index 0713a576b7f..e170fa2f7ee 100755
--- a/testsuite/expect/test7.8
+++ b/testsuite/expect/test7.8
@@ -110,7 +110,7 @@ make_bash_script $file_in "
   echo BEGIN
   $bin_sleep 20
   echo FINI
-  exit 123"
+  exit 0"
 
 set job_id1 0
 set job_id2 0
-- 
GitLab