From cc0f39dfdd4d8db5312bf6abccfad87c5df1829c Mon Sep 17 00:00:00 2001
From: Moe Jette <jette1@llnl.gov>
Date: Wed, 5 Jul 2006 16:59:51 +0000
Subject: [PATCH] svn merge -r8457:8480
 https://eris.llnl.gov/svn/slurm/branches/slurm-1.1

---
 NEWS                                          |  13 +-
 doc/html/configurator.html                    |   8 +-
 doc/html/switchplugins.shtml                  |  34 ++-
 doc/man/man5/slurm.conf.5                     |  13 +-
 slurm/slurm.h.in                              |   1 -
 src/api/step_client_io.c                      |  17 +-
 src/common/bitstring.h                        |  10 +-
 src/common/fd.c                               |  11 +
 src/common/slurm_protocol_defs.c              |   4 -
 src/common/slurm_protocol_defs.h              |   1 -
 src/common/slurm_protocol_pack.c              |   2 -
 src/common/switch.c                           |  23 ++
 src/common/switch.h                           |  20 +-
 src/plugins/switch/elan/switch_elan.c         |  13 +-
 src/plugins/switch/federation/federation.c    | 129 +++++++++--
 .../switch/federation/switch_federation.c     |  21 +-
 src/plugins/switch/none/switch_none.c         |  13 +-
 src/slurmctld/job_mgr.c                       | 201 ++++++------------
 src/slurmctld/node_mgr.c                      |  16 +-
 src/slurmctld/node_scheduler.c                |   4 +-
 src/slurmctld/proc_req.c                      |  25 +--
 src/slurmctld/slurmctld.h                     |  15 +-
 src/slurmctld/step_mgr.c                      | 182 +++++++++++++---
 src/slurmd/slurmd/req.c                       |   4 +-
 src/slurmd/slurmstepd/mgr.c                   |   4 +-
 src/slurmd/slurmstepd/req.c                   |   8 +
 src/smap/job_functions.c                      |   1 -
 src/squeue/print.c                            |   1 -
 src/srun/opt.c                                |   2 +-
 testsuite/expect/globals                      |   1 +
 testsuite/expect/test14.4                     |  11 +-
 testsuite/slurm_unit/common/bitstring-test.c  |   2 +-
 32 files changed, 558 insertions(+), 252 deletions(-)

diff --git a/NEWS b/NEWS
index c81fc7053a1..134a3b8ca47 100644
--- a/NEWS
+++ b/NEWS
@@ -14,6 +14,13 @@ documents those changes that are of interest to users and admins.
     the code)
  -- Added support for OSX build.
 
+* Changes in SLURM 1.1.3
+========================
+ -- Fix big-endian bug in the bitstring code which plagued AIX.
+ -- Fix bug in handling srun's --multi-prog option, could go off end of buffer.
+ -- Added support for job step completion (and switch window release) on 
+    subset of allocated nodes.
+
 * Changes in SLURM 1.1.2
 ========================
  -- Fix bug in jobcomp/filetxt plugin to report proper NodeCnt when a job 
@@ -39,9 +46,6 @@ documents those changes that are of interest to users and admins.
     completed.
  -- BLUEGENE - added configure option --with-bg-link to choose dynamic linking
     or static linking with the bridgeapi.
- -- Fix to make sure all steps are complete on job before removing allocation
- -- send SIGKILL to all steps when an allocation has been completed.
- -- new job state JOB_DEALLOCATING
 
 * Changes in SLURM 1.1.1
 ========================
@@ -197,6 +201,9 @@ documents those changes that are of interest to users and admins.
 
 * Changes in SLURM 1.0.15
 =========================
+ -- In srun, reset stdin to blocking mode (if it was originally blocking before
+    we set it to O_NONBLOCK) on exit to avoid trouble with things like running
+    srun under a bash shell in an emacs *shell* buffer.
 
 * Changes in SLURM 1.0.14
 =========================
diff --git a/doc/html/configurator.html b/doc/html/configurator.html
index e95b3439495..2654e9e596e 100644
--- a/doc/html/configurator.html
+++ b/doc/html/configurator.html
@@ -280,12 +280,12 @@ Slurmctld state save directory
 <input type="text" name="slurmd_spool_dir" value="/tmp/slurmd"> <B>SlurmdSpoolDir</B>:
 Slurmd state save directory
 <P>
-Define when a DOWN node is returned to service.<BR>
+Define when a non-responding (DOWN) node is returned to service.<BR>
 Select one value for <B>ReturnToService</B>:<BR>
+<input type="radio" name="return_to_service" value="0" checked>
+<B>0</B>: When explicitly restored to service by an administrator.<BR>
 <input type="radio" name="return_to_service" value="1"> 
 <B>1</B>: Automatically, when slurmd daemon registers with valid configuration<BR>
-<input type="radio" name="return_to_service" value="0" checked>
-<B>0</B>: When explicitly restored to service by an administrator.
 <P>
  
 <H2>Scheduling</H2>
@@ -464,6 +464,6 @@ before terminating all remaining tasks. A value of zero indicates unlimited wait
 </FORM>
 <HR>
 <p class="footer">UCRL-WEB-217616<br>
-Last modified 6 June 2006</p>
+Last modified 5 July 2006</p>
 </BODY>
 
diff --git a/doc/html/switchplugins.shtml b/doc/html/switchplugins.shtml
index eedf4c676bc..9703dba8a53 100644
--- a/doc/html/switchplugins.shtml
+++ b/doc/html/switchplugins.shtml
@@ -285,10 +285,10 @@ to indicate the reason for failure.</p>
 <p class="commandline">int switch_p_job_step_complete (switch_jobinfo_t switch_job, 
 char *nodelist);</p>
 <p style="margin-left:.2in"><b>Description</b>: Note that the job step associated 
-with the specified node has completed execution.</p>
-<p style="margin-left:.2in"><b>Arguments</b>:<span class="commandline"> switch_job</span>
-&nbsp;
-&nbsp;&nbsp;(input) The completed job's switch credential.<br>
+with the specified nodelist has completed execution.</p>
+<p style="margin-left:.2in"><b>Arguments</b>:<br>
+<span class="commandline"> switch_job</span> &nbsp;&nbsp;&nbsp;(input) 
+The completed job's switch credential.<br>
 <span class="commandline"> nodelist</span>&nbsp; &nbsp;&nbsp;(input) A list of nodes 
 on which the job has completed. This may contain expressions to specify node ranges.
 (e.g. "linux[1-20]" or "linux[2,4,6,8]").</p> 
@@ -296,6 +296,30 @@ on which the job has completed. This may contain expressions to specify node ran
 the plugin should return SLURM_ERROR and set the errno to an appropriate value
 to indicate the reason for failure.</p>
 
+<p class="commandline">int switch_p_job_step_part_comp (switch_jobinfo_t switch_job,
+char *nodelist);</p>
+<p style="margin-left:.2in"><b>Description</b>: Note that the job step has completed 
+execution on the specified node list. The job step is not necessarily completed on all
+nodes, but switch resources associated with it on the specified nodes are no longer 
+in use.</p>
+<p style="margin-left:.2in"><b>Arguments</b>:<br>
+<span class="commandline"> switch_job</span> &nbsp;&nbsp;&nbsp;(input) 
+The completed job's switch credential.<br>
+<span class="commandline"> nodelist</span>&nbsp; &nbsp;&nbsp;(input) A list of nodes
+on which the job step has completed. This may contain expressions to specify node ranges.
+(e.g. "linux[1-20]" or "linux[2,4,6,8]").</p>
+<p style="margin-left:.2in"><b>Returns</b>: SLURM_SUCCESS if successful. On failure,
+the plugin should return SLURM_ERROR and set the errno to an appropriate value
+to indicate the reason for failure.</p>
+
+<p class="commandline">bool switch_p_part_comp (void);</p>
+<p style="margin-left:.2in"><b>Description</b>: Indicate if the switch plugin should 
+process partitial job step completions (i.e. switch_g_job_step_part_comp). Support
+of partition completions is compute intensive, so it should be avoided unless switch 
+resources are in short supply (e.g. switch/federation).</p>
+<p style="margin-left:.2in"><b>Returns</b>: True if partition step completions are 
+to be recorded. False if only full job step completions are to be noted.</p>
+
 <p class="commandline">void switch_p_print_jobinfo(FILE *fp, switch_jobinfo_t switch_job);</p>
 <p style="margin-left:.2in"><b>Description</b>: Print the contents of a job's 
 switch credential to a file.</p>
@@ -495,6 +519,6 @@ plugin that transmitted it. It is at the discretion of the plugin author whether
 to maintain data format compatibility across different versions of the plugin.</p>
 <p class="footer"><a href="#top">top</a></p>
 
-<p style="text-align:center;">Last modified 1 October 2005</p>
+<p style="text-align:center;">Last modified 3 July 2006</p>
 
 <!--#include virtual="footer.txt"-->
diff --git a/doc/man/man5/slurm.conf.5 b/doc/man/man5/slurm.conf.5
index a0c8d71750c..24b1dfe7cb4 100644
--- a/doc/man/man5/slurm.conf.5
+++ b/doc/man/man5/slurm.conf.5
@@ -1,4 +1,4 @@
-.TH "slurm.conf" "5" "June 2006" "slurm.conf 1.2" "Slurm configuration file"
+.TH "slurm.conf" "5" "July 2006" "slurm.conf 1.1" "Slurm configuration file"
 .SH "NAME"
 slurm.conf \- Slurm configuration file 
 .SH "DESCRIPTION"
@@ -305,10 +305,13 @@ appearing in this list.   The user can override this by specifying which
 resource limits to propagate with the srun commands "--propagate" option.
 .TP
 \fBReturnToService\fR
-If set to 1, then a DOWN node will become available for use 
-upon registration. The default value is 0, which 
-means that a node will remain in the DOWN state 
-until a system administrator explicitly changes its state
+If set to 1, then a non-responding (DOWN) node will become available 
+for use upon registration. Note that DOWN node's state will be changed 
+only if it was set DOWN due to being non-responsive. If the node was 
+set DOWN for any other reason (low memory, prolog failure, epilog 
+failure, etc.), its state will not automatically be changed.  The 
+default value is 0, which means that a node will remain in the 
+DOWN state until a system administrator explicitly changes its state
 (even if the slurmd daemon registers and resumes communications).
 .TP
 \fBSchedulerAuth\fR
diff --git a/slurm/slurm.h.in b/slurm/slurm.h.in
index dfce7e56137..e43918ced9d 100644
--- a/slurm/slurm.h.in
+++ b/slurm/slurm.h.in
@@ -144,7 +144,6 @@ BEGIN_C_DECLS
 enum job_states {
 	JOB_PENDING,		/* queued waiting for initiation */
 	JOB_RUNNING,		/* allocated resources and executing */
-	JOB_DEALLOCATING,	/* Cleaning up allocation of job */
 	JOB_SUSPENDED,		/* allocated resources, execution suspended */
 	JOB_COMPLETE,		/* completed execution successfully */
 	JOB_CANCELLED,		/* cancelled by user */
diff --git a/src/api/step_client_io.c b/src/api/step_client_io.c
index 92d26d3cff4..7dfb750b860 100644
--- a/src/api/step_client_io.c
+++ b/src/api/step_client_io.c
@@ -1,8 +1,8 @@
 /****************************************************************************\
- *  io.c - process stdin, stdout, and stderr for parallel jobs.
+ *  step_client_io.c - process stdin, stdout, and stderr for parallel jobs.
  *  $Id$
  *****************************************************************************
- *  Copyright (C) 2002 The Regents of the University of California.
+ *  Copyright (C) 2002-2006 The Regents of the University of California.
  *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
  *  Written by Mark Grondona <grondona@llnl.gov>, et. al.
  *  UCRL-CODE-217948.
@@ -170,6 +170,7 @@ struct file_read_info {
 	uint32_t nodeid;
 
 	bool eof;
+	bool was_blocking;
 };
 
 
@@ -695,6 +696,12 @@ create_file_read_eio_obj(int fd, uint32_t taskid, uint32_t nodeid,
 	info->header.ltaskid = (uint16_t)-1;
 	info->eof = false;
 
+	if (fd_is_blocking(fd)) {
+		fd_set_nonblocking(fd);
+		info->was_blocking = true;
+	} else {
+		info->was_blocking = false;
+	}
 	eio = eio_obj_create(fd, &file_read_ops, (void *)info);
 
 	return eio;
@@ -717,6 +724,11 @@ static bool _file_readable(eio_obj_t *obj)
 	}
 	if (obj->shutdown == true) {
 		debug3("  false, shutdown");
+		/* if the file descriptor was in blocking mode before we set it
+		 * to O_NONBLOCK, then set it back to blocking mode before
+		 * closing */
+		if (info->was_blocking)
+			fd_set_blocking(obj->fd);
 		close(obj->fd);
 		obj->fd = -1;
 		info->eof = true;
@@ -1015,7 +1027,6 @@ _init_stdio_eio_objs(client_io_fds_t fds, client_io_t *cio)
 	 * build stdin eio_obj_t
 	 */
 	if (fds.in.fd > -1) {
-		fd_set_nonblocking(fds.in.fd);
 		fd_set_close_on_exec(fds.in.fd);
 		cio->stdin_obj = create_file_read_eio_obj(
 			fds.in.fd, fds.in.taskid, fds.in.nodeid, cio);
diff --git a/src/common/bitstring.h b/src/common/bitstring.h
index ca23991f3ff..432543287ac 100644
--- a/src/common/bitstring.h
+++ b/src/common/bitstring.h
@@ -36,6 +36,10 @@
  * 32 bit words.
  * 
  * bitstrings are zero origin
+ *
+ * bitstrings are always stored in a little-endian fashion.  In other words,
+ * bit "1" is always in the byte of a word at the lowest memory address,
+ * regardless of the native architecture endianness.
  */
 
 #ifndef _BITSTRING_H_
@@ -90,7 +94,11 @@ typedef bitstr_t bitoff_t;
 	((char *)((name) + BITSTR_OVERHEAD) + ((bit) >> BITSTR_SHIFT_WORD8))
 
 /* mask for the bit within its word */
-#define	_bit_mask(bit) 		((bitstr_t)1 << ((bit)&BITSTR_MAXPOS))
+#ifdef SLURM_BIGENDIAN
+#define	_bit_mask(bit) ((bitstr_t)1 << (BITSTR_MAXPOS - ((bit)&BITSTR_MAXPOS)))
+#else
+#define	_bit_mask(bit) ((bitstr_t)1 << ((bit)&BITSTR_MAXPOS))
+#endif
 
 /* number of bits actually allocated to a bitstr */
 #define _bitstr_bits(name) 	((name)[1])
diff --git a/src/common/fd.c b/src/common/fd.c
index 1ed8201ef09..89a282b3eaa 100644
--- a/src/common/fd.c
+++ b/src/common/fd.c
@@ -61,6 +61,17 @@ void fd_set_noclose_on_exec(int fd)
     return;
 }
 
+int fd_is_blocking(int fd)
+{
+    int val = 0;
+
+    assert(fd >= 0);
+
+    if ((val = fcntl(fd, F_GETFL, 0)) < 0)
+	error("fnctl(F_GET_FL) failed: %m");
+    return (val & O_NONBLOCK) ? 0 : 1;
+}
+
 void fd_set_nonblocking(int fd)
 {
     int fval;
diff --git a/src/common/slurm_protocol_defs.c b/src/common/slurm_protocol_defs.c
index 41471d31916..2ceeef93358 100644
--- a/src/common/slurm_protocol_defs.c
+++ b/src/common/slurm_protocol_defs.c
@@ -520,8 +520,6 @@ char *job_state_string(enum job_states inx)
 			return "PENDING";
 		case JOB_RUNNING:
 			return "RUNNING";
-		case JOB_DEALLOCATING:
-			return "DEALLOCATING";
 		case JOB_SUSPENDED:
 			return "SUSPENDED";
 		case JOB_COMPLETE:
@@ -549,8 +547,6 @@ char *job_state_string_compact(enum job_states inx)
 			return "PD";
 		case JOB_RUNNING:
 			return "R";
-		case JOB_DEALLOCATING:
-			return "DE";
 		case JOB_SUSPENDED:
 			return "S";
 		case JOB_COMPLETE:
diff --git a/src/common/slurm_protocol_defs.h b/src/common/slurm_protocol_defs.h
index 051b79265c9..cf10651182a 100644
--- a/src/common/slurm_protocol_defs.h
+++ b/src/common/slurm_protocol_defs.h
@@ -313,7 +313,6 @@ typedef struct complete_job_allocation {
 
 typedef struct complete_batch_script {
 	uint32_t job_id;
-	uint32_t step_id;
 	uint32_t job_rc;
 	uint32_t slurm_rc;
 	char *node_name;
diff --git a/src/common/slurm_protocol_pack.c b/src/common/slurm_protocol_pack.c
index 70b86d22ec2..54e9f3b89d3 100644
--- a/src/common/slurm_protocol_pack.c
+++ b/src/common/slurm_protocol_pack.c
@@ -2834,7 +2834,6 @@ _pack_complete_batch_script_msg(
 	complete_batch_script_msg_t * msg, Buf buffer)
 {
 	pack32((uint32_t)msg->job_id, buffer);
-	pack32((uint32_t)msg->step_id, buffer);
 	pack32((uint32_t)msg->job_rc, buffer);
 	pack32((uint32_t)msg->slurm_rc, buffer);
 	packstr(msg->node_name, buffer);
@@ -2851,7 +2850,6 @@ _unpack_complete_batch_script_msg(
 	*msg_ptr = msg;
 
 	safe_unpack32(&msg->job_id, buffer);
-	safe_unpack32(&msg->step_id, buffer);
 	safe_unpack32(&msg->job_rc, buffer);
 	safe_unpack32(&msg->slurm_rc, buffer);
 	safe_unpackstr_xmalloc(&msg->node_name, &uint16_tmp, buffer);
diff --git a/src/common/switch.c b/src/common/switch.c
index b81ebc5a8b8..935e3e6a5c9 100644
--- a/src/common/switch.c
+++ b/src/common/switch.c
@@ -94,6 +94,9 @@ typedef struct slurm_switch_ops {
 						char *buf, size_t size );
 	int          (*step_complete)     ( switch_jobinfo_t jobinfo,
 						char *nodelist );
+	int          (*step_part_comp)    ( switch_jobinfo_t jobinfo,
+						char *nodelist );
+	bool         (*part_comp)         ( void );
 	int          (*step_allocated)    ( switch_jobinfo_t jobinfo,
 					        char *nodelist );
 	int          (*state_clear)       ( void );
@@ -202,6 +205,8 @@ _slurm_switch_get_ops( slurm_switch_context_t c )
 		"switch_p_free_node_info",
 		"switch_p_sprintf_node_info",
 		"switch_p_job_step_complete",
+		"switch_p_job_step_part_comp",
+		"switch_p_part_comp",
 		"switch_p_job_step_allocated",
 		"switch_p_libstate_clear",
 		"switch_p_slurmctld_init",
@@ -549,6 +554,24 @@ extern int switch_g_job_step_complete(switch_jobinfo_t jobinfo,
 	return (*(g_context->ops.step_complete))( jobinfo, nodelist );
 }
 
+extern int switch_g_job_step_part_comp(switch_jobinfo_t jobinfo,
+	char *nodelist)
+{
+	if ( switch_init() < 0 )
+		return SLURM_ERROR;
+
+	return (*(g_context->ops.step_part_comp))( jobinfo, nodelist );
+}
+
+extern bool switch_g_part_comp(void)
+{
+	if ( switch_init() < 0 )
+		return false;
+
+	return (*(g_context->ops.part_comp))( );
+}
+
+
 extern int switch_g_job_step_allocated(switch_jobinfo_t jobinfo,
 	char *nodelist)
 {
diff --git a/src/common/switch.h b/src/common/switch.h
index b1f32167012..27747567fa3 100644
--- a/src/common/switch.h
+++ b/src/common/switch.h
@@ -157,12 +157,30 @@ extern int  switch_g_get_jobinfo(switch_jobinfo_t jobinfo,
 	int data_type, void *data);
 
 /*
- * Note that the job step associated with the specified node 
+ * Note that the job step associated with the specified nodelist 
  * has completed execution.
  */
 extern int switch_g_job_step_complete(switch_jobinfo_t jobinfo,
 	char *nodelist);
 
+/*
+ * Note that the job step has completed execution on the specified
+ * nodelist. The job step is not necessarily completed on all 
+ * nodes, but switch resources associated with it on the specified 
+ * nodes are no longer in use. 
+ */
+extern int switch_g_job_step_part_comp(switch_jobinfo_t jobinfo,
+	char *nodelist);
+
+/*
+ * Return TRUE if the switch plugin processes partial job step 
+ * completion calls (i.e. switch_g_job_step_part_comp). Support 
+ * of partition completions is compute intensive, so it should 
+ * be avoided unless switch resources are in short supply (e.g.
+ * switch/federation). Otherwise return FALSE.
+ */
+extern bool switch_g_part_comp(void);
+
 /*
  * Restore the switch allocation information "jobinfo" for an already
  * allocated job step, most likely to restore the switch information
diff --git a/src/plugins/switch/elan/switch_elan.c b/src/plugins/switch/elan/switch_elan.c
index 1d96ac15d08..41cf0e0936b 100644
--- a/src/plugins/switch/elan/switch_elan.c
+++ b/src/plugins/switch/elan/switch_elan.c
@@ -2,7 +2,7 @@
  *  switch_elan.c - Library routines for initiating jobs on QsNet. 
  *  $Id$
  *****************************************************************************
- *  Copyright (C) 2003 The Regents of the University of California.
+ *  Copyright (C) 2003-2006 The Regents of the University of California.
  *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
  *  Written by Kevin Tew <tew1@llnl.gov>, et. al.
  *  UCRL-CODE-217948.
@@ -804,6 +804,17 @@ extern int switch_p_job_step_complete(switch_jobinfo_t jobinfo,
 	return SLURM_SUCCESS;
 }
 
+extern int switch_p_job_step_part_comp(switch_jobinfo_t jobinfo,
+	char *nodelist)
+{
+	return SLURM_SUCCESS;
+}
+
+extern bool switch_p_part_comp(void)
+{
+	return false;
+}
+
 extern int switch_p_job_step_allocated(switch_jobinfo_t jobinfo, char *nodelist)
 {
 	return qsw_restore_jobinfo((qsw_jobinfo_t) jobinfo);
diff --git a/src/plugins/switch/federation/federation.c b/src/plugins/switch/federation/federation.c
index 8be66221d6e..c2916d73c0d 100644
--- a/src/plugins/switch/federation/federation.c
+++ b/src/plugins/switch/federation/federation.c
@@ -82,6 +82,7 @@ mode_t fed_umask;
 typedef struct fed_window {
 	uint16_t id;
 	uint32_t status;
+	uint16_t job_key;
 } fed_window_t;
 	
 typedef struct fed_adapter {
@@ -126,6 +127,9 @@ struct fed_jobinfo {
 	uint8_t bulk_xfer;  /* flag */
 	uint16_t tables_per_task;
 	fed_tableinfo_t *tableinfo;
+
+	hostlist_t nodenames;
+	int num_tasks;
 };
 
 typedef struct {
@@ -884,6 +888,7 @@ fed_pack_nodeinfo(fed_nodeinfo_t *n, Buf buf)
 		for(j = 0; j < a->window_count; j++) {
 			pack16(a->window_list[j].id, buf);
 			pack32(a->window_list[j].status, buf);
+			pack16(a->window_list[j].job_key, buf);
 		}
 	}
 
@@ -953,8 +958,7 @@ _hash_index (char *name)
 	return index;
 }
 
-/* Tries to find a node fast using the hash table if possible, 
- * otherwise falls back to a linear search.
+/* Tries to find a node fast using the hash table
  *
  * Used by: slurmctld
  */
@@ -1208,8 +1212,11 @@ _unpack_nodeinfo(fed_nodeinfo_t *n, Buf buf, bool believe_window_status)
 		for(j = 0; j < tmp_a->window_count; j++) {
 			safe_unpack16(&tmp_w[j].id, buf);
 			safe_unpack32(&tmp_w[j].status, buf);
-			if (!believe_window_status)
+			safe_unpack16(&tmp_w[j].job_key, buf);
+			if (!believe_window_status) {
 				tmp_w[j].status = NTBL_UNLOADED_STATE;
+				tmp_w[j].job_key = 0;
+			}
 		}
 		tmp_a->window_list = tmp_w;
 	}
@@ -1340,7 +1347,7 @@ _find_window(fed_adapter_t *adapter, int window_id) {
  */
 static int
 _allocate_windows_all(int adapter_cnt, fed_tableinfo_t *tableinfo,
-		      char *hostname, int task_id)
+		      char *hostname, int task_id, uint16_t job_key)
 {
 	fed_nodeinfo_t *node;
 	fed_adapter_t *adapter;
@@ -1367,6 +1374,7 @@ _allocate_windows_all(int adapter_cnt, fed_tableinfo_t *tableinfo,
 			return SLURM_ERROR;
 		}
 		window->status = NTBL_LOADED_STATE;
+		window->job_key = job_key;
 
 		table = tableinfo[i].table[task_id];
 		table->task_id = task_id;
@@ -1390,7 +1398,7 @@ _allocate_windows_all(int adapter_cnt, fed_tableinfo_t *tableinfo,
  */
 static int
 _allocate_window_single(char *adapter_name, fed_tableinfo_t *tableinfo,
-			char *hostname, int task_id)
+			char *hostname, int task_id, uint16_t job_key)
 {
 	fed_nodeinfo_t *node;
 	fed_adapter_t *adapter = NULL;
@@ -1432,6 +1440,7 @@ _allocate_window_single(char *adapter_name, fed_tableinfo_t *tableinfo,
 		return SLURM_ERROR;
 	}
 	window->status = NTBL_LOADED_STATE;
+	window->job_key = job_key;
 
 	table = tableinfo[0].table[task_id];
 	table->task_id = task_id;
@@ -1452,7 +1461,8 @@ _allocate_window_single(char *adapter_name, fed_tableinfo_t *tableinfo,
  */
 static int
 _window_state_set(int adapter_cnt, fed_tableinfo_t *tableinfo,
-		  char *hostname, int task_id, enum NTBL_RC state)
+		  char *hostname, int task_id, enum NTBL_RC state,
+		  uint16_t job_key)
 {
 	fed_nodeinfo_t *node = NULL;
 	fed_adapter_t *adapter = NULL;
@@ -1511,8 +1521,11 @@ _window_state_set(int adapter_cnt, fed_tableinfo_t *tableinfo,
 		       adapter->name,
 		       table->lid, table->window_id, task_id);
 		window = _find_window(adapter, table->window_id);
-		if (window)
+		if (window) {
 			window->status = state;
+			window->job_key =
+				(state == NTBL_UNLOADED_STATE) ? 0 : job_key;
+		}
 	}
 	
 	return SLURM_SUCCESS;
@@ -1617,7 +1630,7 @@ _job_step_window_state(fed_jobinfo_t *jp, hostlist_t hl, enum NTBL_RC state)
 			rc = _window_state_set(jp->tables_per_task,
 					       jp->tableinfo,
 					       host, proc_cnt,
-					       state);
+					       state, jp->job_key);
 			proc_cnt++;
 		}
 		free(host);
@@ -1628,15 +1641,101 @@ _job_step_window_state(fed_jobinfo_t *jp, hostlist_t hl, enum NTBL_RC state)
 	return SLURM_SUCCESS;
 }
 
-/* Find all of the windows used by job step "jp" and mark their
- * state NTBL_UNLOADED_STATE.
+/*
+ * For one node, free all of the windows belonging to a particular
+ * job step (as identified by the job_key).
+ */
+static void inline
+_free_windows_by_job_key(uint16_t job_key, char *nodename)
+{
+	fed_nodeinfo_t *node;
+	fed_adapter_t *adapter;
+	fed_window_t *window;
+	int i, j;
+
+	/* debug3("_free_windows_by_job_key(%hu, %s)", job_key, nodename); */
+	if ((node = _find_node(fed_state, nodename)) == NULL)
+		return;
+
+	if (node->adapter_list == NULL) {
+		error("_free_windows_by_job_key, "
+		      "adapter_list NULL for node %s", nodename);
+		return;
+	}
+	for (i = 0; i < node->adapter_count; i++) {
+		adapter = &node->adapter_list[i];
+		if (adapter->window_list == NULL) {
+			error("_free_windows_by_job_key, "
+			      "window_list NULL for node %s adapter %s",
+			      nodename, adapter->name);
+			continue;
+		}
+		/* We could check here to see if this adapter's name
+		 * is in the fed_jobinfo tablinfo list to avoid the next
+		 * loop if the adapter isn't in use by the job step.
+		 * However, the added searching and string comparisons
+		 * probably aren't worth it, especially since MOST job
+		 * steps will use all of the adapters.
+		 */
+		for (j = 0; j < adapter->window_count; j++) {
+			window = &adapter->window_list[j];
+
+			if (window->job_key == job_key) {
+				/* debug3("Freeing adapter %s window %d",
+				   adapter->name, window->id); */
+				window->status = NTBL_UNLOADED_STATE;
+				window->job_key = 0;
+			}
+		}
+	}
+}
+
+/* Find all of the windows used by job step "jp" on the hosts
+ * designated in hostlist "hl" and mark their state NTBL_UNLOADED_STATE.
  *
  * Used by: slurmctld
  */
 int
 fed_job_step_complete(fed_jobinfo_t *jp, hostlist_t hl)
 {
-	return _job_step_window_state(jp, hl, NTBL_UNLOADED_STATE);
+	enum NTBL_RC state = NTBL_UNLOADED_STATE;
+	hostlist_t uniq_hl;
+	hostlist_iterator_t hi;
+	char *nodename;
+
+	xassert(!hostlist_is_empty(hl));
+	xassert(jp);
+	xassert(jp->magic == FED_JOBINFO_MAGIC);
+
+	if ((jp == NULL)
+	    || (jp->magic != FED_JOBINFO_MAGIC)
+	    || (hostlist_is_empty(hl)))
+		return SLURM_ERROR;
+
+	if ((jp->tables_per_task == 0)
+	    || !jp->tableinfo
+	    || (jp->tableinfo[0].table_length == 0))
+		return SLURM_SUCCESS;
+
+	/* The hl hostlist may contain duplicate nodenames (poe -hostfile
+	 * triggers duplicates in the hostlist).  Since there
+	 * is no reason to call _free_windows_by_job_key more than once
+	 * per nodename, we create a new unique hostlist.
+	 */
+	uniq_hl = hostlist_copy(hl);
+	hostlist_uniq(uniq_hl);
+	hi = hostlist_iterator_create(uniq_hl);
+
+	_lock();
+	while((nodename = hostlist_next(hi)) != NULL) {
+		_free_windows_by_job_key(jp->job_key, nodename);
+		free(nodename);
+	}
+	_unlock();
+	
+	hostlist_iterator_destroy(hi);
+	hostlist_destroy(uniq_hl);
+	return SLURM_SUCCESS;
 }
 
 
@@ -1645,7 +1744,7 @@ fed_job_step_complete(fed_jobinfo_t *jp, hostlist_t hl)
  *
  * Used by the slurmctld at startup time to restore the allocation
  * status of any job steps that were running at the time the previous
- * slurmctld was shutdown.  Also used to restore teh allocation
+ * slurmctld was shutdown.  Also used to restore the allocation
  * status after a call to switch_clear().
  */
 int
@@ -1746,11 +1845,13 @@ fed_build_jobinfo(fed_jobinfo_t *jp, hostlist_t hl, int nprocs,
 			if (adapter_name == NULL) {
 				rc = _allocate_windows_all(jp->tables_per_task,
 							   jp->tableinfo,
-							   host, proc_cnt);
+							   host, proc_cnt,
+							   jp->job_key);
 			} else {
 				rc = _allocate_window_single(adapter_name,
 							     jp->tableinfo,
-							     host, proc_cnt);
+							     host, proc_cnt,
+							     jp->job_key);
 			}
 			if (rc != SLURM_SUCCESS) {
 				_unlock();
diff --git a/src/plugins/switch/federation/switch_federation.c b/src/plugins/switch/federation/switch_federation.c
index e07bf130512..897df04c179 100644
--- a/src/plugins/switch/federation/switch_federation.c
+++ b/src/plugins/switch/federation/switch_federation.c
@@ -3,7 +3,7 @@
  **	Federation
  **  $Id$
  *****************************************************************************
- *  Copyright (C) 2004 The Regents of the University of California.
+ *  Copyright (C) 2004-2006 The Regents of the University of California.
  *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
  *  Written by Jason King <jking@llnl.gov>
  *  UCRL-CODE-217948.
@@ -454,7 +454,7 @@ extern int switch_p_get_jobinfo(switch_jobinfo_t switch_job, int key,
 	return fed_get_jobinfo((fed_jobinfo_t *)switch_job, key, resulting_data);
 } 
 
-int switch_p_job_step_complete(switch_jobinfo_t jobinfo, char *nodelist)
+static inline int _make_step_comp(switch_jobinfo_t jobinfo, char *nodelist)
 {
 	hostlist_t list = NULL;
 	int rc;
@@ -466,7 +466,22 @@ int switch_p_job_step_complete(switch_jobinfo_t jobinfo, char *nodelist)
 	return rc;
 }
 
-int switch_p_job_step_allocated(switch_jobinfo_t jobinfo, char *nodelist)
+extern int switch_p_job_step_complete(switch_jobinfo_t jobinfo, char *nodelist)
+{
+	return _make_step_comp(job_info, nodelist);
+}
+
+extern int switch_p_job_step_part_comp(switch_jobinfo_t jobinfo, char *nodelist)
+{
+	return _make_step_comp(job_info, nodelist);
+}
+
+extern bool switch_p_part_comp(void)
+{
+	return true;
+}
+
+extern int switch_p_job_step_allocated(switch_jobinfo_t jobinfo, char *nodelist)
 {
 	hostlist_t list = NULL;
 	int rc;
diff --git a/src/plugins/switch/none/switch_none.c b/src/plugins/switch/none/switch_none.c
index a58f4e22e44..bc15b22e212 100644
--- a/src/plugins/switch/none/switch_none.c
+++ b/src/plugins/switch/none/switch_none.c
@@ -1,7 +1,7 @@
 /*****************************************************************************\
  *  switch_none.c - Library for managing a switch with no special handling.
  *****************************************************************************
- *  Copyright (C) 2002 The Regents of the University of California.
+ *  Copyright (C) 2002-2006 The Regents of the University of California.
  *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
  *  Written by Morris Jette <jette1@llnl.gov>
  *  UCRL-CODE-217948.
@@ -278,6 +278,17 @@ extern int switch_p_job_step_complete(switch_jobinfo_t jobinfo,
 	return SLURM_SUCCESS;
 }
 
+extern int switch_p_job_step_part_comp(switch_jobinfo_t jobinfo,
+	char *nodelist)
+{
+	return SLURM_SUCCESS;
+}
+
+extern bool switch_p_part_comp(void)
+{
+	return false;
+}
+
 extern int switch_p_job_step_allocated(switch_jobinfo_t jobinfo,
 	char *nodelist)
 {
diff --git a/src/slurmctld/job_mgr.c b/src/slurmctld/job_mgr.c
index e92ca1b4acd..f390eee23ba 100644
--- a/src/slurmctld/job_mgr.c
+++ b/src/slurmctld/job_mgr.c
@@ -129,7 +129,6 @@ static int  _resume_job_nodes(struct job_record *job_ptr);
 static void _set_job_id(struct job_record *job_ptr);
 static void _set_job_prio(struct job_record *job_ptr);
 static void _signal_batch_job(struct job_record *job_ptr, uint16_t signal);
-static void _kill_signal_job(struct job_record *job_ptr);
 static void _signal_job(struct job_record *job_ptr, int signal);
 static void _suspend_job(struct job_record *job_ptr, uint16_t op);
 static int  _suspend_job_nodes(struct job_record *job_ptr);
@@ -174,14 +173,12 @@ struct job_record *create_job_record(int *error_code)
 	xassert (job_ptr->magic = JOB_MAGIC); /* sets value */
 	job_ptr->details = detail_ptr;
 	job_ptr->step_list = list_create(NULL);
-	job_ptr->suspended = false;
-	
 	if (job_ptr->step_list == NULL)
 		fatal("memory allocation failure");
 
 	xassert (detail_ptr->magic = DETAILS_MAGIC); /* set value */
 	detail_ptr->submit_time = time(NULL);
-		
+
 	if (list_append(job_list, job_ptr) == 0)
 		fatal("list_append memory allocation failure");
 
@@ -1031,22 +1028,24 @@ extern int kill_job_by_part_name(char *part_name)
 
 	job_iterator = list_iterator_create(job_list);
 	while ((job_ptr = (struct job_record *) list_next(job_iterator))) {
+		bool suspended = false;
 		if (job_ptr->part_ptr != part_ptr)
 			continue;
 		job_ptr->part_ptr = NULL;
 
-		if ((job_ptr->job_state == JOB_RUNNING) 
-		    || job_ptr->suspended) {
+		if (job_ptr->job_state == JOB_SUSPENDED)
+			suspended = true;
+		if ((job_ptr->job_state == JOB_RUNNING) || suspended) {
 			job_count++;
 			info("Killing job_id %u on defunct partition %s",
 			      job_ptr->job_id, part_name);
 			job_ptr->job_state = JOB_NODE_FAIL | JOB_COMPLETING;
-			if (job_ptr->suspended)
+			if (suspended)
 				job_ptr->end_time = job_ptr->suspend_time;
 			else
 				job_ptr->end_time = time(NULL);
 			job_completion_logger(job_ptr);
-			deallocate_nodes(job_ptr, false, job_ptr->suspended);
+			deallocate_nodes(job_ptr, false, suspended);
 		}
 
 	}
@@ -1079,11 +1078,12 @@ extern int kill_running_job_by_node_name(char *node_name, bool step_test)
 
 	job_iterator = list_iterator_create(job_list);
 	while ((job_ptr = (struct job_record *) list_next(job_iterator))) {
+		bool suspended = false;
 		if ((job_ptr->node_bitmap == NULL) ||
 		    (!bit_test(job_ptr->node_bitmap, bit_position)))
 			continue;	/* job not on this node */
 		if (job_ptr->job_state == JOB_SUSPENDED)
-			job_ptr->suspended = true;
+			suspended = true;
 		if (job_ptr->job_state & JOB_COMPLETING) {
 			job_count++;
 			bit_clear(job_ptr->node_bitmap, bit_position);
@@ -1100,8 +1100,7 @@ extern int kill_running_job_by_node_name(char *node_name, bool step_test)
 				error("Node %s comp_job_cnt underflow, "
 					"JobId=%u", 
 					node_ptr->name, job_ptr->job_id);
-		} else if ((job_ptr->job_state == JOB_RUNNING) 
-			   || job_ptr->suspended) {
+		} else if ((job_ptr->job_state == JOB_RUNNING) || suspended) {
 			if (step_test && 
 			    (step_on_node(job_ptr, node_ptr) == 0))
 				continue;
@@ -1115,14 +1114,12 @@ extern int kill_running_job_by_node_name(char *node_name, bool step_test)
 				      job_ptr->job_id, node_name);
 				job_ptr->job_state = JOB_NODE_FAIL | 
 						     JOB_COMPLETING;
-				if (job_ptr->suspended)
-					job_ptr->end_time = 
-						job_ptr->suspend_time;
+				if (suspended)
+					job_ptr->end_time = job_ptr->suspend_time;
 				else
 					job_ptr->end_time = time(NULL);
 				job_completion_logger(job_ptr);
-				deallocate_nodes(job_ptr, false, 
-						 job_ptr->suspended);
+				deallocate_nodes(job_ptr, false, suspended);
 			} else {
 				error("Removing failed node %s from job_id %u",
 				      node_name, job_ptr->job_id);
@@ -1441,7 +1438,8 @@ extern int job_fail(uint32_t job_id)
 {
 	struct job_record *job_ptr;
 	time_t now = time(NULL);
-	
+	bool suspended = false;
+
 	job_ptr = find_job_record(job_id);
 	if (job_ptr == NULL) {
 		error("job_fail: invalid job id %u", job_id);
@@ -1450,16 +1448,18 @@ extern int job_fail(uint32_t job_id)
 
 	if (IS_JOB_FINISHED(job_ptr))
 		return ESLURM_ALREADY_DONE;
-	if ((job_ptr->job_state == JOB_RUNNING) || job_ptr->suspended) {
+	if (job_ptr->job_state == JOB_SUSPENDED)
+		suspended = true;
+	if ((job_ptr->job_state == JOB_RUNNING) || suspended) {
 		/* No need to signal steps, deallocate kills them */
 		job_ptr->time_last_active       = now;
-		if (job_ptr->suspended)
-			job_ptr->end_time = job_ptr->suspend_time;
+		if (suspended)
+			job_ptr->end_time       = job_ptr->suspend_time;
 		else
-			job_ptr->end_time = now;
+			job_ptr->end_time       = now;
 		last_job_update                 = now;
 		job_ptr->job_state = JOB_FAILED | JOB_COMPLETING;
-		deallocate_nodes(job_ptr, false, job_ptr->suspended);
+		deallocate_nodes(job_ptr, false, suspended);
 		job_completion_logger(job_ptr);
 		return SLURM_SUCCESS;
 	}
@@ -1481,7 +1481,7 @@ extern int job_fail(uint32_t job_id)
  *	last_job_update - time of last job table update
  */
 extern int job_signal(uint32_t job_id, uint16_t signal, uint16_t batch_flag, 
-		      uid_t uid)
+		uid_t uid)
 {
 	struct job_record *job_ptr;
 	time_t now = time(NULL);
@@ -1511,14 +1511,23 @@ extern int job_signal(uint32_t job_id, uint16_t signal, uint16_t batch_flag,
 
 	if ((job_ptr->job_state == JOB_PENDING) &&
 	    (signal == SIGKILL)) {
-		job_complete(job_id, uid, false, NO_VAL);
+		last_job_update		= now;
+		job_ptr->job_state	= JOB_CANCELLED;
+		job_ptr->start_time	= now;
+		job_ptr->end_time	= now;
+		job_completion_logger(job_ptr);
+		delete_job_details(job_ptr);
 		verbose("job_signal of pending job %u successful", job_id);
 		return SLURM_SUCCESS;
 	}
 
 	if ((job_ptr->job_state == JOB_SUSPENDED)
 	&&  (signal == SIGKILL)) {
-		job_complete(job_id, uid, false, NO_VAL);
+		last_job_update         = now;
+		job_ptr->end_time       = job_ptr->suspend_time;
+		job_ptr->job_state      = JOB_CANCELLED | JOB_COMPLETING;
+		deallocate_nodes(job_ptr, false, true);
+		job_completion_logger(job_ptr);
 		verbose("job_signal %u of suspended job %u successful",
 			signal, job_id);
 		return SLURM_SUCCESS;
@@ -1526,8 +1535,13 @@ extern int job_signal(uint32_t job_id, uint16_t signal, uint16_t batch_flag,
 	
 	if (job_ptr->job_state == JOB_RUNNING) {
 		if (signal == SIGKILL) {
-			job_ptr->time_last_active = now;
-			job_complete(job_id, uid, false, NO_VAL);
+			/* No need to signal steps, deallocate kills them */
+			job_ptr->time_last_active	= now;
+			job_ptr->end_time		= now;
+			last_job_update			= now;
+			job_ptr->job_state = JOB_CANCELLED | JOB_COMPLETING;
+			deallocate_nodes(job_ptr, false, false);
+			job_completion_logger(job_ptr);
 		} else if (batch_flag) {
 			if (job_ptr->batch_flag)
 				_signal_batch_job(job_ptr, signal);
@@ -1593,16 +1607,19 @@ _signal_batch_job(struct job_record *job_ptr, uint16_t signal)
  *	last_job_update - time of last job table update
  */
 extern int job_complete(uint32_t job_id, uid_t uid, bool requeue,
-	     uint32_t job_return_code){
+	     uint32_t job_return_code)
+{
 	struct job_record *job_ptr;
 	time_t now = time(NULL);
 	uint32_t job_comp_flag = 0;
+	bool suspended = false;
+
 	job_ptr = find_job_record(job_id);
 	if (job_ptr == NULL) {
 		info("job_complete: invalid JobId=%u", job_id);
 		return ESLURM_INVALID_JOB_ID;
 	}
-	
+
 	if (IS_JOB_FINISHED(job_ptr))
 		return ESLURM_ALREADY_DONE;
 
@@ -1614,18 +1631,13 @@ extern int job_complete(uint32_t job_id, uid_t uid, bool requeue,
 	if (job_ptr->job_state & JOB_COMPLETING)
 		return SLURM_SUCCESS;	/* avoid replay */
 
-	last_job_update = now;
-
-	/* make sure all the steps know they are suppost to be done */
-	if (job_ptr->job_state == JOB_RUNNING
-	    || job_ptr->job_state == JOB_SUSPENDED)
-		_kill_signal_job(job_ptr);
-	
-	if (job_ptr->job_state == JOB_RUNNING
-	    || job_ptr->job_state == JOB_SUSPENDED
-	    || job_ptr->job_state == JOB_DEALLOCATING)
+	if (job_ptr->job_state == JOB_RUNNING)
 		job_comp_flag = JOB_COMPLETING;
-	
+	if (job_ptr->job_state == JOB_SUSPENDED) {
+		job_comp_flag = JOB_COMPLETING;
+		suspended = true;
+	}
+
 	if (requeue && (job_ptr->batch_flag > 1)) {
 		/* Failed one requeue, just kill it */
 		requeue = 0;
@@ -1644,7 +1656,6 @@ extern int job_complete(uint32_t job_id, uid_t uid, bool requeue,
 		job_ptr->end_time   = now;
 		job_completion_logger(job_ptr);
 	} else {
-		job_ptr->kill_on_step_done = 1;
 		if (job_return_code == NO_VAL)
 			job_ptr->job_state = JOB_CANCELLED| job_comp_flag;
 		else if (job_return_code)
@@ -1652,39 +1663,19 @@ extern int job_complete(uint32_t job_id, uid_t uid, bool requeue,
 		else if (job_comp_flag &&		/* job was running */
 			 (job_ptr->end_time < now))	/* over time limit */
 			job_ptr->job_state = JOB_TIMEOUT  | job_comp_flag;
-		else 
-			job_ptr->job_state = JOB_DEALLOCATING;
+		else
+			job_ptr->job_state = JOB_COMPLETE | job_comp_flag;
+		if (suspended)
+			job_ptr->end_time = job_ptr->suspend_time;
+		else
+			job_ptr->end_time = now;
+		job_completion_logger(job_ptr);
 	}
 
-	/* job was running */
-	if (job_comp_flag) {
-		if (list_is_empty(job_ptr->step_list))	{
-			if (job_ptr->job_state == JOB_DEALLOCATING)
-				job_ptr->job_state = 
-					JOB_COMPLETE | job_comp_flag;
-			if (job_ptr->suspended)
-				job_ptr->end_time = job_ptr->suspend_time;
-			else
-				job_ptr->end_time = now;
-			job_completion_logger(job_ptr);
-			deallocate_nodes(job_ptr, false, job_ptr->suspended);
-			info("job_complete for JobId=%u successful", job_id);
-		} else if (job_ptr->job_state != JOB_DEALLOCATING) {
-			if (job_ptr->suspended) 
-				job_ptr->end_time = job_ptr->suspend_time;
-			else
-				job_ptr->end_time = now;
-			job_completion_logger(job_ptr);
-			deallocate_nodes(job_ptr, false, job_ptr->suspended);
-			info("1 job_complete for JobId=%u successful", job_id);
-		} else {
-			debug("%d job steps not complete", 
-			      list_count(job_ptr->step_list));
-		}
-	} else {
-		info("job_complete for non-running JobId=%u successful", 
-		     job_id);
-	}
+	last_job_update = now;
+	if (job_comp_flag) 	/* job was running */
+		deallocate_nodes(job_ptr, false, suspended);
+	info("job_complete for JobId=%u successful", job_id);
 
 	return SLURM_SUCCESS;
 }
@@ -3827,6 +3818,7 @@ extern bool job_epilog_complete(uint32_t job_id, char *node_name,
 	}
 #endif
 
+	step_epilog_complete(job_ptr, node_name);
 	if (!(job_ptr->job_state & JOB_COMPLETING)) {	/* COMPLETED */
 		if ((job_ptr->job_state == JOB_PENDING)
 		&&  (job_ptr->batch_flag)) {
@@ -3943,14 +3935,13 @@ static void _signal_job(struct job_record *job_ptr, int signal)
 	signal_job_msg_t *signal_job_msg = NULL;
 	int i, buf_rec_size = 0;
 
-	debug3("signaling job %d with signal %d", job_ptr->job_id, signal);
 	agent_args = xmalloc(sizeof(agent_arg_t));
 	agent_args->msg_type = REQUEST_SIGNAL_JOB;
 	agent_args->retry = 1;
-	signal_job_msg = xmalloc(sizeof(signal_job_msg_t));
+	signal_job_msg = xmalloc(sizeof(kill_tasks_msg_t));
 	signal_job_msg->job_id = job_ptr->job_id;
 	signal_job_msg->signal = signal;
-	
+
 	for (i = 0; i < node_record_count; i++) {
 		if (bit_test(job_ptr->node_bitmap, i) == 0)
 			continue;
@@ -3978,64 +3969,12 @@ static void _signal_job(struct job_record *job_ptr, int signal)
 		xfree(agent_args);
 		return;
 	}
-	agent_args->msg_args = signal_job_msg;
-	agent_queue_request(agent_args);
-	return;
-}
-
-/* Send specified SIGTERM to all steps associated with a job */
-static void _kill_signal_job(struct job_record *job_ptr)
-{
-	agent_arg_t *agent_args;
-	kill_job_msg_t *kill_job_msg;
-	int i, buf_rec_size = 0;
-
-	debug3("sending SIGTERM to job %d", job_ptr->job_id);
-	agent_args = xmalloc(sizeof(agent_arg_t));
-	
-	agent_args->msg_type = REQUEST_TERMINATE_JOB;
-	agent_args->retry = 1;
-	
-	kill_job_msg = xmalloc(sizeof(kill_job_msg_t));
-	kill_job_msg->job_id = job_ptr->job_id;
-	kill_job_msg->job_uid = job_ptr->user_id;
-	kill_job_msg->nodes = xstrdup(job_ptr->nodes);
-	kill_job_msg->select_jobinfo = select_g_copy_jobinfo(
-		job_ptr->select_jobinfo);
-
-	for (i = 0; i < node_record_count; i++) {
-		if (bit_test(job_ptr->node_bitmap, i) == 0)
-			continue;
-		if ((agent_args->node_count + 1) > buf_rec_size) {
-			buf_rec_size += 128;
-			xrealloc((agent_args->slurm_addr),
-				(sizeof(struct sockaddr_in) *
-				buf_rec_size));
-			xrealloc((agent_args->node_names),
-				(MAX_SLURM_NAME * buf_rec_size));
-		}
-		agent_args->slurm_addr[agent_args->node_count] =
-			node_record_table_ptr[i].slurm_addr;
-		strncpy(&agent_args->
-			node_names[MAX_SLURM_NAME * agent_args->node_count],
-			node_record_table_ptr[i].name, MAX_SLURM_NAME);
-		agent_args->node_count++;
-#ifdef HAVE_FRONT_END	/* Operate only on front-end */
-		break;
-#endif
-	}
 
-	if (agent_args->node_count == 0) {
-		slurm_free_kill_job_msg(kill_job_msg);
-		xfree(agent_args);
-		return;
-	}
-	agent_args->msg_args = kill_job_msg;
+	agent_args->msg_args = signal_job_msg;
 	agent_queue_request(agent_args);
 	return;
 }
 
-
 /* Send suspend request to slumrd of all nodes associated with a job */
 static void _suspend_job(struct job_record *job_ptr, uint16_t op)
 {
@@ -4099,7 +4038,7 @@ static int _suspend_job_nodes(struct job_record *job_ptr)
 		if (node_ptr->run_job_cnt)
 			(node_ptr->run_job_cnt)--;
 		else {
-			error("1 Node %s run_job_cnt underflow", 
+			error("Node %s run_job_cnt underflow", 
 				node_ptr->name);
 		}
 		if (job_ptr->details
@@ -4107,7 +4046,7 @@ static int _suspend_job_nodes(struct job_record *job_ptr)
 			if (node_ptr->no_share_job_cnt)
 				(node_ptr->no_share_job_cnt)--;
 			else {
-				error("1 Node %s no_share_job_cnt "
+				error("Node %s no_share_job_cnt "
 					"underflow", node_ptr->name);
 			}
 			if (node_ptr->no_share_job_cnt == 0)
@@ -4235,7 +4174,6 @@ extern int job_suspend(suspend_msg_t *sus_ptr, uid_t uid,
 			goto reply;
 		_suspend_job(job_ptr, sus_ptr->op);
 		job_ptr->job_state = JOB_SUSPENDED;
-		job_ptr->suspended = true;
 		if (job_ptr->suspend_time) {
 			job_ptr->pre_sus_time +=
 				difftime(now, 
@@ -4255,7 +4193,6 @@ extern int job_suspend(suspend_msg_t *sus_ptr, uid_t uid,
 			goto reply;
 		_suspend_job(job_ptr, sus_ptr->op);
 		job_ptr->job_state = JOB_RUNNING;
-		job_ptr->suspended = false;
 		if (job_ptr->time_limit != INFINITE) {
 			/* adjust effective time_limit */
 			job_ptr->end_time = now +
diff --git a/src/slurmctld/node_mgr.c b/src/slurmctld/node_mgr.c
index 1656b6d852e..250346b0b13 100644
--- a/src/slurmctld/node_mgr.c
+++ b/src/slurmctld/node_mgr.c
@@ -1708,14 +1708,15 @@ extern void make_node_comp(struct node_record *node_ptr,
 		if (node_ptr->run_job_cnt)
 			(node_ptr->run_job_cnt)--;
 		else
-			error("2 Node %s run_job_cnt underflow", node_ptr->name);
+			error("Node %s run_job_cnt underflow in "
+				"make_node_comp", node_ptr->name);
 
 		if (job_ptr->details && (job_ptr->details->shared == 0)) {
 			if (node_ptr->no_share_job_cnt)
 				(node_ptr->no_share_job_cnt)--;
 			else
-				error("2 Node %s no_share_job_cnt underflow", 
-					node_ptr->name);
+				error("Node %s no_share_job_cnt underflow in "
+					"make_node_comp", node_ptr->name);
 			if (node_ptr->no_share_job_cnt == 0)
 				bit_set(share_node_bitmap, inx);
 		}
@@ -1796,14 +1797,15 @@ void make_node_idle(struct node_record *node_ptr,
 			if (node_ptr->run_job_cnt)
 				(node_ptr->run_job_cnt)--;
 			else
-				error("3 Node %s run_job_cnt underflow", 
-				      node_ptr->name);
+				error("Node %s run_job_cnt underflow in "
+					"make_node_idle", node_ptr->name);
 		} else {
 			if (node_ptr->comp_job_cnt)
 				(node_ptr->comp_job_cnt)--;
 			else
-				error("3 Node %s comp_job_cnt underflow, job_id %u", 
-				      node_ptr->name, job_ptr->job_id);
+				error("Node %s comp_job_cnt underflow in "
+					"make_node_idle, job_id %u", 
+					node_ptr->name, job_ptr->job_id);
 			if (node_ptr->comp_job_cnt > 0) 
 				return;		/* More jobs completing */
 		}
diff --git a/src/slurmctld/node_scheduler.c b/src/slurmctld/node_scheduler.c
index 89dc3daf9a0..00cde811e13 100644
--- a/src/slurmctld/node_scheduler.c
+++ b/src/slurmctld/node_scheduler.c
@@ -149,7 +149,7 @@ extern int count_cpus(unsigned *bitmap)
  *	node_record_table_ptr - pointer to global node table
  */
 extern void deallocate_nodes(struct job_record *job_ptr, bool timeout, 
-			     bool suspended)
+		bool suspended)
 {
 	int i;
 	kill_job_msg_t *kill_job;
@@ -215,6 +215,8 @@ extern void deallocate_nodes(struct job_record *job_ptr, bool timeout,
 	if (agent_args->node_count == 0) {
 		error("Job %u allocated no nodes to be killed on",
 		      job_ptr->job_id);
+		xfree(kill_job->nodes);
+		select_g_free_jobinfo(&kill_job->select_jobinfo);
 		xfree(kill_job);
 		xfree(agent_args);
 		return;
diff --git a/src/slurmctld/proc_req.c b/src/slurmctld/proc_req.c
index 051aa126341..605937ac7b4 100644
--- a/src/slurmctld/proc_req.c
+++ b/src/slurmctld/proc_req.c
@@ -916,8 +916,8 @@ static void _slurm_rpc_complete_batch_script(slurm_msg_t * msg)
 
 	/* init */
 	START_TIMER;
-	debug2("Processing RPC: REQUEST_COMPLETE_BATCH_SCRIPT %u.%u",
-	       comp_msg->job_id, comp_msg->step_id);
+	debug2("Processing RPC: REQUEST_COMPLETE_BATCH_SCRIPT %u",
+	       comp_msg->job_id);
 	uid = g_slurm_auth_get_uid(msg->auth_cred);
 
 	if (!_is_super_user(uid)) {
@@ -932,19 +932,16 @@ static void _slurm_rpc_complete_batch_script(slurm_msg_t * msg)
 	/* First set node DOWN if fatal error */
 	if (comp_msg->slurm_rc == ESLURM_ALREADY_DONE) {
 		/* race condition on job termination, not a real error */
-		info("slurmd error running JobId=%u.%u from node=%s: %s",
-		     comp_msg->job_id,
-		     comp_msg->step_id,
-		     comp_msg->node_name,
-		     slurm_strerror(comp_msg->slurm_rc));
+		info("slurmd error running JobId=%u from node=%s: %s",
+		      comp_msg->job_id,
+		      comp_msg->node_name,
+		      slurm_strerror(comp_msg->slurm_rc));
 		comp_msg->slurm_rc = SLURM_SUCCESS;
 	}
 	if (comp_msg->slurm_rc != SLURM_SUCCESS) {
-		error("Fatal slurmd error %u running JobId=%u.%u "
-		      "on node=%s: %s",
+		error("Fatal slurmd error %u running JobId=%u on node=%s: %s",
 		      comp_msg->slurm_rc,
 		      comp_msg->job_id,
-		      comp_msg->step_id,
 		      comp_msg->node_name,
 		      slurm_strerror(comp_msg->slurm_rc));
 		if (error_code == SLURM_SUCCESS) {
@@ -961,10 +958,6 @@ static void _slurm_rpc_complete_batch_script(slurm_msg_t * msg)
 		}
 	}
 
-	/* ignore step complete will catch it on job_complete */
-	job_step_complete(comp_msg->job_id, comp_msg->step_id,
-			  uid, job_requeue, comp_msg->job_rc);
-	
 	/* Mark job allocation complete */
 	error_code = job_complete(comp_msg->job_id, uid,
 				  job_requeue, comp_msg->job_rc);
@@ -1732,8 +1725,8 @@ static void _slurm_rpc_submit_batch_job(slurm_msg_t * msg)
 
 		lock_slurmctld(job_write_lock);
 		error_code = job_allocate(job_desc_msg, 
-					  job_desc_msg->immediate, false,
-					  false, uid, &job_ptr);
+				job_desc_msg->immediate, false,
+				false, uid, &job_ptr);
 		unlock_slurmctld(job_write_lock);
 		END_TIMER;
 	}
diff --git a/src/slurmctld/slurmctld.h b/src/slurmctld/slurmctld.h
index 679779ac0c7..17728a3f862 100644
--- a/src/slurmctld/slurmctld.h
+++ b/src/slurmctld/slurmctld.h
@@ -342,8 +342,6 @@ struct job_record {
                                            each of the ntask_cnt hosts */
 	uint16_t mail_type;		/* see MAIL_JOB_* in slurm.h */
 	char *mail_user;		/* user to get e-mail notification */
-	bool suspended;                 /* marker to tell if job was 
-					 * suspended or not */
 };
 
 struct 	step_record {
@@ -541,7 +539,8 @@ extern struct job_record *find_job_record (uint32_t job_id);
  */
 extern struct node_record *find_first_node_record (bitstr_t *node_bitmap);
 
-/* find_node_record - find a record for node with specified name */
+/* find_node_record - find a record for node with specified name, 
+ *	returns pointer to record or NULL if not found */
 extern struct node_record *find_node_record (char *name);
 
 /* 
@@ -1160,6 +1159,16 @@ extern int step_create ( job_step_create_request_msg_t *step_specs,
 			 bool kill_job_when_step_done,
 			 bool batch_step );
 
+/*
+ * step_epilog_complete - note completion of epilog on some node and 
+ *	release it's switch windows if appropriate. can perform partition 
+ *	switch window releases.
+ * IN job_ptr - pointer to job which has completed epilog
+ * IN node_name - name of node which has completed epilog
+ */
+extern int step_epilog_complete(struct job_record  *job_ptr, 
+	char *node_name);
+
 /* 
  * step_on_node - determine if the specified job has any job steps allocated to 
  * 	the specified node 
diff --git a/src/slurmctld/step_mgr.c b/src/slurmctld/step_mgr.c
index 90130b6a777..3242fde6b41 100644
--- a/src/slurmctld/step_mgr.c
+++ b/src/slurmctld/step_mgr.c
@@ -61,6 +61,11 @@
 static void _pack_ctld_job_step_info(struct step_record *step, Buf buffer);
 static bitstr_t * _pick_step_nodes (struct job_record  *job_ptr, 
 				    job_step_create_request_msg_t *step_spec );
+static hostlist_t _step_range_to_hostlist(struct step_record *step_ptr,
+				uint32_t range_first, uint32_t range_last);
+static int _step_hostname_to_inx(struct step_record *step_ptr,
+				char *node_name);
+
 /* 
  * create_step_record - create an empty step_record for the specified job.
  * IN job_ptr - pointer to job table entry to have step record added
@@ -251,8 +256,7 @@ int job_step_signal(uint32_t job_id, uint32_t step_id,
 
 	if (IS_JOB_FINISHED(job_ptr))
 		return ESLURM_ALREADY_DONE;
-	if (job_ptr->job_state != JOB_RUNNING 
-	    && job_ptr->job_state != JOB_DEALLOCATING) {
+	if (job_ptr->job_state != JOB_RUNNING) {
 		verbose("job_step_signal: step %u.%u can not be sent signal "
 			"%u from state=%s", job_id, step_id, signal,
 			job_state_string(job_ptr->job_state));
@@ -351,7 +355,6 @@ int job_step_complete(uint32_t job_id, uint32_t step_id, uid_t uid,
 	struct job_record *job_ptr;
 	struct step_record *step_ptr;
 	int error_code;
-	int nodes;
 
 	job_ptr = find_job_record(job_id);
 	if (job_ptr == NULL) {
@@ -362,42 +365,27 @@ int job_step_complete(uint32_t job_id, uint32_t step_id, uid_t uid,
 	step_ptr = find_step_record(job_ptr, step_id);
 	if (step_ptr == NULL)
 		return ESLURM_INVALID_JOB_ID;
+	else 
+		jobacct_g_step_complete_slurmctld(step_ptr);
 	
-	if (step_ptr->exit_code == NO_VAL) {
-		/* initialize the node bitmap for exited nodes */
-		nodes = bit_set_count(step_ptr->step_node_bitmap);
-		xassert(step_ptr->exit_node_bitmap == NULL);
-		step_ptr->exit_node_bitmap = bit_alloc(nodes);
-		if (step_ptr->exit_node_bitmap == NULL)
-			fatal("bit_alloc: %m");
-		step_ptr->exit_code = job_return_code;
-	}
+	if ((job_ptr->kill_on_step_done)
+	&&  (list_count(job_ptr->step_list) <= 1)
+	&&  (!IS_JOB_FINISHED(job_ptr)))
+		return job_complete(job_id, uid, requeue, job_return_code);
 
-	jobacct_g_step_complete_slurmctld(step_ptr);
-	
 	if ((job_ptr->user_id != uid) && (uid != 0) && (uid != getuid())) {
 		error("Security violation, JOB_COMPLETE RPC from uid %d",
 		      uid);
 		return ESLURM_USER_ID_MISSING;
 	}
-	last_job_update = time(NULL);	
 
+	last_job_update = time(NULL);
 	error_code = delete_step_record(job_ptr, step_id);
 	if (error_code == ENOENT) {
 		info("job_step_complete step %u.%u not found", job_id,
 		     step_id);
 		return ESLURM_ALREADY_DONE;
 	}
-
-	debug2("have %d steps, kill %d, state %d %d", 
-	       list_count(job_ptr->step_list),
-	       job_ptr->kill_on_step_done,
-	       job_ptr->job_state, IS_JOB_FINISHED(job_ptr));
-	if ((job_ptr->kill_on_step_done)
-	    && (list_is_empty(job_ptr->step_list))
-	    && (!IS_JOB_FINISHED(job_ptr)))
-		return job_complete(job_id, uid, requeue, job_return_code);
-
 	return SLURM_SUCCESS;
 }
 
@@ -998,7 +986,7 @@ extern int job_step_checkpoint_comp(checkpoint_comp_msg_t *ckpt_ptr,
 		rc = ESLURM_JOB_PENDING;
 		goto reply;
 	} else if ((job_ptr->job_state != JOB_RUNNING)
-		   && (job_ptr->job_state != JOB_SUSPENDED)) {
+	&&         (job_ptr->job_state != JOB_SUSPENDED)) {
 		rc = ESLURM_ALREADY_DONE;
 		goto reply;
 	}
@@ -1034,7 +1022,7 @@ extern int step_partial_comp(step_complete_msg_t *req, int *rem,
 {
 	struct job_record *job_ptr;
 	struct step_record *step_ptr;
-	int nodes;
+	int nodes, rem_nodes;
 
 	/* find the job, step, and validate input */
 	job_ptr = find_job_record (req->job_id);
@@ -1045,16 +1033,22 @@ extern int step_partial_comp(step_complete_msg_t *req, int *rem,
 	step_ptr = find_step_record(job_ptr, req->job_step_id);
 	if (step_ptr == NULL)
 		return ESLURM_INVALID_JOB_ID;
-	if (req->range_last < req->range_first)
+	if (req->range_last < req->range_first) {
+		error("step_partial_comp: range: %u-%u", req->range_first, 
+			req->range_last);
 		return EINVAL;
+	}
 
 	jobacct_g_aggregate(step_ptr->jobacct, req->jobacct);
 
 	if (step_ptr->exit_code == NO_VAL) {
 		/* initialize the node bitmap for exited nodes */
 		nodes = bit_set_count(step_ptr->step_node_bitmap);
-		if (req->range_last >= nodes)	/* range is zero origin */
+		if (req->range_last >= nodes) {	/* range is zero origin */
+			error("step_partial_comp: last=%u, nodes=%d",
+				req->range_last, nodes);
 			return EINVAL;
+		}
 		xassert(step_ptr->exit_node_bitmap == NULL);
 		step_ptr->exit_node_bitmap = bit_alloc(nodes);
 		if (step_ptr->exit_node_bitmap == NULL)
@@ -1063,18 +1057,144 @@ extern int step_partial_comp(step_complete_msg_t *req, int *rem,
 	} else {
 		xassert(step_ptr->exit_node_bitmap);
 		nodes = _bitstr_bits(step_ptr->exit_node_bitmap);
-		if (req->range_last >= nodes)	/* range is zero origin */
+		if (req->range_last >= nodes) {	/* range is zero origin */
+			error("step_partial_comp: last=%u, nodes=%d",
+				req->range_last, nodes);
 			return EINVAL;
+		}
 		step_ptr->exit_code = MAX(step_ptr->exit_code, req->step_rc);
 	}
 
 	bit_nset(step_ptr->exit_node_bitmap, req->range_first,
 		req->range_last);
+	rem_nodes = bit_clear_count(step_ptr->exit_node_bitmap);
 	if (rem)
-		*rem = bit_clear_count(step_ptr->exit_node_bitmap);
+		*rem = rem_nodes;
+	if (rem_nodes == 0) {
+		/* release all switch windows */
+		if (step_ptr->switch_job) {
+			debug2("full switch release for step %u.%u, "
+				"nodes %s", req->job_id, 
+				req->job_step_id, 
+				step_ptr->step_node_list);
+			switch_g_job_step_complete(
+				step_ptr->switch_job,
+				step_ptr->step_node_list);
+			switch_free_jobinfo (step_ptr->switch_job);
+			step_ptr->switch_job = NULL;
+		}
+	} else if (switch_g_part_comp() && step_ptr->switch_job) {
+		/* release switch windows on completed nodes,
+		 * must translate range numbers to nodelist */
+		hostlist_t hl;
+		char *node_list;
+		int new_size = 8096;
+
+		hl = _step_range_to_hostlist(step_ptr,
+			req->range_first, req->range_last);
+		node_list = (char *) xmalloc(new_size);
+		while (hostlist_ranged_string(hl, new_size,
+				node_list) == -1) {
+			new_size *= 2;
+			xrealloc(node_list, new_size );
+		}
+		debug2("partitial switch release for step %u.%u, "
+			"nodes %s", req->job_id, 
+			req->job_step_id, node_list);
+		switch_g_job_step_part_comp(
+			step_ptr->switch_job, node_list);
+		hostlist_destroy(hl);
+		xfree(node_list);
+	}
+
 	if (max_rc)
 		*max_rc = step_ptr->exit_code;
 
 	return SLURM_SUCCESS;
 }
 
+/* convert a range of nodes allocated to a step to a hostlist with 
+ * names of those nodes */
+static hostlist_t _step_range_to_hostlist(struct step_record *step_ptr,
+		uint32_t range_first, uint32_t range_last)
+{
+	int i, node_inx = -1;
+	hostlist_t hl = hostlist_create("");
+
+	for (i = 0; i < node_record_count; i++) {
+		if (bit_test(step_ptr->step_node_bitmap, i) == 0)
+			continue;
+		node_inx++;
+		if ((node_inx >= range_first)
+		&&  (node_inx <= range_last)) {
+			hostlist_push(hl, 
+				node_record_table_ptr[i].name);
+		}
+	}
+
+	return hl;
+}
+
+/* convert a single node name to it's offset within a step's 
+ * nodes allocation. returns -1 on error */
+static int _step_hostname_to_inx(struct step_record *step_ptr,
+		char *node_name)
+{
+	struct node_record *node_ptr;
+	int i, node_inx, node_offset = 0; 
+
+	node_ptr = find_node_record(node_name);
+	if (node_ptr == NULL)
+		return -1;
+	node_inx = node_ptr - node_record_table_ptr;
+
+	for (i = 0; i < node_inx; i++) {
+		if (bit_test(step_ptr->step_node_bitmap, i))
+			node_offset++;
+	}
+	return node_offset;
+}
+
+extern int step_epilog_complete(struct job_record  *job_ptr, 
+		char *node_name)
+{
+	int rc = 0, node_inx, step_offset;
+	ListIterator step_iterator;
+	struct step_record *step_ptr;
+	struct node_record *node_ptr;
+
+	if (!switch_g_part_comp()) {
+		/* don't bother with partitial completions */
+		return 0;
+	}
+	if ((node_ptr = find_node_record(node_name)) == NULL)
+		return 0;
+	node_inx = node_ptr - node_record_table_ptr;
+	
+	step_iterator = list_iterator_create(job_ptr->step_list);
+	while ((step_ptr = (struct step_record *) list_next (step_iterator))) {
+		if ((!step_ptr->switch_job)
+		||  (bit_test(step_ptr->step_node_bitmap, node_inx) == 0))
+			continue;
+		if (step_ptr->exit_node_bitmap) {
+			step_offset = _step_hostname_to_inx(
+					step_ptr, node_name);
+			if ((step_offset < 0)
+			||  bit_test(step_ptr->exit_node_bitmap,
+					step_offset))
+				continue;
+			bit_set(step_ptr->exit_node_bitmap,
+				step_offset);
+		}
+		rc++;
+		debug2("partitial switch release for step %u.%u, "
+			"epilog on %s", job_ptr->job_id, 
+			step_ptr->step_id, node_name);
+		switch_g_job_step_part_comp(
+			step_ptr->switch_job, node_name);
+	}
+	list_iterator_destroy (step_iterator);
+
+	return rc;
+}
+
diff --git a/src/slurmd/slurmd/req.c b/src/slurmd/slurmd/req.c
index 340058c141c..6b21305edc7 100644
--- a/src/slurmd/slurmd/req.c
+++ b/src/slurmd/slurmd/req.c
@@ -665,8 +665,8 @@ _rpc_launch_tasks(slurm_msg_t *msg)
 	}
 
 	slurm_get_ip_str(cli, &port, host, sizeof(host));
-	info("launch task %u.%u request from %u.%u@%s", req->job_id, 
-	     req->job_step_id, req->uid, req->gid, host);
+	info("launch task %u.%u request from %u.%u@%s (port %hu)", req->job_id,
+	     req->job_step_id, req->uid, req->gid, host, port);
 
 	if (_check_job_credential(req->cred, jobid, stepid, req_uid,
 				  req->tasks_to_launch[req->srun_node_id],
diff --git a/src/slurmd/slurmstepd/mgr.c b/src/slurmd/slurmstepd/mgr.c
index d8f31496d38..48a8002bea6 100644
--- a/src/slurmd/slurmstepd/mgr.c
+++ b/src/slurmd/slurmstepd/mgr.c
@@ -1381,7 +1381,6 @@ _complete_batch_script(slurmd_job_t *job, int err, int status)
 	complete_batch_script_msg_t  req;
 
 	req.job_id	= job->jobid;
-	req.step_id	= job->stepid;
 	req.job_rc      = status;
 	req.slurm_rc	= err; 
 		
@@ -1392,8 +1391,7 @@ _complete_batch_script(slurmd_job_t *job, int err, int status)
 	req_msg.ret_list = NULL;
 	req_msg.forward_struct_init = 0;
 	
-	info("sending REQUEST_COMPLETE_BATCH_SCRIPT %u.%u %d",
-	     job->jobid, job->stepid, status);
+	info("sending REQUEST_COMPLETE_BATCH_SCRIPT");
 
 	/* Note: these log messages don't go to slurmd.log from here */
 	for (i=0; i<=MAX_RETRY; i++) {
diff --git a/src/slurmd/slurmstepd/req.c b/src/slurmd/slurmstepd/req.c
index a1c8073e5dc..b1527e4893e 100644
--- a/src/slurmd/slurmstepd/req.c
+++ b/src/slurmd/slurmstepd/req.c
@@ -995,6 +995,7 @@ _handle_completion(int fd, slurmd_job_t *job, uid_t uid)
 	int last;
 	jobacctinfo_t *jobacct = NULL;
 	int step_rc;
+/* 	char bits_string[128]; */
 
 	debug("_handle_completion for job %u.%u",
 	      job->jobid, job->stepid);
@@ -1021,9 +1022,16 @@ _handle_completion(int fd, slurmd_job_t *job, uid_t uid)
 	 * Record the completed nodes
 	 */
 	pthread_mutex_lock(&step_complete.lock);
+/* 	debug2("Setting range %d(bit %d) through %d(bit %d)", */
+/* 	       first, first-(step_complete.rank+1), */
+/* 	       last, last-(step_complete.rank+1)); */
+/* 	bit_fmt(bits_string, 128, step_complete.bits); */
+/* 	debug2("  before bits: %s", bits_string); */
 	bit_nset(step_complete.bits,
 		 first - (step_complete.rank+1),
 		 last - (step_complete.rank+1));
+/* 	bit_fmt(bits_string, 128, step_complete.bits); */
+/* 	debug2("  after bits: %s", bits_string); */
 	step_complete.step_rc = MAX(step_complete.step_rc, step_rc);
 	
 	/************* acct stuff ********************/
diff --git a/src/smap/job_functions.c b/src/smap/job_functions.c
index 151a85365bb..7ac434bbfc4 100644
--- a/src/smap/job_functions.c
+++ b/src/smap/job_functions.c
@@ -83,7 +83,6 @@ extern void get_job()
 		
 		if ((job.job_state != JOB_PENDING)
 		    &&  (job.job_state != JOB_RUNNING)
-		    &&  (job.job_state != JOB_DEALLOCATING)
 		    &&  (job.job_state != JOB_SUSPENDED)
 		    &&  ((job.job_state & JOB_COMPLETING) == 0))
 			continue;	/* job has completed */
diff --git a/src/squeue/print.c b/src/squeue/print.c
index b79668ab189..a37445ddff2 100644
--- a/src/squeue/print.c
+++ b/src/squeue/print.c
@@ -1110,7 +1110,6 @@ static int _filter_job(job_info_t * job)
 	} else {
 		if ((job->job_state != JOB_PENDING)
 		&&  (job->job_state != JOB_RUNNING)
-		&&  (job->job_state != JOB_DEALLOCATING)
 		&&  (job->job_state != JOB_SUSPENDED)
 		&&  ((job->job_state & JOB_COMPLETING) == 0))
 			return 4;
diff --git a/src/srun/opt.c b/src/srun/opt.c
index 64b7d794d9b..08432d596cd 100644
--- a/src/srun/opt.c
+++ b/src/srun/opt.c
@@ -1554,7 +1554,7 @@ static void _load_multi(int *argc, char **argv)
 			argv[0]);
 		exit(1);
 	}
-	data_buf = xmalloc(stat_buf.st_size);
+	data_buf = xmalloc(stat_buf.st_size + 1);
 	while ((i = read(config_fd, &data_buf[data_read], stat_buf.st_size 
 			- data_read)) != 0) {
 		if (i < 0) {
diff --git a/testsuite/expect/globals b/testsuite/expect/globals
index de40a675cae..8288d5d9bd7 100755
--- a/testsuite/expect/globals
+++ b/testsuite/expect/globals
@@ -90,6 +90,7 @@ cset bin_bash   [exec which bash]
 cset bin_cat	"cat"
 cset bin_cc	"gcc"
 cset bin_chmod	"chmod"
+cset bin_cmp	"cmp"
 cset bin_cp	"cp"
 cset bin_diff	"diff"
 cset bin_echo	"echo"
diff --git a/testsuite/expect/test14.4 b/testsuite/expect/test14.4
index b5206cd01af..07f176b4c20 100755
--- a/testsuite/expect/test14.4
+++ b/testsuite/expect/test14.4
@@ -52,20 +52,23 @@ if {[test_front_end] != 0} {
 #
 # Delete left-over stdout/err files
 # Build input script file that broacasts a file
+# NOTE: we broadcast the file "sbcast", just for convenienc
 #
 set pid         [pid]
 set file1       "/tmp/test.$pid.1.$test_id"
 set file2       "/tmp/test.$pid.2.$test_id"
 exec $bin_rm -f $file_out $file_err
 make_bash_script $file_in "
-  $srun $bin_touch $file1
+  $srun rm -f $file1
+  $srun $bin_echo dummy >$file1
   $sbcast $sbcast $file1
-  $srun $bin_diff $sbcast $file1
+  $srun $bin_cmp $sbcast $file1
   $srun $bin_rm -f $file1
 
-  $srun $bin_touch $file2
+  $srun rm -f $file2
+  $srun $bin_echo dummy >$file2
   $sbcast $sbcast --force $file2
-  $srun $bin_diff $sbcast $file2
+  $srun $bin_cmp $sbcast $file2
   $srun $bin_rm -f $file2
 "
 
diff --git a/testsuite/slurm_unit/common/bitstring-test.c b/testsuite/slurm_unit/common/bitstring-test.c
index 0e3338f2fd2..8fe079e50a1 100644
--- a/testsuite/slurm_unit/common/bitstring-test.c
+++ b/testsuite/slurm_unit/common/bitstring-test.c
@@ -8,7 +8,7 @@
 /* Test for failure: 
 */
 #define TEST(_tst, _msg) do {			\
-	if (! _tst) 				\
+	if (! (_tst))				\
 		fail( _msg );       \
 	else					\
 		pass( _msg );       \
-- 
GitLab