From f05e7f763a56ffe6641dba42ef0a1837b866376e Mon Sep 17 00:00:00 2001
From: Mark Grondona <mgrondona@llnl.gov>
Date: Wed, 23 Oct 2002 15:30:57 +0000
Subject: [PATCH]  o Check in of new slurmd codebase. Fixes for many bugs in
 old slurmd.

---
 src/common/util_signals.c               |   87 --
 src/common/util_signals.h               |    9 -
 src/slurmd/Makefile.am                  |   50 +-
 src/slurmd/circular_buffer.c            |  485 ---------
 src/slurmd/circular_buffer.h            |  117 ---
 src/slurmd/cntl.h                       |   40 -
 src/slurmd/elan_interconnect.c          |   97 +-
 src/slurmd/forked_ctrl.c                |  107 --
 src/slurmd/forked_io.c                  |  165 ----
 src/slurmd/interconnect.h               |   43 +-
 src/slurmd/io.c                         | 1202 +++++++++++++++--------
 src/slurmd/io.h                         |  115 ++-
 src/slurmd/io_threads.c                 |  362 -------
 src/slurmd/job.c                        |  272 +++++
 src/slurmd/job.h                        |  123 +++
 src/slurmd/locks.c                      |  164 ----
 src/slurmd/locks.h                      |  115 ---
 src/slurmd/mgr.c                        |  312 ++++++
 src/slurmd/{reconnect_utils.h => mgr.h} |   39 +-
 src/slurmd/nbio.c                       |  741 --------------
 src/slurmd/nbio.h                       |   32 -
 src/slurmd/no_interconnect.c            |   43 +-
 src/slurmd/pipes.c                      |  101 --
 src/slurmd/pipes.h                      |   58 --
 src/slurmd/reconnect_utils.c            |  152 ---
 src/slurmd/semaphore.c                  |  321 ++++++
 src/slurmd/semaphore.h                  |   93 ++
 src/slurmd/setenvpf.c                   |    9 +-
 src/slurmd/setenvpf.h                   |   25 +
 src/slurmd/shm.c                        |  763 ++++++++++++++
 src/slurmd/shm.h                        |  229 +++++
 src/slurmd/shmem_struct.c               |  263 -----
 src/slurmd/shmem_struct.h               |   82 --
 src/slurmd/slurmd.c                     |  171 ++--
 src/slurmd/task_mgr.c                   |  384 --------
 src/slurmd/task_mgr.h                   |  102 --
 src/slurmd/threaded_ctrl.c              |   89 --
 src/slurmd/threaded_io.c                |  114 ---
 38 files changed, 3255 insertions(+), 4421 deletions(-)
 delete mode 100644 src/common/util_signals.c
 delete mode 100644 src/common/util_signals.h
 delete mode 100644 src/slurmd/circular_buffer.c
 delete mode 100644 src/slurmd/circular_buffer.h
 delete mode 100644 src/slurmd/cntl.h
 delete mode 100644 src/slurmd/forked_ctrl.c
 delete mode 100644 src/slurmd/forked_io.c
 delete mode 100644 src/slurmd/io_threads.c
 create mode 100644 src/slurmd/job.c
 create mode 100644 src/slurmd/job.h
 delete mode 100644 src/slurmd/locks.c
 delete mode 100644 src/slurmd/locks.h
 create mode 100644 src/slurmd/mgr.c
 rename src/slurmd/{reconnect_utils.h => mgr.h} (56%)
 delete mode 100644 src/slurmd/nbio.c
 delete mode 100644 src/slurmd/nbio.h
 delete mode 100644 src/slurmd/pipes.c
 delete mode 100644 src/slurmd/pipes.h
 delete mode 100644 src/slurmd/reconnect_utils.c
 create mode 100644 src/slurmd/semaphore.c
 create mode 100644 src/slurmd/semaphore.h
 create mode 100644 src/slurmd/shm.c
 create mode 100644 src/slurmd/shm.h
 delete mode 100644 src/slurmd/shmem_struct.c
 delete mode 100644 src/slurmd/shmem_struct.h
 delete mode 100644 src/slurmd/task_mgr.c
 delete mode 100644 src/slurmd/task_mgr.h
 delete mode 100644 src/slurmd/threaded_ctrl.c
 delete mode 100644 src/slurmd/threaded_io.c

diff --git a/src/common/util_signals.c b/src/common/util_signals.c
deleted file mode 100644
index 03b773a0d85..00000000000
--- a/src/common/util_signals.c
+++ /dev/null
@@ -1,87 +0,0 @@
-#include <signal.h>
-#include <errno.h>
-#include <src/common/log.h>
-#include <src/common/slurm_errno.h>
-#include <src/common/util_signals.h> 
-int posix_signal_pipe_ignore ()
-{
-	return posix_signal_ignore ( SIGPIPE ) ;
-}
-
-int posix_signal_ignore ( int signal )
-{
-	struct sigaction newaction ;
-        struct sigaction oldaction ;
-	newaction . sa_handler = SIG_IGN ;
-	if ( sigaction( signal , &newaction, &oldaction) )/* ignore tty input */
-	{
-		error ("posix_signal_ignore: sigaction %m errno %d", errno);
-		return SLURM_ERROR ;
-	}
-	return SLURM_SUCCESS ;
-}
-
-int unblock_all_signals_pthread ( )
-{
-	sigset_t set;
-	if (sigfillset (&set))
-	{
-		error ("unblock_all_signals_pthread: sigfillset %m errno %d", errno);
-		return SLURM_ERROR ;
-	}
-	if (pthread_sigmask (SIG_UNBLOCK, &set, NULL))
-	{
-		error ("unblock_all_signals_pthread: pthread_sigmask %m errno %d", errno);
-		return SLURM_ERROR ;
-	}
-	return SLURM_SUCCESS ;
-}
-
-int block_all_signals_pthread ( )
-{
-	sigset_t set;
-	if (sigfillset (&set))
-	{
-		error ("block_all_signals_pthread: sigfillset %m errno %d", errno);
-		return SLURM_ERROR ;
-	}
-	if (pthread_sigmask (SIG_BLOCK, &set, NULL))
-	{
-		error ("block_all_signals_pthread: pthread_sigmask %m errno %d", errno);
-		return SLURM_ERROR ;
-	}
-	return SLURM_SUCCESS ;
-}
-
-int unblock_all_signals ( )
-{
-	sigset_t set;
-	if (sigfillset (&set))
-	{
-		error ("unblock_all_signals: sigfillset %m errno %d", errno);
-		return SLURM_ERROR ;
-	}
-	if (sigprocmask (SIG_UNBLOCK, &set, NULL))
-	{
-		error ("unblock_all_signals: sigprocmask %m errno %d", errno);
-		return SLURM_ERROR ;
-	}
-	return SLURM_SUCCESS ;
-}
-
-int block_all_signals ( )
-{
-	sigset_t set;
-	if (sigfillset (&set))
-	{
-		error ("block_all_signals: sigfillset %m errno %d", errno);
-		return SLURM_ERROR ;
-	}
-	if (sigprocmask (SIG_BLOCK, &set, NULL))
-	{
-		error ("block_all_signals: sigprocmask %m errno %d", errno);
-		return SLURM_ERROR ;
-	}
-	return SLURM_SUCCESS ;
-}
-
diff --git a/src/common/util_signals.h b/src/common/util_signals.h
deleted file mode 100644
index 1ac345f8a05..00000000000
--- a/src/common/util_signals.h
+++ /dev/null
@@ -1,9 +0,0 @@
-#ifndef _UTIL_SIGNALS_H
-#define _UTIL_SIGNALS_H
-int posix_signal_pipe_ignore () ;
-int posix_signal_ignore ( int signal ) ;
-int unblock_all_signals_pthread ( ) ;
-int block_all_signals_pthread ( ) ;
-int unblock_all_signals ( ) ;
-int block_all_signals ( ) ;
-#endif
diff --git a/src/slurmd/Makefile.am b/src/slurmd/Makefile.am
index 5245377e4a0..c1098d33420 100644
--- a/src/slurmd/Makefile.am
+++ b/src/slurmd/Makefile.am
@@ -7,20 +7,12 @@ sbin_PROGRAMS = slurmd
 
 if WITH_ELAN
 interconnect_lib = libelan_interconnect.la
-process_lib = libforked.la
-io_lib = libnbio.la
-noinst_LTLIBRARIES = \
-	libelan_interconnect.la	\
-	libforked.la		\
-	libnbio.la
+noinst_LTLIBRARIES = 		\
+	libelan_interconnect.la	
 else
 interconnect_lib = libno_interconnect.la
-process_lib = libthreaded.la
-io_lib = libthreaded_io.la
-noinst_LTLIBRARIES = \
-	libno_interconnect.la	\
-	libthreaded.la		\
-	libthreaded_io.la
+noinst_LTLIBRARIES = 		\
+	libno_interconnect.la	
 endif
 
 LDADD = $(top_srcdir)/src/common/libcommon.la \
@@ -28,29 +20,21 @@ LDADD = $(top_srcdir)/src/common/libcommon.la \
 	$(top_srcdir)/src/common/libcred.la \
 	$(SSL_LIBS)
 
-slurmd_LDADD = $(process_lib) $(io_lib) $(LDADD) $(interconnect_lib)
+slurmd_LDADD = $(LDADD) $(interconnect_lib)
 	
-common_sources = slurmd.c \
-		batch_mgr.c \
-		get_mach_stat.c \
-		read_proc.c \
-		task_mgr.c \
-		shmem_struct.c \
-		circular_buffer.c \
-		pipes.c \
-		locks.c \
-		setenvpf.c
+common_sources = 	        \
+	slurmd.c 	        \
+	mgr.c			\
+	get_mach_stat.c         \
+	read_proc.c 	        \
+	job.c job.h		\
+	io.c io.h		\
+	semaphore.c semaphore.h	\
+	shm.c shm.h		\
+	setenvpf.c setenvpf.h
 
 slurmd_SOURCES = $(common_sources)
 
-libforked_la_SOURCES = forked_ctrl.c
-libthreaded_la_SOURCES = threaded_ctrl.c
-#libforked_io_la_SOURCES = forked_io.c reconnect_utils.c io_threads.c 
-libthreaded_io_la_SOURCES = threaded_io.c reconnect_utils.c io_threads.c 
-
-libnbio_la_SOURCES =  nbio.c reconnect_utils.c
-
-libelan_interconnect_la_SOURCES =  elan_interconnect.c
-libno_interconnect_la_SOURCES =  no_interconnect.c 
-
+libelan_interconnect_la_SOURCES = elan_interconnect.c
+libno_interconnect_la_SOURCES   = no_interconnect.c
 
diff --git a/src/slurmd/circular_buffer.c b/src/slurmd/circular_buffer.c
deleted file mode 100644
index 40b2c5afbd6..00000000000
--- a/src/slurmd/circular_buffer.c
+++ /dev/null
@@ -1,485 +0,0 @@
-/*****************************************************************************\
- *  circular_buffers.c - 
- *****************************************************************************
- *  Copyright (C) 2002 The Regents of the University of California.
- *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
- *  Written by Kevin Tew <tew1@llnl.gov> et. al.
- *  UCRL-CODE-2002-040.
- *  
- *  This file is part of SLURM, a resource management program.
- *  For details, see <http://www.llnl.gov/linux/slurm/>.
- *  
- *  SLURM is free software; you can redistribute it and/or modify it under
- *  the terms of the GNU General Public License as published by the Free
- *  Software Foundation; either version 2 of the License, or (at your option)
- *  any later version.
- *  
- *  SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
- *  WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
- *  FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
- *  details.
- *  
- *  You should have received a copy of the GNU General Public License along
- *  with ConMan; if not, write to the Free Software Foundation, Inc.,
- *  59 Temple Place, Suite 330, Boston, MA  02111-1307  USA.
-\*****************************************************************************/
-
-#include <assert.h>
-#include <stdlib.h>
-#include <string.h>
-#include <src/common/log.h>
-#include <src/common/xmalloc.h>
-#include <src/common/slurm_errno.h>
-
-#include <src/slurmd/circular_buffer.h>
-
-#define DEF_INITIAL_BUFFER_SIZE 	8192
-#define DEF_INCREMENTAL_BUFFER_SIZE 	8192
-#define DEF_MAX_BUFFER_SIZE 		( ( 8192 * 10 ) )
-#define BUFFER_FULL_DUMP_SIZE 		( buf->min_size/2 )
-
-static int assert_checks(circular_buffer_t * buf);
-static int assert_checks_2(circular_buffer_t * buf);
-static int expand_buffer(circular_buffer_t * buf);
-static int shrink_buffer(circular_buffer_t * buf);
-static void common_init(circular_buffer_t * buf);
-
-void free_circular_buffer(circular_buffer_t * buf_ptr)
-{
-	if (buf_ptr) {
-		if (buf_ptr->buffer) 
-			xfree(buf_ptr->buffer);
-		xfree(buf_ptr);
-	}
-}
-
-int init_circular_buffer(circular_buffer_t ** buf_ptr)
-{
-	circular_buffer_t *buf;
-	*buf_ptr = xmalloc(sizeof(circular_buffer_t));
-	buf = *buf_ptr;
-
-	buf->min_size = DEF_INITIAL_BUFFER_SIZE;
-	buf->max_size = DEF_MAX_BUFFER_SIZE;
-	buf->incremental_size = DEF_INCREMENTAL_BUFFER_SIZE;
-
-	common_init(buf);
-
-	return SLURM_SUCCESS;
-}
-
-int init_circular_buffer2(circular_buffer_t ** buf_ptr, int min_size,
-			  int max_size, int incremental_size)
-{
-	circular_buffer_t *buf;
-	*buf_ptr = xmalloc(sizeof(circular_buffer_t));
-	buf = *buf_ptr;
-
-	buf->min_size = min_size;
-	buf->max_size = max_size;
-	buf->incremental_size = incremental_size;
-
-	common_init(buf);
-
-	return SLURM_SUCCESS;
-}
-
-static void common_init(circular_buffer_t * buf)
-{
-	buf->buffer = xmalloc(buf->min_size);
-	buf->buf_size = buf->min_size;
-
-	buf->start = buf->buffer;
-	buf->end = buf->start + buf->buf_size;
-
-	buf->head = buf->start;
-	buf->tail = buf->start;
-
-	buf->read_size = 0;
-	buf->write_size = buf->min_size;
-}
-
-void print_circular_buffer(circular_buffer_t * buf)
-{
-	info("--");
-	info("buffer  %X", buf->buffer);
-	info("start   %X", buf->start);
-	info("end     %X", buf->end);
-	info("head    %X", buf->head);
-	info("tail    %X", buf->tail);
-	info("rhead   %i", buf->head - buf->start);
-	info("rtail   %i", buf->tail - buf->start);
-	info("size    %i", buf->buf_size);
-	info("read s  %i", buf->read_size);
-	info("write s %i", buf->write_size);
-}
-
-int cir_buf_read_update(circular_buffer_t * buf, unsigned int size)
-{
-	/*if zero read, just return */
-	if (size == 0) {
-		debug("zero length read in cirular buffer");
-		return SLURM_SUCCESS;
-	}
-
-	/* before modifing the buffer lets do some sanity checks */
-	assert(size <= buf->read_size);
-	assert_checks(buf);
-	assert_checks_2(buf);
-
-	/* modify head position of the buffer */
-	buf->head = buf->head + size;
-
-	/* after modifing the buffer lets do some sanity checks */
-	assert_checks(buf);
-
-	/* take care of wrap around issues */
-	if (buf->tail > buf->head) {	/* CASE tail after head */
-		buf->read_size = buf->tail - buf->head;
-		buf->write_size = buf->end - buf->tail;
-	} else if (buf->tail < buf->head) {	/* CASE tail befpre head */
-		if (buf->head == buf->end) {	/* CASE tail == end */
-			if (buf->tail == buf->start) {	/* CASE head == start */
-				/* buffer empty */
-				shrink_buffer(buf);
-			} else {
-				buf->head = buf->start;
-				buf->read_size = buf->tail - buf->head;
-				buf->write_size = buf->end - buf->tail;
-			}
-		} else {
-			buf->read_size = buf->end - buf->head;
-			buf->write_size = buf->head - buf->tail;
-		}
-	} else if (buf->tail == buf->head) {	/* CASE head == tail */
-		/* buffer empty */
-		shrink_buffer(buf);
-	}
-
-	/* final sanity check */
-	assert_checks(buf);
-	assert_checks_2(buf);
-
-	return SLURM_SUCCESS;
-}
-
-int cir_buf_write_update(circular_buffer_t * buf, unsigned int size)
-{
-	/* if zero read, just return */
-	if (size == 0) {
-		info("zero length write in cirular buffer");
-		return SLURM_SUCCESS;
-	}
-
-	/* before modifing the buffer lets do some sanity checks */
-	assert(size <= buf->write_size);
-	assert_checks(buf);
-	assert_checks_2(buf);
-
-	/*modify headning position of the buffer */
-	buf->tail = buf->tail + size;
-
-	/* after modifing the buffer lets do some sanity checks */
-	assert_checks(buf);
-
-	/* take care of wrap around issues */
-	if (buf->tail > buf->head) {	/* CASE tail after head */
-		if (buf->tail == buf->end) {	/* CASE tail == end */
-			if (buf->head == buf->start) {	/* CASE head == start */
-				/* buffer full */
-				buf->write_size -= size;
-				buf->read_size += size;
-				expand_buffer(buf);
-			} else {
-				buf->tail = buf->start;
-				buf->write_size = buf->head - buf->tail;
-				buf->read_size = buf->end - buf->head;
-			}
-		} else {
-			buf->write_size = buf->end - buf->tail;
-			buf->read_size = buf->tail - buf->head;
-		}
-	} else if (buf->tail < buf->head) {	/* CASE tail before head */
-		buf->write_size = buf->head - buf->tail;
-		buf->read_size = buf->end - buf->head;
-	} else if (buf->tail == buf->head) {	/* CASE head == tail */
-		/* buffer full */
-		buf->write_size -= size;
-		buf->read_size += size;
-		expand_buffer(buf);
-	}
-
-	/* final sanity check */
-	assert_checks(buf);
-	assert_checks_2(buf);
-	return SLURM_SUCCESS;
-}
-
-static int assert_checks_2(circular_buffer_t * buf)
-{
-	/* sanity checks */
-
-	/* head pointer is between start and end */
-	assert(buf->head >= buf->start);
-	assert(buf->head < buf->end);
-
-	/* tail pointer is between start and end */
-	assert(buf->tail >= buf->start);
-	assert(buf->tail < buf->end);
-
-	if (buf->tail > buf->head) {
-		assert(buf->write_size == buf->end - buf->tail);
-		assert(buf->read_size == buf->tail - buf->head);
-	} else if (buf->tail < buf->head) {
-		assert(buf->write_size == buf->head - buf->tail);
-		assert(buf->read_size == buf->end - buf->head);
-	} else if (buf->tail == buf->head) {
-		assert(buf->write_size == buf->buf_size);
-		assert(buf->read_size == 0);
-	}
-
-	return SLURM_SUCCESS;
-}
-
-static int assert_checks(circular_buffer_t * buf)
-{
-	/* sanity checks */
-	/* insures that dump data when MAX_BUFFER_SIZE is full will work correctly */
-
-	assert(buf != NULL);	/* buf struct is not null */
-	assert(buf->start == buf->buffer);	/* stat hasn't moved */
-	assert((buf->start) < (buf->end));	/* buf_end is after start */
-	assert(buf->end - buf->start == buf->buf_size);	/* buffer start and end haven't moved */
-
-	/* head pointer is between start and end */
-	assert(buf->head >= buf->start);
-	assert(buf->head <= buf->end);
-
-	/* tail pointer is between start and end */
-	assert(buf->tail >= buf->start);
-	assert(buf->tail <= buf->end);
-
-	return SLURM_SUCCESS;
-}
-
-static int shrink_buffer(circular_buffer_t * buf)
-{
-	char *new_buffer;
-
-	if (buf->buf_size == buf->min_size) {
-		/*      info ( "circular buffer at minimum" ) ; */
-
-		buf->head = buf->start;
-		buf->tail = buf->start;
-
-		buf->read_size = 0;
-		buf->write_size = buf->min_size;
-
-		return SLURM_SUCCESS;
-	} else {
-		new_buffer = xmalloc(buf->min_size);
-		xfree(buf->buffer);
-		buf->buffer = new_buffer;
-		buf->buf_size = buf->min_size;
-
-		buf->start = new_buffer;
-		buf->end = new_buffer + buf->min_size;
-
-		buf->head = new_buffer;
-		buf->tail = new_buffer;
-
-		buf->read_size = 0;
-		buf->write_size = buf->min_size;
-
-		return SLURM_SUCCESS;
-	}
-}
-
-static int expand_buffer(circular_buffer_t * buf)
-{
-	char *new_buffer;
-	int data_size;
-	int data_size_blk1;
-	int data_size_blk2;
-
-	debug3("expanding circular buffer");
-	/* print_circular_buffer(buf); */
-
-	/* buffer has reached its maximum size going to dump some data 
-	 * out the bit bucket 
-	 */
-	if (buf->buf_size == buf->max_size) {
-
-		debug3("circular buffer maxed out, dumping %d bytes of data", 
-				BUFFER_FULL_DUMP_SIZE);
-
-		if (buf->tail - buf->start >= BUFFER_FULL_DUMP_SIZE) {
-			buf->tail = buf->tail - BUFFER_FULL_DUMP_SIZE;
-			buf->write_size = BUFFER_FULL_DUMP_SIZE;
-
-			if (buf->tail > buf->head) /* CASE tail after head */
-				buf->read_size -= BUFFER_FULL_DUMP_SIZE;
-
-			/* CASE tail befpre head */
-			/* read_size stays the same */
-
-		} else {
-			int datasize_blk1 = buf->tail - buf->start;
-			int datasize_blk2 =
-			    BUFFER_FULL_DUMP_SIZE - datasize_blk1;
-			buf->tail = buf->end - datasize_blk2;
-			buf->write_size = datasize_blk2;
-			buf->read_size = buf->tail - buf->head;
-		}
-
-		return SLURM_SUCCESS;
-	}
-
-	if (buf->tail > buf->head) {
-		new_buffer =
-		    xmalloc(buf->buf_size + buf->incremental_size);
-		data_size = buf->tail - buf->head;
-		memcpy(new_buffer, buf->head, data_size);
-		xfree(buf->buffer);
-
-	} else if (buf->tail <= buf->head) {	/* CASE B */
-		new_buffer =
-		    xmalloc(buf->buf_size + buf->incremental_size);
-		data_size_blk1 = buf->end - buf->head;
-		data_size_blk2 = buf->tail - buf->start;
-		data_size = data_size_blk1 + data_size_blk2;
-		memcpy(new_buffer, buf->head, data_size_blk1);
-		memcpy(new_buffer + data_size_blk1, buf->start,
-		       data_size_blk2);
-		xfree(buf->buffer);
-	} else {
-		fatal("Logical impossibility found in circular buffer");
-	}
-
-	/* set up new state variables 
-	 * !!!statement order below does matter 
-	 */
-	buf->buffer = new_buffer;
-	buf->start = new_buffer;
-	buf->head = new_buffer;
-	buf->tail = new_buffer + data_size;
-	buf->end = new_buffer + buf->buf_size + buf->incremental_size;
-	buf->buf_size += buf->incremental_size;
-	buf->read_size = data_size;
-	buf->write_size = buf->end - buf->tail;
-
-	return SLURM_SUCCESS;
-}
-
-int cir_buf_get_line(circular_buffer_t * buf, cir_buf_line_t * line)
-{
-	char *tmp_head;
-	size_t tmp_length = 0;
-	size_t tmp_length2 = 0;
-
-	/* stage one */
-	line->line[0] = buf->head;
-	tmp_head = buf->head;
-
-	while (true) {
-		/* check for max line length for transmit */
-		if (tmp_length >= line->max_line_length) {
-			debug("cir_buf_get_line: max line length reached");
-			line->line_count = 1;
-			line->line_length[0] = tmp_length;
-			return SLURM_SUCCESS;
-		}
-
-		/* physical end of buffer reached need to wrap */
-		if (tmp_head >= buf->end) {
-			debug("cir_buf_get_line: end of buffer reached");
-			line->line_length[0] = tmp_length;
-			break;
-		}
-
-		/* logical end of data reached, we are done, 
-		 * no more data in buffer 
-		 */
-		if (tmp_head == buf->tail) {
-			debug("cir_buf_get_line: end of data");
-			line->line_count = 1;
-			line->line_length[0] = tmp_length;
-			return SLURM_SUCCESS;
-		}
-
-		/* new line found */
-		if (*tmp_head == '\n') {
-			debug("cir_buf_get_line: newline character found");
-			line->line_count = 1;
-			line->line_length[0] = tmp_length + 1;
-			return SLURM_SUCCESS;
-		}
-
-		tmp_head++;
-		tmp_length++;
-	}
-
-	/* stage one */
-	line->line[1] = buf->start;
-	tmp_head = buf->start;
-
-	while (true) {
-		/* max line length for transmit reached */
-		if (tmp_length >= line->max_line_length) {
-			debug("cir_buf_get_line: max line length reached");
-			line->line_count = 2;
-			line->line_length[1] = tmp_length2;
-			return SLURM_SUCCESS;
-		}
-
-		/* physical end of buffer reached this shouldn't happen in stage two */
-		if (tmp_head >= buf->end) {
-			error("VERY BAD - End of buffer reached in stage 2");
-			line->line_length[1] = tmp_length2;
-			break;
-		}
-
-		/* XXX: Is this if block redundant or what? */
-		/* 
-		if (*tmp_head == '\n') {
-			debug("cir_buf_get_line: newline character found");
-			line->line_count = 1;
-			line->line_length[0] = tmp_length + 1;
-			return SLURM_SUCCESS;
-		}
-		*/
-		if (tmp_head == buf->tail) {
-			debug("cir_buf_get_line: end of data reached");
-			line->line_count = 2;
-			line->line_length[1] = tmp_length2;
-			return SLURM_SUCCESS;
-		}
-		/* new line found */
-		if (*tmp_head == '\n') {
-			info("New line character found in ");
-			line->line_count = 1;
-			line->line_length[0] = tmp_length2 + 1;
-			return SLURM_SUCCESS;
-		}
-
-		tmp_head++;
-		tmp_length++;
-		tmp_length2++;
-	}
-
-	return SLURM_SUCCESS;
-}
-
-int cir_buf_update_line(circular_buffer_t * buf, cir_buf_line_t * line)
-{
-	int i;
-	if (line->line_count > 2) {
-		error(" VERY BAD line -> line_count is too big %i ",
-		     line->line_count);
-		return SLURM_ERROR;
-	}
-	for (i = 0; i < line->line_count; i++) {
-		cir_buf_write_update(buf, line->line_length[i]);
-	}
-	return SLURM_SUCCESS;
-}
diff --git a/src/slurmd/circular_buffer.h b/src/slurmd/circular_buffer.h
deleted file mode 100644
index 995418129fa..00000000000
--- a/src/slurmd/circular_buffer.h
+++ /dev/null
@@ -1,117 +0,0 @@
-/*****************************************************************************\
- *  circular_buffer.h
- *****************************************************************************
- *  Copyright (C) 2002 The Regents of the University of California.
- *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
- *  Written by Kevin Tew <tew1@llnl.gov> et. al.
- *  UCRL-CODE-2002-040.
- *  
- *  This file is part of SLURM, a resource management program.
- *  For details, see <http://www.llnl.gov/linux/slurm/>.
- *  
- *  SLURM is free software; you can redistribute it and/or modify it under
- *  the terms of the GNU General Public License as published by the Free
- *  Software Foundation; either version 2 of the License, or (at your option)
- *  any later version.
- *  
- *  SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
- *  WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
- *  FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
- *  details.
- *  
- *  You should have received a copy of the GNU General Public License along
- *  with ConMan; if not, write to the Free Software Foundation, Inc.,
- *  59 Temple Place, Suite 330, Boston, MA  02111-1307  USA.
-\*****************************************************************************/
-
-#ifndef _CIRCULAR_BUFFER_H
-#define _CIRCULAR_BUFFER_H
-
-typedef struct circular_buffer {
-	char         *buffer;	/* buffer pointer - this never changes 
-				   except during allocate and deallocate */
-
-	char         *start;	/* buffer pointer copy - this never changes 
-				   except during allocate and deallocate , 
-				   but it is used in a lot of arithmetic 
-				   hence the paranoia copy 		 */
-
-	char         *end;	/* one char past the last char of the buffer
-				   - ths never changes except during allocate 
-				   and deallocate , but it is used in a lot 
-				   of arithmetic 			 */
-	
-	unsigned int buf_size;	/* buffer size - this never changes except 
-				   during allocate and deallocate 	 */
-
-	unsigned int read_size;	/* buffer size that can be read 	 */
-
-	unsigned int write_size;/* buffer size that can be written 	 */
-
-	char         *head;	/* beginning of the used portion of the 
-				   buffer 				 */
-
-	char         *tail;	/* end of the used portion of the buffer */
-
-	unsigned int min_size;	/* min buffer size 			 */
-	unsigned int max_size;	/* max buffer size 			 */
-
-	unsigned int incremental_size;	/* incremental buffer size 	 */
-
-} circular_buffer_t;
-
-typedef struct cir_buf_line {
-	char   *line[2];
-	size_t line_length[2];
-	size_t line_count;
-	size_t max_line_length;
-} cir_buf_line_t;
-
-/* init_circular_buffer2
- * allocated buffer structure and sets default parameter according to passed parameters
- * OUT buf_ptr		- the allocate buffer 
- * IN min_size		- buffer min size default 8K
- * IN max_size		- buffer max size 10 * 8K
- * IN incremental_size	- buffer increment size 8K
- */
-int inline init_circular_buffer2(circular_buffer_t ** buf_ptr,
-				 int min_size, int max_size,
-				 int incremental_size);
-
-/* init_circular_buffer2
- * allocated buffer structure and sets default parameter according to passed parameters
- * OUT buf_ptr		- the allocate buffer 
- */
-int inline init_circular_buffer(circular_buffer_t ** buf_ptr);
-
-/* free_circular_buffer
- * deallocates the buffer
- * IN buf_ptr		- the allocated buffer 
- */
-void inline free_circular_buffer(circular_buffer_t * buf_ptr);
-
-/* print_circular_buffer
- * prints the buffer
- * IN buf_ptr		- the buffer to print
- */
-void inline print_circular_buffer(circular_buffer_t * buf_ptr);
-
-/* cir_buf_read_update
- * updated the buffer state after a read from the buffer
- * IN buf_ptr		- the allocated buffer 
- * IN size 		- size of the read 
- */
-int cir_buf_read_update(circular_buffer_t * buf, unsigned int size);
-
-/* cir_buf_write_update
- * updated the buffer state after a write to the buffer
- * IN buf_ptr		- the allocated buffer 
- * IN size 		- size of the write 
- */
-int cir_buf_write_update(circular_buffer_t * buf, unsigned int size);
-
-int cir_buf_get_line(circular_buffer_t * buf, cir_buf_line_t * line);
-
-int cir_buf_update_line(circular_buffer_t * buf, cir_buf_line_t * line);
-
-#endif /* !_CIRCULAR_BUFFER_H */
diff --git a/src/slurmd/cntl.h b/src/slurmd/cntl.h
deleted file mode 100644
index 12952479570..00000000000
--- a/src/slurmd/cntl.h
+++ /dev/null
@@ -1,40 +0,0 @@
-/*****************************************************************************\
- *  cntl.h
- *****************************************************************************
- *  Copyright (C) 2002 The Regents of the University of California.
- *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
- *  Written by Kevin Tew <tew1@llnl.gov> et. al.
- *  UCRL-CODE-2002-040.
- *  
- *  This file is part of SLURM, a resource management program.
- *  For details, see <http://www.llnl.gov/linux/slurm/>.
- *  
- *  SLURM is free software; you can redistribute it and/or modify it under
- *  the terms of the GNU General Public License as published by the Free
- *  Software Foundation; either version 2 of the License, or (at your option)
- *  any later version.
- *  
- *  SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
- *  WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
- *  FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
- *  details.
- *  
- *  You should have received a copy of the GNU General Public License along
- *  with ConMan; if not, write to the Free Software Foundation, Inc.,
- *  59 Temple Place, Suite 330, Boston, MA  02111-1307  USA.
-\*****************************************************************************/
-
-#ifndef _SLURMD_IO_H_
-#define _SLURMD_IO_H_
-
-#include <src/slurmd/task_mgr.h>
-
-int launch_task(task_start_t * task_start);
-
-int wait_for_tasks(launch_tasks_request_msg_t * launch_msg,
-		   task_start_t ** task_start);
-
-int kill_launched_tasks(launch_tasks_request_msg_t * launch_msg,
-			task_start_t ** task_start, int i);
-
-#endif /* !_SLURMD_IO_H_ */
diff --git a/src/slurmd/elan_interconnect.c b/src/slurmd/elan_interconnect.c
index 00cfc7f50cf..2032eb604f3 100644
--- a/src/slurmd/elan_interconnect.c
+++ b/src/slurmd/elan_interconnect.c
@@ -1,11 +1,10 @@
 /*****************************************************************************\
- *  elan_interconnect.c - Demo the routines in common/qsw.c
- *  This can run mping on the local node (uses shared memory comms).
- *  ./runqsw /usr/lib/mpi-test/mping 1 1024
+ *  src/slurmd/elan_interconnect.c Elan interconnect implementation
  *****************************************************************************
  *  Copyright (C) 2002 The Regents of the University of California.
  *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
- *  Written by Kevin Tew <tew1@llnl.gov> et. al.
+ *  Written by Kevin Tew <tew1@llnl.gov> 
+ *         and Mark Grondona <mgrondona@llnl.gov>
  *  UCRL-CODE-2002-040.
  *  
  *  This file is part of SLURM, a resource management program.
@@ -22,11 +21,11 @@
  *  details.
  *  
  *  You should have received a copy of the GNU General Public License along
- *  with ConMan; if not, write to the Free Software Foundation, Inc.,
+ *  with SLURM; if not, write to the Free Software Foundation, Inc.,
  *  59 Temple Place, Suite 330, Boston, MA  02111-1307  USA.
 \*****************************************************************************/
 
-#define HAVE_LIBELAN3 
+#include <src/slurmd/interconnect.h>
 
 #include <sys/types.h>
 #include <sys/wait.h>
@@ -43,21 +42,9 @@
 #include <src/common/qsw.h>
 #include <src/common/slurm_errno.h>
 #include <src/common/slurm_protocol_api.h>
-#include <src/slurmd/task_mgr.h>
 #include <src/slurmd/interconnect.h>
 #include <src/slurmd/setenvpf.h>
-
-
-
-/* exported module funtion to launch tasks */
-/*launch_tasks should really be named launch_job_step*/
-int 
-launch_tasks(launch_tasks_request_msg_t * launch_msg)
-{
-	pthread_atfork(NULL, NULL, pthread_fork_child_after);
-	debug("launch_tasks: calling interconnect_init()");
-	return interconnect_init(launch_msg);
-}
+#include <src/slurmd/shm.h>
 
 static int 
 _wait_and_destroy_prg(qsw_jobinfo_t qsw_job, pid_t pid)
@@ -65,10 +52,15 @@ _wait_and_destroy_prg(qsw_jobinfo_t qsw_job, pid_t pid)
 	int i = 0;
 	int sleeptime = 1;
 
+	shm_init();
+
 	debug3("waiting to destory program description...");
+  again:
 	if (waitpid(pid, NULL, 0) < 0) {
+		if (errno == EINTR)
+			goto again;
 		error("waitpid: %m");
-		return SLURM_ERROR;
+		exit(1);
 	}
 
 	while(qsw_prgdestroy(qsw_job) < 0) {
@@ -88,12 +80,16 @@ _wait_and_destroy_prg(qsw_jobinfo_t qsw_job, pid_t pid)
 		sleep(sleeptime*=2);
 	}
 
+	shm_fini();
+	exit(0);
 	return SLURM_SUCCESS;
 }
 
-/* Contains interconnect specific setup instructions and then calls 
- * fan_out_task_launch */
-int interconnect_init ( launch_tasks_request_msg_t * launch_msg )
+/* 
+ * prepare node for interconnect use
+ */
+int 
+interconnect_init(slurmd_job_t *job)
 {
 	pid_t pid;
 
@@ -101,43 +97,44 @@ int interconnect_init ( launch_tasks_request_msg_t * launch_msg )
 	switch ((pid = fork())) 
 	{
 		case -1:
-			error ("elan_interconnect_init fork(): %m");
+			error ("elan_interconnect_prepare fork(): %m");
 			return SLURM_ERROR ;
 		case 0: /* child falls thru */
 			break;
 		default: /* parent */
-			return _wait_and_destroy_prg(launch_msg->qsw_job, pid);
+			return _wait_and_destroy_prg(job->qsw_job, pid);
 	}
 
 	/* Process 2: */
 	debug("calling qsw_prog_init from process %ld", getpid());
-	if (qsw_prog_init(launch_msg->qsw_job, launch_msg->uid) < 0) {
+	if (qsw_prog_init(job->qsw_job, job->uid) < 0) {
 		error ("elan interconnect_init: qsw_prog_init: %m");
 		/* we may lose the following info if not logging to stderr */
-		qsw_print_jobinfo(stderr, launch_msg->qsw_job);
-		_exit(1) ;
+		qsw_print_jobinfo(stderr, job->qsw_job);
+		return SLURM_ERROR;
 	}
 	
-	fan_out_task_launch(launch_msg);
-	_exit(0);
-
-	return SLURM_ERROR; /* XXX: why? */
+	return SLURM_SUCCESS; 
 }
 
-int interconnect_set_capabilities(task_start_t * task_start)
+int 
+interconnect_fini(slurmd_job_t *job)
 {
-	pid_t pid;
-	int nodeid, nnodes, nprocs, procid; 
+	return SLURM_SUCCESS;
+}
+int 
+interconnect_attach(slurmd_job_t *job, int procid)
+{
+	int nodeid, nnodes, nprocs; 
 
-	nodeid = task_start->launch_msg->srun_node_id;
-	nnodes = task_start->launch_msg->nnodes;
-	procid = task_start->local_task_id;
-	nprocs = task_start->launch_msg->nprocs;
+	nodeid = job->nodeid;
+	nnodes = job->nnodes;
+	nprocs = job->nprocs;
 
 	debug3("nodeid=%d nnodes=%d procid=%d nprocs=%d", 
 	       nodeid, nnodes, procid, nprocs);
 	debug3("setting capability in process %ld", getpid());
-	if (qsw_setcap(task_start->launch_msg->qsw_job, procid) < 0) {
+	if (qsw_setcap(job->qsw_job, procid) < 0) {
 		error("qsw_setcap: %m");
 		return SLURM_ERROR;
 	}
@@ -148,25 +145,21 @@ int interconnect_set_capabilities(task_start_t * task_start)
 /*
  * Set environment variables needed by QSW MPICH / libelan.
  */
-int interconnect_env(char ***env, uint16_t *envc, int nodeid, int nnodes, 
-	             int procid, int nprocs)
+int interconnect_env(slurmd_job_t *job, int taskid)
 {
-	int cnt = *envc;
+	int cnt  = job->envc;
+	int rank = job->task[taskid]->gid; 
 
-	if (setenvpf(env, &cnt, "RMS_RANK=%d", procid) < 0)
+	if (setenvpf(&job->env, &cnt, "RMS_RANK=%d",   rank       ) < 0)
 		return -1;
-	if (setenvpf(env, &cnt, "RMS_NODEID=%d", nodeid) < 0)
+	if (setenvpf(&job->env, &cnt, "RMS_NODEID=%d", job->nodeid) < 0)
 		return -1;
-	if (setenvpf(env, &cnt, "RMS_PROCID=%d", procid) < 0)
+	if (setenvpf(&job->env, &cnt, "RMS_PROCID=%d", rank       ) < 0)
 		return -1;
-	if (setenvpf(env, &cnt, "RMS_NNODES=%d", nnodes) < 0)
+	if (setenvpf(&job->env, &cnt, "RMS_NNODES=%d", job->nnodes) < 0)
 		return -1;
-	if (setenvpf(env, &cnt, "RMS_NPROCS=%d", nprocs) < 0)
+	if (setenvpf(&job->env, &cnt, "RMS_NPROCS=%d", job->nprocs) < 0)
 		return -1;
 	return 0;
 }
 
-
-void pthread_fork_child()
-{
-}
diff --git a/src/slurmd/forked_ctrl.c b/src/slurmd/forked_ctrl.c
deleted file mode 100644
index 0ed37932174..00000000000
--- a/src/slurmd/forked_ctrl.c
+++ /dev/null
@@ -1,107 +0,0 @@
-/*****************************************************************************\
- *  forked_ctrl.c - 
- *****************************************************************************
- *  Copyright (C) 2002 The Regents of the University of California.
- *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
- *  Written by Kevin Tew <tew1@llnl.gov> et. al.
- *  UCRL-CODE-2002-040.
- *  
- *  This file is part of SLURM, a resource management program.
- *  For details, see <http://www.llnl.gov/linux/slurm/>.
- *  
- *  SLURM is free software; you can redistribute it and/or modify it under
- *  the terms of the GNU General Public License as published by the Free
- *  Software Foundation; either version 2 of the License, or (at your option)
- *  any later version.
- *  
- *  SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
- *  WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
- *  FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
- *  details.
- *  
- *  You should have received a copy of the GNU General Public License along
- *  with ConMan; if not, write to the Free Software Foundation, Inc.,
- *  59 Temple Place, Suite 330, Boston, MA  02111-1307  USA.
-\*****************************************************************************/
-
-#include <stdlib.h>
-#include <sys/types.h>
-#include <pwd.h>
-#include <grp.h>
-#include <sys/wait.h>
-#include <errno.h>
-#include <unistd.h>
-#include <string.h>
-#include <pthread.h>
-
-#include <src/common/log.h>
-#include <src/common/list.h>
-#include <src/common/xmalloc.h>
-#include <src/common/slurm_protocol_api.h>
-#include <src/common/slurm_errno.h>
-#include <src/common/util_signals.h>
-
-#include <src/slurmd/task_mgr.h>
-#include <src/slurmd/shmem_struct.h>
-#include <src/slurmd/circular_buffer.h>
-#include <src/slurmd/io.h>
-#include <src/slurmd/pipes.h>
-#include <src/slurmd/reconnect_utils.h>
-
-/* global variables */
-
-
-/******************************************************************
- *task launch method call hierarchy
- *
- *launch_tasks()
- *	interconnect_init()
- *		fan_out_task_launch() (pthread_create)
- *			task_exec_thread() (fork) for task exec
- *			task_exec_thread() (pthread_create) for io piping 
- ******************************************************************/
-
-int launch_task(task_start_t * task_start)
-{
-	pid_t pid = fork();
-	switch (pid) {
-	case -1:
-		slurm_perror("fork");
-		return SLURM_ERROR;
-		break;
-	case 0:
-		task_exec_thread(task_start);
-		_exit(0);
-	default:
-		task_start->pthread_id = pid;
-		return SLURM_SUCCESS;
-		break;
-
-	}
-	return SLURM_SUCCESS;
-}
-
-int wait_for_tasks(launch_tasks_request_msg_t * launch_msg,
-		   task_start_t ** task_start)
-{
-	int i;
-	int rc;
-	for (i = 0; i < launch_msg->tasks_to_launch; i++) {
-		rc = waitpid(task_start[i]->pthread_id, NULL, 0);
-		debug3 ("fan_out_task_launch: thread %i pthread_id %i joined ",
-		     i, task_start[i]->pthread_id);
-	}
-	return SLURM_SUCCESS;
-}
-
-int kill_launched_tasks(launch_tasks_request_msg_t * launch_msg,
-			task_start_t ** task_start, int i)
-{
-	/*
-	   int rc ;
-	   for (  i-- ; i >= 0  ; i -- ) {
-	           rc = kill(task_start[i]->pthread_id, SIGKILL);
-	   }
-	 */
-	return SLURM_SUCCESS;
-}
diff --git a/src/slurmd/forked_io.c b/src/slurmd/forked_io.c
deleted file mode 100644
index 6307c942cd9..00000000000
--- a/src/slurmd/forked_io.c
+++ /dev/null
@@ -1,165 +0,0 @@
-/*****************************************************************************\
- *  forked_io.c - 
- *****************************************************************************
- *  Copyright (C) 2002 The Regents of the University of California.
- *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
- *  Written by Kevin Tew <tew1@llnl.gov> et. al.
- *  UCRL-CODE-2002-040.
- *  
- *  This file is part of SLURM, a resource management program.
- *  For details, see <http://www.llnl.gov/linux/slurm/>.
- *  
- *  SLURM is free software; you can redistribute it and/or modify it under
- *  the terms of the GNU General Public License as published by the Free
- *  Software Foundation; either version 2 of the License, or (at your option)
- *  any later version.
- *  
- *  SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
- *  WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
- *  FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
- *  details.
- *  
- *  You should have received a copy of the GNU General Public License along
- *  with ConMan; if not, write to the Free Software Foundation, Inc.,
- *  59 Temple Place, Suite 330, Boston, MA  02111-1307  USA.
-\*****************************************************************************/
-
-#include <stdlib.h>
-#include <sys/types.h>
-#include <pwd.h>
-#include <grp.h>
-#include <sys/wait.h>
-#include <errno.h>
-#include <unistd.h>
-#include <string.h>
-#include <pthread.h>
-
-#include <src/common/log.h>
-#include <src/common/list.h>
-#include <src/common/xmalloc.h>
-#include <src/common/slurm_protocol_api.h>
-#include <src/common/slurm_errno.h>
-#include <src/common/util_signals.h>
-
-#include <src/slurmd/task_mgr.h>
-#include <src/slurmd/shmem_struct.h>
-#include <src/slurmd/circular_buffer.h>
-#include <src/slurmd/io.h>
-#include <src/slurmd/pipes.h>
-#include <src/slurmd/reconnect_utils.h>
-
-/* global variables */
-
-
-/******************************************************************
- *task launch method call hierarchy
- *
- *launch_tasks()
- *	interconnect_init()
- *		fan_out_task_launch() (pthread_create)
- *			task_exec_thread() (fork) for task exec
- *			task_exec_thread() (pthread_create) for io piping 
- ******************************************************************/			
-int forward_io ( task_start_t * task_start ) 
-{
-	pid_t cpid ;
-	int * pipes = task_start -> pipes ;
-
-#define FORK_ERROR -1
-
-	//posix_signal_pipe_ignore ( ) ;
-
-	/* open stdout*/
-	connect_io_stream ( task_start , STDIN_OUT_SOCK ) ;
-	/* open stderr*/
-	connect_io_stream ( task_start , SIG_STDERR_SOCK ) ;
-
-	switch ( ( cpid = fork () ) )
-	{
-		case FORK_ERROR :
-			goto return_label;
-			break;
-		case 0 : /*CHILD*/
-			close ( pipes[CHILD_IN_RD_PIPE] );
-			close ( pipes[CHILD_OUT_RD_PIPE] );
-			close ( pipes[CHILD_OUT_WR_PIPE] );
-			close ( pipes[CHILD_ERR_RD_PIPE] );
-			close ( pipes[CHILD_ERR_WR_PIPE] );
-			stdin_io_pipe_thread ( task_start ) ;
-			_exit( 0 ) ;
-			break;
-		default : /*PARENT*/
-			task_start->io_pthread_id[STDIN_FILENO]	= cpid ;
-			break ;
-	}
-
-	switch ( ( cpid = fork () ) )
-	{
-		case FORK_ERROR :
-			goto kill_stdin_thread;
-			break;
-		case 0 : /*CHILD*/
-			close ( pipes[CHILD_IN_RD_PIPE] );
-			close ( pipes[CHILD_IN_WR_PIPE] );
-			close ( pipes[CHILD_OUT_WR_PIPE] );
-			close ( pipes[CHILD_ERR_RD_PIPE] );
-			close ( pipes[CHILD_ERR_WR_PIPE] );
-			stdout_io_pipe_thread ( task_start ) ;
-			_exit( 0 ) ;
-			break;
-		default : /*PARENT*/
-			task_start->io_pthread_id[STDOUT_FILENO] = cpid ;
-			break ;
-	}
-
-	switch ( ( cpid = fork ( ) ) )  
-	{
-		case FORK_ERROR :
-			goto kill_stdout_thread;
-			break;
-		case 0 : /*CHILD*/
-			close ( pipes[CHILD_IN_RD_PIPE] );
-			close ( pipes[CHILD_IN_WR_PIPE] );
-			close ( pipes[CHILD_OUT_RD_PIPE] );
-			close ( pipes[CHILD_OUT_WR_PIPE] );
-			close ( pipes[CHILD_ERR_WR_PIPE] );
-			stderr_io_pipe_thread ( task_start ) ;
-			_exit( 0 ) ;
-			break;
-		default : /*PARENT*/
-			task_start->io_pthread_id[STDERR_FILENO] = cpid ;
-			break ;
-	}
-
-	goto return_label;
-
-kill_stdout_thread:
-	kill ( task_start->io_pthread_id[STDOUT_FILENO] , SIGKILL );
-kill_stdin_thread:
-	kill ( task_start->io_pthread_id[STDIN_FILENO] , SIGKILL );
-return_label:
-	return SLURM_SUCCESS ;
-}
-
-int wait_on_io_threads ( task_start_t * task_start ) 
-{
-	info ( "%i: err pid: %i " , task_start -> local_task_id , task_start->io_pthread_id[STDERR_FILENO] ) ;
-	info ( "%i: out pid: %i " , task_start -> local_task_id , task_start->io_pthread_id[STDOUT_FILENO] ) ;
-	info ( "%i: in pid: %i " , task_start -> local_task_id , task_start->io_pthread_id[STDIN_FILENO] ) ;
-	/* threads have been detatched*/
-	waitpid (  task_start->io_pthread_id[STDERR_FILENO] , NULL , 0 ) ;
-	info ( "%i: errexit pid: %i " , task_start -> local_task_id , task_start->io_pthread_id[STDERR_FILENO] ) ;
-	waitpid ( task_start->io_pthread_id[STDOUT_FILENO] , NULL , 0 ) ;
-	info ( "%i: outexit pid: %i " , task_start -> local_task_id , task_start->io_pthread_id[STDOUT_FILENO] ) ;
-	/* waitpid ( task_start->io_pthread_id[STDIN_FILENO] , NULL ) ;*/
-	kill ( task_start->io_pthread_id[STDIN_FILENO] , SIGKILL );
-	info ( "%i: inexit pid: %i " , task_start -> local_task_id , task_start->io_pthread_id[STDIN_FILENO] ) ;
-	/* thread join on stderr or stdout signifies task termination we should kill the stdin thread */
-	info ( "leaving wait_on_io_threads" ) ;
-	return SLURM_SUCCESS ;
-}
-
-int iotype_init_pipes ( int * pipes )
-{
-	return SLURM_SUCCESS ;
-}
diff --git a/src/slurmd/interconnect.h b/src/slurmd/interconnect.h
index d083909c33d..405bdb00420 100644
--- a/src/slurmd/interconnect.h
+++ b/src/slurmd/interconnect.h
@@ -1,9 +1,11 @@
 /*****************************************************************************\
- *  interconnect.h -
+ *  src/slurmd/interconnect.h - general interconnect routines for slurmd
+ *  $Id$
  *****************************************************************************
  *  Copyright (C) 2002 The Regents of the University of California.
  *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
- *  Written by Kevin Tew <tew1@llnl.gov> et. al.
+ *  Written by Kevin Tew <tew1@llnl.gov> 
+ *          modified by Mark Grondona <mgrondona@llnl.gov>
  *  UCRL-CODE-2002-040.
  *  
  *  This file is part of SLURM, a resource management program.
@@ -20,41 +22,34 @@
  *  details.
  *  
  *  You should have received a copy of the GNU General Public License along
- *  with ConMan; if not, write to the Free Software Foundation, Inc.,
+ *  with SLURM; if not, write to the Free Software Foundation, Inc.,
  *  59 Temple Place, Suite 330, Boston, MA  02111-1307  USA.
 \*****************************************************************************/
 
-#ifndef _SLURMD_INTERCONNECT_H_
-#define _SLURMD_INTERCONNECT_H_
+#ifndef _INTERCONNECT_H_
+#define _INTERCONNECT_H_
 
 #include <src/common/slurm_protocol_api.h>
-#include <src/slurmd/task_mgr.h>
+#include <src/slurmd/job.h>
 
-/* interconnect_init
- * called by launch_tasks to initialize the interconnect
- * IN launch_msg	- launch_tasks_msg
- * RET int		- return_code
+/* 
+ * initialize interconnect on node
  */
-int interconnect_init ( launch_tasks_request_msg_t * launch_msg );
+int interconnect_init(slurmd_job_t *job);
 
-/* fan_out_task_launch
- * called by launch_tasks to do the task fan out
- * IN launch_msg	- launch_tasks_msg
- * RET int		- return_code
+/*
+ * finalize and detach from interconnect on node
  */
-int fan_out_task_launch ( launch_tasks_request_msg_t * launch_msg );
+int interconnect_fini(slurmd_job_t *job);
 
-/* interconnect_set_capabilities
- * called by fan_out_task_launch to set interconnect capabilities
- * IN task_start	- task_start structure
- * RET int		- return_code
+/* 
+ * attach process to interconnect
  */
-int interconnect_set_capabilities ( task_start_t * task_start ) ;
+int interconnect_attach(slurmd_job_t *job, int taskid);
 
 /*
  * Set environment variables needed.
  */
-int interconnect_env(char ***env, uint16_t *envc, int nodeid, int nnodes, 
-	             int procid, int nprocs) ;
+int interconnect_env(slurmd_job_t *job, int taskid);
 
-#endif
+#endif /* _INTERCONNECT_H */
diff --git a/src/slurmd/io.c b/src/slurmd/io.c
index dae35c43044..5ca27c814df 100644
--- a/src/slurmd/io.c
+++ b/src/slurmd/io.c
@@ -1,9 +1,10 @@
 /*****************************************************************************\
- *  io.c - 
+ * src/slurmd/io.c - I/O handling routines for slurmd
+ * $Id$
  *****************************************************************************
  *  Copyright (C) 2002 The Regents of the University of California.
  *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
- *  Written by Kevin Tew <tew1@llnl.gov> et. al.
+ *  Written by Mark Grondona <mgrondona@llnl.gov>.
  *  UCRL-CODE-2002-040.
  *  
  *  This file is part of SLURM, a resource management program.
@@ -20,455 +21,890 @@
  *  details.
  *  
  *  You should have received a copy of the GNU General Public License along
- *  with ConMan; if not, write to the Free Software Foundation, Inc.,
+ *  with SLURM; if not, write to the Free Software Foundation, Inc.,
  *  59 Temple Place, Suite 330, Boston, MA  02111-1307  USA.
 \*****************************************************************************/
 
-#include <stdlib.h>
-#include <sys/types.h>
-#include <pwd.h>
-#include <grp.h>
-#include <sys/wait.h>
-#include <errno.h>
+#if HAVE_CONFIG_H
+#  include <config.h>
+#endif
+
+#if HAVE_UNISTD_H
+#  include <unistd.h>
+#endif
+
+#if HAVE_STRING_H
+#  include <string.h>
+#endif
+
+#if HAVE_STDLIB_H
+#  include <stdlib.h>
+#endif
+
 #include <unistd.h>
-#include <string.h>
-#include <pthread.h>
+#include <errno.h>
 
+#include <src/common/eio.h>
+#include <src/common/cbuf.h>
 #include <src/common/log.h>
+#include <src/common/fd.h>
 #include <src/common/list.h>
-#include <src/common/pack.h>
 #include <src/common/xmalloc.h>
-#include <src/common/slurm_protocol_api.h>
-#include <src/common/slurm_errno.h>
-#include <src/common/util_signals.h>
 
-#include <src/slurmd/task_mgr.h>
-#include <src/slurmd/shmem_struct.h>
-#include <src/slurmd/circular_buffer.h>
+#include <src/slurmd/job.h>
+#include <src/slurmd/shm.h>
 #include <src/slurmd/io.h>
-#include <src/slurmd/pipes.h>
-
-/* global variables */
-int connect_io_stream(task_start_t * task_start, int out_or_err);
-int send_io_stream_header(task_start_t * task_start, int out_or_err);
-ssize_t read_EINTR(int fd, void *buf, size_t count);
-
-
-/******************************************************************
- *task launch method call hierarchy
- *
- *launch_tasks()
- *	interconnect_init()
- *		fan_out_task_launch() (pthread_create)
- *			task_exec_thread() (fork) for task exec
- *			task_exec_thread() (pthread_create) for io piping 
- ******************************************************************/
-int forward_io(task_start_t * task_start)
-{
-	pthread_attr_t pthread_attr;
-
-#define STDIN_OUT_SOCK 0
-#define SIG_STDERR_SOCK 1
-
-	/* open stdout */
-	connect_io_stream(task_start, STDIN_OUT_SOCK);
-	/* open stderr */
-	connect_io_stream(task_start, SIG_STDERR_SOCK);
-
-	/* spawn io pipe threads */
-	/* set detatch state */
-	pthread_attr_init(&pthread_attr);
-
-	if (pthread_create
-	    (&task_start->io_pthread_id[STDIN_FILENO], NULL,
-	     stdin_io_pipe_thread, task_start))
-		goto return_label;
-	if (pthread_create
-	    (&task_start->io_pthread_id[STDOUT_FILENO], NULL,
-	     stdout_io_pipe_thread, task_start))
-		goto kill_stdin_thread;
-	if (pthread_create
-	    (&task_start->io_pthread_id[STDERR_FILENO], NULL,
-	     stderr_io_pipe_thread, task_start))
-		goto kill_stdout_thread;
-
-
-
-	goto return_label;
-
-      kill_stdout_thread:
-	pthread_kill(task_start->io_pthread_id[STDOUT_FILENO], SIGKILL);
-      kill_stdin_thread:
-	pthread_kill(task_start->io_pthread_id[STDIN_FILENO], SIGKILL);
-      return_label:
-	return SLURM_SUCCESS;
+
+typedef enum slurmd_io_tupe {
+	TASK_STDERR,
+	TASK_STDOUT,
+	TASK_STDIN,
+	CLIENT_STDERR,
+	CLIENT_STDOUT,
+} slurmd_io_type_t;
+
+static char *slurmd_io_str[] = 
+{
+	"domain socket",
+	"task stderr",
+	"task stdout",
+	"task stdin",
+	"client stderr",
+	"client stdout"
+};
+
+
+struct io_info {
+#ifndef NDEBUG
+#define IO_MAGIC 0x10101
+	int magic;
+#endif
+	uint32_t id;
+	cbuf_t buf;
+	List readers;
+	List writers;
+	slurmd_io_type_t type;
+	unsigned eof:1;
+	unsigned disconnected:1;
+};
+
+
+static int    _io_init_pipes(task_info_t *t);
+static void   _io_prepare_clients(slurmd_job_t *);
+static void   _io_prepare_tasks(slurmd_job_t *);
+static void * _io_thr(void *);
+static int    _io_write_header(struct io_info *, srun_info_t *);
+static void   _io_connect_objs(io_obj_t *, io_obj_t *);
+static int    _validate_io_list(List objList);
+static int    _shutdown_task_obj(struct io_info *t);
+
+static struct io_obj  * _io_obj_create(int fd, void *arg);
+static struct io_info * _io_info_create(uint32_t id);
+static struct io_obj  * _io_obj(int fd, uint id, int type);
+static void           * _io_thr(void *arg);
+
+
+/* Slurmd I/O objects:
+ * N   task   stderr, stdout objs (read-only)
+ * N*M client stderr, stdout objs (read-write) (possibly a file)
+ * N   task   stdin          objs (write only) (possibly a file)
+ */
+
+static bool _readable(io_obj_t *);
+static bool _writable(io_obj_t *);
+static int  _write(io_obj_t *, List);
+static int  _task_read(io_obj_t *, List);
+static int  _client_read(io_obj_t *, List);
+static int  _task_error(io_obj_t *, List);
+static int  _client_error(io_obj_t *, List);
+
+
+struct io_operations task_out_ops = {
+        readable:	&_readable,
+	handle_read:	&_task_read,
+        handle_error:	&_task_error
+};
+
+struct io_operations task_in_ops = {
+	writable:	&_writable,
+	handle_write:	&_write,
+	handle_error:	&_task_error,
+};
+			
+struct io_operations client_ops = {
+	readable:	&_readable,
+	writable:	&_writable,
+	handle_read:	&_client_read,
+	handle_write:	&_write,
+	handle_error:	&_client_error,
+};
+
+int
+io_spawn_handler(slurmd_job_t *job) 
+{
+	pthread_attr_t attr;
+	
+	if (io_init_pipes(job) == SLURM_FAILURE) {
+		error("io_handler: init_pipes failed: %m");
+		return SLURM_FAILURE;
+	}
+
+	/* create task IO objects and append these to the objs list
+	 *
+	 * XXX check for errors?
+	 */
+	_io_prepare_tasks(job);
+
+	/* open 2*ntask initial connections or files for stdout/err 
+	 * append these to objs list 
+	 */
+	_io_prepare_clients(job);
+
+	if ((errno = pthread_attr_init(&attr)) != 0)
+		error("pthread_attr_init: %m");
+
+#ifdef PTHREAD_SCOPE_SYSTEM
+	if ((errno = pthread_attr_setscope(&attr, PTHREAD_SCOPE_SYSTEM)) != 0)
+		error("pthread_attr_setscope: %m");
+#endif 
+	xassert(_validate_io_list(job->objs));
+
+	return pthread_create(&job->ioid, &attr, &_io_thr, (void *)job);
 }
 
-int wait_on_io_threads(task_start_t * task_start)
+static int
+_xclose(int fd)
 {
-	/* threads have been detatched */
-	pthread_join(task_start->io_pthread_id[STDERR_FILENO], NULL);
-	info("errexit");
-	pthread_join(task_start->io_pthread_id[STDOUT_FILENO], NULL);
-	info("outexit");
-	/*pthread_join ( task_start->io_pthread_id[STDIN_FILENO] , NULL ) ; */
-	pthread_cancel(task_start->io_pthread_id[STDIN_FILENO]);
-	info("inexit");
-	/* thread join on stderr or stdout signifies task termination we should kill the stdin thread */
-	return SLURM_SUCCESS;
+	int rc;
+	do rc = close(fd);
+	while (rc == -1 && errno == EINTR);
+	return rc;
 }
 
-void *stdin_io_pipe_thread(void *arg)
+/* Close child fds in parent */
+static void
+_io_finalize(task_info_t *t)
 {
-	task_start_t *task_start = (task_start_t *) arg;
-	int bytes_read;
-	int bytes_written;
-	int local_errno;
-	circular_buffer_t *cir_buf;
+	if (_xclose(t->pin[0] ) < 0)
+		error("close(stdin) : %m");
+	if (_xclose(t->pout[1]) < 0)
+		error("close(stdout): %m");
+	if (_xclose(t->perr[1]) < 0)
+		error("close(stderr): %m");
+}
+
+void 
+io_close_all(slurmd_job_t *job)
+{
+	int i;
+	for (i = 0; i < job->ntasks; i++)
+		_io_finalize(job->task[i]);
+}
+
+static void *
+_io_thr(void *arg)
+{
+	slurmd_job_t *job = (slurmd_job_t *) arg;
+	log_reinit();
+	io_handle_events(job->objs);
+	verbose("IO handler exited");
+	return (void *)1;
+}
 
-	init_circular_buffer(&cir_buf);
+static void
+_io_prepare_tasks(slurmd_job_t *job)
+{
+	int          i;
+	srun_info_t *srun;
+	task_info_t *t;
 
-	posix_signal_pipe_ignore();
+	srun = list_peek(job->sruns);
 
-	while (true) {
-		if ((cir_buf->write_size == 0)) {
-			info("stdin cir_buf->write_size == 0 this shouldn't happen");
-			continue;
+	for (i = 0; i < job->ntasks; i++) {
+		t = job->task[i];
+
+		t->in  = _io_obj(t->pin[1],  t->gid, TASK_STDIN );
+		list_append(job->objs, (void *)t->in );
+
+		t->out = _io_obj(t->pout[0], t->gid, TASK_STDOUT);
+		list_append(job->objs, (void *)t->out);
+
+		t->err = _io_obj(t->perr[0], t->gid, TASK_STDERR);
+		list_append(job->objs, (void *)t->err);
+	}
+}
+
+#if 0
+/*
+ * create initial file objs for N tasks
+ */
+static void
+_io_prepare_files(slurmd_job_t *job)
+{
+	int       i, fd;
+	int       err_flags = O_WRONLY | O_CREAT | O_EXCL;
+	int       out_flags = O_WRONLY | O_CREAT | O_EXCL;
+	int       in_flags  = O_RDONLY | O_CREAT | O_EXCL;
+	char     *filename;
+	io_obj_t *obj;
+
+	if (job->outf) {
+		if 
+		for (i = 0; i < job->ntasks; i++) {
+			char *buf[4096];
+			snprintf(buf, 4096, job->outf, i);
+			if (open(buf, out_flags) < 0)
+				error("can't open file `%s': %m", buf);
+			
 		}
+	}
+}
+#endif
+
+/* 
+ * create initial client objs for N tasks
+ */
+static void
+_io_prepare_clients(slurmd_job_t *job)
+{
+	int          i, sock;
+	io_obj_t    *obj;
+	srun_info_t *srun;
 
-		if ((bytes_read =
-		     slurm_read_stream(task_start->sockets[STDIN_OUT_SOCK],
-				       cir_buf->tail,
-				       cir_buf->write_size)) <= 0) {
-			local_errno = errno;
-			if (bytes_read == 0) {
-				info("0 returned EOF on socket ");
-				break;
-			} else if (bytes_read == -1) {
-				switch (local_errno) {
-				case EBADF:
-				case EPIPE:
-				case ECONNREFUSED:
-				case ECONNRESET:
-				case ENOTCONN:
-					break;
-				default:
-					info("error reading stdin  stream for task %i, %m errno: %i , bytes read %i ", task_start->launch_msg->global_task_ids[task_start->local_task_id], local_errno, bytes_read);
-					error("uncaught errno %i",
-					      local_errno);
-					break;
-				}
-			} else {
-				info("bytes_read: %i don't know what to do with this return code ", bytes_read);
-			}
-		} else {
-			cir_buf_write_update(cir_buf, bytes_read);
+	xassert(list_count(job->sruns) == 1);
+
+	srun = list_peek(job->sruns);
+
+	/* create sockets for stdout/err 
+	 */
+	for (i = 0; i < job->ntasks; i++) {
+		task_info_t *t = job->task[i];
+
+		sock = (int) slurm_open_stream(&srun->ioaddr);
+		if (sock < 1) {
+			error("connect io: %m");
+			return;
 		}
+		fd_set_nonblocking(sock);
+		fd_set_close_on_exec(sock);
+		obj  = _io_obj(sock, t->gid, CLIENT_STDOUT);
+		_io_write_header(obj->arg, srun);
+		list_append(job->objs, obj);
+
+		_io_connect_objs(t->out, obj);
+		_io_connect_objs(obj, t->in );
+
+		sock = (int) slurm_open_stream(&srun->ioaddr);
+		fd_set_nonblocking(sock);
+		fd_set_close_on_exec(sock);
+		obj  = _io_obj(sock, t->gid, CLIENT_STDERR);
+		_io_write_header(obj->arg, srun);
+		list_append(job->objs, obj);
+
+		_io_connect_objs(t->err, obj);
+	}
+}
 
-		/* debug */
-		//write ( 1 ,  "stdin-", 6 ) ;
-		//write ( 1 , cir_buf->head , cir_buf->read_size ) ;
-		info("%i stdin bytes read", bytes_read);
-		/* debug */
-
-		while (true) {
-
-			if ((bytes_written =
-			     write(task_start->pipes[CHILD_IN_WR],
-				   cir_buf->head,
-				   cir_buf->read_size)) <= 0) {
-				if ((bytes_written == SLURM_PROTOCOL_ERROR)
-				    && (errno == EINTR)) {
-					continue;
-				} else {
-
-					local_errno = errno;
-					info("error sending stdin  stream for task %i, %m errno: %i , bytes read %i ", task_start->launch_msg->global_task_ids[task_start->local_task_id], local_errno, bytes_read);
-					goto stdin_return;
-				}
-			} else {
-				cir_buf_read_update(cir_buf,
-						    bytes_written);
-				break;
-			}
+static void
+_io_connect_objs(io_obj_t *obj1, io_obj_t *obj2)
+{
+	struct io_info *src = (struct io_info *) obj1->arg;
+	struct io_info *dst = (struct io_info *) obj2->arg;
+	xassert(src->magic == IO_MAGIC);
+	xassert(dst->magic == IO_MAGIC);
+	list_append(src->readers, dst);
+	list_append(dst->writers, src);
+}
+
+static int
+_validate_task_out(struct io_info *t, int type)
+{
+	ListIterator i;
+	struct io_info *r;
+	int retval = 1;
+
+	xassert(t->magic == IO_MAGIC);
+	
+	if (t->writers)
+		retval = 0;
+
+	i = list_iterator_create(t->readers);
+	while ((r = list_next(i))) {
+		if (r->type != type) {
+			fatal("_validate_io: %s reader is %s",
+					slurmd_io_str[t->type],
+					slurmd_io_str[r->type]);
 		}
 	}
-      stdin_return:
-	free_circular_buffer(cir_buf);
-	close(task_start->pipes[CHILD_IN_WR]);
-	pthread_exit(NULL);
+	list_iterator_destroy(i);
+
+	return retval;
 }
 
-#define RECONNECT_RETRY_TIME 1
-void *stdout_io_pipe_thread(void *arg)
+static int
+_validate_task_in(struct io_info *t)
 {
-	task_start_t *task_start = (task_start_t *) arg;
-	int bytes_read;
-	int sock_bytes_written;
-	int local_errno;
-	int attempt_reconnect = false;
-	time_t last_reconnect_try = 0;
-	circular_buffer_t *cir_buf;
+	ListIterator i;
+	struct io_info *r;
+	int retval = 1;
 
-	init_circular_buffer(&cir_buf);
+	xassert(t->magic == IO_MAGIC);
 
-	posix_signal_pipe_ignore();
+	if (t->readers)
+		retval = 0;
 
-	while (true) {
-		if ((cir_buf->write_size == 0)) {
-			info("stdout cir_buf->write_size == 0 this shouldn't happen");
-			continue;
+	i = list_iterator_create(t->writers);
+	while ((r = list_next(i)) != NULL) {
+		if (r->magic != IO_MAGIC) {
+			error("_validate_io: %s writer is invalid", 
+					slurmd_io_str[t->type]);
+			return 0;
+		}
+		if (r->type != CLIENT_STDOUT) {
+			error("_validate_io: %s writer is %s",
+					slurmd_io_str[t->type],
+					slurmd_io_str[r->type]);
+			retval = 0;
 		}
+	}
+	list_iterator_destroy(i);
+
+	return retval;
+}
 
-		/* read stdout code */
-		if ((bytes_read =
-		     read_EINTR(task_start->pipes[CHILD_OUT_RD],
-				cir_buf->tail,
-				cir_buf->write_size)) <= 0) {
-			local_errno = errno;
-			info("error reading stdout stream for task %i, %m errno: %i , bytes read %i ", task_start->launch_msg->global_task_ids[task_start->local_task_id], local_errno, bytes_read);
-			goto stdout_return;
-		} else {
-			cir_buf_write_update(cir_buf, bytes_read);
+
+static int
+_validate_client_stdout(struct io_info *client)
+{
+	ListIterator i;
+	struct io_info *t;
+	int retval = 1;
+
+	xassert(client->magic == IO_MAGIC);
+	
+	i = list_iterator_create(client->readers);
+	while ((t = list_next(i))) {
+		if (t->type != TASK_STDIN) {
+			error("_validate_io: client stdin reader is %s",
+					slurmd_io_str[t->type]);
+			retval = 0;
 		}
+	}
+	list_iterator_destroy(i);
+
+	i = list_iterator_create(client->writers);
+	while ((t = list_next(i))) {
+		if (t->type != TASK_STDOUT) {
+			error("_validate_io: client stdout writer is %s",
+					slurmd_io_str[t->type]);
+			retval = 0;
+		}
+	}
+	list_iterator_destroy(i);
+	
+	return retval;
+}
 
-		/* debug */
-		write(1, cir_buf->head, cir_buf->read_size);
-		info("%i stdout bytes read", bytes_read);
-		/* debug */
-
-		/* reconnect code */
-		if (attempt_reconnect) {
-			time_t curr_time = time(NULL);
-			if (difftime(curr_time, last_reconnect_try) >
-			    RECONNECT_RETRY_TIME) {
-				slurm_close_stream(task_start->
-						   sockets
-						   [STDIN_OUT_SOCK]);
-				if ((task_start->sockets[STDIN_OUT_SOCK] =
-				     slurm_open_stream(&
-						       (task_start->
-							io_streams_dest)))
-				    == SLURM_PROTOCOL_ERROR) {
-					local_errno = errno;
-					info("error reconnecting socket to srun to pipe stdout errno %i", local_errno);
-					last_reconnect_try = time(NULL);
-					continue;
-				}
-				attempt_reconnect = false;
-			} else {
-				continue;
-			}
+static int
+_validate_client_stderr(struct io_info *client)
+{
+	ListIterator i;
+	struct io_info *t;
+	int retval = 1;
+
+	xassert(client->magic == IO_MAGIC);
+
+	if (client->readers)
+		retval = 0;
+	
+	i = list_iterator_create(client->writers);
+	while ((t = list_next(i))) {
+		if (t->type != TASK_STDERR) {
+			error("_validate_io: client stderr writer is %s",
+					slurmd_io_str[t->type]);
+			retval = 0;
 		}
+	}
+	list_iterator_destroy(i);
 
-		/* write out socket code */
-		if ((sock_bytes_written =
-		     slurm_write_stream(task_start->
-					sockets[STDIN_OUT_SOCK],
-					cir_buf->head,
-					cir_buf->read_size)) ==
-		    SLURM_PROTOCOL_ERROR) {
-			local_errno = errno;
-			switch (local_errno) {
-			case EBADF:
-			case EPIPE:
-			case ECONNREFUSED:
-			case ECONNRESET:
-			case ENOTCONN:
-				info("std out connection losti %i",
-				     local_errno);
-				attempt_reconnect = true;
-				slurm_close_stream(task_start->
-						   sockets
-						   [STDIN_OUT_SOCK]);
-				break;
-			default:
-				info("error sending stdout stream for task %i, errno %i", task_start->launch_msg->global_task_ids[task_start->local_task_id], local_errno);
-				error("uncaught errno %i", local_errno);
-				break;
-			}
-			continue;
+	return retval;
+}
+
+static int 
+_validate_io_list(List objList)
+{
+	io_obj_t *obj;
+	int retval = 1;
+	ListIterator i = list_iterator_create(objList);
+	while ((obj = list_next(i))) {
+		struct io_info *io = (struct io_info *) obj->arg;
+		switch (io->type) {
+		case TASK_STDOUT:
+			xassert(_validate_task_out(io, CLIENT_STDOUT));
+			break;
+		case TASK_STDERR:
+			xassert(_validate_task_out(io, CLIENT_STDERR));
+			break;
+		case TASK_STDIN:
+			xassert(_validate_task_in(io));
+			break;
+		case CLIENT_STDERR:
+			xassert(_validate_client_stderr(io));
+			break;
+		case CLIENT_STDOUT:
+			xassert(_validate_client_stdout(io));
+			break;
 		}
-		cir_buf_read_update(cir_buf, sock_bytes_written);
 	}
+	list_iterator_destroy(i);
+	return retval;
+}
+
+static int 
+find_obj(struct io_info *obj, struct io_info *key)
+{
+	xassert(obj != NULL);
+	xassert(key != NULL);
 
-      stdout_return:
-	free_circular_buffer(cir_buf);
-	slurm_close_stream(task_start->sockets[STDIN_OUT_SOCK]);
-	close(task_start->pipes[CHILD_OUT_RD]);
-	pthread_exit(NULL);
+	return (obj == key);
 }
 
-void *stderr_io_pipe_thread(void *arg)
+
+static void
+_io_disconnect_client(struct io_info *client)
 {
-	task_start_t *task_start = (task_start_t *) arg;
-	int bytes_read;
-	int sock_bytes_written;
-	int local_errno;
-	int attempt_reconnect = false;
-	time_t last_reconnect_try = 0;
-	circular_buffer_t *cir_buf;
+	struct io_info *t;
+	int n;
+
+	xassert(client->magic == IO_MAGIC);
+	client->disconnected = 1;
+		
+	/* delete client from its writer->readers list 
+	 * (a client should have only one writer) 
+	 */
+	t = list_peek(client->writers);
+
+	xassert(!t || t->type == TASK_STDERR || t->type == TASK_STDOUT);
+	if (t && list_count(t->readers) > 1) {
+	        n = list_delete_all(t->readers, (ListFindF)find_obj, client);
+		if (n <= 0)
+			error("deleting client from readers");
+	}
 
-	init_circular_buffer(&cir_buf);
+	if (!client->readers)
+		return;
+
+	/* delete STDOUT client from its reader->writers list
+	 * (a client obj should have only one reader)
+	 */
+	t = list_peek(client->readers);
+	if (t) {
+	        n = list_delete_all(t->writers, (ListFindF)find_obj, client);
+		if (n <= 0)
+			error("deleting client from readers");
+	}
+}
 
-	posix_signal_pipe_ignore();
 
-	while (true) {
-		if ((cir_buf->write_size == 0)) {
-			info("stderr cir_buf->write_size == 0 this shouldn't happen");
-			continue;
-		}
+io_obj_t *
+_io_obj(int fd, uint32_t id, int type)
+{
+	struct io_info *io = _io_info_create(id);
+	struct io_obj *obj = _io_obj_create(fd, (void *)io);
+
+	xassert(io->magic == IO_MAGIC);
+	xassert(type >= 0);
+
+	io->type = type;
+	switch (type) {
+	 case TASK_STDERR:
+	 case TASK_STDOUT:
+		 obj->ops    = &task_out_ops;
+		 io->readers = list_create(NULL);
+		 break;
+	 case TASK_STDIN:
+		 obj->ops    = &task_in_ops;
+		 io->buf     = cbuf_create(512, 10240);
+		 io->writers = list_create(NULL);
+		 break;
+	 case CLIENT_STDOUT:
+		 io->readers = list_create(NULL);
+	 case CLIENT_STDERR:
+		 obj->ops    = &client_ops;
+		 io->buf     = cbuf_create(16, 1048576);
+		 io->writers = list_create(NULL);
+		 break;
+	 default:
+		 error("io: unknown I/O obj type %d", type);
+	}
+	return obj;
+}
 
-		/* read stderr code */
-		if ((bytes_read =
-		     read(task_start->pipes[CHILD_ERR_RD], cir_buf->tail,
-			  cir_buf->write_size)) <= 0) {
-			debug("bytes_read: %i , errno: %i", bytes_read,
-			      errno);
-			if ((bytes_read == SLURM_PROTOCOL_ERROR)
-			    && (errno == EINTR)) {
-				continue;
-			} else {
-
-				local_errno = errno;
-				info("error reading stderr stream for task %i, errno %i , bytes read %i ", task_start->launch_msg->global_task_ids[task_start->local_task_id], local_errno, bytes_read);
-				goto stderr_return;
-			}
-		} else {
-			cir_buf_write_update(cir_buf, bytes_read);
-		}
+void
+io_obj_destroy(io_obj_t *obj)
+{
+	struct io_info *io = (struct io_info *) obj->arg;
+
+	xassert(obj != NULL);
+	xassert(io  != NULL);
+	xassert(io->magic == IO_MAGIC);
+
+	switch (io->type) {
+         case TASK_STDERR:
+	 case TASK_STDOUT:
+		 list_destroy(io->readers);
+		 break;
+	 case TASK_STDIN:
+		 cbuf_destroy(io->buf);
+		 list_destroy(io->writers);
+		 break;
+	 case CLIENT_STDOUT:
+		 list_destroy(io->readers);
+	 case CLIENT_STDERR:
+		 cbuf_destroy(io->buf);
+		 list_destroy(io->writers);
+		 break;
+	 default:
+		 error("unknown IO object type: %ld", io->type);
+	}
 
-		/* debug */
-		/*
-		   info ( "%i stderr bytes read", bytes_read ) ;
-		   write ( 2 , cir_buf->head , cir_buf->read_size ) ;
-		 */
-		/* debug */
-
-		/* reconnect code */
-		if (attempt_reconnect) {
-			time_t curr_time = time(NULL);
-			if (difftime(curr_time, last_reconnect_try) >
-			    RECONNECT_RETRY_TIME) {
-				slurm_close_stream(task_start->
-						   sockets
-						   [SIG_STDERR_SOCK]);
-				if ((task_start->sockets[SIG_STDERR_SOCK] =
-				     slurm_open_stream(&
-						       (task_start->
-							io_streams_dest)))
-				    == SLURM_PROTOCOL_ERROR) {
-					local_errno = errno;
-					info("error reconnecting socket to srun to pipe stderr errno %i", local_errno);
-					last_reconnect_try = time(NULL);
-					continue;
-				}
-				attempt_reconnect = false;
-			} else {
-				continue;
-			}
-		}
+	xassert(io->magic = ~IO_MAGIC);
+	xfree(io);
+	xfree(obj);
+}
 
-		/* write out socket code */
-		if ((sock_bytes_written =
-		     slurm_write_stream(task_start->
-					sockets[SIG_STDERR_SOCK],
-					cir_buf->head,
-					cir_buf->read_size)) ==
-		    SLURM_PROTOCOL_ERROR) {
-			local_errno = errno;
-			switch (local_errno) {
-			case EBADF:
-			case EPIPE:
-			case ECONNREFUSED:
-			case ECONNRESET:
-			case ENOTCONN:
-				info("std err connection lost %i ",
-				     local_errno);
-				attempt_reconnect = true;
-				slurm_close_stream(task_start->
-						   sockets
-						   [SIG_STDERR_SOCK]);
-				break;
-			default:
-				info("error sending stderr stream for task %i , %m errno: %i", task_start->launch_msg->global_task_ids[task_start->local_task_id], local_errno);
-				error("uncaught errno %i", local_errno);
-				break;
-			}
-			continue;
+static io_obj_t *
+_io_obj_create(int fd, void *arg)
+{
+	io_obj_t *obj = xmalloc(sizeof(*obj));
+	obj->fd  = fd;
+	obj->arg = arg;
+	obj->ops = NULL;
+	return obj;
+}
+
+static struct io_info *
+_io_info_create(uint32_t id)
+{
+	struct io_info *io = (struct io_info *) xmalloc(sizeof(*io));
+	xassert(io->magic = IO_MAGIC);
+	io->id  = id;
+	io->buf = NULL;
+	io->type = -1;
+	return io;
+}
+
+int
+io_init_pipes(slurmd_job_t *job)
+{
+	int i;
+	for (i = 0; i < job->ntasks; i++) {
+		if (_io_init_pipes(job->task[i]) == SLURM_FAILURE) {
+			error("init_pipes <task %d> failed", i);
+			return SLURM_FAILURE;
 		}
-		cir_buf_read_update(cir_buf, sock_bytes_written);
 	}
+	return SLURM_SUCCESS;
+}
 
-      stderr_return:
-	free_circular_buffer(cir_buf);
-	slurm_close_stream(task_start->sockets[SIG_STDERR_SOCK]);
-	close(task_start->pipes[CHILD_ERR_RD]);
-	pthread_exit(NULL);
+static int
+_io_write_header(struct io_info *client, srun_info_t *srun)
+{
+	slurm_io_stream_header_t hdr;
+	char *buf;
+	int retval;
+	int size   = sizeof(hdr);
+	Buf buffer = init_buf(size);
+
+	hdr.version = SLURM_PROTOCOL_VERSION;
+	memcpy(hdr.key, srun->key->data, SLURM_SSL_SIGNATURE_LENGTH);
+	hdr.task_id = client->id;
+	hdr.type    = client->type == CLIENT_STDOUT ? 0 : 1;
+
+	pack_io_stream_header(&hdr, buffer);
+
+	/* XXX Shouldn't have to jump through these hoops to 
+	 * support slurm Buf type. Need a better way to do this
+	 */
+	size   = buffer->processed;
+	buf    = xfer_buf_data(buffer);
+	retval = cbuf_write(client->buf, buf, size, NULL);
+	xfree(buf);
+	return retval;
 }
 
-int connect_io_stream(task_start_t * task_start, int out_or_err)
+static int
+_io_init_pipes(task_info_t *t)
 {
-	int local_errno;
-	if ((task_start->sockets[out_or_err] =
-	     slurm_open_stream(&(task_start->io_streams_dest))) ==
-	    SLURM_PROTOCOL_ERROR) {
-		local_errno = errno;
-		info("error opening socket to srun to pipe %s %m errno: %i", out_or_err ? "stdout" : "stderr", local_errno);
-		return SLURM_PROTOCOL_ERROR;
-	} else {
-		return send_io_stream_header(task_start, out_or_err);
+	if (  (pipe(t->pin)  < 0) 
+	   || (pipe(t->pout) < 0) 
+	   || (pipe(t->perr) < 0) ) {
+		error("io_init_pipes: pipe: %m");
+		return SLURM_FAILURE;
 	}
 
+	fd_set_close_on_exec(t->pin[1]);
+	fd_set_close_on_exec(t->pout[0]);
+	fd_set_close_on_exec(t->perr[0]);
+
+	fd_set_nonblocking(t->pin[1]);
+	fd_set_nonblocking(t->pout[0]);
+	fd_set_nonblocking(t->perr[0]);
+
+	return SLURM_SUCCESS;
 }
 
-int send_io_stream_header(task_start_t * task_start, int out_or_err)
+/* prepare for child I/O:
+ * dup stdin,stdout,stderr onto appropriate pipes and
+ * close write end of stdin, and read end of stdout/err
+ */
+int 
+io_prepare_child(task_info_t *t)
 {
-	slurm_io_stream_header_t io_header;
-	Buf buffer;
-	int rc;
+	if (dup2(t->pin[0], STDIN_FILENO  ) < 0) {
+		error("dup2(stdin): %m");
+		return SLURM_FAILURE;
+	}
 
-	buffer = init_buf (sizeof(slurm_io_stream_header_t));
-	if (out_or_err == STDIN_OUT_SOCK) {
-		init_io_stream_header(&io_header,
-				      task_start->launch_msg->credential->
-				      signature,
-				      task_start->launch_msg->
-				      global_task_ids[task_start->local_task_id],
-				      SLURM_IO_STREAM_INOUT);
-		pack_io_stream_header(&io_header, buffer);
-		rc = slurm_write_stream(task_start->sockets[STDIN_OUT_SOCK], 
-		                        get_buf_data(buffer), get_buf_offset(buffer));
-	} else {
-
-		init_io_stream_header(&io_header,
-				      task_start->launch_msg->credential->
-				      signature,
-				      task_start->launch_msg->
-				      global_task_ids[task_start->local_task_id],
-				      SLURM_IO_STREAM_SIGERR);
-		pack_io_stream_header(&io_header, buffer);
-		rc = slurm_write_stream(task_start->sockets[SIG_STDERR_SOCK], 
-		                        get_buf_data(buffer), get_buf_offset(buffer));
+	if (dup2(t->pout[1], STDOUT_FILENO) < 0) {
+		error("dup2(stdout): %m");
+		return SLURM_FAILURE;
 	}
 
-	free_buf(buffer);
+	if (dup2(t->perr[1], STDERR_FILENO) < 0) {
+		error("dup2(stderr): %m");
+		return SLURM_FAILURE;
+	}
+
+	/* ignore errors on close */
+	close(t->pin[1] );
+	close(t->pout[0]);
+	close(t->perr[0]);
+	return SLURM_SUCCESS;
+}
+
+static bool 
+_readable(io_obj_t *obj)
+{
+	bool rc;
+	struct io_info *io = (struct io_info *) obj->arg;
+
+	xassert(io->magic == IO_MAGIC);
+
+	if ((rc = (!io->disconnected && !io->eof && (obj->fd > 0))))
+		debug3("readable %s", slurmd_io_str[io->type]);
+
+	return rc;
+}
+
+static bool 
+_writable(io_obj_t *obj)
+{
+	bool rc;
+	struct io_info *io = (struct io_info *) obj->arg;
+
+	xassert(io->magic == IO_MAGIC);
+
+	rc = (!io->disconnected 
+		&& ((cbuf_used(io->buf) > 0) || io->eof));
+	if (rc)
+		debug3("writable %s", slurmd_io_str[io->type]);
 	return rc;
 }
 
+static int
+_write(io_obj_t *obj, List objs)
+{
+	struct io_info *io = (struct io_info *) obj->arg;
+	int n;
+
+	xassert(io->magic == IO_MAGIC);
+	xassert(io->type >= 0);
+
+	if (io->disconnected)
+		return 0;
+
+	verbose("Need to write %ld bytes to %s %d", 
+		cbuf_used(io->buf), slurmd_io_str[io->type], io->id);
+
+
+	if (io->eof && (cbuf_used(io->buf) == 0)) {
+		if (close(obj->fd) < 0)
+			error("close: %m");
+		obj->fd = -1;
+		if (io->type == CLIENT_STDERR || io->type == CLIENT_STDOUT)
+			_io_disconnect_client(io);
+		else
+			_shutdown_task_obj(io);
+		list_delete_all(objs, (ListFindF)find_obj, obj); 
+		return 0;
+	}
+
+	while ((n = cbuf_read_fd(io->buf, obj->fd, -1)) < 0) {
+		int local_errno = errno;
+		if ((errno == EAGAIN) || (errno == EWOULDBLOCK)) 
+			continue;
+		error("task <%ld> write failed: %s", io->id, 
+				slurm_strerror(local_errno));
+		return -1;
+	}
+
+	verbose("Wrote %d bytes to %s %d", 
+		 n, slurmd_io_str[io->type], io->id);
+
+	return 0;
+}
+
+/* */
+static int
+_shutdown_task_obj(struct io_info *t)
+{
+	List l;
+	ListIterator i;
+	struct io_info *r;
+
+	l = (t->type == TASK_STDIN) ? t->writers : t->readers;
+	
+	i = list_iterator_create(l);
+	while ((r = list_next(i))) {
+		List rlist = (t->type == TASK_STDIN) ? r->readers : r->writers;
+		r->eof = 1;
+		list_delete_all(rlist, (ListFindF) find_obj, t);
+	}
+	list_iterator_destroy(i);
+
+	return 0;
+}
 
-ssize_t read_EINTR(int fd, void *buf, size_t count)
+static int
+_task_read(io_obj_t *obj, List objs)
 {
-	ssize_t bytes_read;
-	while (true) {
-		if ((bytes_read = read(fd, buf, count)) <= 0) {
-			debug("bytes_read: %i , %m errno: %i", bytes_read,
-			      errno);
-			if ((bytes_read == SLURM_PROTOCOL_ERROR)
-			    && (errno == EINTR)) {
-				continue;
-			}
+	struct io_info *r, *t;
+	char buf[4096]; /* XXX Configurable? */
+	ssize_t n, len = sizeof(buf);
+	ListIterator i;
+
+	t = (struct io_info *) obj->arg;
+
+	xassert(t->magic == IO_MAGIC);
+	xassert((t->type == TASK_STDOUT) || (t->type == TASK_STDERR));
+	xassert(_validate_io_list(objs));
+
+   again:
+	if ((n = read(obj->fd, (void *) buf, len)) < 0) {
+		if (errno == EINTR)
+			goto again;
+		if ((errno == EAGAIN) || (errno == EWOULDBLOCK)) {
+		        error("%s %d: read returned EAGAIN",
+			       slurmd_io_str[t->type], t->id);
+			return 0;
 		}
-		return bytes_read;
+		error("Unable to read from task %ld fd %d errno %d %m", 
+				t->id, obj->fd, errno);
+		return -1;
+	}
+	verbose("read %d bytes from %s %d", 
+		n, slurmd_io_str[t->type], t->id);
+
+	if (n == 0) {  /* got eof */
+		verbose("got eof on task %ld", t->id);
+		_shutdown_task_obj(t);
+		close(obj->fd);
+		obj->fd = -1;
+		if (list_delete_all(objs, (ListFindF) find_obj, obj) <= 0)
+			error("Unable to remove task object from list");
+		return 0;
+	}
+
+	/* copy buf to all readers */
+	i = list_iterator_create(t->readers);
+	while((r = list_next(i))) {
+		n = cbuf_write(r->buf, (void *) buf, n, NULL);
+		verbose("wrote %ld bytes into %s buf", n, 
+				slurmd_io_str[r->type]);
+	}
+	list_iterator_destroy(i);
+
+	return 0;
+}
+
+static int 
+_task_error(io_obj_t *obj, List objs)
+{
+	struct io_info *t = (struct io_info *) obj->arg;
+	xassert(t->magic == IO_MAGIC);
+
+	error("error on %s %d", slurmd_io_str[t->type], t->id);
+	_shutdown_task_obj(t);
+	obj->fd = -1;
+	list_delete_all(objs, (ListFindF) find_obj, obj);
+
+	xassert(_validate_io_list(objs));
+	return -1;
+}
+
+static int 
+_client_read(io_obj_t *obj, List objs)
+{
+	struct io_info *client = (struct io_info *) obj->arg;
+	struct io_info *reader;
+	char buf[1024]; /* XXX Configurable? */
+	ssize_t n, len = sizeof(buf);
+	ListIterator i;
+
+	xassert(client->magic == IO_MAGIC);
+	xassert(_validate_io_list(objs));
+	xassert((client->type == CLIENT_STDOUT) 
+		 || (client->type == CLIENT_STDERR));
+
+   again:
+	if ((n = read(obj->fd, (void *) buf, len)) < 0) {
+		if (errno == EINTR)
+			goto again;
+		if ((errno == EAGAIN) || (errno == EWOULDBLOCK))
+			fatal("client read");
+		error("read from client %ld: %m", client->id);
+		return -1;
+	}
+
+	debug("read %d bytes from %s %d", n, slurmd_io_str[client->type],
+			client->id);
+
+	if (n == 0)  { /* got eof, disconnect this client */
+		verbose("client %d closed connection", client->id);
+		if (!client->disconnected)
+			_io_disconnect_client(client);
+		xassert(_validate_io_list(objs));
+		return 0;
+	}
+
+	if (client->type == CLIENT_STDERR) {
+		/* unsigned long int signo = strtoul(buf, NULL, 10); */
+		/* return kill(client->id, signo); */
+		return 0;
+	}
+
+	/* copy buf to all readers 
+	 * XXX Client should never have more than one reader,
+	 *     unless we choose to support this? 
+	 */
+	i = list_iterator_create(client->readers);
+	while((reader = list_next(i))) {
+		n = cbuf_write(reader->buf, (void *) buf, n, NULL);
 	}
+	list_iterator_destroy(i);
+
+	return 0;
+}
+
+static int 
+_client_error(io_obj_t *obj, List objs)
+{
+	struct io_info *io = (struct io_info *) obj->arg;
+
+	xassert(io->magic == IO_MAGIC);
+
+	error("%s task %d", slurmd_io_str[io->type], io->id); 
+	return 0;
 }
+
diff --git a/src/slurmd/io.h b/src/slurmd/io.h
index 52691f9b5c2..d15c4ca519d 100644
--- a/src/slurmd/io.h
+++ b/src/slurmd/io.h
@@ -1,9 +1,10 @@
 /*****************************************************************************\
- *  io.h -
+ * src/slurmd/io.h - slurmd IO routines
+ * $Id$
  *****************************************************************************
  *  Copyright (C) 2002 The Regents of the University of California.
  *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
- *  Written by Kevin Tew <tew1@llnl.gov> et. al.
+ *  Written by Mark Grondona <mgrondona@llnl.gov>.
  *  UCRL-CODE-2002-040.
  *  
  *  This file is part of SLURM, a resource management program.
@@ -20,61 +21,79 @@
  *  details.
  *  
  *  You should have received a copy of the GNU General Public License along
- *  with ConMan; if not, write to the Free Software Foundation, Inc.,
+ *  with SLURM; if not, write to the Free Software Foundation, Inc.,
  *  59 Temple Place, Suite 330, Boston, MA  02111-1307  USA.
 \*****************************************************************************/
 
-#ifndef _SLURMD_IO_H_
-#define _SLURMD_IO_H_
-#include <src/slurmd/task_mgr.h>
+#ifndef _IO_H
+#define _IO_H
 
-/* file descriptor defines */
-
-#define MAX_TASKS_PER_LAUNCH 64
-
-enum {
-	CHILD_IN_PIPE = 0,
-	CHILD_IN_RD_PIPE = 0,
-	CHILD_IN_WR_PIPE = 1,
-	CHILD_OUT_PIPE = 2,
-	CHILD_OUT_RD_PIPE = 2,
-	CHILD_OUT_WR_PIPE = 3,
-	CHILD_ERR_PIPE = 4,
-	CHILD_ERR_RD_PIPE = 4,
-	CHILD_ERR_WR_PIPE = 5
-};
-
-/* prototypes */
-enum {
-	STDIN_OUT_SOCK = 0,
-	SIG_STDERR_SOCK = 1
-};
+#include <src/slurmd/job.h>
+#include <src/common/eio.h>
 
+/*
+ * Spawn IO handling thread.
+ * Initializes IO pipes, creates IO objects and appends them to job->objs,
+ * and opens 2*ntask initial connections for stdout/err, also appending these
+ * to job->objs list.
+ */
+int io_spawn_handler(slurmd_job_t *job);
 
-/* forward_io
- * controlling thread for io forwarding or io piping
- * IN task_arg		- task_arg structure containing per task launch info
- * RET int		- return_code
+/*
+ * Frees memory associated with the given IO object
  */
-int forward_io(task_start_t * task_arg);
+void io_obj_destroy(io_obj_t *obj);
 
-/* individual io piping threads called by forward_io */
-void *stdin_io_pipe_thread(void *arg);
-void *stdout_io_pipe_thread(void *arg);
-void *stderr_io_pipe_thread(void *arg);
+int  io_init_pipes(slurmd_job_t *job);
+int  io_prepare_child(task_info_t *t);
 
-/* wait_on_io_threads
- * called by exec_task_thread parent proccess to insure streams have been flushed before returning task exit status
- * IN task_arg		- task_arg structure containing per task launch info
- * RET int		- return_code
- */
-int wait_on_io_threads(task_start_t * task_start);
+void io_close_all(slurmd_job_t *job);
 
-int launch_task(task_start_t * task_start);
 
-int wait_for_tasks(launch_tasks_request_msg_t * launch_msg,
-		   task_start_t ** task_start);
+/* Notes:
+ *
+ * slurmd <-+---> client (e.g. srun, file)
+ *          `---> client
+ * 
+ * slurmd can handle multiple client connections. Each task writes
+ * stdout and stderr data to the client and reads stdin and signals
+ * from the client streams. 
+ *
+ * I/O objects:
+ * task stdout: R/0 pipe created by slurmd
+ *  - buffer is null
+ *  - readers list has at least one client reader (may be a file obj)
+ *  - writers list is empty
+ *
+ *  task stderr: R/O pipe created by slurmd
+ *  - buffer is null
+ *  - readers list has at least one client reader (may be a file obj)
+ *  - writers list is empty
+ *
+ *  task stdin: W/O pipe created by slurmd
+ *  - circular buffer
+ *  - readers list is empty
+ *  - writers list contains only one client (may be a file obj)
+ *
+ *  client stdout/in socket:
+ *  - circular buffer for stdout data
+ *  - readers list is one task stdin obj or empty
+ *  - writers list is one task stdout obj
+ *
+ *  client stderr/sig socket:
+ *  - circular buffer for stderr data
+ *  - readers list is null (data read is converted to signal)
+ *  - writers list is one task stderr obj
+ *
+ *  stdout/err file obj:
+ *  - circular buffer for stdout/err data
+ *  - readers list is empty
+ *  - writers list is one task stdout/err obj
+ *
+ *  stdin file obj
+ *  - buffer is null
+ *  - readers list is one or more task stdin obj's
+ *  - writers list is empty
+ */
 
-int kill_launched_tasks(launch_tasks_request_msg_t * launch_msg,
-			task_start_t ** task_start, int i);
-#endif
+#endif /* !_IO_H */
diff --git a/src/slurmd/io_threads.c b/src/slurmd/io_threads.c
deleted file mode 100644
index e4bf7b778dc..00000000000
--- a/src/slurmd/io_threads.c
+++ /dev/null
@@ -1,362 +0,0 @@
-/*****************************************************************************\
- *  io_threads.c - 
- *****************************************************************************
- *  Copyright (C) 2002 The Regents of the University of California.
- *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
- *  Written by Kevin Tew <tew1@llnl.gov> et. al.
- *  UCRL-CODE-2002-040.
- *  
- *  This file is part of SLURM, a resource management program.
- *  For details, see <http://www.llnl.gov/linux/slurm/>.
- *  
- *  SLURM is free software; you can redistribute it and/or modify it under
- *  the terms of the GNU General Public License as published by the Free
- *  Software Foundation; either version 2 of the License, or (at your option)
- *  any later version.
- *  
- *  SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
- *  WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
- *  FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
- *  details.
- *  
- *  You should have received a copy of the GNU General Public License along
- *  with ConMan; if not, write to the Free Software Foundation, Inc.,
- *  59 Temple Place, Suite 330, Boston, MA  02111-1307  USA.
-\*****************************************************************************/
-
-#include <stdlib.h>
-#include <sys/types.h>
-#include <pwd.h>
-#include <grp.h>
-#include <sys/wait.h>
-#include <errno.h>
-#include <unistd.h>
-#include <string.h>
-#include <pthread.h>
-
-#include <src/common/log.h>
-#include <src/common/list.h>
-#include <src/common/xmalloc.h>
-#include <src/common/slurm_protocol_api.h>
-#include <src/common/slurm_errno.h>
-#include <src/common/util_signals.h>
-
-#include <src/slurmd/task_mgr.h>
-#include <src/slurmd/shmem_struct.h>
-#include <src/slurmd/circular_buffer.h>
-#include <src/slurmd/io.h>
-#include <src/slurmd/pipes.h>
-#include <src/slurmd/reconnect_utils.h>
-
-void * stdin_io_pipe_thread ( void * arg )
-{
-	task_start_t * task_start = ( task_start_t * ) arg ;
-	int bytes_read ;
-	int bytes_written ;
-	int local_errno ;
-	circular_buffer_t * cir_buf ;
-
-	init_circular_buffer ( & cir_buf ) ;
-	
-	posix_signal_pipe_ignore ( ) ;
-
-	while ( true )
-	{
-		if ( ( cir_buf->write_size == 0 ) )
-		{
-			debug3 ( "stdin cir_buf->write_size == 0 this shouldn't happen" ) ;
-			break ;
-		}
-		
-		if ( ( bytes_read = slurm_read_stream ( task_start->sockets[STDIN_OUT_SOCK] , cir_buf->tail , cir_buf->write_size ) ) <= 0 )
-		{
-			sleep (1) ;
-			local_errno = errno ;	
-			if ( bytes_read == 0)
-			{
-				debug3 ( "STDIN stdin 0 returned EOF on socket ") ;
-				continue ;
-				//break ;
-			}
-			else if ( bytes_read == -1 )
-			{
-				switch ( local_errno )
-				{
-					case EBADF:
-					case EPIPE:
-					case ECONNREFUSED:
-					case ECONNRESET:
-					case ENOTCONN:
-						debug3 ( "STDIN stdin connection lost %m errno: %i", local_errno ) ;
-						continue ;
-						//break ;
-					default:
-						debug3 ( "%i STDIN uncaught error reading stdin sock stream, %m errno: %i , bytes read %i ", 
-								task_start -> launch_msg -> global_task_ids[ task_start -> local_task_id ] , local_errno , bytes_read ) ;
-						continue ;
-						//break;
-				}
-			}
-			else
-			{
-				debug3 ( "STDIN bytes_read: %i don't know what to do with this return code ", bytes_read ) ;
-				continue ;
-				//break ;
-			}
-		}
-		else
-		{
-			cir_buf_write_update ( cir_buf , bytes_read ) ;
-		}
-		
-		/* debug */
-		/*
-		write ( 1 ,  "stdin-", 6 ) ;
-		write ( 1 , cir_buf->head , cir_buf->read_size ) ;
-		debug3 ( "%i stdin bytes read", bytes_read ) ;
-		*/
-		/* debug */
-
-
-		if ( ( bytes_written = write_EINTR ( task_start->pipes[CHILD_IN_WR_PIPE] , cir_buf->head , cir_buf->read_size ) ) <= 0 )
-		{
-
-			local_errno = errno ;	
-			debug3 ( "%i error sending stdin pipe stream, %m errno: %i , bytes written %i ", 
-					task_start -> launch_msg -> global_task_ids[ task_start -> local_task_id ] , local_errno , bytes_written) ;
-			goto stdin_return ;
-		}
-		else
-		{
-			cir_buf_read_update ( cir_buf , bytes_written ) ;
-		}
-	}
-	stdin_return:
-	free_circular_buffer ( cir_buf ) ;
-	close ( task_start->pipes[CHILD_IN_WR_PIPE] ) ;
-	pthread_exit ( NULL ) ;
-}
-
-#define RECONNECT_RETRY_TIME 1
-void * stdout_io_pipe_thread ( void * arg )
-{
-	task_start_t * task_start = ( task_start_t * ) arg ;
-	int bytes_read ;
-	int sock_bytes_written ;
-	int local_errno ;
-	int attempt_reconnect = false ;
-	time_t last_reconnect_try = 0 ;
-	circular_buffer_t * cir_buf ;
-
-	init_circular_buffer ( & cir_buf ) ;
-	
-	posix_signal_pipe_ignore ( ) ;
-
-	while ( true )
-	{
-		if ( ( cir_buf->write_size == 0 ) )
-		{
-			debug3 ( "stdout cir_buf->write_size == 0 this shouldn't happen" ) ;
-			break ;
-		}	
-
-		/* read stdout code */
-		if ( ( bytes_read = read_EINTR ( task_start->pipes[CHILD_OUT_RD_PIPE] , cir_buf->tail , cir_buf->write_size ) ) <= 0 )
-		{
-			local_errno = errno ;	
-			debug3 ( "%i error reading stdout pipe stream, %m errno: %i , bytes read %i ", 
-					task_start -> launch_msg -> global_task_ids[ task_start -> local_task_id ] , local_errno , bytes_read ) ;
-			goto stdout_return ;
-		}
-		else
-		{
-			cir_buf_write_update ( cir_buf , bytes_read ) ;
-		}
-		
-		/* debug */
-		/*
-		write ( 1 , cir_buf->head , cir_buf->read_size ) ;
-		debug3 ( "%i stdout bytes read", bytes_read ) ;
-		*/
-		/* debug */
-		
-		/* reconnect code */
-		if ( attempt_reconnect )
-		{
-			time_t curr_time = time ( NULL ) ;
-			if ( difftime ( curr_time , last_reconnect_try )  > RECONNECT_RETRY_TIME )
-			{
-				slurm_close_stream ( task_start->sockets[STDIN_OUT_SOCK] ) ;
-				if ( ( task_start->sockets[STDIN_OUT_SOCK] = slurm_open_stream ( & ( task_start -> io_streams_dest ) ) ) == SLURM_PROTOCOL_ERROR )
-				{
-					local_errno = errno ;	
-					debug3 ( "error reconnecting socket to srun to pipe stdout errno %i" , local_errno ) ;
-					last_reconnect_try = time ( NULL ) ;
-					continue ;
-				}
-				attempt_reconnect = false ;
-			}
-			else
-			{
-				continue ;
-			}
-		}
-
-		/* write out socket code */
-		if ( ( sock_bytes_written = slurm_write_stream ( task_start->sockets[STDIN_OUT_SOCK] , cir_buf->head , cir_buf->read_size ) ) <= 0 )
-		{
-			local_errno = errno ;	
-			if ( sock_bytes_written == 0)
-			{
-				debug3 ( "stdout 0 returned EOF on socket ") ;
-				break ;
-			}
-			else if ( sock_bytes_written == -1 )
-			{
-				switch ( local_errno )
-				{
-					case EBADF:
-					case EPIPE:
-					case ECONNREFUSED:
-					case ECONNRESET:
-					case ENOTCONN:
-						debug3 ( "stdout connection lost %m errno: %i", local_errno ) ;
-						attempt_reconnect = true ;
-						slurm_close_stream ( task_start->sockets[STDIN_OUT_SOCK] ) ;
-						break ;
-					default:
-						debug3 ( "%i uncaught error sending stdout sock stream, errno %i sock bytes written %i",  
-								task_start -> launch_msg -> global_task_ids[ task_start -> local_task_id ] , local_errno , sock_bytes_written ) ;
-						break ;
-				}
-			}
-			else
-			{
-				debug3 ( "bytes_read: %i don't know what to do with this return code ", bytes_read ) ;
-				break ;
-			}
-		}
-		else
-		{
-			cir_buf_read_update ( cir_buf , sock_bytes_written ) ;
-		}
-	}
-
-	stdout_return:
-	free_circular_buffer ( cir_buf ) ;
-	slurm_close_stream ( task_start->sockets[STDIN_OUT_SOCK] ) ;
-	close ( task_start->pipes[CHILD_OUT_RD_PIPE] ) ;
-	pthread_exit ( NULL ) ; 
-}
-
-void * stderr_io_pipe_thread ( void * arg )
-{
-	task_start_t * task_start = ( task_start_t * ) arg ;
-	int bytes_read ;
-	int sock_bytes_written ;
-	int local_errno ;
-	int attempt_reconnect = false ;
-	time_t last_reconnect_try = 0 ;
-	circular_buffer_t * cir_buf ;
-
-	init_circular_buffer ( & cir_buf ) ;
-	
-	posix_signal_pipe_ignore ( ) ;
-	
-	while ( true )
-	{
-		if ( ( cir_buf->write_size == 0 ) )
-		{
-			debug3 ( "stderr cir_buf->write_size == 0 this shouldn't happen" ) ;
-			break ;
-		}	
-
-		/* read stderr code */
-		if ( ( bytes_read = read_EINTR ( task_start->pipes[CHILD_ERR_RD_PIPE] , cir_buf->tail , cir_buf->write_size ) ) <= 0 )
-		{
-			local_errno = errno ;	
-				debug3 ( "%i error reading stderr pipe stream, errno %i , bytes read %i ", 
-						task_start -> launch_msg -> global_task_ids[ task_start -> local_task_id ] , local_errno , bytes_read ) ;
-				goto stderr_return ;
-		}
-		else
-		{
-			cir_buf_write_update ( cir_buf , bytes_read ) ;
-		}
-		
-		/* debug */
-		/*
-		debug3 ( "%i stderr bytes read", bytes_read ) ;
-		write ( 2 , cir_buf->head , cir_buf->read_size ) ;
-		*/
-		/* debug */
-
-		/* reconnect code */
-		if ( attempt_reconnect )
-		{
-			time_t curr_time = time ( NULL ) ;
-			if ( difftime ( curr_time , last_reconnect_try )  > RECONNECT_RETRY_TIME )
-			{
-				slurm_close_stream ( task_start->sockets[SIG_STDERR_SOCK] ) ;
-				if ( ( task_start->sockets[SIG_STDERR_SOCK] = slurm_open_stream ( &( task_start -> io_streams_dest ) ) ) == SLURM_PROTOCOL_ERROR )
-				{
-					local_errno = errno ;	
-					debug3 ( "error reconnecting socket to srun to pipe stderr errno %i" , local_errno ) ;
-					last_reconnect_try = time ( NULL ) ;
-					continue ;
-				}
-				attempt_reconnect = false ;
-			}
-			else
-			{
-				continue ;
-			}
-		}
-
-		/* write out socket code */
-		if ( ( sock_bytes_written = slurm_write_stream ( task_start->sockets[SIG_STDERR_SOCK] , cir_buf->head , cir_buf->read_size ) ) <= 0 )
-		{
-			local_errno = errno ;	
-			if ( sock_bytes_written == 0)
-			{
-				debug3 ( "stderr 0 returned EOF on socket ") ;
-				break ;
-			}
-			else if ( sock_bytes_written == -1 )
-			{
-				switch ( local_errno )
-				{
-					case EBADF:
-					case EPIPE:
-					case ECONNREFUSED:
-					case ECONNRESET:
-					case ENOTCONN:
-						debug3 ( "stderr connection lost %m errno: %i", local_errno ) ;
-						attempt_reconnect = true ;
-						slurm_close_stream ( task_start->sockets[SIG_STDERR_SOCK] ) ;
-						break ;
-					default:
-						debug3 ( "%i uncaught error sending stderr sock stream, %m errno: %i sock bytes %i", 
-								task_start -> launch_msg -> global_task_ids[ task_start -> local_task_id ] , local_errno , sock_bytes_written ) ;
-						break ;
-				}
-			}
-			else
-			{
-				debug3 ( "bytes_read: %i don't know what to do with this return code ", bytes_read ) ;
-				break ;
-			}
-		}
-		else
-		{
-			cir_buf_read_update ( cir_buf , sock_bytes_written ) ;
-		}
-	}
-
-	stderr_return:
-	free_circular_buffer ( cir_buf ) ;
-	slurm_close_stream ( task_start->sockets[SIG_STDERR_SOCK] ) ;
-	close ( task_start->pipes[CHILD_ERR_RD_PIPE] ) ;
-	pthread_exit ( NULL ) ; 
-}
-
diff --git a/src/slurmd/job.c b/src/slurmd/job.c
new file mode 100644
index 00000000000..5d7c2515d23
--- /dev/null
+++ b/src/slurmd/job.c
@@ -0,0 +1,272 @@
+/*****************************************************************************\
+ * src/slurmd/job.c - slurmd_job_t routines
+ * $Id$
+ *****************************************************************************
+ *  Copyright (C) 2002 The Regents of the University of California.
+ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+ *  Written by Mark Grondona <mgrondona@llnl.gov>.
+ *  UCRL-CODE-2002-040.
+ *  
+ *  This file is part of SLURM, a resource management program.
+ *  For details, see <http://www.llnl.gov/linux/slurm/>.
+ *  
+ *  SLURM is free software; you can redistribute it and/or modify it under
+ *  the terms of the GNU General Public License as published by the Free
+ *  Software Foundation; either version 2 of the License, or (at your option)
+ *  any later version.
+ *  
+ *  SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
+ *  WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ *  FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
+ *  details.
+ *  
+ *  You should have received a copy of the GNU General Public License along
+ *  with SLURM; if not, write to the Free Software Foundation, Inc.,
+ *  59 Temple Place, Suite 330, Boston, MA  02111-1307  USA.
+\*****************************************************************************/
+
+#if HAVE_CONFIG_H
+#  include <config.h>
+#endif
+
+#if HAVE_STRING_H
+#  include <string.h>
+#endif
+
+#include <src/common/xmalloc.h>
+#include <src/common/xassert.h>
+#include <src/common/xstring.h>
+#include <src/common/log.h>
+#include <src/common/eio.h>
+#include <src/common/slurm_protocol_api.h>
+
+#include <src/slurmd/job.h>
+#include <src/slurmd/shm.h>
+#include <src/slurmd/io.h>
+
+static char ** _array_copy(int n, char **src);
+static void _array_free(int n, char ***array);
+static void _srun_info_destructor(void *arg);
+static void _job_init_task_info(slurmd_job_t *job, 
+		                launch_tasks_request_msg_t *msg);
+
+
+/* create a slurmd job structure from a launch tasks message */
+slurmd_job_t * 
+job_create(launch_tasks_request_msg_t *msg)
+{
+	slurmd_job_t  *job;
+	srun_info_t   *srun;
+
+	xassert(msg != NULL);
+
+	job = xmalloc(sizeof(*job));
+
+	job->jobid   = msg->job_id;
+	job->stepid  = msg->job_step_id;
+	job->uid     = msg->uid;
+	job->nprocs  = msg->nprocs;
+	job->nnodes  = msg->nnodes;
+	job->nodeid  = msg->srun_node_id;
+	job->ntasks  = msg->tasks_to_launch;
+
+	job->timelimit   = msg->credential->expiration_time;
+
+	job->envc    = msg->envc;
+	job->env     = _array_copy(job->envc, msg->env);
+	job->argc    = msg->argc;
+	job->argv    = _array_copy(job->argc, msg->argv);
+
+	job->cwd     = xstrdup(msg->cwd);
+
+#ifdef HAVE_ELAN
+	job->qsw_job = msg->qsw_job;
+#endif
+
+	job->objs    = list_create((ListDelF) io_obj_destroy);
+
+	srun = srun_info_create( (void *)msg->credential->signature,
+			         msg->response_addr,
+			         msg->streams 
+			       );
+
+	job->sruns  = list_create((ListDelF) _srun_info_destructor);
+
+	list_append(job->sruns, (void *) srun);
+
+	_job_init_task_info(job, msg);
+
+	return job;
+}
+
+static void
+_job_init_task_info(slurmd_job_t *job, launch_tasks_request_msg_t *msg)
+{
+	int          i;
+	int          n    = job->ntasks;
+	srun_info_t *srun = (srun_info_t *) list_peek(job->sruns);
+
+	job->task = (task_info_t **) xmalloc(n * sizeof(task_info_t *));
+
+	for (i = 0; i < n; i++){
+		uint32_t gid = msg->global_task_ids[i];
+		job->task[i] = task_info_create(i, gid);
+		list_append(job->task[i]->srun_list, (void *)srun);
+	}
+}
+
+
+/* remove job from shared memory, kill initiated tasks, etc */
+void 
+job_kill(slurmd_job_t *job)
+{
+	job_state_t *state;
+
+	xassert(job != NULL);
+
+	if (!(state = shm_lock_step_state(job->jobid, job->stepid))) 
+		return;
+
+	if (*state > SLURMD_JOB_STARTING) {
+		/* singnal all tasks on step->task_list 
+		 * This will result in task exit msgs being sent to srun
+		 * XXX IMPLEMENT
+		 */
+		/* job_signal_tasks(job, SIGKILL); */
+	}
+	*state = SLURMD_JOB_ENDING;
+	shm_unlock_step_state(job->jobid, job->stepid);
+	
+	/* forward remaining task_exit messages? */
+	/* send_exit_codes() 			 */
+}
+
+void 
+job_destroy(slurmd_job_t *job)
+{
+	int i;
+
+	_array_free(job->envc, &job->env);
+	_array_free(job->argc, &job->argv);
+
+	for (i = 0; i < job->ntasks; i++)
+		task_info_destroy(job->task[i]);
+	list_destroy(job->sruns);
+	list_destroy(job->objs);
+
+	xfree(job);
+}
+
+static char **
+_array_copy(int n, char **src)
+{
+	char **dst = xmalloc((n+1) * sizeof(char *));
+	while (--n >= 0)
+		dst[n] = xstrdup(src[n]);
+	dst[n] = NULL;
+	return dst;
+}
+
+static void
+_array_free(int n, char ***array)
+{
+	while (--n >= 0)
+		xfree(*array[n]);
+	xfree(*array);
+	*array = NULL;
+}
+
+
+struct srun_info *
+srun_info_create(void *keydata, slurm_addr resp_addr, slurm_addr ioaddr)
+{
+	struct srun_info *srun = xmalloc(sizeof(*srun));
+	srun_key_t       *key  = xmalloc(sizeof(*key ));
+
+	memcpy((void *) key->data, keydata, sizeof(*key->data));
+
+	srun->key       = key;
+	srun->ioaddr    = ioaddr;
+	srun->resp_addr = resp_addr;
+	return srun;
+}
+
+/* destructor for list routines */
+static void
+_srun_info_destructor(void *arg)
+{
+	struct srun_info *srun = (struct srun_info *)arg;
+	srun_info_destroy(srun);
+}
+
+void
+srun_info_destroy(struct srun_info *srun)
+{
+	xfree(srun->key);
+	xfree(srun);
+}
+
+task_info_t *
+task_info_create(int taskid, int gtaskid)
+{
+	task_info_t *t = (task_info_t *) xmalloc(sizeof(*t));
+
+	xassert(taskid >= 0);
+	xassert(gtaskid >= 0);
+
+	slurm_mutex_init(&t->mutex);
+	slurm_mutex_lock(&t->mutex);
+	t->state     = SLURMD_TASK_INIT;
+	t->id        = taskid;
+	t->gid	     = gtaskid;
+	t->pid       = (pid_t) -1;
+	t->pin[0]    = -1;
+	t->pin[1]    = -1;
+	t->pout[0]   = -1;
+	t->pout[1]   = -1;
+	t->perr[0]   = -1;
+	t->perr[1]   = -1;
+	t->estatus   = -1;
+	t->in        = NULL;
+	t->out       = NULL;
+	t->err       = NULL;
+	t->srun_list = list_create(NULL); 
+	slurm_mutex_unlock(&t->mutex);
+	return t;
+}
+
+
+void 
+task_info_destroy(task_info_t *t)
+{
+	slurm_mutex_lock(&t->mutex);
+	list_destroy(t->srun_list);
+	slurm_mutex_unlock(&t->mutex);
+	slurm_mutex_destroy(&t->mutex);
+	xfree(t);
+}
+
+void
+job_update_shm(slurmd_job_t *job)
+{
+	job_step_t s;
+
+	s.uid	    = job->uid;
+	s.jobid     = job->jobid;
+	s.stepid    = job->stepid;
+	s.ntasks    = job->ntasks;
+	s.timelimit = job->timelimit;
+
+	s.sw_id     = 0;
+
+	if (shm_insert_step(&s) == SLURM_FAILURE)
+		error("Updating shmem with new step info: %m");
+}
+
+void 
+job_delete_shm(slurmd_job_t *job)
+{
+	if (shm_delete_step(job->jobid, job->stepid) == SLURM_FAILURE)
+		error("deleting step:  %ld.%ld not found in shmem", 
+				job->jobid, job->stepid); 
+}
diff --git a/src/slurmd/job.h b/src/slurmd/job.h
new file mode 100644
index 00000000000..8778f854dae
--- /dev/null
+++ b/src/slurmd/job.h
@@ -0,0 +1,123 @@
+/*****************************************************************************\
+ * src/slurmd/job.h  slurmd_job_t definition
+ * $Id$
+ *****************************************************************************
+ *  Copyright (C) 2002 The Regents of the University of California.
+ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+ *  Written by Mark Grondona <mgrondona@llnl.gov>.
+ *  UCRL-CODE-2002-040.
+ *  
+ *  This file is part of SLURM, a resource management program.
+ *  For details, see <http://www.llnl.gov/linux/slurm/>.
+ *  
+ *  SLURM is free software; you can redistribute it and/or modify it under
+ *  the terms of the GNU General Public License as published by the Free
+ *  Software Foundation; either version 2 of the License, or (at your option)
+ *  any later version.
+ *  
+ *  SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
+ *  WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ *  FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
+ *  details.
+ *  
+ *  You should have received a copy of the GNU General Public License along
+ *  with SLURM; if not, write to the Free Software Foundation, Inc.,
+ *  59 Temple Place, Suite 330, Boston, MA  02111-1307  USA.
+\*****************************************************************************/
+
+#ifndef _JOB_H
+#define _JOB_H
+
+#ifdef HAVE_PTHREAD
+#include <pthread.h>
+#endif
+
+#include <src/common/macros.h>
+#include <src/common/slurm_protocol_api.h>
+#include <src/common/list.h>
+#include <src/common/eio.h>
+
+
+#define SLURM_KEY_SIZE	SLURM_SSL_SIGNATURE_LENGTH
+typedef struct srun_key {
+	unsigned char data[SLURM_KEY_SIZE];
+} srun_key_t;
+
+typedef enum task_state {
+	SLURMD_TASK_INIT,
+	SLURMD_TASK_STARTING,
+	SLURMD_TASK_RUNNING,
+	SLURMD_TASK_COMPLETE
+} task_state_t;
+
+typedef struct task_info {
+	pthread_mutex_t mutex;	   /* mutex to protect task state         */
+	task_state_t    state;	   /* task state                          */
+
+	int             id;	   /* local task id                       */
+	uint32_t        gid;	   /* global task id                      */
+	pid_t           pid;	   /* task pid                            */
+	int             pin[2];    /* stdin pipe                          */
+	int             pout[2];   /* stdout pipe                         */
+	int             perr[2];   /* stderr pipe                         */
+	io_obj_t       *in, 
+	               *out,       /* I/O objects used in IO event loop   */
+		       *err;       
+	int             estatus;   /* this task's exit status             */
+	char *		ofile;	   /* output file (if any)                */
+	char *		errfile;   /* error file (if any)		  */
+	List            srun_list; /* List of srun objs for this task     */
+} task_info_t;
+
+
+typedef struct srun_info {
+	srun_key_t *key;	/* srun key for IO verification       */
+	slurm_addr resp_addr;	/* response addr for task exit msg    */
+	slurm_addr ioaddr;      /* Address to connect on for I/O      */
+} srun_info_t;
+
+typedef struct slurmd_job {
+	uint32_t      jobid;
+	uint32_t      stepid;
+	uint32_t      nnodes;
+	uint32_t      nprocs;
+	uint32_t      nodeid;
+	uint32_t      ntasks;
+	uint16_t      envc;
+	uint16_t      argc;
+	char        **env;
+	char        **argv;
+	char         *cwd;
+#ifdef HAVE_ELAN
+	qsw_jobinfo_t qsw_job;
+#endif
+	uid_t         uid;
+	time_t        timelimit;
+	task_info_t **task;
+	List          objs; 
+	List 	      sruns;
+	int           unixsock;
+	pthread_t     ioid;
+} slurmd_job_t;
+
+
+slurmd_job_t * job_create(launch_tasks_request_msg_t *msg);
+
+void job_kill(slurmd_job_t *job);
+
+void job_destroy(slurmd_job_t *job);
+
+struct srun_info * srun_info_create(void *keydata, slurm_addr resp_addr, 
+		                    slurm_addr ioaddr);
+
+void  srun_info_destroy(struct srun_info *srun);
+
+struct task_info * task_info_create(int taskid, int gtaskid);
+
+void task_info_destroy(struct task_info *t);
+
+void job_update_shm(slurmd_job_t *job);
+
+void job_delete_shm(slurmd_job_t *job);
+
+#endif /* !_JOB_H */
diff --git a/src/slurmd/locks.c b/src/slurmd/locks.c
deleted file mode 100644
index a0c1991301f..00000000000
--- a/src/slurmd/locks.c
+++ /dev/null
@@ -1,164 +0,0 @@
-/*****************************************************************************\
- *  locks.c - semaphore functions for slurmd
- *****************************************************************************
- *  Copyright (C) 2002 The Regents of the University of California.
- *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
- *  Written by Moe Jette <jette@llnl.gov>, Randy Sanchez <rsancez@llnl.gov>
- *  UCRL-CODE-2002-040.
- *  
- *  This file is part of SLURM, a resource management program.
- *  For details, see <http://www.llnl.gov/linux/slurm/>.
- *  
- *  SLURM is free software; you can redistribute it and/or modify it under
- *  the terms of the GNU General Public License as published by the Free
- *  Software Foundation; either version 2 of the License, or (at your option)
- *  any later version.
- *  
- *  SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
- *  WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
- *  FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
- *  details.
- *  
- *  You should have received a copy of the GNU General Public License along
- *  with SLURM; if not, write to the Free Software Foundation, Inc.,
- *  59 Temple Place, Suite 330, Boston, MA  02111-1307  USA.
-\*****************************************************************************/
-
-#ifdef HAVE_CONFIG_H
-#  include <config.h>
-#endif
-
-#include <errno.h>
-#include <pthread.h>
-#include <string.h>
-#include <sys/types.h>
-
-#include <src/slurmd/locks.h>
-#include <src/common/log.h>
-
-pthread_mutex_t locks_mutex = PTHREAD_MUTEX_INITIALIZER;
-pthread_cond_t locks_cond = PTHREAD_COND_INITIALIZER;
-slurmd_lock_flags_t slurmd_locks;
-
-void wr_rdlock (lock_datatype_t datatype);
-void wr_rdunlock (lock_datatype_t datatype);
-void wr_wrlock (lock_datatype_t datatype);
-void wr_wrunlock (lock_datatype_t datatype);
-
-/* init_locks - create locks used for slurmd data structure access control */
-void
-init_locks ( void )
-{
-	/* just clear all semaphores */
-	memset ((void *)&slurmd_locks, 0, sizeof (slurmd_locks) );
-}
-
-/* lock_slurmd - Issue the required lock requests in a well defined order
- * Returns 0 on success, -1 on failure */
-void 
-lock_slurmd (slurmd_lock_t lock_levels)
-{
-	if (lock_levels.jobs == READ_LOCK)
-		wr_rdlock (JOB_LIST_LOCK);
-	else if (lock_levels.jobs == WRITE_LOCK)
-		wr_wrlock (JOB_LIST_LOCK);
-
-	if (lock_levels.tasks == READ_LOCK)
-		wr_rdlock (TASK_LIST_LOCK);
-	else if (lock_levels.tasks == WRITE_LOCK)
-		wr_wrlock (TASK_LIST_LOCK);
-
-	if (lock_levels.credentials == READ_LOCK)
-		wr_rdlock (CREDENTIAL_STATE_LOCK);
-	else if (lock_levels.credentials == WRITE_LOCK)
-		wr_wrlock (CREDENTIAL_STATE_LOCK);
-}
-
-/* unlock_slurmd - Issue the required unlock requests in a well defined order */
-void 
-unlock_slurmd (slurmd_lock_t lock_levels)
-{
-	if (lock_levels.credentials == READ_LOCK)
-		wr_rdunlock (CREDENTIAL_STATE_LOCK);
-	else if (lock_levels.credentials == WRITE_LOCK)
-		wr_wrunlock (CREDENTIAL_STATE_LOCK);
-
-	if (lock_levels.tasks == READ_LOCK)
-		wr_rdunlock (TASK_LIST_LOCK);
-	else if (lock_levels.tasks == WRITE_LOCK)
-		wr_wrunlock (TASK_LIST_LOCK);
-
-	if (lock_levels.jobs == READ_LOCK)
-		wr_rdunlock (JOB_LIST_LOCK);
-	else if (lock_levels.jobs == WRITE_LOCK)
-		wr_wrunlock (JOB_LIST_LOCK);
-}
-
-/* wr_rdlock - Issue a read lock on the specified data type */
-void 
-wr_rdlock (lock_datatype_t datatype)
-{
-	pthread_mutex_lock (&locks_mutex);
-	while (1) {
-		if ((slurmd_locks.entity [write_wait_lock (datatype)] == 0) &&
-		    (slurmd_locks.entity [write_lock (datatype)] == 0)) {
-			slurmd_locks.entity [read_lock (datatype)]++;
-			break;
-		} 
-		else {	/* wait for state change and retry */
-			pthread_cond_wait (&locks_cond, &locks_mutex);
-		}
-	}
-	pthread_mutex_unlock (&locks_mutex);
-}
-
-/* wr_rdunlock - Issue a read unlock on the specified data type */
-void
-wr_rdunlock (lock_datatype_t datatype)
-{
-	pthread_mutex_lock (&locks_mutex);
-	slurmd_locks.entity [read_lock (datatype)]--;
-	pthread_mutex_unlock (&locks_mutex);
-	pthread_cond_broadcast (&locks_cond);
-}
-
-/* wr_wrlock - Issue a write lock on the specified data type */
-void
-wr_wrlock (lock_datatype_t datatype)
-{
-	pthread_mutex_lock (&locks_mutex);
-	slurmd_locks.entity [write_wait_lock (datatype)]++;
-
-	while (1) {
-		if ((slurmd_locks.entity [read_lock (datatype)] == 0) &&
-		    (slurmd_locks.entity [write_lock (datatype)] == 0)) {
-			slurmd_locks.entity [write_lock (datatype)]++;
-			slurmd_locks.entity [write_wait_lock (datatype)]--;
-			break;
-		} 
-		else {	/* wait for state change and retry */
-			pthread_cond_wait (&locks_cond, &locks_mutex);
-		}
-	}
-	pthread_mutex_unlock (&locks_mutex);
-}
-
-/* wr_wrunlock - Issue a write unlock on the specified data type */
-void
-wr_wrunlock (lock_datatype_t datatype)
-{
-	pthread_mutex_lock (&locks_mutex);
-	slurmd_locks.entity [write_lock (datatype)]--;
-	pthread_mutex_unlock (&locks_mutex);
-	pthread_cond_broadcast (&locks_cond);
-}
-
-/* get_lock_values - Get the current value of all locks */
-void
-get_lock_values (slurmd_lock_flags_t *lock_flags)
-{
-	if (lock_flags == NULL)
-		fatal ("get_lock_values passed null pointer");
-
-	memcpy ((void *)lock_flags, (void *) &slurmd_locks, sizeof (slurmd_locks) );
-}
diff --git a/src/slurmd/locks.h b/src/slurmd/locks.h
deleted file mode 100644
index 608b818d3fc..00000000000
--- a/src/slurmd/locks.h
+++ /dev/null
@@ -1,115 +0,0 @@
-/*****************************************************************************\
- *  locks.h - definitions for semaphore functions for slurmd (locks.c)
- *****************************************************************************
- *  Copyright (C) 2002 The Regents of the University of California.
- *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
- *  Written by Moe Jette <jette@llnl.gov>, Randy Sanchez <rsancez@llnl.gov>
- *  UCRL-CODE-2002-040.
- *  
- *  This file is part of SLURM, a resource management program.
- *  For details, see <http://www.llnl.gov/linux/slurm/>.
- *  
- *  SLURM is free software; you can redistribute it and/or modify it under
- *  the terms of the GNU General Public License as published by the Free
- *  Software Foundation; either version 2 of the License, or (at your option)
- *  any later version.
- *  
- *  SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
- *  WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
- *  FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
- *  details.
- *  
- *  You should have received a copy of the GNU General Public License along
- *  with SLURM; if not, write to the Free Software Foundation, Inc.,
- *  59 Temple Place, Suite 330, Boston, MA  02111-1307  USA.
-\*****************************************************************************/
-
-/*****************************************************************************\
- * Read/write locks are implemented by the routines in this directory by using
- * a set of three (3) UNIX semaphores to lock a resource.
- * 
- * The set of three (3) semaphores represent a reader semaphore,
- * a writer semaphore and a writers waiting semaphore.
- * 
- * The reader semaphore indicates the number of readers that currently have a
- * read lock on the resource.
- * The writers semaphore indicates that a writer has the resource locked.
- * The writers waiting semaphore indicates the number of writers waiting to
- * lock the resource.
- * 
- * Readers cannot lock the resource until there are no writers waiting for the
- * resource and the resource is not locked by a writer.
- * 
- * Writers cannot lock the resource if the resource is locked by other writers
- * or if any readers have the resource locked.
- * 
- * Writers will have priority in locking the resource over readers because
- * of the writers waiting semaphore.  The writers waiting semaphore is incremented
- * by a writer that is waiting to lock the resource.  A reader cannot lock
- * the resource until there are no writers waiting to lock the resource and
- * the resource is not locked by a writer.
- * 
- * So, if the resource is locked by an unspecified number of readers,
- * and a writer trys to lock the resource, then the writer will be blocked
- * until all of the previous readers have unlocked the resource.  But,
- * just before the writer checked to see if there were any readers locking
- * the resource, the writer incremented the writers waiting semaphore, 
- * indicating that there is now a writer waiting to lock the resource.
- * In the mean time, if an unspecified number of readers try to lock the 
- * resource after a writer (or writers) has tried to lock the resource,
- * those readers will be blocked until all writers have obtained the lock on
- * the resource, used the resource and unlocked the resource.  The subsequent
- * unspecified number of readers are blocked because they are waiting for the
- * number of writers waiting semaphore to become 0, meaning that there are no
- * writers waiting to lock the resource.
- *
- * use init_locks() to initialize the locks then
- * lock_slurd() and unlock_slurmd() to get the ordering so as to 
- * prevent deadlock. The arguments indicate the lock type required for 
- * each entity (job, node, etc.) in a well defined order.
- * For example: no lock on the config data structure, read lock on the job 
- * and node data structures, and write lock on the partition data structure 
- * would look like this: "{ NO_LOCK, READ_LOCK, READ_LOCK, WRITE_LOCK }"
-\*****************************************************************************/
-
-/* levels of locking required for each data structure */
-typedef enum {
-	NO_LOCK,
-	READ_LOCK,
-	WRITE_LOCK
-}	lock_level_t;
-
-/* slurmd specific data structures to lock via APIs */
-typedef struct {
-	lock_level_t	jobs;
-	lock_level_t	tasks;
-	lock_level_t	credentials;
-}	slurmd_lock_t;
-
-/* Interval lock structure
- * we actually use three semaphores for each data type, see macros below
- *	(lock_datatype_t * 3 + 0) = read_lock
- *	(lock_datatype_t * 3 + 1) = write_lock
- *	(lock_datatype_t * 3 + 2) = write_wait_lock
- */
-typedef enum {
-	JOB_LIST_LOCK,
-	TASK_LIST_LOCK,
-	CREDENTIAL_STATE_LOCK,
-	ENTITY_COUNT
-}	lock_datatype_t;
-
-#define read_lock(data_type)		(data_type * 3 + 0) 
-#define write_lock(data_type)		(data_type * 3 + 1) 
-#define write_wait_lock(data_type)	(data_type * 3 + 2)
-
-typedef struct {
-	int entity[ENTITY_COUNT * 3];
-}	slurmd_lock_flags_t;
-
-
-extern void get_lock_values (slurmd_lock_flags_t *lock_flags);
-extern void init_locks ( void );
-extern void lock_slurmd (slurmd_lock_t lock_levels);
-extern void unlock_slurmd (slurmd_lock_t lock_levels);
-
diff --git a/src/slurmd/mgr.c b/src/slurmd/mgr.c
new file mode 100644
index 00000000000..a2885ab179a
--- /dev/null
+++ b/src/slurmd/mgr.c
@@ -0,0 +1,312 @@
+/*****************************************************************************\
+ * src/slurmd/mgr.c - job manager functions for slurmd
+ * $Id$
+ *****************************************************************************
+ *  Copyright (C) 2002 The Regents of the University of California.
+ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+ *  Written by Mark Grondona <mgrondona@llnl.gov>.
+ *  UCRL-CODE-2002-040.
+ *  
+ *  This file is part of SLURM, a resource management program.
+ *  For details, see <http://www.llnl.gov/linux/slurm/>.
+ *  
+ *  SLURM is free software; you can redistribute it and/or modify it under
+ *  the terms of the GNU General Public License as published by the Free
+ *  Software Foundation; either version 2 of the License, or (at your option)
+ *  any later version.
+ *  
+ *  SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
+ *  WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ *  FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
+ *  details.
+ *  
+ *  You should have received a copy of the GNU General Public License along
+ *  with SLURM; if not, write to the Free Software Foundation, Inc.,
+ *  59 Temple Place, Suite 330, Boston, MA  02111-1307  USA.
+\*****************************************************************************/
+
+#if HAVE_CONFIG_H
+#  include <config.h>
+#endif
+
+#if HAVE_SYS_TYPES_H
+#  include <sys/types.h>
+#endif
+
+#include <sys/wait.h>
+#include <unistd.h>
+#include <pwd.h>
+#include <grp.h>
+#include <signal.h>
+
+#if HAVE_STDLIB_H
+#  include <stdlib.h>
+#endif
+
+#include <src/common/log.h>
+
+#include <src/slurmd/mgr.h>
+#include <src/slurmd/io.h>
+#include <src/slurmd/shm.h>
+#include <src/slurmd/interconnect.h>
+
+static int _unblock_all_signals(void);
+static int _send_exit_msg(int rc, task_info_t *t);
+
+/* Launch a job step on this node
+ */
+int
+mgr_launch_tasks(launch_tasks_request_msg_t *msg)
+{
+	slurmd_job_t *job;
+
+	log_reinit();
+	if (shm_init() < 0) 
+		return SLURM_ERROR;
+	if (!(job = job_create(msg)))
+		return SLURM_ERROR;
+	slurmd_run_job(job); 
+	debug2("%ld returned from slurmd_run_job()", getpid());
+	shm_fini();
+	exit(0); 
+	return 0; /* not reached */
+}
+
+/* Instance of a slurmd "job" or job step:
+ * We run:
+ *  interconnect_prepare()       : prepare node for interconnect (if any)
+ *  interconnect_init()          : initialize interconnect on node
+ *  fork() N tasks --> wait() --> interconnect_fini()
+ *   \
+ *    `--> interconnect_attach() : attach each proc to interconnect
+ *         interconnect_env()    : setup child environment 
+ *         exec()
+ */
+void
+slurmd_run_job(slurmd_job_t *job)
+{
+	int rc;
+	/* Insert job info into shared memory */
+	job_update_shm(job);
+
+	if (interconnect_init(job) == SLURM_ERROR) {
+		error("interconnect_init failed");
+		rc = 2;
+		goto done;
+	}
+
+	/* initialize I/O, connect back to srun, and spawn thread for
+	 * forwarding I/O.
+	 */
+	/* Option: connect slurmd stderr to srun local task 0: stderr? */
+	if (io_spawn_handler(job) == SLURM_ERROR) {
+		error("unable to spawn io handler");
+		rc = 3;
+		goto done;
+	}
+
+	job_launch_tasks(job);
+	verbose("job %d.%d complete, waiting on IO", job->jobid, job->stepid);
+	io_close_all(job);
+	pthread_join(job->ioid, NULL);
+	verbose("job %d.%d IO complete", job->jobid, job->stepid);
+
+  done:
+	interconnect_fini(job); /* ignore errors        */
+	verbose("removing job %d.%d from system", job->jobid, job->stepid);
+	job_delete_shm(job);    /* again, ignore errors */
+	return;
+}
+
+static void
+xsignal(int signo, void (*handler)(int))
+{
+	struct sigaction sa, old_sa;
+
+	sa.sa_handler = handler;
+	sigemptyset(&sa.sa_mask);
+	sigaddset(&sa.sa_mask, signo);
+	sa.sa_flags = 0;
+	sigaction(signo, &sa, &old_sa);
+}
+
+static void
+_wait_for_all_tasks(slurmd_job_t *job)
+{
+	int waiting = job->ntasks;
+	int i;
+
+	while (waiting > 0) {
+		int status;
+		pid_t pid = waitpid(0, &status, 0);
+		if (pid < (pid_t) 0) {
+			error("waitpid: %m");
+			/* job_cleanup() */
+		}
+		for (i = 0; i < job->ntasks; i++) {
+			if (job->task[i]->pid == pid) {
+				_send_exit_msg(status, job->task[i]);
+				waiting--;
+			}
+		}
+	}
+	return;
+}
+
+static void
+_task_exec(slurmd_job_t *job, int i)
+{
+	struct passwd *pwd;
+	log_options_t opts = LOG_OPTS_STDERR_ONLY;
+
+	io_prepare_child(job->task[i]);
+
+	/* 
+	 * Reinitialize slurm log facility to send errors back to client 
+	 */
+	log_init("slurmd", opts, 0, NULL);
+
+	if (_unblock_all_signals() == SLURM_ERROR) {
+		error("unable to unblock signals");
+		exit(1);
+	}
+
+	/* attach to interconnect */
+	if (interconnect_attach(job, i) < 0) {
+		error("interconnect attach failed: %m");
+		exit(1);
+	}
+
+	if (interconnect_env(job, i) < 0) {
+		error("interconnect_env: %m");
+	}
+
+	if ((pwd = getpwuid(job->uid)) == NULL) {
+		error("User not found on node");
+		exit(1);
+	}
+
+	if (setgid(pwd->pw_gid) < 0) {
+		error("setgid: %m");
+		exit(1);
+	}
+
+	if (initgroups(pwd->pw_name, pwd->pw_gid) < 0) {
+		;
+		/* error("initgroups: %m"); */
+	}
+
+	if (setuid(job->uid) < 0) {
+		error("setuid: %m");
+		exit(1);
+	}
+
+	if (chdir(job->cwd) < 0) {
+		error("couldn't chdir to `%s': %m: going to /tmp instead",
+				job->cwd); 
+		if (chdir("/tmp") < 0) {
+			error("couldn't chdir to /tmp either. dying.");
+			exit(1);
+		}
+	}
+
+	/* exec the cmdline */
+	execve(job->argv[0], job->argv, job->env);
+
+	/* error and clean up if execve() returns:
+	 */
+	error("execve(): %s: %m", job->argv[0]); 
+	exit(errno);
+}
+
+void 
+job_launch_tasks(slurmd_job_t *job)
+{
+	pid_t sid;
+	int i;
+
+	debug3("%ld entered job_launch_tasks", getpid());
+
+	xsignal(SIGPIPE, SIG_IGN);
+
+	if ((sid = setsid()) < (pid_t) 0) 
+		error("setsid: %m");
+
+	if (shm_update_step_sid(job->jobid, job->stepid, sid) < 0)
+		error("shm_update_step_sid: %m");
+	
+	debug2("invoking %d tasks for job %d.%d", job->ntasks, job->jobid,
+			job->stepid);
+
+	for (i = 0; i < job->ntasks; i++) {
+		task_t t;
+		verbose("going to fork task %d", i);
+		t.id = i;
+		t.global_id = job->task[i]->gid;
+		t.ppid      = getpid();
+
+		if ((t.pid = fork()) < 0) {
+			error("fork: %m");
+			exit(1);
+			/* job_cleanup() */
+		} else if (t.pid == 0)
+			break;
+
+		/* Parent continues loop: */
+
+		job->task[i]->pid = t.pid;
+
+		debug2("%ld: forked child process %ld for task %d", 
+				getpid(), (long) t.pid, i);  
+		debug2("going to add task %d to shm", i);
+		if (shm_add_task(job->jobid, job->stepid, &t) < 0)
+			error("shm_add_task: %m");
+		debug2("task %d added to shm", i);
+
+	}
+
+	if (i == job->ntasks) 
+		_wait_for_all_tasks(job);
+	else
+		_task_exec(job, i);
+
+	return;
+}
+
+static int 
+_send_exit_msg(int rc, task_info_t *t)
+{
+	slurm_msg_t resp;
+	task_exit_msg_t msg;
+	ListIterator i;
+	srun_info_t *srun;
+
+       	msg.return_code = rc;
+	msg.task_id     = t->gid;
+	resp.data       = &msg;
+	resp.msg_type   = MESSAGE_TASK_EXIT;
+
+	i = list_iterator_create(t->srun_list);
+	while ((srun = list_next(i))) {
+		resp.address = srun->resp_addr;
+		slurm_send_only_node_msg(&resp);
+	}
+	list_iterator_destroy(i);
+
+	return SLURM_SUCCESS;
+}
+
+static int
+_unblock_all_signals(void)
+{
+	sigset_t set;
+	if (sigfillset(&set)) {
+		error("sigfillset: %m");
+		return SLURM_ERROR;
+	}
+	if (sigprocmask(SIG_UNBLOCK, &set, NULL)) {
+		error("sigprocmask: %m");
+		return SLURM_ERROR;
+	}
+	return SLURM_SUCCESS;
+}
diff --git a/src/slurmd/reconnect_utils.h b/src/slurmd/mgr.h
similarity index 56%
rename from src/slurmd/reconnect_utils.h
rename to src/slurmd/mgr.h
index 794fbd039f7..9c426ccea3f 100644
--- a/src/slurmd/reconnect_utils.h
+++ b/src/slurmd/mgr.h
@@ -1,9 +1,9 @@
 /*****************************************************************************\
- *  reconnect_utils.h - 
+ * src/slurmd/mgr.c - job management functions for slurmd
  *****************************************************************************
  *  Copyright (C) 2002 The Regents of the University of California.
  *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
- *  Written by Kevin Tew <tew1@llnl.gov> et. al.
+ *  Written by Mark Grondona <mgrondona@llnl.gov>.
  *  UCRL-CODE-2002-040.
  *  
  *  This file is part of SLURM, a resource management program.
@@ -20,22 +20,35 @@
  *  details.
  *  
  *  You should have received a copy of the GNU General Public License along
- *  with ConMan; if not, write to the Free Software Foundation, Inc.,
+ *  with SLURM; if not, write to the Free Software Foundation, Inc.,
  *  59 Temple Place, Suite 330, Boston, MA  02111-1307  USA.
 \*****************************************************************************/
+#ifndef _MGR_H
+#define _MGR_H
 
-#ifndef _SLURMD_RECONNECT_UTILS_H_
-#define _SLURMD_RECONNECT_UTILS_H_
+#if HAVE_CONFIG_H
+#  include <config.h>
+#endif
+
+#include <src/common/slurm_protocol_defs.h>
 
-/* connect_io_stream
- * called by the io_threads to establish a connection to srun
+#include <src/slurmd/job.h>
+
+/* Launch a job step on this node
  */
-int connect_io_stream(task_start_t * task_start, int out_or_err);
+int mgr_launch_tasks(launch_tasks_request_msg_t *msg);
 
-/* connect_io_stream
- * called by connect_io_stream to send stream identification info
+/* Instance of a slurmd "job" or job step:
+ * We run:
+ *  interconnect_prepare()       : prepare node for interconnect (if any)
+ *  interconnect_init()          : initialize interconnect on node
+ *  fork() N tasks --> wait() --> interconnect_fini()
+ *   \
+ *    `--> interconnect_attach() : attach each proc to interconnect
+ *         interconnect_env()    : setup child environment 
+ *         exec()
  */
-int send_io_stream_header(task_start_t * task_start, int out_or_err);
-ssize_t read_EINTR(int fd, void *buf, size_t count);
-ssize_t write_EINTR(int fd, void *buf, size_t count);
+void slurmd_run_job(slurmd_job_t *job);
+void job_launch_tasks(slurmd_job_t *job);
+
 #endif
diff --git a/src/slurmd/nbio.c b/src/slurmd/nbio.c
deleted file mode 100644
index 1de4de1a3a3..00000000000
--- a/src/slurmd/nbio.c
+++ /dev/null
@@ -1,741 +0,0 @@
-/*****************************************************************************\
- *  nbio.c - Non-blocking I/O
- *****************************************************************************
- *  Copyright (C) 2002 The Regents of the University of California.
- *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
- *  Written by Kevin Tew <tew1@llnl.gov> et. al.
- *  UCRL-CODE-2002-040.
- *  
- *  This file is part of SLURM, a resource management program.
- *  For details, see <http://www.llnl.gov/linux/slurm/>.
- *  
- *  SLURM is free software; you can redistribute it and/or modify it under
- *  the terms of the GNU General Public License as published by the Free
- *  Software Foundation; either version 2 of the License, or (at your option)
- *  any later version.
- *  
- *  SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
- *  WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
- *  FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
- *  details.
- *  
- *  You should have received a copy of the GNU General Public License along
- *  with ConMan; if not, write to the Free Software Foundation, Inc.,
- *  59 Temple Place, Suite 330, Boston, MA  02111-1307  USA.
-\*****************************************************************************/
-
-#include <stdlib.h>
-#include <sys/types.h>
-#include <pwd.h>
-#include <grp.h>
-#include <sys/wait.h>
-#include <errno.h>
-#include <unistd.h>
-#include <string.h>
-#include <pthread.h>
-
-#include <src/common/log.h>
-#include <src/common/list.h>
-#include <src/common/xmalloc.h>
-#include <src/common/slurm_protocol_api.h>
-#include <src/common/slurm_errno.h>
-#include <src/common/util_signals.h>
-
-#include <src/slurmd/task_mgr.h>
-#include <src/slurmd/shmem_struct.h>
-#include <src/slurmd/circular_buffer.h>
-#include <src/slurmd/reconnect_utils.h>
-#include <src/slurmd/io.h>
-#include <src/slurmd/pipes.h>
-#include <src/slurmd/nbio.h>
-
-#define RECONNECT_TIMEOUT_SECONDS 1
-#define RECONNECT_TIMEOUT_MICROSECONDS 0
-typedef enum {
-	IN_OUT_FD,
-	SIG_ERR_FD,
-	CHILD_IN_WR_FD,
-	CHILD_OUT_RD_FD,
-	CHILD_ERR_RD_FD
-} nbio_fd_t;
-
-typedef enum {
-	RD_SET,
-	WR_SET,
-	ER_SET
-} nbio_set_t;
-
-typedef enum {
-	CONNECTED,
-	RECONNECT,
-	DRAIN,
-	DRAINED
-} reconnect_state_t;
-
-typedef struct nbio_attr {
-	task_start_t *task_start;
-	slurm_fd_set init_set[3];
-	slurm_fd_set next_set[3];
-	slurm_fd fd[5];
-	circular_buffer_t *in_cir_buf;
-	circular_buffer_t *out_cir_buf;
-	circular_buffer_t *err_cir_buf;
-	int flush_flag;
-	int die;
-	int reconnect_flags[2];
-	time_t reconnect_timers[2];
-	int max_fd;
-	struct timeval select_timer;
-} nbio_attr_t;
-
-typedef struct io_debug {
-	char *name;
-	int local_task_id;
-	int global_task_id;
-} io_debug_t;
-
-/* TODO
- * timers on reconnect
- * line oriented code
- */
-int forward_io(task_start_t *tsk);
-int nbio_set_init(nbio_attr_t * nbio_attr, slurm_fd_set * set_ptr);
-int memcpy_sets(slurm_fd_set * init_set, slurm_fd_set * next_set);
-int write_task_socket(circular_buffer_t * cir_buf, slurm_fd write_fd,
-		      io_debug_t * dbg);
-int read_task_pipe(circular_buffer_t * cir_buf, slurm_fd write_fd,
-		   io_debug_t * dbg);
-int write_task_pipe(circular_buffer_t * cir_buf, slurm_fd write_fd,
-		    io_debug_t * dbg);
-int read_task_socket(circular_buffer_t * cir_buf, slurm_fd read_fd,
-		     io_debug_t * dbg);
-int error_task_pipe(nbio_attr_t * nbio_attr, int fd_index);
-int error_task_socket(nbio_attr_t * nbio_attr, int fd_index);
-int set_max_fd(nbio_attr_t * nbio_attr);
-int nbio_cleanup(nbio_attr_t * nbio_attr);
-int reconnect(nbio_attr_t * nbio_attr);
-int test_error_conditions(nbio_attr_t * nbio_attr);
-int print_nbio_sets(nbio_attr_t * nbio_attr, slurm_fd_set * set_ptr);
-
-int forward_io(task_start_t *tsk)
-{
-	return do_nbio((void *)tsk);
-}
-
-int wait_on_io_threads(task_start_t *tsk)
-{
-	return SLURM_SUCCESS;
-}
-
-static void delay(struct timeval *tv)
-{
-	select(0, NULL, NULL, NULL, tv);
-}
-
-
-int init_io_debug(io_debug_t * io_dbg, task_start_t * task_start,
-		  char *name)
-{
-	io_dbg->name = name;
-	io_dbg->local_task_id = task_start->local_task_id;
-	io_dbg->global_task_id =
-	    task_start->launch_msg->global_task_ids[task_start->
-						    local_task_id];
-	return SLURM_SUCCESS;
-}
-
-int init_nbio_attr(nbio_attr_t * nbio_attr, task_start_t * task_start)
-{
-	int i;
-	nbio_attr->max_fd = 0;
-	nbio_attr->flush_flag = false;
-	nbio_attr->die = false;
-	nbio_attr->task_start = task_start;
-	nbio_attr->fd[IN_OUT_FD] = task_start->sockets[STDIN_OUT_SOCK];
-	nbio_attr->fd[SIG_ERR_FD] = task_start->sockets[SIG_STDERR_SOCK];
-	nbio_attr->fd[CHILD_IN_WR_FD] =
-	    task_start->pipes[CHILD_IN_WR_PIPE];
-	nbio_attr->fd[CHILD_OUT_RD_FD] =
-	    task_start->pipes[CHILD_OUT_RD_PIPE];
-	nbio_attr->fd[CHILD_ERR_RD_FD] =
-	    task_start->pipes[CHILD_ERR_RD_PIPE];
-	init_circular_buffer(&nbio_attr->in_cir_buf);
-	init_circular_buffer(&nbio_attr->out_cir_buf);
-	init_circular_buffer(&nbio_attr->err_cir_buf);
-	for (i = 0; i < 2; i++) {
-		nbio_attr->reconnect_flags[i] = RECONNECT;
-		nbio_attr->reconnect_timers[i] = 0;
-	}
-	nbio_set_init(nbio_attr, nbio_attr->init_set);
-	nbio_attr->select_timer.tv_sec = RECONNECT_TIMEOUT_SECONDS;
-	nbio_attr->select_timer.tv_usec = RECONNECT_TIMEOUT_MICROSECONDS;
-	return SLURM_SUCCESS;
-}
-
-int do_nbio(void *arg)
-{
-	nbio_attr_t nbio_attr;
-	task_start_t *task_start = (task_start_t *) arg;
-	io_debug_t in_dbg;
-	io_debug_t out_dbg;
-	io_debug_t err_dbg;
-	struct timeval tv;
-
-	debug3("do_nbio: enter");
-
-	/* init_io_debug(&in_dbg, task_start, "stdin");   */
-	/* init_io_debug(&out_dbg, task_start, "stdout"); */
-	/* init_io_debug(&err_dbg, task_start, "stderr"); */
-	init_nbio_attr(&nbio_attr, task_start);
-	debug3("after init_nbio_attr()");
-
-	posix_signal_pipe_ignore();
-	debug3("after posix_signal_pipe_ignore()");
-
-	reconnect(&nbio_attr);
-
-	while (true) {
-		int rc;
-
-		set_max_fd(&nbio_attr);
-
-		/* print_nbio_sets(&nbio_attr, nbio_attr.init_set); */
-		debug3("entering slurm_select");
-		rc = slurm_select(nbio_attr.max_fd,
-				  &nbio_attr.init_set[RD_SET],
-				  &nbio_attr.init_set[WR_SET],
-				  &nbio_attr.init_set[ER_SET],
-				  &nbio_attr.select_timer);
-
-		debug3("returned from slurm_select() with rc = %d", rc);
-
-		/* print_nbio_sets(&nbio_attr, nbio_attr.init_set); */
-		if (rc == SLURM_ERROR) {
-			error("select error in IO loop %m");
-			nbio_set_init(&nbio_attr, nbio_attr.init_set);
-			continue;
-		} else if (rc == 0) {
-			reconnect(&nbio_attr);
-			nbio_set_init(&nbio_attr, nbio_attr.init_set);
-			/* these are here to set the write set after the fd 
-			 * numbers could have changed in reconnect */
-			if (nbio_attr.out_cir_buf->read_size > 0) {
-				slurm_FD_SET(nbio_attr.fd[IN_OUT_FD],
-					     &nbio_attr.init_set[WR_SET]);
-			}
-			if (nbio_attr.err_cir_buf->read_size > 0) {
-				slurm_FD_SET(nbio_attr.fd[SIG_ERR_FD],
-					     &nbio_attr.init_set[WR_SET]);
-			}
-			if (test_error_conditions(&nbio_attr))
-				break;
-
-			nbio_attr.select_timer.tv_sec =
-			    RECONNECT_TIMEOUT_SECONDS;
-			nbio_attr.select_timer.tv_usec =
-			    RECONNECT_TIMEOUT_MICROSECONDS;
-			continue;
-		} else if (rc < 0) {
-			debug3("select has unknown error: %i", rc);
-			break;
-		}
-
-		if (test_error_conditions(&nbio_attr))
-			break;
-
-		nbio_set_init(&nbio_attr, nbio_attr.next_set);
-
-		/* error fd set */
-		if (slurm_FD_ISSET( nbio_attr.fd[CHILD_IN_WR_FD],
-		                    &nbio_attr.init_set[ER_SET])  )
-			error_task_pipe(&nbio_attr, CHILD_IN_WR_FD);
-
-		if (slurm_FD_ISSET( nbio_attr.fd[CHILD_OUT_RD_FD],
-		                    &nbio_attr.init_set[ER_SET])  ) 
-			error_task_pipe(&nbio_attr, CHILD_OUT_RD_FD);
-
-		if (slurm_FD_ISSET( nbio_attr.fd[CHILD_ERR_RD_FD],
-		                    &nbio_attr.init_set[ER_SET])  ) 
-			error_task_pipe(&nbio_attr, CHILD_ERR_RD_FD);
-
-		if (slurm_FD_ISSET( nbio_attr.fd[IN_OUT_FD],
-		                    &nbio_attr.init_set[ER_SET]) ) 
-			error_task_socket(&nbio_attr, IN_OUT_FD);
-
-		if (slurm_FD_ISSET( nbio_attr.fd[SIG_ERR_FD],
-		                    &nbio_attr.init_set[ER_SET]) ) 
-			error_task_socket(&nbio_attr, SIG_ERR_FD);
-
-		/* read fd set */
-		if (slurm_FD_ISSET
-		    (nbio_attr.fd[IN_OUT_FD], &nbio_attr.init_set[RD_SET])
-		    && nbio_attr.reconnect_flags[IN_OUT_FD] == CONNECTED) {
-			if (read_task_socket
-			    (nbio_attr.in_cir_buf, nbio_attr.fd[IN_OUT_FD],
-			     &in_dbg)) {
-				error_task_socket(&nbio_attr, IN_OUT_FD);
-			} else
-				slurm_FD_SET(nbio_attr.fd[CHILD_IN_WR_FD],
-					     &nbio_attr.next_set[WR_SET]);
-		}
-		if (slurm_FD_ISSET
-		    (nbio_attr.fd[CHILD_OUT_RD_FD],
-		     &nbio_attr.init_set[RD_SET])) {
-			if (read_task_pipe
-			    (nbio_attr.out_cir_buf,
-			     nbio_attr.fd[CHILD_OUT_RD_FD], &out_dbg)) {
-				error_task_pipe(&nbio_attr,
-						CHILD_OUT_RD_FD);
-			} else
-				slurm_FD_SET(nbio_attr.fd[IN_OUT_FD],
-					     &nbio_attr.next_set[WR_SET]);
-		}
-		if (slurm_FD_ISSET
-		    (nbio_attr.fd[CHILD_ERR_RD_FD],
-		     &nbio_attr.init_set[RD_SET])) {
-			if (read_task_pipe
-			    (nbio_attr.err_cir_buf,
-			     nbio_attr.fd[CHILD_ERR_RD_FD], &err_dbg)) {
-				error_task_pipe(&nbio_attr,
-						CHILD_ERR_RD_FD);
-			} else
-				slurm_FD_SET(nbio_attr.fd[SIG_ERR_FD],
-					     &nbio_attr.next_set[WR_SET]);
-		}
-
-		/* write fd set */
-		if (slurm_FD_ISSET
-		    (nbio_attr.fd[CHILD_IN_WR_FD],
-		     &nbio_attr.next_set[WR_SET])) {
-			if (write_task_pipe
-			    (nbio_attr.in_cir_buf,
-			     nbio_attr.fd[CHILD_IN_WR_FD], &in_dbg)) {
-				error_task_pipe(&nbio_attr,
-						CHILD_IN_WR_FD);
-			} else
-				slurm_FD_CLR(nbio_attr.fd[CHILD_IN_WR_FD],
-					     &nbio_attr.next_set[WR_SET]);
-		}
-		if (slurm_FD_ISSET
-		    (nbio_attr.fd[IN_OUT_FD], &nbio_attr.next_set[WR_SET])
-		    && nbio_attr.reconnect_flags[IN_OUT_FD] == CONNECTED) {
-			if (write_task_socket
-			    (nbio_attr.out_cir_buf,
-			     nbio_attr.fd[IN_OUT_FD], &out_dbg)) {
-				error_task_socket(&nbio_attr, IN_OUT_FD);
-			} else
-				slurm_FD_CLR(nbio_attr.fd[IN_OUT_FD],
-					     &nbio_attr.next_set[WR_SET]);
-		}
-		if (slurm_FD_ISSET
-		    (nbio_attr.fd[SIG_ERR_FD], &nbio_attr.next_set[WR_SET])
-		    && nbio_attr.reconnect_flags[IN_OUT_FD] == CONNECTED) {
-			if (write_task_socket
-			    (nbio_attr.err_cir_buf,
-			     nbio_attr.fd[SIG_ERR_FD], &err_dbg)) {
-				error_task_socket(&nbio_attr, SIG_ERR_FD);
-			} else
-				slurm_FD_CLR(nbio_attr.fd[SIG_ERR_FD],
-					     &nbio_attr.next_set[WR_SET]);
-		}
-
-		if (nbio_attr.flush_flag) 
-			nbio_set_init(&nbio_attr, nbio_attr.init_set);
-		else 
-			memcpy_sets(nbio_attr.init_set,
-				    nbio_attr.next_set);
-	}
-
-	nbio_cleanup(&nbio_attr);
-	return SLURM_SUCCESS;
-}
-
-int memcpy_sets(slurm_fd_set * init_set, slurm_fd_set * next_set)
-{
-	int i;
-
-	for (i = 0; i < 3; i++) {
-		memcpy(&init_set[i], &next_set[i], sizeof(slurm_fd_set));
-	}
-	return SLURM_SUCCESS;
-}
-
-int read_task_pipe(circular_buffer_t * cir_buf, slurm_fd read_fd,
-		   io_debug_t * dbg)
-{
-	int bytes_read;
-	int local_errno;
-
-	debug3("read_task_pipe: enter");
-
-	/* test for wierd state */
-	if ((cir_buf->write_size == 0)) {
-		if (dbg)
-			debug3("%s write_size == 0 this shouldn't happen",
-			     dbg->name);
-		slurm_seterrno_ret(ESLURMD_CIRBUF_POINTER_0);
-	}
-
-	/* read stdout code */
-	if ((bytes_read = read_EINTR(read_fd, cir_buf->tail,
-			             cir_buf->write_size    )) <= 0) {
-		if (dbg)
-			debug3("%d: read_EINTR: %m: bytes read %d",
-			     dbg->global_task_id, dbg->name, local_errno,
-			     bytes_read);
-		slurm_seterrno_ret(ESLURMD_PIPE_DISCONNECT);
-	} else {
-		cir_buf_write_update(cir_buf, bytes_read);
-		debug3("read_task_pipe fd: %d bytes_read %d", read_fd,
-		       bytes_read);
-		return SLURM_SUCCESS;
-	}
-}
-
-
-int write_task_pipe(circular_buffer_t * cir_buf, slurm_fd write_fd,
-		    io_debug_t * dbg)
-{
-	int bytes_written;
-	int local_errno;
-
-	/* test for wierd state */
-	if ((cir_buf->read_size == 0)) {
-		if (dbg)
-			debug3("%s read_size == 0 this shouldn't happen",
-			     dbg->name);
-		slurm_seterrno_ret(ESLURMD_CIRBUF_POINTER_0);
-	}
-
-	if ((bytes_written =
-	     write_EINTR(write_fd, cir_buf->head,
-			 cir_buf->read_size)) <= 0) {
-		local_errno = errno;
-		if (dbg)
-			debug3("%d: %s: write_EINTR: %m: bytes written %d",
-			     dbg->global_task_id, bytes_written);
-		slurm_seterrno_ret(ESLURMD_PIPE_DISCONNECT);
-	} else {
-		cir_buf_read_update(cir_buf, bytes_written);
-		//debug3 ( "write_task_pipe fd: %i bytes_written %i" , write_fd , bytes_written ) ;
-		return SLURM_SUCCESS;
-	}
-}
-
-int read_task_socket(circular_buffer_t * cir_buf, slurm_fd read_fd,
-		     io_debug_t * dbg)
-{
-	int bytes_read;
-	int local_errno;
-
-	/* test for wierd state */
-	if ((cir_buf->write_size == 0)) {
-		if (dbg)
-			debug3
-			    ("%s cir_buf->write_size == 0 this shouldn't happen",
-			     dbg->name);
-		slurm_seterrno_ret(ESLURMD_CIRBUF_POINTER_0);
-	}
-
-	if ((bytes_read =
-	     slurm_read_stream(read_fd, cir_buf->tail,
-			       cir_buf->write_size)) <= 0) {
-		local_errno = errno;
-		/* test for EOF on socket */
-		if (bytes_read == 0) {
-			if (dbg)
-				debug3("%i 0 returned EOF on socket",
-				       dbg->global_task_id);
-			slurm_seterrno_ret(ESLURMD_EOF_ON_SOCKET);
-		} else if (bytes_read == -1) {
-			switch (local_errno) {
-			case EBADF:
-			case EPIPE:
-			case ECONNREFUSED:
-			case ECONNRESET:
-			case ENOTCONN:
-				if (dbg)
-					debug3
-					    ("lost %s socket connection %m errno: %i",
-					     dbg->name, local_errno);
-				slurm_seterrno_ret
-				    (ESLURMD_SOCKET_DISCONNECT);
-				break;
-			default:
-				if (dbg)
-					debug3
-					    ("%i error reading %s sock stream, %m errno: %i , bytes read %i ",
-					     dbg->global_task_id,
-					     dbg->name, local_errno,
-					     bytes_read);
-				slurm_seterrno_ret
-				    (ESLURMD_UNKNOWN_SOCKET_ERROR);
-				break;
-			}
-		} else {
-			debug3
-			    ("bytes_read: %i don't know what to do with this return code ",
-			     bytes_read);
-			slurm_seterrno_ret(ESLURMD_UNKNOWN_SOCKET_ERROR);
-		}
-	} else {
-		cir_buf_write_update(cir_buf, bytes_read);
-		//debug3 ( "read_task_socket fd: %i bytes_read %i" , read_fd , bytes_read ) ;
-		return SLURM_SUCCESS;
-	}
-}
-
-int write_task_socket(circular_buffer_t * cir_buf, slurm_fd write_fd,
-		      io_debug_t * dbg)
-{
-	int sock_bytes_written;
-	int local_errno;
-
-	debug3("write_task_socket: entered");
-	/* test for wierd state */
-	if ((cir_buf->read_size == 0)) {
-		if (dbg)
-			debug3
-			    ("%s cir_buf->read_size == 0 this shouldn't happen",
-			     dbg->name);
-		slurm_seterrno_ret(ESLURMD_CIRBUF_POINTER_0);
-	}
-
-	if ((sock_bytes_written =
-	     slurm_write_stream(write_fd, cir_buf->head,
-				cir_buf->read_size)) <= 0) {
-		local_errno = errno;
-		/* test for EOF on socket */
-		if (sock_bytes_written == 0) {
-			if (dbg)
-				debug3("%i 0 returned EOF on socket",
-				       dbg->global_task_id);
-			slurm_seterrno_ret(ESLURMD_EOF_ON_SOCKET);
-		} else if (sock_bytes_written == -1) {
-			switch (local_errno) {
-			case EBADF:
-			case EPIPE:
-			case ECONNREFUSED:
-			case ECONNRESET:
-			case ENOTCONN:
-				if (dbg)
-					debug3
-					    ("lost %s socket connection %m errno: %i",
-					     dbg->name, local_errno);
-				slurm_seterrno_ret
-				    (ESLURMD_SOCKET_DISCONNECT);
-				break;
-			default:
-				if (dbg)
-					debug3
-					    ("%i error sending %s sock stream, %m errno %i, sock bytes written %i",
-					     dbg->global_task_id,
-					     dbg->name, local_errno,
-					     sock_bytes_written);
-				slurm_seterrno_ret
-				    (ESLURMD_UNKNOWN_SOCKET_ERROR);
-				break;
-			}
-		} else {
-			debug3
-			    ("bytes_read: %i don't know what to do with this return code ",
-			     sock_bytes_written);
-			slurm_seterrno_ret(ESLURMD_UNKNOWN_SOCKET_ERROR);
-		}
-	} else {
-		cir_buf_read_update(cir_buf, sock_bytes_written);
-		debug3("write_task_socket fd: %i bytes_written %i",
-		       write_fd, sock_bytes_written);
-		return SLURM_SUCCESS;
-	}
-}
-
-int error_task_pipe(nbio_attr_t * nbio_attr, int fd_index)
-{
-	switch (errno) {
-	case ESLURMD_CIRBUF_POINTER_0:
-		break;
-	case ESLURMD_PIPE_DISCONNECT:
-		nbio_attr->flush_flag = true;
-		break;
-	}
-	return SLURM_SUCCESS;
-}
-
-int error_task_socket(nbio_attr_t * nbio_attr, int fd_index)
-{
-	switch (errno) {
-	case ESLURMD_CIRBUF_POINTER_0:
-		if (nbio_attr->flush_flag) {
-			nbio_attr->reconnect_flags[fd_index] = DRAINED;
-		} else {
-			debug3
-			    ("ESLURMD_CIRBUF_POINTER_0 shouldn't have occured");
-		}
-		break;
-	case ESLURMD_UNKNOWN_SOCKET_ERROR:
-	case ESLURMD_SOCKET_DISCONNECT:
-	case ESLURMD_EOF_ON_SOCKET:
-		if (!slurm_close_stream(nbio_attr->fd[fd_index])); 
-			nbio_attr->fd[fd_index] = -1;
-
-		switch (nbio_attr->reconnect_flags[fd_index]) {
-		case CONNECTED:
-			nbio_attr->reconnect_flags[fd_index] = RECONNECT;
-			break;
-		case DRAIN:
-		case DRAINED:
-			nbio_attr->die = true;
-			break;
-		case RECONNECT:
-			break;
-		default:
-			debug3
-			    ("Unknown case in error_task_socket:ESLURMD_EOF_ON_SOCKET: %i",
-			     nbio_attr->reconnect_flags[fd_index]);
-			break;
-		}
-		break;
-	default:
-		debug3("Unknown case in error_task_socket: %i",
-		       nbio_attr->reconnect_flags[fd_index]);
-		break;
-	}
-	return SLURM_SUCCESS;
-}
-
-int nbio_set_init(nbio_attr_t * nbio_attr, slurm_fd_set * set_ptr)
-{
-	int i;
-
-	for (i = 0; i < 3; i++) {
-		FD_ZERO(&set_ptr[i]);
-	}
-
-	if (nbio_attr->flush_flag) {
-		/* write fds */
-		slurm_FD_SET(nbio_attr->fd[IN_OUT_FD], &set_ptr[WR_SET]);
-		slurm_FD_SET(nbio_attr->fd[SIG_ERR_FD], &set_ptr[WR_SET]);
-
-		/* error fds */
-		slurm_FD_SET(nbio_attr->fd[IN_OUT_FD], &set_ptr[ER_SET]);
-		slurm_FD_SET(nbio_attr->fd[SIG_ERR_FD], &set_ptr[ER_SET]);
-	}
-	{
-		/* read fds */
-		slurm_FD_SET(nbio_attr->fd[IN_OUT_FD], &set_ptr[RD_SET]);
-		slurm_FD_SET(nbio_attr->fd[CHILD_OUT_RD_FD],
-			     &set_ptr[RD_SET]);
-		slurm_FD_SET(nbio_attr->fd[CHILD_ERR_RD_FD],
-			     &set_ptr[RD_SET]);
-
-		/* error fds */
-		for (i = 0; i < 5; i++) {
-			slurm_FD_SET(nbio_attr->fd[i], &set_ptr[ER_SET]);
-		}
-
-	}
-	return SLURM_SUCCESS;
-}
-
-int set_max_fd(nbio_attr_t * nbio_attr)
-{
-	int i;
-	nbio_attr->max_fd = 0;
-	for (i = 0; i < 5; i++) {
-		nbio_attr->max_fd =
-		    MAX(nbio_attr->max_fd, nbio_attr->fd[i]);
-	}
-	nbio_attr->max_fd++;
-	return SLURM_SUCCESS;
-}
-
-int nbio_cleanup(nbio_attr_t * nbio_attr)
-{
-	free_circular_buffer(nbio_attr->in_cir_buf);
-	free_circular_buffer(nbio_attr->out_cir_buf);
-	free_circular_buffer(nbio_attr->err_cir_buf);
-
-	slurm_close_stream(nbio_attr->fd[IN_OUT_FD]);
-	slurm_close_stream(nbio_attr->fd[SIG_ERR_FD]);
-	close(nbio_attr->fd[CHILD_IN_WR_FD]);
-	close(nbio_attr->fd[CHILD_OUT_RD_FD]);
-	close(nbio_attr->fd[CHILD_ERR_RD_FD]);
-
-	return SLURM_SUCCESS;
-}
-
-int reconnect(nbio_attr_t * nbio_attr)
-{
-	if (nbio_attr->reconnect_flags[IN_OUT_FD] == RECONNECT) {
-		if (connect_io_stream(nbio_attr->task_start, STDIN_OUT_SOCK) > 0) {
-			nbio_attr->fd[IN_OUT_FD] =
-			    nbio_attr->task_start->sockets[STDIN_OUT_SOCK];
-			slurm_set_stream_non_blocking(nbio_attr->
-						      fd[IN_OUT_FD]);
-			nbio_attr->reconnect_flags[IN_OUT_FD] = CONNECTED;
-		}
-	}
-	if (nbio_attr->reconnect_flags[SIG_ERR_FD] == RECONNECT) {
-		if (connect_io_stream
-		    (nbio_attr->task_start, SIG_STDERR_SOCK) > 0) {
-			nbio_attr->fd[SIG_ERR_FD] =
-			    nbio_attr->task_start->
-			    sockets[SIG_STDERR_SOCK];
-			slurm_set_stream_non_blocking(nbio_attr->
-						      fd[SIG_ERR_FD]);
-			nbio_attr->reconnect_flags[SIG_ERR_FD] = CONNECTED;
-		}
-	}
-	return SLURM_SUCCESS;
-}
-
-int test_error_conditions(nbio_attr_t * nbio_attr)
-{
-	/* task has died and io is flushed */
-	if (nbio_attr->out_cir_buf->read_size == 0
-	    && nbio_attr->err_cir_buf->read_size == 0
-	    && nbio_attr->flush_flag) {
-		return SLURM_ERROR;
-	}
-
-	if (nbio_attr->die) {
-		return SLURM_ERROR;
-	}
-        /* if ( waitpid ( nbio_attr -> task_start -> exec_pid , NULL , 
-	               WNOHANG ) > 0 )
-		return SLURM_ERROR ;
-	*/
-	return SLURM_SUCCESS;
-}
-
-int print_nbio_sets(nbio_attr_t * nbio_attr, slurm_fd_set * set_ptr)
-{
-	int i;
-	printf("fds ");
-	for (i = 0; i < 5; i++)
-		printf(" %i ", nbio_attr->fd[i]);
-	printf("\n");
-	printf(" %i %i %i %i %i %i \n",
-	       nbio_attr->in_cir_buf->read_size,
-	       nbio_attr->in_cir_buf->write_size,
-	       nbio_attr->out_cir_buf->read_size,
-	       nbio_attr->out_cir_buf->write_size,
-	       nbio_attr->err_cir_buf->read_size,
-	       nbio_attr->err_cir_buf->write_size);
-	printf("--- 00000000001111111111222222222233\n");
-	printf("--- 01234567890123456789012345678901\n");
-	printf("rd  ");
-	for (i = 0; i < 32; i++)
-		printf("%i", slurm_FD_ISSET(i, &set_ptr[RD_SET]));
-	printf("\n");
-	printf("wr  ");
-	for (i = 0; i < 32; i++)
-		printf("%i", slurm_FD_ISSET(i, &set_ptr[WR_SET]));
-	printf("\n");
-	printf("er  ");
-	for (i = 0; i < 32; i++)
-		printf("%i", slurm_FD_ISSET(i, &set_ptr[ER_SET]));
-	printf("\n");
-	return SLURM_SUCCESS;
-}
diff --git a/src/slurmd/nbio.h b/src/slurmd/nbio.h
deleted file mode 100644
index d7ebe243ae7..00000000000
--- a/src/slurmd/nbio.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/*****************************************************************************\
- *  nbio.h - Non-blocking I/O header for nbio.c
- *****************************************************************************
- *  Copyright (C) 2002 The Regents of the University of California.
- *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
- *  Written by Kevin Tew <tew1@llnl.gov> et. al.
- *  UCRL-CODE-2002-040.
- *  
- *  This file is part of SLURM, a resource management program.
- *  For details, see <http://www.llnl.gov/linux/slurm/>.
- *  
- *  SLURM is free software; you can redistribute it and/or modify it under
- *  the terms of the GNU General Public License as published by the Free
- *  Software Foundation; either version 2 of the License, or (at your option)
- *  any later version.
- *  
- *  SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
- *  WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
- *  FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
- *  details.
- *  
- *  You should have received a copy of the GNU General Public License along
- *  with ConMan; if not, write to the Free Software Foundation, Inc.,
- *  59 Temple Place, Suite 330, Boston, MA  02111-1307  USA.
-\*****************************************************************************/
-
-#ifndef _SLURMD_NBIO_H
-#define _SLURMD_NBIO_H
-
-int do_nbio(void *arg);
-
-#endif
diff --git a/src/slurmd/no_interconnect.c b/src/slurmd/no_interconnect.c
index 63a7087625d..0e445b50f13 100644
--- a/src/slurmd/no_interconnect.c
+++ b/src/slurmd/no_interconnect.c
@@ -25,37 +25,42 @@
  *  59 Temple Place, Suite 330, Boston, MA  02111-1307  USA.
 \*****************************************************************************/
 
-#include <src/common/slurm_protocol_api.h>
-#include <src/slurmd/task_mgr.h>
 #include <src/slurmd/interconnect.h>
+#include <src/slurmd/setenvpf.h>
 
-/* exported module funtion to launch tasks */
-/*launch_tasks should really be named launch_job_step*/
-int launch_tasks ( launch_tasks_request_msg_t * launch_msg )
+int interconnect_init (slurmd_job_t *job)
 {
-	pthread_atfork ( NULL , NULL , pthread_fork_child_after ) ;
-	return interconnect_init ( launch_msg );
+	return SLURM_SUCCESS;
 }
 
-/* Contains interconnect specific setup instructions and then calls 
- * fan_out_task_launch */
-int interconnect_init ( launch_tasks_request_msg_t * launch_msg )
+int interconnect_attach (slurmd_job_t *job, int taskid) 
 {
-	return fan_out_task_launch ( launch_msg ) ;
-}
-
-int interconnect_set_capabilities ( task_start_t * task_start ) 
-{
-	return SLURM_SUCCESS ;
+	return SLURM_SUCCESS;
 }
 
 /*
  * Set env variables needed for this interconnect
  */
-int interconnect_env(char ***env, uint16_t *envc, int nodeid, int nnodes, 
-	             int procid, int nprocs)
+int interconnect_env(slurmd_job_t *job, int taskid)
+{
+	int cnt = job->envc;
+	task_info_t *t = job->task[taskid];
+
+	if (setenvpf(&job->env, &cnt, "SLURM_NODEID=%d", job->nodeid) < 0)
+		return -1;
+	if (setenvpf(&job->env, &cnt, "SLURM_PROCID=%d", t->gid     ) < 0)
+		return -1;
+	if (setenvpf(&job->env, &cnt, "SLURM_NNODES=%d", job->nnodes) < 0)
+		return -1;
+	if (setenvpf(&job->env, &cnt, "SLURM_NPROCS=%d", job->nprocs) < 0)
+		return -1;
+
+	return SLURM_SUCCESS;
+}
+
+int interconnect_fini(slurmd_job_t *job)
 {
-	return SLURM_SUCCESS ;
+	return SLURM_SUCCESS;
 }
 
 
diff --git a/src/slurmd/pipes.c b/src/slurmd/pipes.c
deleted file mode 100644
index db6f0b41816..00000000000
--- a/src/slurmd/pipes.c
+++ /dev/null
@@ -1,101 +0,0 @@
-/*****************************************************************************\
- *  pipes.c -   
- *****************************************************************************
- *  Copyright (C) 2002 The Regents of the University of California.
- *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
- *  Written by Kevin Tew <tew1@llnl.gov> et. al.
- *  UCRL-CODE-2002-040.
- *  
- *  This file is part of SLURM, a resource management program.
- *  For details, see <http://www.llnl.gov/linux/slurm/>.
- *  
- *  SLURM is free software; you can redistribute it and/or modify it under
- *  the terms of the GNU General Public License as published by the Free
- *  Software Foundation; either version 2 of the License, or (at your option)
- *  any later version.
- *  
- *  SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
- *  WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
- *  FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
- *  details.
- *  
- *  You should have received a copy of the GNU General Public License along
- *  with ConMan; if not, write to the Free Software Foundation, Inc.,
- *  59 Temple Place, Suite 330, Boston, MA  02111-1307  USA.
-\*****************************************************************************/
-
-#include <unistd.h>
-#include <errno.h>
-
-#include <src/common/slurm_errno.h>
-#include <src/common/log.h>
-#include <src/slurmd/pipes.h>
-#include <src/slurmd/io.h>
-
-void setup_parent_pipes(int *pipes)
-{
-	close(pipes[CHILD_IN_RD_PIPE]);
-	close(pipes[CHILD_OUT_WR_PIPE]);
-	close(pipes[CHILD_ERR_WR_PIPE]);
-}
-
-void cleanup_parent_pipes(int *pipes)
-{
-	close(pipes[CHILD_IN_WR_PIPE]);
-	close(pipes[CHILD_OUT_RD_PIPE]);
-	close(pipes[CHILD_ERR_RD_PIPE]);
-}
-
-int init_parent_pipes(int *pipes)
-{
-	int rc;
-
-	/* open pipes to be used in dup after fork */
-	if ((rc = pipe(&pipes[CHILD_IN_PIPE]))) 
-		slurm_seterrno_ret(ESLRUMD_PIPE_ERROR_ON_TASK_SPAWN);
-	if ((rc = pipe(&pipes[CHILD_OUT_PIPE]))) 
-		slurm_seterrno_ret(ESLRUMD_PIPE_ERROR_ON_TASK_SPAWN);
-	if ((rc = pipe(&pipes[CHILD_ERR_PIPE]))) 
-		slurm_seterrno_ret(ESLRUMD_PIPE_ERROR_ON_TASK_SPAWN);
-
-	return SLURM_SUCCESS;
-}
-
-int setup_child_pipes(int *pipes)
-{
-	int error_code = SLURM_SUCCESS;
-
-	/* dup stdin */
-	/* close ( STDIN_FILENO ); */
-
-	if (SLURM_ERROR ==
-	    (error_code |= dup2(pipes[CHILD_IN_RD_PIPE], STDIN_FILENO))) {
-		error("dup failed on child standard in pipe %d: %m",
-		     pipes[CHILD_IN_RD_PIPE]);
-	}
-	close(pipes[CHILD_IN_RD_PIPE]);
-	close(pipes[CHILD_IN_WR_PIPE]);
-
-	/* dup stdout */
-	/* close ( STDOUT_FILENO ); */
-	if (SLURM_ERROR ==
-	    (error_code |=
-	     dup2(pipes[CHILD_OUT_WR_PIPE], STDOUT_FILENO))) {
-		error("dup failed on child standard out pipe %i: %m",
-				pipes[CHILD_OUT_WR_PIPE]);
-	}
-	close(pipes[CHILD_OUT_RD_PIPE]);
-	close(pipes[CHILD_OUT_WR_PIPE]);
-
-	/* dup stderr  */
-	/* close ( STDERR_FILENO ); */
-	if (SLURM_ERROR ==
-	    (error_code |=
-	     dup2(pipes[CHILD_ERR_WR_PIPE], STDERR_FILENO))) {
-		error("dup failed on child standard err pipe %i: %m",
-		     pipes[CHILD_ERR_WR_PIPE]);
-	}
-	close(pipes[CHILD_ERR_RD_PIPE]);
-	close(pipes[CHILD_ERR_WR_PIPE]);
-	return error_code;
-}
diff --git a/src/slurmd/pipes.h b/src/slurmd/pipes.h
deleted file mode 100644
index f499f991b4a..00000000000
--- a/src/slurmd/pipes.h
+++ /dev/null
@@ -1,58 +0,0 @@
-/*****************************************************************************\
- *  pipes.h - headers for slurmd pipes (pipes.c)
- *****************************************************************************
- *  Copyright (C) 2002 The Regents of the University of California.
- *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
- *  Written by Kevin Tew <tew1@llnl.gov> et. al.
- *  UCRL-CODE-2002-040.
- *  
- *  This file is part of SLURM, a resource management program.
- *  For details, see <http://www.llnl.gov/linux/slurm/>.
- *  
- *  SLURM is free software; you can redistribute it and/or modify it under
- *  the terms of the GNU General Public License as published by the Free
- *  Software Foundation; either version 2 of the License, or (at your option)
- *  any later version.
- *  
- *  SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
- *  WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
- *  FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
- *  details.
- *  
- *  You should have received a copy of the GNU General Public License along
- *  with ConMan; if not, write to the Free Software Foundation, Inc.,
- *  59 Temple Place, Suite 330, Boston, MA  02111-1307  USA.
-\*****************************************************************************/
-
-#ifndef _SLURMD_PIPES_H_
-#define _SLURMD_PIPES_H_
-
-/*pipes.c*/
-/* init_parent_pipes
- * initializes pipes in the parent to be used for child io ipc after fork and exec
- * IN pipes	- array of six file desciptors
- * OUT int	- return_code
- */
-int init_parent_pipes(int *pipes);
-
-/* setup_parent_pipes 
- * setups the parent side of the pipes after fork 
- * IN pipes	- array of six file desciptors
- */
-void setup_parent_pipes(int *pipes);
-
-
-/* setup_child_pipes
- * setups the child side of the pipes after fork
- * IN pipes	- array of six file desciptors
- * OUT int	- return_code
- */
-int setup_child_pipes(int *pipes);
-
-/* cleanup_parent_pipes
- * cleans up the parent side of the pipes after task exit
- * IN pipes	- array of six file desciptors
- */
-void cleanup_parent_pipes(int *pipes);
-
-#endif /* !_SLURMD_PIPES_H */
diff --git a/src/slurmd/reconnect_utils.c b/src/slurmd/reconnect_utils.c
deleted file mode 100644
index f8c1fb30491..00000000000
--- a/src/slurmd/reconnect_utils.c
+++ /dev/null
@@ -1,152 +0,0 @@
-/*****************************************************************************\
- *  reconnect_utils.c - 
- *****************************************************************************
- *  Copyright (C) 2002 The Regents of the University of California.
- *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
- *  Written by Kevin Tew <tew1@llnl.gov> et. al.
- *  UCRL-CODE-2002-040.
- *  
- *  This file is part of SLURM, a resource management program.
- *  For details, see <http://www.llnl.gov/linux/slurm/>.
- *  
- *  SLURM is free software; you can redistribute it and/or modify it under
- *  the terms of the GNU General Public License as published by the Free
- *  Software Foundation; either version 2 of the License, or (at your option)
- *  any later version.
- *  
- *  SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
- *  WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
- *  FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
- *  details.
- *  
- *  You should have received a copy of the GNU General Public License along
- *  with ConMan; if not, write to the Free Software Foundation, Inc.,
- *  59 Temple Place, Suite 330, Boston, MA  02111-1307  USA.
-\*****************************************************************************/
-
-#include <stdlib.h>
-#include <sys/types.h>
-#include <pwd.h>
-#include <grp.h>
-#include <sys/wait.h>
-#include <errno.h>
-#include <unistd.h>
-#include <string.h>
-#include <pthread.h>
-
-#include <src/common/log.h>
-#include <src/common/list.h>
-#include <src/common/xmalloc.h>
-#include <src/common/slurm_protocol_api.h>
-#include <src/common/slurm_errno.h>
-#include <src/common/util_signals.h>
-
-#include <src/slurmd/task_mgr.h>
-#include <src/slurmd/shmem_struct.h>
-#include <src/slurmd/circular_buffer.h>
-#include <src/slurmd/io.h>
-#include <src/slurmd/pipes.h>
-#include <src/slurmd/reconnect_utils.h>
-
-int connect_io_stream(task_start_t * task_start, int out_or_err)
-{
-	if ((task_start->sockets[out_or_err] =
-	     slurm_open_stream(&(task_start->io_streams_dest))) ==
-	    SLURM_PROTOCOL_ERROR) {
-		error("connect_io_stream: <%s>: %m", 
-				out_or_err ? "stdout" : "stderr");
-		return SLURM_PROTOCOL_ERROR;
-	} else 
-		return send_io_stream_header(task_start, out_or_err);
-}
-
-int send_io_stream_header(task_start_t * task_start, int out_or_err)
-{
-	slurm_io_stream_header_t io_header;
-	Buf buffer;
-	int rc;
-
-	buffer = init_buf (sizeof(slurm_io_stream_header_t));
-	if (out_or_err == STDIN_OUT_SOCK) {
-		init_io_stream_header(&io_header,
-				      task_start->launch_msg->credential->
-				      signature,
-				      task_start->launch_msg->
-				      global_task_ids[task_start->local_task_id],
-				      SLURM_IO_STREAM_INOUT);
-		pack_io_stream_header(&io_header, buffer);
-		rc = slurm_write_stream(task_start->sockets[STDIN_OUT_SOCK], 
-		                        get_buf_data(buffer), get_buf_offset(buffer));
-	} else {
-
-		init_io_stream_header(&io_header,
-				      task_start->launch_msg->credential->
-				      signature,
-				      task_start->launch_msg->
-				      global_task_ids[task_start->
-						      local_task_id],
-				      SLURM_IO_STREAM_SIGERR);
-		pack_io_stream_header(&io_header, buffer);
-		rc = slurm_write_stream(task_start->sockets[SIG_STDERR_SOCK], 
-		                        get_buf_data(buffer), get_buf_offset(buffer));
-	}
-
-	free_buf(buffer);
-	return rc;
-}
-
-ssize_t read_EINTR(int fd, void *buf, size_t count)
-{
-	ssize_t bytes_read;
-	while (true) {
-		if ((bytes_read = read(fd, buf, count)) <= 0) {
-			if ((bytes_read == SLURM_PROTOCOL_ERROR)
-			    && (errno == EINTR)) {
-				debug
-				    ("read_EINTR: bytes_read: %i , fd: %i %m errno: %i",
-				     bytes_read, fd, errno);
-				continue;
-			}
-		}
-		return bytes_read;
-	}
-}
-
-ssize_t write_EINTR(int fd, void *buf, size_t count)
-{
-	ssize_t bytes_written;
-	while (true) {
-		if ((bytes_written = write(fd, buf, count)) <= 0) {
-			if ((bytes_written == SLURM_PROTOCOL_ERROR)
-			    && (errno == EINTR)) {
-				debug
-				    ("write_EINTR: bytes_written: %i , fd: %i %m errno: %i",
-				     bytes_written, fd, errno);
-				continue;
-			}
-		}
-		return bytes_written;
-	}
-}
-
-struct timeval timeval_diff(struct timeval *last, struct timeval *first)
-{
-	struct timeval temp;
-	double lastd = last->tv_sec * 1000000 + last->tv_usec;
-	double firstd = first->tv_sec * 1000000 + first->tv_usec;
-	double diffd = lastd - firstd;
-	temp.tv_sec = diffd / 1000000;
-	temp.tv_usec = (long long) diffd % 1000000;
-	return temp;
-}
-
-double timeval_diffd(struct timeval *last, struct timeval *first,
-		     struct timeval *remaining)
-{
-	double lastd = last->tv_sec * 1000000 + last->tv_usec;
-	double firstd = first->tv_sec * 1000000 + first->tv_usec;
-	double diffd = lastd - firstd;
-	remaining->tv_sec = diffd / 1000000;
-	remaining->tv_usec = (long long) diffd % 1000000;
-	return diffd;
-}
diff --git a/src/slurmd/semaphore.c b/src/slurmd/semaphore.c
new file mode 100644
index 00000000000..9b9e0be9caa
--- /dev/null
+++ b/src/slurmd/semaphore.c
@@ -0,0 +1,321 @@
+/*****************************************************************************\
+ * semaphore.c - POSIX semaphore implementation via SysV semaphores
+ * $Id$
+ *****************************************************************************
+ *  Copyright (C) 2002 The Regents of the University of California.
+ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+ *  Written by Chris Dunlap <cdunlap@llnl.gov>.
+ *  UCRL-CODE-2002-040.
+ *  
+ *  This file is part of SLURM, a resource management program.
+ *  For details, see <http://www.llnl.gov/linux/slurm/>.
+ *  
+ *  SLURM is free software; you can redistribute it and/or modify it under
+ *  the terms of the GNU General Public License as published by the Free
+ *  Software Foundation; either version 2 of the License, or (at your option)
+ *  any later version.
+ *  
+ *  SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
+ *  WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ *  FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
+ *  details.
+ *  
+ *  You should have received a copy of the GNU General Public License along
+ *  with SLURM; if not, write to the Free Software Foundation, Inc.,
+ *  59 Temple Place, Suite 330, Boston, MA  02111-1307  USA.
+\*****************************************************************************/
+
+/*
+ *  semaphore.c
+ *    by Chris Dunlap <cdunlap@llnl.gov>
+ *
+ *  Posix Semaphores implementation using System V Semaphores
+ *    (cf. Stevens' Unix Network Programming, v2, 2e, Section 10.16)
+ *
+ *  Id: semaphore.c,v 1.1.1.1 2000/10/20 21:56:06 dun Exp 
+ */
+
+
+#include <errno.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <stdarg.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <sys/ipc.h>
+#include <sys/sem.h>
+#include <stdio.h>
+#include "semaphore.h"
+
+
+#define MAX_TRIES 32
+
+
+sem_t * sem_open(const char *name, int oflag, ...)
+{
+    va_list ap;
+    mode_t mode;
+    unsigned int value;
+    int i, fd, errno_bak;
+    key_t key;
+    int semflag, semid;
+    union semun semarg;
+    struct sembuf semval;
+    struct semid_ds seminfo;
+    sem_t *sem;
+
+    if (!name) {
+        return(SEM_FAILED);
+    }
+
+    semid = -1;
+
+    /* Create a new semaphore.
+     */
+    if (oflag & O_CREAT) {
+        va_start(ap, oflag);
+        mode = va_arg(ap, mode_t);
+        value = va_arg(ap, unsigned int);
+        va_end(ap);
+
+        /* Create ancillary file and map pathname into SysV IPC key.
+         */
+        if ((fd = open(name, oflag, mode)) == -1) {
+            /*
+             * If the O_EXCL flag is specified and we return before the sem
+             *   is actually created in the following semget(), we create a
+             *   race-condition.
+             * This can present itself when two processes try to simultaneously
+             *   open the same sem.  Suppose the first process succeeds in
+             *   opening the file.  The second process will fail in its call to
+             *   open() because of the O_EXCL flag and sem_open() will return
+             *   SEM_FAILED.  Now suppose a subsequent sem_open() call is made
+             *   w/o the O_EXCL flag to open the (presumably existing) sem.
+             *   If the first process has not returned from semget() by now,
+             *   this sem_open() will return SEM_FAILED with errno=ENOENT since
+             *   the sem does not yet exist!
+             */
+            if ((errno == EEXIST) && (oflag & O_EXCL)) {
+                if ((key = ftok(name, 1)) == -1) {
+                    return(SEM_FAILED);
+                }
+                for (i=0; i<MAX_TRIES; i++) {
+                    if (((semget(key, 0, 0)) != -1) || (errno != ENOENT)) {
+                        break;
+                    }
+                    sleep(1);
+                }
+                errno = EEXIST;		/* don't let semget() change errno */
+            }
+            return(SEM_FAILED);
+        }
+        close(fd);
+        if ((key = ftok(name, 1)) == -1) {
+            return(SEM_FAILED);
+        }
+
+        /* Convert Posix sem flag to SysV sem flag.
+         */
+        semflag = IPC_CREAT | (mode & 0777);
+        if (oflag & O_EXCL) {
+            semflag |= IPC_EXCL;
+        }
+
+        /* Create SysV semaphore set with one member.
+         * Note that semget() sets sem_otime to zero during sem creation.
+         */
+        if ((semid = semget(key, 1, semflag | IPC_EXCL)) >= 0) {
+            /*
+             * With IPC_EXCL, we're the first to create sem, so init to 0.
+             */
+            semarg.val = 0;
+            if (semctl(semid, 0, SETVAL, semarg) == -1) {
+                goto err;
+            }
+            /* SysV sems are normally stored as ushorts, so enforce max val.
+             */
+            if (value > SEMVMX) {
+                errno = EINVAL;
+                goto err;
+            }
+            /* Now increment sem by 'value' w/ semop() to set sem_otime nonzero.
+             */
+            semval.sem_num = 0;
+            semval.sem_op = value;
+            semval.sem_flg = 0;
+            if (semop(semid, &semval, 1) == -1) {
+                goto err;
+            }
+            goto end;
+        }
+        /* If the sem already exists and the caller does not specify O_EXCL,
+         *   this is NOT an error.  Instead, fall-thru to open existing sem.
+         */
+        else if ((errno != EEXIST) || ((semflag & IPC_EXCL) != 0)) {
+            goto err;
+        }
+    }
+
+    /* Open (presumably) existing semaphore.  Either O_CREAT was not specified,
+     *   or O_CREAT was specified w/o O_EXCL and the semaphore already exists.
+     */
+    if ((key = ftok(name, 1)) == -1) {
+        goto err;
+    }
+    if ((semid = semget(key, 0, 0)) == -1) {
+        goto err;
+    }
+
+    /* If sem_otime is 0, sem has not yet been initialized by its creator.
+     *   Spin up to MAX_TRIES before giving up.
+     *
+     * DANGER, WILL ROBINSON!
+     *   Unfortunatley, semop() on a BSD system does not appear to update the
+     *   sem_otime member for some sick and twisted reason.  So we'll sleep,
+     *   cross our fingers, and hope for the best.
+     */
+#ifdef HAVE_BROKEN_SEM_OTIME
+    sleep(1);
+    goto end;
+#endif /* HAVE_BROKEN_SEM_OTIME */
+
+    semarg.buf = &seminfo;
+    for (i=0; i<MAX_TRIES; i++) {
+        if (semctl(semid, 0, IPC_STAT, semarg) == -1) {
+            goto err;
+        }
+        if (seminfo.sem_otime != 0) {
+            goto end;
+        }
+        sleep(1);
+    }
+    errno = ETIMEDOUT;
+    /* fall-thru to 'err' */
+    
+
+/* Clean up failed semaphore before returning.
+ */
+err:
+    errno_bak = errno;			/* don't let semctl() change errno */
+    if (semid != -1) {
+        semctl(semid, 0, IPC_RMID);
+    }
+    errno = errno_bak;
+    return(SEM_FAILED);
+
+/* SysV sem creation was successful, so create Posix sem wrapper.
+ */
+end:
+    if ((sem = malloc(sizeof(sem_t))) == NULL) {
+        goto err;
+    }
+    sem->id = semid;
+    return(sem);
+}
+
+
+int sem_close(sem_t *sem)
+{
+    if (sem->id < 0) {
+        errno = EINVAL;
+        return(-1);
+    }
+    sem->id = -1;
+    free(sem);
+    return(0);
+}
+
+
+int sem_unlink(const char *name)
+{
+    key_t key;
+    int semid;
+
+    if (!name) {
+        return(-1);
+    }
+    if ((key = ftok(name, 1)) == -1) {
+        return(-1);
+    }
+    if (unlink(name) == -1) {
+        return(-1);
+    }
+    if ((semid = semget(key, 0, 0)) == -1) {
+        return(-1);
+    }
+    if (semctl(semid, 0, IPC_RMID) == -1) {
+        return(-1);
+    }
+    return(0);
+}
+
+
+int sem_wait(sem_t *sem)
+{
+    struct sembuf op;
+
+    if (sem->id < 0) {
+        errno = EINVAL;
+        return(-1);
+    }
+    op.sem_num = 0;
+    op.sem_op = -1;
+    op.sem_flg = 0;
+    if (semop(sem->id, &op, 1) == -1) {
+        return(-1);
+    }
+    return(0);
+}
+
+
+int sem_trywait(sem_t *sem)
+{
+    struct sembuf op;
+
+    if (sem->id < 0) {
+        errno = EINVAL;
+        return(-1);
+    }
+    op.sem_num = 0;
+    op.sem_op = -1;
+    op.sem_flg = IPC_NOWAIT;
+    if (semop(sem->id, &op, 1) == -1) {
+        return(-1);
+    }
+    return(0);
+}
+
+
+int sem_post(sem_t *sem)
+{
+    struct sembuf op;
+
+    if (sem->id < 0) {
+        errno = EINVAL;
+        return(-1);
+    }
+    op.sem_num = 0;
+    op.sem_op = 1;
+    op.sem_flg = 0;
+    if (semop(sem->id, &op, 1) == -1) {
+        return(-1);
+    }
+    return(0);
+}
+
+
+int sem_getvalue(sem_t *sem, int *valp)
+{
+    int val;
+
+    if (sem->id < 0) {
+        errno = EINVAL;
+        return(-1);
+    }
+    if ((val = semctl(sem->id, 0, GETVAL)) == -1) {
+        return(-1);
+    }
+    *valp = val;
+    return(0);
+}
diff --git a/src/slurmd/semaphore.h b/src/slurmd/semaphore.h
new file mode 100644
index 00000000000..084ef13f635
--- /dev/null
+++ b/src/slurmd/semaphore.h
@@ -0,0 +1,93 @@
+/*****************************************************************************\
+ * src/slurmd/semaphore.h - POSIX semaphore implementation via SysV semaphores
+ * $Id$
+ *****************************************************************************
+ *  Copyright (C) 2002 The Regents of the University of California.
+ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+ *  Written by Chris Dunlap <Dunlap@llnl.gov>.
+ *  UCRL-CODE-2002-040.
+ *  
+ *  This file is part of SLURM, a resource management program.
+ *  For details, see <http://www.llnl.gov/linux/slurm/>.
+ *  
+ *  SLURM is free software; you can redistribute it and/or modify it under
+ *  the terms of the GNU General Public License as published by the Free
+ *  Software Foundation; either version 2 of the License, or (at your option)
+ *  any later version.
+ *  
+ *  SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
+ *  WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ *  FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
+ *  details.
+ *  
+ *  You should have received a copy of the GNU General Public License along
+ *  with SLURM; if not, write to the Free Software Foundation, Inc.,
+ *  59 Temple Place, Suite 330, Boston, MA  02111-1307  USA.
+\*****************************************************************************/
+
+/*
+ *  semaphore.h
+ *    by Chris Dunlap <cdunlap@llnl.gov>
+ *
+ *  Posix Semaphores implementation using System V Semaphores
+ *    (cf. Stevens' Unix Network Programming, v2, 2e, Section 10.16)
+ *
+ *  Id: semaphore.h,v 1.1.1.1 2000/10/02 20:56:53 dun Exp 
+ *
+ */
+
+
+#ifndef DUN_SEMAPHORE_H
+#define DUN_SEMAPHORE_H
+
+#include "config.h"
+
+#ifdef HAVE_POSIX_SEMS
+#include <semaphore.h>
+#else
+
+
+typedef struct {
+    int id;				/* SysV semaphore ID */
+} sem_t;
+
+#ifdef SEM_FAILED
+#undef SEM_FAILED
+#endif /* SEM_FAILED */
+#define SEM_FAILED ((sem_t *)(-1))	/* avoid compiler warnings */
+
+#ifndef SEMVMX
+#define SEMVMX 32767			/* historical SysV sem max value */
+#endif /* !SEMVMX */
+
+/* Default perms for new SysV semaphores.
+ */
+#define SYSV_SEM_DEF_MODE (S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH)
+
+#ifndef HAVE_SEMUN_UNION
+union semun {
+    int val;				/* value for SETVAL */
+    struct semid_ds *buf;		/* buffer for IPC_SET and IPC_STAT */
+    unsigned short int *array;		/* array for GETALL and SETALL */
+};
+#endif /* !HAVE_SEMUN_UNION */
+
+
+sem_t * sem_open(const char *name, int oflag, ...);
+
+int sem_close(sem_t *sem);
+
+int sem_unlink(const char *name);
+
+int sem_wait(sem_t *sem);
+
+int sem_trywait(sem_t *sem);
+
+int sem_post(sem_t *sem);
+
+int sem_getvalue(sem_t *sem, int *valp);
+
+
+#endif /* !HAVE_POSIX_SEMS */
+
+#endif /* !DUN_SEMAPHORE_H */
diff --git a/src/slurmd/setenvpf.c b/src/slurmd/setenvpf.c
index 1530c60d4a4..d016756534c 100644
--- a/src/slurmd/setenvpf.c
+++ b/src/slurmd/setenvpf.c
@@ -1,5 +1,6 @@
 /*****************************************************************************\
- * setenvpf.c - add an environment variable to environment vector
+ * src/slurmd/setenvpf.c - add an environment variable to environment vector
+ * $Id$
  *****************************************************************************
  *  Copyright (C) 2002 The Regents of the University of California.
  *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
@@ -24,6 +25,10 @@
  *  59 Temple Place, Suite 330, Boston, MA  02111-1307  USA.
 \*****************************************************************************/
 
+#if HAVE_CONFIG_H
+#  include <config.h>
+#endif 
+
 #include <stdio.h>
 #include <stdarg.h>
 
@@ -34,7 +39,7 @@
  * xmalloc() extending *envp if necessary.
  *
  * envp		Pointer to environment array allocated with xmalloc()
- * envc		Pointer to current count of environment vars			 
+ * envc		Pointer to current count of environment vars
  * fmt		printf style format (e.g. "SLURM_NPROCS=%d")
  *
  */    
diff --git a/src/slurmd/setenvpf.h b/src/slurmd/setenvpf.h
index 140b16009de..765b3589c84 100644
--- a/src/slurmd/setenvpf.h
+++ b/src/slurmd/setenvpf.h
@@ -1,3 +1,28 @@
+/*****************************************************************************\
+ * src/slurmd/setenvpf.h - environment vector manipulation
+ *****************************************************************************
+ *  Copyright (C) 2002 The Regents of the University of California.
+ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+ *  Written by Mark Grondona <mgrondona@llnl.gov>.
+ *  UCRL-CODE-2002-040.
+ *  
+ *  This file is part of SLURM, a resource management program.
+ *  For details, see <http://www.llnl.gov/linux/slurm/>.
+ *  
+ *  SLURM is free software; you can redistribute it and/or modify it under
+ *  the terms of the GNU General Public License as published by the Free
+ *  Software Foundation; either version 2 of the License, or (at your option)
+ *  any later version.
+ *  
+ *  SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
+ *  WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ *  FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
+ *  details.
+ *  
+ *  You should have received a copy of the GNU General Public License along
+ *  with SLURM; if not, write to the Free Software Foundation, Inc.,
+ *  59 Temple Place, Suite 330, Boston, MA  02111-1307  USA.
+\*****************************************************************************/
 #ifndef _SETENVPF_H
 #define _SETENVPF_H
 
diff --git a/src/slurmd/shm.c b/src/slurmd/shm.c
new file mode 100644
index 00000000000..d2e1c98ff40
--- /dev/null
+++ b/src/slurmd/shm.c
@@ -0,0 +1,763 @@
+/*****************************************************************************\
+ * src/slurmd/shm.c - slurmd shared memory routines 
+ * $Id$
+ *****************************************************************************
+ *  Copyright (C) 2002 The Regents of the University of California.
+ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+ *  Written by Mark Grondona <mgrondona@llnl.gov>.
+ *  UCRL-CODE-2002-040.
+ *  
+ *  This file is part of SLURM, a resource management program.
+ *  For details, see <http://www.llnl.gov/linux/slurm/>.
+ *  
+ *  SLURM is free software; you can redistribute it and/or modify it under
+ *  the terms of the GNU General Public License as published by the Free
+ *  Software Foundation; either version 2 of the License, or (at your option)
+ *  any later version.
+ *  
+ *  SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
+ *  WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ *  FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
+ *  details.
+ *  
+ *  You should have received a copy of the GNU General Public License along
+ *  with SLURM; if not, write to the Free Software Foundation, Inc.,
+ *  59 Temple Place, Suite 330, Boston, MA  02111-1307  USA.
+\*****************************************************************************/
+
+#if HAVE_CONFIG_H
+#  include <config.h>
+#endif
+
+#if HAVE_SYS_IPC_H
+#  include <sys/ipc.h>
+#endif
+
+#if HAVE_SYS_SHM_H
+#  include <sys/shm.h>
+#endif
+
+#if HAVE_SYS_SEM_H
+#  include <sys/sem.h>
+#endif
+
+#if HAVE_STRING_H
+#  include <string.h>
+#endif
+
+#if HAVE_ERRNO_H
+#  include <errno.h>
+#endif
+
+#include <stdio.h>
+#include <stdarg.h>
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <stdlib.h>
+#include <signal.h>
+
+#include <src/common/list.h>
+#include <src/common/log.h>
+#include <src/common/xmalloc.h>
+#include <src/common/xassert.h>
+#include <src/common/slurm_errno.h>
+
+#include <src/slurmd/shm.h>
+
+/* We use Chris Dunlap's POSIX semaphore implementation if necessary */
+#include <src/slurmd/test/semaphore.h>
+
+#define MAX_JOB_STEPS	16
+#define MAX_TASKS	1024
+
+#define SHM_LOCKNAME	"/.slurm.lock"
+
+/* Increment SHM_VERSION if format changes */
+#define SHM_VERSION	0x1001
+
+typedef struct shmem_struct {
+	int        version;
+	int        users;	
+	job_step_t step[MAX_JOB_STEPS];
+	task_t     task[MAX_TASKS];
+} slurmd_shm_t;
+
+
+/* static variables: */
+static sem_t *shm_lock;
+static char  *lockname;
+static int shmid;
+static slurmd_shm_t *slurmd_shm;
+
+/* static function prototypes: */
+static int  _is_valid_ipc_name(const char *name);
+static char *_create_ipc_name(const char *name);
+static int _shm_unlink_lock(void);
+static int  _shm_lock_and_initialize(void);
+static void _shm_lock(void);
+static void _shm_unlock(void);
+static void _shm_initialize(void);
+static void _shm_prepend_task_to_step(job_step_t *, task_t *);
+static void _shm_task_copy(task_t *, task_t *);
+static void _shm_step_copy(job_step_t *, job_step_t *);
+static void _shm_clear_task(task_t *);
+static void _shm_clear_step(job_step_t *);
+static int  _shm_find_step(uint32_t, uint32_t);
+static task_t * _shm_alloc_task(void);
+static task_t * _shm_find_task_in_step(job_step_t *s, int taskid);
+
+
+/* initialize shared memory: 
+ * Attach if shared region already exists, otherwise create and attach
+*/
+int
+shm_init(void)
+{
+	return _shm_lock_and_initialize();
+}
+
+/* Detach from shared memory */
+int
+shm_fini(void)
+{
+	int destroy = 0;
+	info("process %ld detaching from shm", getpid());
+	xassert(slurmd_shm != NULL);
+	_shm_lock();
+	if (--slurmd_shm->users == 0)
+		destroy = 1;
+
+	/* detach segment from local memory */
+	if (shmdt(slurmd_shm) < 0) {
+		error("shmdt: %m");
+		return -1;
+	}
+
+	if (destroy && (shmctl(shmid, IPC_RMID, NULL) < 0)) {
+		error("shmctl: %m");
+		return -1;
+	}
+	_shm_unlock();
+	if (destroy && (_shm_unlink_lock() < 0)) {
+		error("_shm_unlink_lock: %m");
+		return -1;
+	}
+
+	return 0;
+}
+
+void
+shm_cleanup(void)
+{
+	char *s;
+
+	if ((s = _create_ipc_name(SHM_LOCKNAME))) {
+		info("going to destroy shm lock `%s'", s);
+		if (sem_unlink(s) < 0)
+			error("sem_unlink: %m");
+		xfree(s);
+	}
+
+
+
+}
+
+static int
+_is_valid_ipc_name(const char *name)
+{
+	if (!name)
+		return(0);
+	else if (strlen(name) <= 1)
+		return(2);
+	else if (strlen(name) >= PATH_MAX)
+		return(3);
+	else if (strcmp(name, "/.") == 0)
+		return(4);
+	else if (strcmp(name, "/..") == 0)
+		return(5);
+	else if (strrchr(name, '/') != name)
+		return(6);
+	return(1);
+}
+
+static char *
+_create_ipc_name(const char *name)
+{
+	char *dst, *dir, *slash;
+	int rc;
+
+	if ((rc = _is_valid_ipc_name(name)) != 1) {
+		error("invalid ipc name: `%s' %d", name, rc);
+		return NULL;
+	}
+	else if (!(dst = xmalloc(PATH_MAX)))
+		return NULL;
+
+#if defined(POSIX_IPC_PREFIX) && defined(HAVE_POSIX_SEMS)
+	dir = POSIX_IPC_PREFIX;
+#else
+	if (!(dir = getenv("TMPDIR")) || !strlen(dir)) 
+		dir = "/tmp";
+#endif /* POSIX_IPC_PREFIX */
+
+	slash = (dir[strlen(dir) - 1] == '/') ? "" : "/";
+
+#ifdef HAVE_SNPRINTF
+	snprintf(dst, PATH_MAX, "%s%s%s", dir, slash, name+1);
+#else
+	sprintf(dst, "%s%s%s", dir, slash, name+1);
+#endif /* HAVE_SNPRINTF */
+
+	return(dst);
+}
+
+static int
+_shm_unlink_lock()
+{
+	debug3("process %ld removing shm lock", getpid());
+	if (sem_unlink(lockname) == -1) 
+		return 0;
+	xfree(lockname);
+	return 1;
+}
+
+static sem_t *
+_sem_open(const char *name, int oflag, ...)
+{
+	sem_t *sem;
+	va_list ap;
+	mode_t mode;
+	unsigned int value;
+
+	if (!(lockname = _create_ipc_name(name))) {
+		fatal("sem_open failed for [%s]: invalid IPC name", name);
+	}
+
+	if (oflag & O_CREAT) {
+		va_start(ap, oflag);
+		mode = va_arg(ap, mode_t);
+		value = va_arg(ap, unsigned int);
+		va_end(ap);
+		sem = sem_open(lockname, oflag, mode, value);
+	} else 
+		sem = sem_open(lockname, oflag);
+
+	return(sem);
+}
+
+
+static void
+_shm_initialize()
+{
+	int i;
+	memset(slurmd_shm, 0, sizeof(slurmd_shm_t));
+	for (i = 0; i < MAX_TASKS; i++)
+		slurmd_shm->task[i].used = false;
+	for (i = 0; i < MAX_JOB_STEPS; i++)
+		slurmd_shm->step[i].state = SLURMD_JOB_UNUSED;
+	slurmd_shm->version = SHM_VERSION;
+}
+
+int 
+shm_insert_step(job_step_t *step)
+{
+	int i = 0;
+	_shm_lock();
+	if (_shm_find_step(step->jobid, step->stepid) >= 0) {
+		_shm_unlock();
+		slurm_seterrno_ret(EEXIST);
+	}
+
+	for (i = 0; i < MAX_JOB_STEPS; i++) {
+		if (slurmd_shm->step[i].state == SLURMD_JOB_UNUSED)
+			break;
+	}
+	if (i == MAX_JOB_STEPS) {
+		_shm_unlock();
+		slurm_seterrno_ret(ENOSPC);
+	} else
+		_shm_step_copy(&slurmd_shm->step[i], step);
+
+	_shm_unlock();
+	return SLURM_SUCCESS;
+}
+
+int 
+shm_delete_step(uint32_t jobid, uint32_t stepid)
+{
+	int i;
+	_shm_lock();
+	if ((i = _shm_find_step(jobid, stepid)) < 0) {
+		_shm_unlock();
+		slurm_seterrno_ret(ESRCH);
+	}
+	_shm_clear_step(&slurmd_shm->step[i]);
+	_shm_unlock();
+	return 0;
+}
+
+int 
+shm_update_step(job_step_t *step)
+{
+	int i, retval = 0;
+	_shm_lock();
+	if ((i = _shm_find_step(step->jobid, step->stepid)) >= 0) 
+		_shm_step_copy(&slurmd_shm->step[i], step);
+	else
+		retval = -1;
+	_shm_unlock();
+	return retval;
+}
+
+int
+shm_signal_step(uint32_t jobid, uint32_t stepid, uint32_t signal)
+{
+	int         signo  = (int) signal;
+	int         retval = SLURM_SUCCESS;
+	int         i;
+	job_step_t *s;
+	task_t     *t;
+
+	_shm_lock();
+	if ((i = _shm_find_step(jobid, stepid)) >= 0) {
+		s = &slurmd_shm->step[i];
+		for (t = s->task_list; t; t = t->next) {
+			if (t->pid > 0 && kill(t->pid, signo) < 0) {
+				error("kill %d.%d pid %ld: %m", 
+				      jobid, stepid, (long)t->pid);
+				retval = errno;
+			}
+		}	
+	} else
+		retval = ESRCH;
+
+	_shm_unlock();
+	if (retval > 0)
+		slurm_seterrno_ret(retval);
+	else
+		return SLURM_SUCCESS;
+}
+
+
+job_step_t *
+shm_get_step(uint32_t jobid, uint32_t stepid)
+{
+	int i;
+	job_step_t *s = NULL;
+	task_t *t;
+
+	_shm_lock();
+	if ((i = _shm_find_step(jobid, stepid)) >= 0) {
+		s = xmalloc(sizeof(job_step_t));
+		_shm_step_copy(s, &slurmd_shm->step[i]);
+		for (t = slurmd_shm->step[i].task_list; t; t = t->next) {
+			task_t *u = xmalloc(sizeof(task_t));
+			_shm_task_copy(u, t);
+			_shm_prepend_task_to_step(s, u);
+		}
+
+	}
+	_shm_unlock();
+	return s;
+}
+
+void 
+shm_free_step(job_step_t *step)
+{
+	task_t *p, *t;
+	if ((t = step->task_list)) {
+		do {
+			p = t->next;
+			xfree(t);
+		} while ((t = p));
+	}
+	xfree(step);
+}
+
+int 
+shm_update_step_sid(uint32_t jobid, uint32_t stepid, int sid)
+{
+	int i, retval = SLURM_SUCCESS;
+	_shm_lock();
+	if ((i = _shm_find_step(jobid, stepid)) >= 0)
+		slurmd_shm->step[i].sid = sid;
+	else {
+		slurm_seterrno(ESRCH);
+		retval = SLURM_FAILURE;
+	}
+	_shm_unlock();
+	return retval;
+}
+
+int 
+shm_step_sid(uint32_t jobid, uint32_t stepid)
+{
+	int i, sid;
+	_shm_lock();
+	if ((i = _shm_find_step(jobid, stepid)) >= 0)
+		sid = slurmd_shm->step[i].sid;
+	else {
+		slurm_seterrno(ESRCH);
+		sid = SLURM_FAILURE;
+	}
+	_shm_unlock();
+	return sid;
+}
+
+
+int 
+shm_update_step_state(uint32_t jobid, uint32_t stepid, job_state_t state)
+{
+	int i, retval = SLURM_SUCCESS;
+	_shm_lock();
+	if ((i = _shm_find_step(jobid, stepid)) >= 0)
+		slurmd_shm->step[i].state = state;
+	else {
+		slurm_seterrno(ESRCH);
+		retval = SLURM_FAILURE;
+	}
+	_shm_unlock();
+	return retval;
+}
+
+job_state_t *
+shm_lock_step_state(uint32_t jobid, uint32_t stepid)
+{
+	int i;
+	job_state_t *state = NULL;
+	_shm_lock();
+	if ((i = _shm_find_step(jobid, stepid)) >= 0)
+		state = &slurmd_shm->step[i].state;
+	else {
+		slurm_seterrno(ESRCH);
+		_shm_unlock();
+	}
+	/* caller is responsible for unlocking */ 
+	return state;
+}
+
+void
+shm_unlock_step_state(uint32_t jobid, uint32_t stepid)
+{
+	/* May support individual job locks in the future, so we
+	 * keep the arguments above
+	 */
+	_shm_unlock();
+}
+
+
+int 
+shm_update_step_addrs(uint32_t jobid, uint32_t stepid, 
+		      slurm_addr *ioaddr, slurm_addr *respaddr)
+{
+	int i, retval = SLURM_SUCCESS;
+	_shm_lock();
+	if ((i = _shm_find_step(jobid, stepid)) >= 0) {
+		job_step_t *s = &slurmd_shm->step[i];
+
+		/* Only allow one addr update at a time */
+		if (!s->io_update) {
+			s->ioaddr = *ioaddr;
+			s->respaddr = *respaddr;
+			s->io_update = true;
+		} else {
+			slurm_seterrno(EAGAIN);
+			retval = SLURM_FAILURE;
+		}
+
+	} else {
+		slurm_seterrno(ESRCH);
+		retval = SLURM_FAILURE;
+	}
+	_shm_unlock();
+	return retval;
+}
+
+int
+shm_step_addrs(uint32_t jobid, uint32_t stepid, 
+	       slurm_addr *ioaddr, slurm_addr *respaddr)
+{
+	int i, retval = SLURM_SUCCESS;
+	xassert(ioaddr != NULL);
+	xassert(respaddr != NULL);
+	_shm_lock();
+	if ((i = _shm_find_step(jobid, stepid)) >= 0) {
+		job_step_t *s = &slurmd_shm->step[i];
+		*ioaddr   = s->ioaddr;
+		*respaddr = s->respaddr;
+		s->io_update = false;
+	} else {
+		slurm_seterrno(ESRCH);
+		retval = SLURM_FAILURE;
+	}
+	_shm_unlock();
+	return retval;
+}
+
+int 
+shm_update_step_timelimit(uint32_t jobid, uint32_t stepid, time_t newlim)
+{
+	int i, retval = SLURM_SUCCESS;
+	_shm_lock();
+	if ((i = _shm_find_step(jobid, stepid)) >= 0)
+		slurmd_shm->step[i].timelimit = newlim;
+	else { 
+		slurm_seterrno(ESRCH);
+		retval = SLURM_FAILURE;
+	}
+	_shm_unlock();
+	return retval;
+}
+
+time_t
+shm_step_timelimit(uint32_t jobid, uint32_t stepid)
+{
+	int i;
+	time_t timelimit;
+	_shm_lock();
+	if ((i = _shm_find_step(jobid, stepid)) >= 0)
+		timelimit = slurmd_shm->step[i].timelimit;
+	else {
+		slurm_seterrno(ESRCH);
+		timelimit = (time_t) SLURM_FAILURE;
+	}
+	_shm_unlock();
+	return timelimit;
+}
+
+static int
+_shm_find_step(uint32_t jobid, uint32_t stepid)
+{
+	int i;
+	for (i = 0; i < MAX_JOB_STEPS; i++) {
+		job_step_t *s = &slurmd_shm->step[i];
+		if (s->jobid == jobid && s->stepid == stepid) 
+			return i;
+	}
+	return -1;
+}
+
+int
+shm_add_task(uint32_t jobid, uint32_t stepid, task_t *task)
+{
+	int i;
+	job_step_t *s;
+	task_t *t;
+	xassert(task != NULL);
+	_shm_lock();
+	if ((i = _shm_find_step(jobid, stepid)) < 0) {
+		_shm_unlock();
+		slurm_seterrno_ret(ESRCH);
+	} 
+	s = &slurmd_shm->step[i];
+	if (_shm_find_task_in_step(s, task->id)) {
+		_shm_unlock();
+		slurm_seterrno_ret(EEXIST);
+	}
+	if (!(t = _shm_alloc_task())) {
+		_shm_unlock();
+		slurm_seterrno_ret(ENOMEM);
+	}
+	_shm_task_copy(t, task);
+	_shm_prepend_task_to_step(s, t);
+	_shm_unlock();
+	return 0;
+}
+
+static void
+_shm_prepend_task_to_step(job_step_t *s, task_t *task)
+{
+	task->next = s->task_list;
+	s->task_list = task;
+	task->job_step = s;
+}
+
+static task_t *
+_shm_find_task_in_step(job_step_t *s, int taskid)
+{
+	task_t *t = NULL;
+	for (t = s->task_list; t && t->used; t = t->next) {
+		if (t->id == taskid)
+			break;
+	}
+	return t;
+}
+
+static task_t *
+_shm_alloc_task(void)
+{
+	int i;
+	for (i = 0; i < MAX_TASKS; i++) {
+		if (!slurmd_shm->task[i].used) 
+			return &slurmd_shm->task[i];
+	}
+	return NULL;
+}
+
+static void
+_shm_task_copy(task_t *to, task_t *from)
+{
+	*to = *from;
+	/* next and step are not valid for copying */
+	to->next = NULL;
+	to->job_step = NULL;
+}
+
+static void 
+_shm_step_copy(job_step_t *to, job_step_t *from)
+{
+	task_t *t = NULL;
+	if (to->task_list)
+		t = to->task_list;
+	*to = *from;
+	to->state = SLURMD_JOB_ALLOCATED;
+	to->task_list = t; /* addition of tasks is another step */
+}
+
+static void
+_shm_clear_task(task_t *t)
+{
+	memset(t, 0, sizeof(*t));
+}
+
+static void
+_shm_clear_step(job_step_t *s)
+{
+	task_t *p, *t = s->task_list;
+	do {
+		p = t->next;
+		_shm_clear_task(t);
+	} while ((t = p));
+
+	memset(s, 0, sizeof(*s));
+}
+
+
+static int
+_shm_create()
+{
+	int oflags = IPC_CREAT | IPC_EXCL | 0600;
+	key_t key = ftok(".", 'a');
+
+	if ((shmid = shmget(key, sizeof(slurmd_shm_t), oflags)) < 0) {
+		if ((shmid = shmget(key, sizeof(slurmd_shm_t), 0600)) < 0)
+		error("shmget: %m");
+		return SLURM_ERROR;
+	}
+
+	slurmd_shm = shmat(shmid, NULL, 0);
+	if (slurmd_shm == (void *)-1 || slurmd_shm == NULL) {
+		error("shmat: %m");
+		return SLURM_ERROR;
+	}
+
+	_shm_initialize();
+
+	return 1;
+}
+
+static int
+_shm_attach()
+{
+	int oflags = 0;
+	key_t key = ftok(".", 'a');
+
+	if ((shmid = shmget(key, sizeof(slurmd_shm_t), oflags)) < 0) 
+		fatal("shm_attach: %m");
+
+	slurmd_shm = shmat(shmid, NULL, 0);
+	if (slurmd_shm == (void *)-1 || !slurmd_shm) 
+		fatal("shmat: %m");
+
+	return 1;
+}
+
+/* 
+ * Create shared memory region if it doesn't exist, if it does exist,
+ * reinitialize it.
+ *
+ */
+static int
+_shm_new()
+{
+	if ((_shm_create() < 0) && (_shm_attach() < 0)) {
+		error("shm_attach: %m");
+		return SLURM_FAILURE;
+	}
+	_shm_initialize();
+	slurmd_shm->users = 1;
+	_shm_unlock();
+	return SLURM_SUCCESS;
+}
+
+static int
+_shm_reopen()
+{
+	int retval = SLURM_SUCCESS;
+
+	if ((shm_lock = _sem_open(SHM_LOCKNAME, 0)) == SEM_FAILED) {
+		error("Unable to initialize semaphore: %m");
+		return SLURM_FAILURE;
+	}
+
+	/* Attach to shared memory region */
+	_shm_attach();
+
+	/* Lock and unlock semaphore to ensure data is initialized */
+	_shm_lock();
+	if (slurmd_shm->version != SHM_VERSION) {
+		error("shm_init: Wrong version in shared memory");
+		retval = SLURM_FAILURE;
+	} else
+		slurmd_shm->users++;
+	_shm_unlock();
+
+	return retval;
+}
+
+
+/* get and initialize, if necessary, the shm semaphore
+ * if lock did not exist, assume we need to initialize shared region
+ */
+static int
+_shm_lock_and_initialize()
+{
+	if (slurmd_shm && slurmd_shm->version == SHM_VERSION) {           
+		/* we've already opened shared memory */
+		_shm_lock();
+		slurmd_shm->users++;
+		_shm_unlock();
+		return SLURM_SUCCESS;
+	}
+
+	shm_lock = _sem_open(SHM_LOCKNAME, O_CREAT|O_EXCL, S_IRUSR|S_IWUSR, 0);
+
+	if (shm_lock != SEM_FAILED) /* lock didn't exist. Create shmem      */
+		return _shm_new();
+	else                        /* lock exists. Attach to shared memory */
+		return _shm_reopen();
+}
+
+static void 
+_shm_lock()
+{
+    restart:
+	if (sem_wait(shm_lock) == -1) {
+		if (errno == EINTR)
+			goto restart;
+		fatal("_shm_lock: %m");
+	}
+	return;
+}
+
+static void
+_shm_unlock()
+{
+    restart:
+	if (sem_post(shm_lock) == -1) {
+		if (errno == EINTR)
+			goto restart;
+		fatal("_shm_unlock: %m");
+	}
+	return;
+}
diff --git a/src/slurmd/shm.h b/src/slurmd/shm.h
new file mode 100644
index 00000000000..74eb5788c0f
--- /dev/null
+++ b/src/slurmd/shm.h
@@ -0,0 +1,229 @@
+/*****************************************************************************\
+ * src/slurmd/shm.h - shared memory routines for slurmd
+ * $Id$
+ *****************************************************************************
+ *  Copyright (C) 2002 The Regents of the University of California.
+ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+ *  Written by Mark Grondona <mgrondona@llnl.gov>.
+ *  UCRL-CODE-2002-040.
+ *  
+ *  This file is part of SLURM, a resource management program.
+ *  For details, see <http://www.llnl.gov/linux/slurm/>.
+ *  
+ *  SLURM is free software; you can redistribute it and/or modify it under
+ *  the terms of the GNU General Public License as published by the Free
+ *  Software Foundation; either version 2 of the License, or (at your option)
+ *  any later version.
+ *  
+ *  SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
+ *  WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ *  FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
+ *  details.
+ *  
+ *  You should have received a copy of the GNU General Public License along
+ *  with SLURM; if not, write to the Free Software Foundation, Inc.,
+ *  59 Temple Place, Suite 330, Boston, MA  02111-1307  USA.
+\*****************************************************************************/
+#ifndef _SHM_H
+#define _SHM_H
+
+#if HAVE_CONFIG_H
+#  include <config.h>
+#endif  
+
+#if HAVE_INTTYPES_H
+#  include <inttypes.h>
+#else
+#  if HAVE_STDINT_H
+#    include <stdint.h>
+#  endif
+#endif /* HAVE_INTTYPES_H */
+
+#if HAVE_SYS_TYPES_H
+#  include <sys/types.h>
+#endif
+
+#if HAVE_UNISTD_H
+#  include <unistd.h>
+#endif 
+
+#include <src/common/slurm_protocol_api.h>
+
+#include <src/slurmd/job.h>
+
+/* local job states */
+typedef enum job_state {
+	SLURMD_JOB_UNUSED = 0,
+	SLURMD_JOB_ALLOCATED,
+	SLURMD_JOB_STARTING,
+	SLURMD_JOB_STARTED,
+	SLURMD_JOB_ENDING,
+	SLURMD_JOB_COMPLETE
+} job_state_t;
+
+typedef struct task task_t;
+typedef struct job_step job_step_t;
+
+struct task {
+	int used;
+	int id;	        /* local task id              			*/
+	int global_id;  /* global task id             			*/
+	pid_t pid;	/* pid of user process        			*/
+	pid_t ppid;	/* parent pid of user process 			*/
+	pid_t mpid;	/* manager pid of this task   			*/
+	/* reverse pointer back to controlling job step */
+	job_step_t *job_step;
+	task_t *next;	/* next task in this job step			*/
+};
+
+struct job_step {
+	uid_t      uid;
+	uint32_t   jobid;		
+	uint32_t   stepid;
+	uint32_t   sw_id;	/* Switch/Interconnect specific id  */
+	int        ntasks;	/* number of tasks in this job	    */
+	pid_t      sid;		/* Job session id 		    */
+
+	int        io_update;	/* srun address has been updated    */
+	slurm_addr respaddr;	/* Addr to send messages to srun on */
+	slurm_addr ioaddr;	/* Addr to connect to initialize IO */
+	srun_key_t key;		/* last key from srun client	    */
+
+
+	job_state_t state;	/* Job step status 		    */
+	time_t      timelimit;	/* job time limit		    */
+	task_t     *task_list;	/* list of this step's tasks        */
+};
+
+
+/* 
+ * Attach to and initialize slurmd shared memory segment
+ * Returns -1 and sets errno on failure.
+ */
+int shm_init(void);
+
+/*
+ * Release slurmd shared memory segment. Deallocates segment if no
+ * other processes are currently attached.
+ */
+int shm_fini(void);
+
+/*
+ * Force cleanup of any stale shared memory locks
+ */
+void shm_cleanup(void);
+
+/*
+ * Insert a new step into shared memory, the step passed in by address
+ * should be filled in with the appropriate values, excepting the
+ * task_list pointer (see add_task below to add tasks to a job step)
+ * The resulting step will not be modified nor freed. The step information is
+ * *copied* into shared memory
+ *
+ * Failure modes are:
+ *   EEXIST: A step already exists in shared memory with that jobid,stepid
+ *   ENOSPC: No step slots remain in shared memory
+ */
+int shm_insert_step(job_step_t *step);
+
+/*
+ * Delete the job step record from shared memory, if it exists
+ *
+ * Returns SLURM_FAILURE and sets errno if job step cannot be deleted
+ *  ESRCH: Job step with jobid,stepid not found
+ */
+int shm_delete_step(uint32_t jobid, uint32_t stepid);
+
+/*
+ * Return a *copy* of the job step with jobid,stepid from shared
+ * memory. The copy must be freed with xfree()
+ *
+ * Returns NULL if job step is not found in shared memory.
+ */
+job_step_t *shm_get_step(uint32_t jobid, uint32_t stepid);
+
+/*
+ * Update an existing job step to match "step"
+ * returns SLURM_FAILURE if job step cannot be found
+ */
+int shm_update_step(job_step_t *step);
+
+/*
+ * Deallocate memory used by step struct returned from shm_get_step()
+ */
+void shm_free_step(job_step_t *step);
+
+/* 
+ * Lock shared memory and send `signal' to all tasks in step 
+ */
+int shm_signal_step(uint32_t jobid, uint32_t stepid, uint32_t signal);
+
+/* 
+ * Add a task record to a job step in memory
+ *
+ * Returns SLURM_FAILURE and following errnos if not successful:
+ *   ESRCH: Cannot find job step
+ *   EEXIST: A task with that id is already associated with job step
+ *   ENOMEM: No more task slots available in shared memory
+ */
+int shm_add_task(uint32_t jobid, uint32_t stepid, task_t *task);
+
+
+/*
+ *  update job step session id
+ */
+int shm_update_step_sid(uint32_t jobid, uint32_t stepid, int sid);
+
+
+/*
+ * update job step state 
+ */
+int shm_update_step_state(uint32_t jobid, uint32_t stepid, job_state_t state);
+
+
+/* 
+ * lock and return _pointer_ to step state in shared memory
+ * Caller must subsequently call shm_unlock_step_state() or shared memory
+ *  will be locked for everyone else.
+ * (Note: This function is different from most others in this module as
+ *  it returns a pointer into the shared memory region instead of a copy
+ *  of the data. Callers should remain cognizant of this fact. )
+ */
+job_state_t *shm_lock_step_state(uint32_t jobid, uint32_t stepid);
+
+/* unlock job step state
+ */
+void shm_unlock_step_state(uint32_t jobid, uint32_t stepid);
+
+/* 
+ * update job step io_addr 
+ */
+int shm_update_step_addrs(uint32_t jobid, uint32_t stepid, 
+		          slurm_addr *ioaddr, slurm_addr *respaddr);
+
+
+/* 
+ * Return true if ioaddr was updated
+ */
+bool shm_addr_updated(uint32_t jobid, uint32_t stepid);
+
+
+/* 
+ * Atomically return current ioaddr and reset io_update field to false
+ */
+int shm_step_addrs(uint32_t jobid, uint32_t stepid, 
+		   slurm_addr *ioaddr, slurm_addr *respaddr);
+
+
+/* 
+ * update job step timelimit
+ */
+int shm_update_step_timelimit(uint32_t jobid, uint32_t stepid, time_t newlim);
+
+
+/* 
+ * Return job step timelimit
+ */
+time_t shm_step_timelimit(uint32_t jobid, uint32_t stepid);
+
+#endif /* !_SHM_H */
diff --git a/src/slurmd/shmem_struct.c b/src/slurmd/shmem_struct.c
deleted file mode 100644
index fd7de3f644f..00000000000
--- a/src/slurmd/shmem_struct.c
+++ /dev/null
@@ -1,263 +0,0 @@
-/*****************************************************************************\
- *  shmem_struct.c - shared memory support functions
- *****************************************************************************
- *  Copyright (C) 2002 The Regents of the University of California.
- *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
- *  Written by Kevin Tew <tew1@llnl.gov> et. al.
- *  UCRL-CODE-2002-040.
- *  
- *  This file is part of SLURM, a resource management program.
- *  For details, see <http://www.llnl.gov/linux/slurm/>.
- *  
- *  SLURM is free software; you can redistribute it and/or modify it under
- *  the terms of the GNU General Public License as published by the Free
- *  Software Foundation; either version 2 of the License, or (at your option)
- *  any later version.
- *  
- *  SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
- *  WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
- *  FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
- *  details.
- *  
- *  You should have received a copy of the GNU General Public License along
- *  with ConMan; if not, write to the Free Software Foundation, Inc.,
- *  59 Temple Place, Suite 330, Boston, MA  02111-1307  USA.
-\*****************************************************************************/
-
-#include <stdlib.h>
-#include <assert.h>
-#include <pthread.h>
-#include <sys/types.h>
-#include <sys/shm.h>
-#include <string.h>
-
-#include <src/common/slurm_errno.h>
-#include <src/common/log.h>
-#include <src/common/slurm_protocol_api.h>
-#include <src/slurmd/shmem_struct.h>
-
-extern int errno;
-static int shmem_gid;
-#define SHMEM_PERMS 0600
-
-/* function prototypes */
-static void clear_task(task_t * task);
-static void clear_job_step(job_step_t * job_step);
-static int prepend_task(slurmd_shmem_t * shmem, job_step_t * job_step,
-			task_t * task);
-
-/* gets a pointer to the slurmd shared memory segment
- * if it doesn't exist, one is created 
- * returns - a void * pointer to the shared memory segment
- */
-void *get_shmem()
-{
-	void *shmem_addr;
-	int key = ftok(".", 'a');
-
-	assert(key != SLURM_ERROR);
-
-	shmem_gid = shmget(key, sizeof(slurmd_shmem_t), IPC_CREAT | SHMEM_PERMS);
-
-	debug("shmget id = %i ", shmem_gid);
-	if (shmem_gid == SLURM_ERROR) 
-		fatal("can't get shared memory segment: %m ");
-
-	shmem_addr = shmat(shmem_gid, NULL, 0);
-	if (shmem_addr == (void *)SLURM_ERROR)
-		fatal("Unable to attach to shared memory: %m");
-
-	return shmem_addr;
-}
-
-int rel_shmem(void *shmem_addr)
-{
-	if ((shmdt(shmem_addr)) < 0)
-		error("unable to release shared memory: %m");
-	return shmctl(shmem_gid, IPC_RMID, NULL);
-}
-
-/* initializes the shared memory segment, this should only be called 
- * once by the master slurmd after the initial get_shmem call.
- *
- * shmem - pointer to the shared memory segment returned by get_shmem ( )
- */
-void init_shmem(slurmd_shmem_t * shmem)
-{
-	int i;
-
-	/* set everthing to zero */
-	memset(shmem, 0, sizeof(slurmd_shmem_t));
-
-	/* sanity check */
-	/* set all task objects to unused */
-	for (i = 0; i < MAX_TASKS; i++) {
-		clear_task(&shmem->tasks[i]);
-	}
-
-	/* set all job_step objects to unused */
-	for (i = 0; i < MAX_JOB_STEPS; i++) {
-		clear_job_step(&shmem->job_steps[i]);
-	}
-	pthread_mutex_init(&shmem->mutex, NULL);
-}
-
-/* runs through the job_step array looking for a unused job_step.
- * upon finding one the passed src job_step is copied into the shared mem job_step array
- * shmem - pointer to the shared memory segment returned by get_shmem ( )
- * job_step_t - src job_step to be added to the shared memory list
- * returns - the address of the assigned job_step in the shared mem job_step array or
- * the function dies on a fatal log call if the array is full
- */
-job_step_t *alloc_job_step(slurmd_shmem_t * shmem, int job_id,
-			   int job_step_id)
-{
-	int i;
-	pthread_mutex_lock(&shmem->mutex);
-	for (i = 0; i < MAX_JOB_STEPS; i++) {
-		if (shmem->job_steps[i].used == false) {
-			clear_job_step(&shmem->job_steps[i]);
-			shmem->job_steps[i].used = true;
-			shmem->job_steps[i].job_id = job_id;
-			shmem->job_steps[i].job_step_id = job_step_id;
-			pthread_mutex_unlock(&shmem->mutex);
-			return &shmem->job_steps[i];
-		}
-	}
-	pthread_mutex_unlock(&shmem->mutex);
-	error("No available job_step slots in shmem segment");
-	slurm_seterrno(ESLURMD_NO_AVAILABLE_JOB_STEP_SLOTS_IN_SHMEM);
-
-	return (void *) SLURM_ERROR;
-}
-
-/* runs through the task array looking for a unused task.
- * upon finding one the passed src task is copied into the shared mem task array
- * shmem - pointer to the shared memory segment returned by get_shmem ( )
- * new_task - src task to be added to the shared memory list
- * returns - the address of the assigned task in the shared mem task array
- * the function dies on a fatal log call if the array is full
- */
-task_t *alloc_task(slurmd_shmem_t * shmem, job_step_t * job_step)
-{
-	int i;
-	pthread_mutex_lock(&shmem->mutex);
-	for (i = 0; i < MAX_TASKS; i++) {
-		if (shmem->tasks[i].used == false) {
-			clear_task(&shmem->tasks[i]);
-			shmem->tasks[i].used = true;
-			prepend_task(shmem, job_step, &shmem->tasks[i]);
-			pthread_mutex_unlock(&shmem->mutex);
-			return &shmem->tasks[i];
-		}
-	}
-	pthread_mutex_unlock(&shmem->mutex);
-	error("No available task slots in shmem segment");
-	slurm_seterrno(ESLURMD_NO_AVAILABLE_TASK_SLOTS_IN_SHMEM);
-	return (void *) SLURM_ERROR;
-}
-
-
-/* prepends a new task onto the front of a list of tasks assocuated with a job_step.
- * it calls add_task which copies the passed task into a task array in shared memoery
- * sets pointers from the task to the corresponding job_step array 
- * note if the task array is full,  the add_task function will assert and exiti
- * shmem - pointer to the shared memory segment returned by get_shmem ( )
- * job_step - job_step to receive the new task
- * task - task to be prepended
- */
-static int prepend_task(slurmd_shmem_t * shmem, job_step_t * job_step,
-			task_t * task)
-{
-	/* newtask next pointer gets head of the jobstep task list */
-	task->next = job_step->head_task;
-
-	/* newtask pointer becomes the new head of the jobstep task list */
-	job_step->head_task = task;
-
-	/* set back pointer from task to job_step */
-	task->job_step = job_step;
-
-	return SLURM_SUCCESS;
-}
-
-/* clears a job_step and associated task list for future use */
-int deallocate_job_step(job_step_t * jobstep)
-{
-	task_t *task_ptr = jobstep->head_task;
-	task_t *task_temp_ptr;
-	while (task_ptr != NULL) {
-		task_temp_ptr = task_ptr->next;
-		clear_task(task_ptr);
-		task_ptr = task_temp_ptr;
-	}
-	clear_job_step(jobstep);
-	return SLURM_SUCCESS;
-}
-
-/* clears a task array member for future use 
- */
-static void clear_task(task_t * task)
-{
-	task->used = false;
-	task->job_step = NULL;
-	task->next = NULL;
-}
-
-/* clears a job_step array memeber for future use 
- */
-static void clear_job_step(job_step_t * job_step)
-{
-	job_step->used = false;
-	job_step->head_task = NULL;
-}
-
-/* api call for DPCS to return a job_id given a session_id 
- */
-int find_job_id_for_session(slurmd_shmem_t * shmem, int session_id)
-{
-	int i;
-	pthread_mutex_lock(&shmem->mutex);
-	for (i = 0; i < MAX_JOB_STEPS; i++) {
-		if (shmem->job_steps[i].used == true) {
-			if (shmem->job_steps[i].session_id == session_id)
-
-				pthread_mutex_unlock(&shmem->mutex);
-			return shmem->job_steps[i].job_id;
-		}
-	}
-	pthread_mutex_unlock(&shmem->mutex);
-	debug("No job_id found for session_id %i", session_id);
-	return SLURM_FAILURE;
-}
-
-job_step_t *find_job_step(slurmd_shmem_t * shmem, int job_id,
-			  int job_step_id)
-{
-	int i;
-	pthread_mutex_lock(&shmem->mutex);
-	for (i = 0; i < MAX_JOB_STEPS; i++) {
-		if (shmem->job_steps[i].used == true
-		    && shmem->job_steps[i].job_id == job_id
-		    && shmem->job_steps[i].job_step_id == job_step_id) {
-			debug3("found step %d.%d in slot %d", 
-			       job_id, job_step_id, i);
-			pthread_mutex_unlock(&shmem->mutex);
-			return &shmem->job_steps[i];
-		}
-	}
-	debug3("find_job_step: unable to find %d.%d", job_id, job_step_id);
-	pthread_mutex_unlock(&shmem->mutex);
-	return (void *) SLURM_ERROR;
-}
-
-task_t *find_task(job_step_t * job_step_ptr, int task_id)
-{
-	task_t *task_ptr = job_step_ptr->head_task;
-	while (task_ptr != NULL) {
-		if (task_ptr->task_id == task_id) {
-			return task_ptr;
-		}
-	}
-	return (void *) SLURM_ERROR;
-}
diff --git a/src/slurmd/shmem_struct.h b/src/slurmd/shmem_struct.h
deleted file mode 100644
index 7f82be6f7f8..00000000000
--- a/src/slurmd/shmem_struct.h
+++ /dev/null
@@ -1,82 +0,0 @@
-#ifndef _SHMEM_STRUCT_H
-#define _SHMEM_STRUCT_H
-
-#include <src/slurmd/task_mgr.h>
-
-#define MAX_TASKS 128
-#define MAX_JOB_STEPS 128
-
-typedef struct job_step job_step_t;
-typedef struct task task_t;
-/* represents a task running on a node */
-struct task {
-	uint32_t task_id;	 /* srun assigned globally unique taskid     */
-	task_start_t task_start; /* task_start_message see task_mgr.h 	     */
-
-	/* boolean type that is marked when this record is used 	     */
-	char used;		
-
-	job_step_t *job_step;	 /* reverse pointer to the cntrllng job_step */
-	task_t *next;		 /* next task pointer in the job_step 	     */
-};
-
-/* represents a job_step consisting of a list of tasks */
-struct job_step {
-	uint32_t job_id;	/* slurmctld assigned jobid 		*/
-	uint32_t job_step_id;	/* slurmctld assigned job_step id 	*/
-	uint32_t session_id;
-
-	/* boolean type that is marked when this record is used 	*/
-	char used;		
-
-	task_t *head_task;	/* fist task in the job_step 		*/
-};
-
-/* shared memory structure.  This structure is overlayed on top of the allocated shared ram */
-typedef struct slurmd_shmem {
-	pthread_mutex_t mutex;	             /* mutex to protect shared ram  */
-	task_t tasks[MAX_TASKS];             /* array of task objects 	     */
-	job_step_t job_steps[MAX_JOB_STEPS]; /* array of job_step objects    */
-} slurmd_shmem_t;
-
-/* gets shared memory segment, allocating it if needed 
- */
-void *get_shmem();
-
-/* should only be called once after allocation of shared ram
- * Marks all task and job_step objects as unused 
- */
-void init_shmem(slurmd_shmem_t * shmem);
-
-/* detaches from shared ram and deallocates shared ram if no other
- * attachments exist 
- */
-int rel_shmem(void *shmem_addr);
-
-/* allocates job step from shared memory array 
- */
-job_step_t *alloc_job_step(slurmd_shmem_t * shmem, int job_id,
-			   int job_step_id);
-
-/* allocates task from shared memory array 
- */
-task_t *alloc_task(slurmd_shmem_t * shmem, job_step_t * job_step);
-
-/* api call for DPCS to return a job_id given a session_id 
- */
-int find_job_id_for_session(slurmd_shmem_t * shmem, int session_id);
-
-/* clears a job_step and associated task list for future use 
- */
-int deallocate_job_step(job_step_t * jobstep);
-
-/* find a particular job_step 
- */
-job_step_t *find_job_step(slurmd_shmem_t * shmem, int job_id,
-			  int job_step_id);
-
-/* find a particular task 
- */
-task_t *find_task(job_step_t * job_step, int task_id);
-
-#endif /* _SHMEM_STRUCT_H */
diff --git a/src/slurmd/slurmd.c b/src/slurmd/slurmd.c
index f05d0aa3608..6c1cb963f25 100644
--- a/src/slurmd/slurmd.c
+++ b/src/slurmd/slurmd.c
@@ -20,7 +20,7 @@
  *  details.
  *  
  *  You should have received a copy of the GNU General Public License along
- *  with ConMan; if not, write to the Free Software Foundation, Inc.,
+ *  with SLURM; if not, write to the Free Software Foundation, Inc.,
  *  59 Temple Place, Suite 330, Boston, MA  02111-1307  USA.
 \*****************************************************************************/
 
@@ -44,14 +44,14 @@
 #include <src/common/xstring.h>
 #include <src/common/list.h>
 #include <src/common/slurm_protocol_api.h>
-#include <src/common/util_signals.h>
 #include <src/common/log.h>
+#include <src/common/fd.h>
 
 #include <src/slurmd/batch_mgr.h>
 #include <src/slurmd/get_mach_stat.h>
 #include <src/slurmd/slurmd.h>
-#include <src/slurmd/task_mgr.h>
-#include <src/slurmd/shmem_struct.h>
+#include <src/slurmd/mgr.h>
+#include <src/slurmd/shm.h>
 #include <src/common/signature_utils.h>
 #include <src/common/credential_utils.h>
 
@@ -64,6 +64,7 @@ typedef struct slurmd_config {
 	log_options_t log_opts;
 	char *slurm_conf;
 	int daemonize;
+	slurm_fd serverfd;
 } slurmd_config_t;
 
 typedef struct connection_arg {
@@ -73,7 +74,6 @@ typedef struct connection_arg {
 time_t init_time;
 pid_t slurmd_pid;
 time_t shutdown_time = (time_t) 0;
-slurmd_shmem_t *shmem_seg;
 char hostname[MAX_NAME_LEN];
 slurm_ssl_key_ctx_t verify_ctx;
 List credential_state_list;
@@ -138,13 +138,14 @@ int main(int argc, char *argv[])
 	/* send registration message to slurmctld */
 	send_node_registration_status_msg();
 
-	/* block SIGHUP, SIGTERM, and SIGINT in all threads */
-	/* block_some_signals(); */
-
 	/* create attached thread to handle signals */
-	if (pthread_create(&sigthr, NULL, &slurmd_handle_signals, 
-			   (void *)NULL) != 0)
-		fatal("pthread_create: %m");
+	{ /* XXX fix this properly */
+		pthread_attr_t attr;
+		pthread_attr_init(&attr);
+		if (pthread_create(&sigthr, &attr, &slurmd_handle_signals, 
+					(void *)NULL) != 0)
+			fatal("pthread_create: %m");
+	}
 
 	slurmd_msg_engine((void *)NULL);
 
@@ -201,8 +202,7 @@ void *slurmd_handle_signals(void *args)
 int slurmd_init()
 {
 	slurmd_pid = getpid();
-	shmem_seg = get_shmem();
-	init_shmem(shmem_seg);
+	shm_init();
 	slurm_ssl_init();
 	slurm_init_verifier(&verify_ctx, public_cert_filename());
 	initialize_credential_state_list(&credential_state_list);
@@ -262,7 +262,7 @@ static char *public_cert_filename()
 int slurmd_destroy()
 {
 	destroy_credential_state_list(credential_state_list);
-	rel_shmem(shmem_seg);
+	shm_fini();
 	slurm_destroy_ssl_key_ctx(&verify_ctx);
 	slurm_ssl_destroy();
 	return SLURM_SUCCESS;
@@ -296,15 +296,20 @@ fill_in_node_registration_status_msg(slurm_node_registration_status_msg_t *
 	/* fill in data structure */
 	node_reg_msg->timestamp = time(NULL);
 	node_reg_msg->node_name = xstrdup(hostname);
+
 	get_procs(&node_reg_msg->cpus);
 	get_memory(&node_reg_msg->real_memory_size);
 	get_tmp_disk(&node_reg_msg->temporary_disk_space);
-/* FIXME: Need to set correct count of currently running job stepss and their ID's below */
-/* This is needed to more reliably recover from restarts of daemons */
+
+	/* FIXME: Need to set correct count of currently running job 
+	 * steps and their ID's below */
+        /* This is needed to more reliably recover from restarts of daemons */
+
 	node_reg_msg->job_count = 0;
 	node_reg_msg->job_id = NULL;
 	node_reg_msg->step_id = NULL;
-	info("Configuration name=%s cpus=%u real_memory=%u, tmp_disk=%u, job_count=%u",
+	info("Configuration name=%s cpus=%u real_memory=%u, "
+	     "tmp_disk=%u, job_count=%u",
 	     hostname, node_reg_msg->cpus,
 	     node_reg_msg->real_memory_size,
 	     node_reg_msg->temporary_disk_space,
@@ -330,6 +335,9 @@ void *slurmd_msg_engine(void *args)
 	    == SLURM_SOCKET_ERROR)
 		fatal("slurm_init_msg_engine_port: %m");
 
+	fd_set_close_on_exec((int) sockfd);
+	slurmd_conf.serverfd = sockfd;
+
 	if ((rc = pthread_attr_init(&thread_attr)))
 		error("pthread_attr_init returned %d", rc);
 
@@ -343,7 +351,8 @@ void *slurmd_msg_engine(void *args)
 		    xmalloc(sizeof(connection_arg_t));
 
 		/* accept needed for stream implementation 
-		 * is a no-op in mongo implementation that just passes sockfd to newsockfd
+		 * is a no-op in mongo implementation that just passes 
+		 * sockfd to newsockfd
 		 */
 		if ((newsockfd = slurm_accept_msg_conn(sockfd, &cli_addr)) == 
 				SLURM_SOCKET_ERROR) {
@@ -351,14 +360,15 @@ void *slurmd_msg_engine(void *args)
 			continue;
 		}
 
-		/* receive message call that must occur before thread spawn because in message 
-		 * implementation their is no connection and the message is the sign of a new connection */
+		/* receive message call that must occur before thread 
+		 * spawn because in message implementation their is no 
+		 * connection and the message is the sign of a new connection 
+		 */
 		conn_arg->newsockfd = newsockfd;
 
 		if (shutdown_time) {
 			service_connection((void *) conn_arg);
-			pthread_exit((void *) 0);
-
+			break;
 		}
 
 		if ((rc = pthread_create(&request_thread_id, 
@@ -369,6 +379,7 @@ void *slurmd_msg_engine(void *args)
 			error("slurmd_msg_engine: pthread_create: %m");
 			service_connection((void *) conn_arg);
 		}
+
 	}
 	slurm_shutdown_msg_engine(sockfd);
 	return NULL;
@@ -455,35 +466,60 @@ void slurmd_req(slurm_msg_t * msg)
 /* rpc methods */
 /******************************/
 
+static int _launch_tasks(launch_tasks_request_msg_t *req)
+{
+	pid_t pid;
+
+	switch ((pid = fork())) {
+	  case -1:
+		  error("launch_tasks: fork: %m");
+		  return SLURM_ERROR;
+		  break;
+	  case 0: /* child runs job */
+		  slurm_shutdown_msg_engine(slurmd_conf.serverfd);
+		  destroy_credential_state_list(credential_state_list);
+		  slurm_destroy_ssl_key_ctx(&verify_ctx);
+		  slurm_ssl_destroy();
+		  mgr_launch_tasks(req);
+		  break;
+	  default:
+		  verbose("created process %ld for job %d.%d", 
+				  pid, req->job_id, req->job_step_id);
+		  break;
+	}
+
+	return SLURM_SUCCESS;
+}
+
 /* Launches tasks */
 void slurm_rpc_launch_tasks(slurm_msg_t * msg)
 {
 	/* init */
 	int rc = SLURM_SUCCESS;
 	clock_t start_time;
-	launch_tasks_request_msg_t *task_desc =
-	    (launch_tasks_request_msg_t *) msg->data;
 	slurm_msg_t resp_msg;
 	launch_tasks_response_msg_t task_resp;
+	launch_tasks_request_msg_t *req = 
+		(launch_tasks_request_msg_t *) msg->data;
 
 	start_time = clock();
 	info("slurmd_req: launch tasks message received");
-
-	slurm_print_launch_task_msg(task_desc);
+	slurm_print_launch_task_msg(req);
 
 	/* do RPC call */
 	/* test credentials */
-	/* rc =  */ verify_credential(&verify_ctx, task_desc->credential, 
+	/* rc =  */ verify_credential(&verify_ctx, req->credential, 
 			       credential_state_list);
 
+	if (rc == SLURM_SUCCESS)
+		rc = _launch_tasks(req);
 	task_resp.node_name = hostname;
-	task_resp.srun_node_id = task_desc->srun_node_id;
+	task_resp.srun_node_id = req->srun_node_id;
 
-	resp_msg.address = task_desc->response_addr;
+	resp_msg.address = req->response_addr;
 	resp_msg.data = &task_resp;
 	resp_msg.msg_type = RESPONSE_LAUNCH_TASKS;
 
-	
 	task_resp.return_code = rc; 
 
 	/* return result */
@@ -494,7 +530,6 @@ void slurm_rpc_launch_tasks(slurm_msg_t * msg)
 		info("slurmd_req: launch authorization completed "
 		     "successfully, time=%ld", (long) (clock() - start_time));
 		slurm_send_only_node_msg(&resp_msg);
-		launch_tasks(task_desc);
 	}
 }
 
@@ -507,23 +542,17 @@ void slurm_rpc_ping(slurm_msg_t * msg)
 /* Kills Launched Tasks */
 void slurm_rpc_kill_tasks(slurm_msg_t * msg)
 {
-	/* init */
-	int error_code;
-	clock_t start_time;
-	kill_tasks_msg_t *kill_tasks_msg = (kill_tasks_msg_t *) msg->data;
-
-	start_time = clock();
+	int rc;
+	kill_tasks_msg_t *req = (kill_tasks_msg_t *) msg->data;
 
-	/* do RPC call */
-	error_code = kill_tasks(kill_tasks_msg);
+	rc = shm_signal_step(req->job_id, req->job_step_id, req->signal);
 
 	/* return result */
-	if (error_code) {
-		error("slurmd_req: kill tasks error %d, time=%ld",
-		      error_code, (long) (clock() - start_time));
-		slurm_send_rc_msg(msg, error_code);
+	if (rc) {
+		error("slurmd_req: kill tasks error %d", rc);
+		slurm_send_rc_msg(msg, rc);
 	} else {
-		info("slurmd_req: kill tasks completed successfully, time=%ld", (long) (clock() - start_time));
+		verbose("slurmd_req: kill tasks completed");
 		slurm_send_rc_msg(msg, SLURM_SUCCESS);
 	}
 }
@@ -539,7 +568,7 @@ void slurm_rpc_reattach_tasks_streams(slurm_msg_t * msg)
 	start_time = clock();
 
 	/* do RPC call */
-	error_code = reattach_tasks_streams(reattach_tasks_steams_msg);
+	/* error_code = reattach_tasks_streams(reattach_tasks_steams_msg);*/
 
 	/* return result */
 	if (error_code) {
@@ -556,22 +585,19 @@ void slurm_rpc_reattach_tasks_streams(slurm_msg_t * msg)
 void slurm_rpc_revoke_credential(slurm_msg_t * msg)
 {
 	/* init */
-	int error_code = SLURM_SUCCESS;
+	int rc = SLURM_SUCCESS;
 	clock_t start_time;
-	revoke_credential_msg_t *revoke_credential_msg =
-	    (revoke_credential_msg_t *) msg->data;
+	revoke_credential_msg_t *req = (revoke_credential_msg_t *) msg->data;
 
 	start_time = clock();
 
 	/* do RPC call */
-	error_code =
-	    revoke_credential(revoke_credential_msg,
-			      credential_state_list);
+	rc = revoke_credential(req, credential_state_list);
 
 	/* return result */
-	if (error_code) {
+	if (rc) {
 		error("slurmd_req:  error %m errno %d, time=%ld",
-		      error_code, (long) (clock() - start_time));
+		      rc, (long) (clock() - start_time));
 		slurm_send_rc_msg(msg, errno);
 	} else {
 		info("slurmd_req:  completed successfully, time=%ld",
@@ -607,7 +633,7 @@ int slurmd_shutdown()
 	return_code_msg_t *slurm_rc_msg;
 	slurm_addr slurmd_addr;
 
-	kill_all_tasks();
+	/* kill_all_tasks();*/
 
 	/* init message connection for message communication with controller */
 	slurm_set_addr_char(&slurmd_addr, slurm_get_slurmd_port(),
@@ -647,24 +673,16 @@ int slurmd_shutdown()
 
 void slurm_rpc_launch_batch_job(slurm_msg_t * msg)
 {
-	/* init */
-	int error_code = SLURM_SUCCESS;
-	clock_t start_time;
-	batch_job_launch_msg_t *batch_job_launch_msg = ( batch_job_launch_msg_t * ) msg->data ;
-
-	start_time = clock();
+	int rc;
+	batch_job_launch_msg_t *req = (batch_job_launch_msg_t *) msg->data ;
 
-	/* do RPC call */
-	error_code = launch_batch_job(batch_job_launch_msg);
+	rc = SLURM_SUCCESS; /* launch_batch_job(req); */
 
-	/* return result */
-	if (error_code) {
-		error("slurmd_req:  error %d, time=%ld",
-		      error_code, (long) (clock() - start_time));
-		slurm_send_rc_msg(msg, error_code);
+	if (rc) {
+		error("slurmd_req:  error %d", rc);
+		slurm_send_rc_msg(msg, rc);
 	} else {
-		info("slurmd_req:  completed successfully, time=%ld",
-		     (long) (clock() - start_time));
+		info("slurmd_req:  completed successfully");
 		slurm_send_rc_msg(msg, SLURM_SUCCESS);
 	}
 }
@@ -728,7 +746,7 @@ int parse_commandline_args(int argc, char **argv,
 			{0, 0, 0, 0}
 		};
 
-		c = getopt_long(argc, argv, "de:hf:l:s:", long_options,
+		c = getopt_long(argc, argv, "cde:hf:l:s:", long_options,
 				&option_index);
 		if (c == -1)
 			break;
@@ -780,6 +798,9 @@ int parse_commandline_args(int argc, char **argv,
 			}
 			slurmd_config->log_opts.syslog_level = errlev;
 			break;
+		case 'c':
+			shm_cleanup();
+			break;
 		case 0:
 			info("option %s", long_options[option_index].name);
 			if (optarg) {
@@ -797,14 +818,8 @@ int parse_commandline_args(int argc, char **argv,
 			digit_optind = this_option_optind;
 			info("option %c\n", c);
 			break;
-		case '?':
-			info("?? getopt returned character code 0%o ??",
-			     c);
-			break;
-
 		default:
-			info("?? getopt returned character code 0%o ??",
-			     c);
+			info("unknown option %c", c);
 			usage(argv[0]);
 			exit(1);
 		}
@@ -835,7 +850,7 @@ reset_cwd(void)
 	else {
 		if (chdir (dir))
 			error ("chdir to %s error %m", dir);
-debug ("chdir %s", dir);
+		debug ("chdir %s", dir);
 		xfree (dir);
 	}
 }
diff --git a/src/slurmd/task_mgr.c b/src/slurmd/task_mgr.c
deleted file mode 100644
index 410ef936bb8..00000000000
--- a/src/slurmd/task_mgr.c
+++ /dev/null
@@ -1,384 +0,0 @@
-/*****************************************************************************\
- *  task_mgr.c - 
- *****************************************************************************
- *  Copyright (C) 2002 The Regents of the University of California.
- *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
- *  Written by Kevin Tew <tew1@llnl.gov> et. al.
- *  UCRL-CODE-2002-040.
- *  
- *  This file is part of SLURM, a resource management program.
- *  For details, see <http://www.llnl.gov/linux/slurm/>.
- *  
- *  SLURM is free software; you can redistribute it and/or modify it under
- *  the terms of the GNU General Public License as published by the Free
- *  Software Foundation; either version 2 of the License, or (at your option)
- *  any later version.
- *  
- *  SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
- *  WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
- *  FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
- *  details.
- *  
- *  You should have received a copy of the GNU General Public License along
- *  with ConMan; if not, write to the Free Software Foundation, Inc.,
- *  59 Temple Place, Suite 330, Boston, MA  02111-1307  USA.
-\*****************************************************************************/
-
-#include <stdlib.h>
-#include <sys/types.h>
-#include <pwd.h>
-#include <grp.h>
-#include <sys/wait.h>
-#include <errno.h>
-#include <unistd.h>
-#include <string.h>
-#include <pthread.h>
-#include <unistd.h>
-
-#include <src/common/log.h>
-#include <src/common/list.h>
-#include <src/common/xmalloc.h>
-#include <src/common/slurm_protocol_api.h>
-#include <src/common/slurm_errno.h>
-#include <src/common/util_signals.h>
-
-#include <src/slurmd/task_mgr.h>
-#include <src/slurmd/shmem_struct.h>
-#include <src/slurmd/circular_buffer.h>
-#include <src/slurmd/pipes.h>
-#include <src/slurmd/io.h>
-#include <src/slurmd/interconnect.h>
-
-/* global variables */
-
-/* prototypes */
-int kill_task(task_t * task, int signal);
-extern pid_t getsid(pid_t pid);
-
-int send_task_exit_msg(int task_return_code, task_start_t * task_start);
-
-/******************************************************************
- *task launch method call hierarchy
- *
- *launch_tasks()
- *	interconnect_init()
- *		fan_out_task_launch() (pthread_create)
- *			task_exec_thread() (fork) for task exec
- *			task_exec_thread() (pthread_create) for io piping 
- ******************************************************************/
-
-int fan_out_task_launch(launch_tasks_request_msg_t * launch_msg)
-{
-	int i;
-	int session_id;
-
-	/* shmem work - see slurmd.c shmem_seg this is probably not needed */
-	slurmd_shmem_t *shmem_ptr = get_shmem();
-
-	/* alloc a job_step object in shmem for this launch_tasks request 
-	 * launch_tasks should really be named launch_job_step             
-	 */
-	job_step_t *curr_job_step = 
-		alloc_job_step(shmem_ptr, launch_msg->job_id, 
-			       launch_msg->job_step_id);
-
-	/* task pointer that will point to shmem task structures as they 
-	 * are allocated 
-	 */
-	task_t *curr_task = NULL;
-
-	/* array of pointers used in this function to point to the 
-	 * task_start structure for each task to be launched
-	 */
-	task_start_t *task_start[launch_msg->tasks_to_launch];
-
-	debug3("entered fan_out_task_launch()");
-	debug("msg->job_step_id = %d", launch_msg->job_step_id);
-
-	if ((session_id = setsid()) == SLURM_ERROR) {
-		error("set sid failed: %m");
-		if ((session_id = getsid(0)) == SLURM_ERROR) {
-			error("getsid also failed");
-		}
-	}
-
-	curr_job_step->session_id = session_id;
-
-
-	debug3("going to launch %d tasks", launch_msg->tasks_to_launch);
-	/* launch requested number of threads 
-	 */
-	for (i = 0; i < launch_msg->tasks_to_launch; i++) {
-		curr_task = alloc_task(shmem_ptr, curr_job_step);
-		task_start[i] = &curr_task->task_start;
-		curr_task->task_id = launch_msg->global_task_ids[i];
-
-		/* fill in task_start struct */
-		task_start[i]->launch_msg = launch_msg;
-		task_start[i]->local_task_id = i;
-		task_start[i]->io_streams_dest = launch_msg->streams;
-
-		debug("going to launch task %d", i);
-		if (launch_task(task_start[i])) {
-			error("launch_task error ");
-			goto kill_tasks_label;
-		}
-		debug("task %i launched", i);
-	}
-
-	/* wait for all the launched threads to finish 
-	 */
-	wait_for_tasks(launch_msg, task_start);
-
-	goto return_label;
-
-      kill_tasks_label:
-	/* kill_launched_tasks(launch_msg, task_start, i); */
-      return_label:
-	/* can't release if this is the same process as the main daemon ie threads
-	 * this is needed if we use forks
-	 * rel_shmem ( shmem_ptr ) ; */
-	deallocate_job_step(curr_job_step);
-	return SLURM_SUCCESS;
-}
-
-
-
-void *task_exec_thread(void *arg)
-{
-	task_start_t *task_start = (task_start_t *) arg;
-	launch_tasks_request_msg_t *launch_msg = task_start->launch_msg;
-	int *pipes = task_start->pipes;
-	int rc;
-	int cpid;
-	struct passwd *pwd;
-	int task_return_code;
-	int local_errno;
-	log_options_t log_opts_def = LOG_OPTS_STDERR_ONLY;
-
-	interconnect_set_capabilities(task_start);
-
-	/* create pipes to read child stdin, stdout, sterr */
-	init_parent_pipes(task_start->pipes);
-
-#define FORK_ERROR -1
-#define CHILD_PROCCESS 0
-	switch ((cpid = fork())) {
-	case FORK_ERROR:
-		break;
-
-	case CHILD_PROCCESS:
-		/* log init stuff */
-		log_init("slurmd", log_opts_def, 0, NULL);
-
-		unblock_all_signals();
-
-		posix_signal_ignore(SIGTTOU);	/* ignore tty output */
-		posix_signal_ignore(SIGTTIN);	/* ignore tty input */
-		posix_signal_ignore(SIGTSTP);	/* ignore user */
-
-		/* setup interconnect specific environment variables
-		 */
-		interconnect_env(&launch_msg->env, &launch_msg->envc, 
-				 launch_msg->srun_node_id,
-				 launch_msg->nnodes,
-				 launch_msg->global_task_ids[task_start->local_task_id],
-				 launch_msg->nprocs);
-
-		/* setup std stream pipes */
-		setup_child_pipes(pipes);
-
-		/* get passwd file info */
-		if ((pwd = getpwuid(launch_msg->uid)) == NULL) {
-			error("user id not found in passwd file");
-			_exit(SLURM_FAILURE);
-		}
-
-		/* setgid and uid */
-		if ((rc = setgid(pwd->pw_gid)) < 0) {
-			error("setgid failed: %m ");
-			_exit(SLURM_FAILURE);
-		}
-
-		/* initgroups */
-		if (( getuid() == (uid_t)0 ) &&
-		    ( initgroups(pwd->pw_name, pwd->pw_gid) ) < 0) {
-			error("initgroups() failed: %m");
-			//_exit(SLURM_FAILURE);
-		}
-
-		if ((rc = setuid(launch_msg->uid)) < 0) {
-			error("setuid() failed: %m");
-			_exit(SLURM_FAILURE);
-		}
-
-		/* run bash and cmdline */
-		if ((chdir(launch_msg->cwd)) < 0) {
-			error("cannot chdir to `%s,' going to /tmp instead",
-					launch_msg->cwd);
-			if ((chdir("/tmp")) < 0) {
-				error("couldn't chdir to `/tmp' either. dying.");
-				_exit(SLURM_FAILURE);
-			}
-		}
-
-		execve(launch_msg->argv[0], launch_msg->argv, launch_msg->env);
-
-		/* error if execve returns
-		 * clean up */
-		error("execve(): %s: %m", launch_msg->argv[0]);
-		close(STDIN_FILENO);
-		close(STDOUT_FILENO);
-		close(STDERR_FILENO);
-		local_errno = errno;
-		_exit(local_errno);
-		break;
-
-	default:		/*parent proccess */
-		debug("forked pid %ld", cpid);
-		task_start->exec_pid = cpid;
-		/* order below is very important 
-		 * deadlock can occur if you mess with it - ask me how I know :)
-		 */
-
-		debug3("calling setup_parent_pipes");
-		/* 1   */ setup_parent_pipes(task_start->pipes);
-		debug3("calling forward_io");
-		/* 1.5 */ forward_io(arg);
-		debug3("calling waitpid(%ld)", cpid);
-		/* 2   */ waitpid(cpid, &task_return_code, 0);
-		/* 3   */ wait_on_io_threads(task_start);
-
-		send_task_exit_msg(task_return_code, task_start);
-
-		break;
-	}
-	return (void *) SLURM_SUCCESS; /* XXX: I think this is wrong */
-}
-
-
-int send_task_exit_msg(int task_return_code, task_start_t * task_start)
-{
-	slurm_msg_t resp_msg;
-	task_exit_msg_t task_exit;
-
-	/* init task_exit_message */
-	task_exit.return_code = task_return_code;
-	task_exit.task_id =
-	    task_start->launch_msg->global_task_ids[task_start->local_task_id];
-
-	/* init slurm_msg_t */
-	resp_msg.address = task_start->launch_msg->response_addr;
-	resp_msg.data = &task_exit;
-	resp_msg.msg_type = MESSAGE_TASK_EXIT;
-
-	debug("sending task exit code %d", task_return_code);
-
-	/* send message */
-	return slurm_send_only_node_msg(&resp_msg);
-}
-
-int kill_tasks(kill_tasks_msg_t * kill_task_msg)
-{
-	int i = 0;
-	int error_code = SLURM_SUCCESS;
-
-	/* get shmemptr 
-	 */
-	slurmd_shmem_t *shmem_ptr = get_shmem();
-	
-	task_t *task_ptr;
-
-	/* find job step 
-	 */
-	job_step_t *job_step_ptr = 
-		find_job_step(shmem_ptr, kill_task_msg->job_id,
-			      kill_task_msg->job_step_id);
-
-	debug("request to kill step %d.%d with signal %d", 
-	      kill_task_msg->job_id,
-	      kill_task_msg->job_step_id, 
-	      kill_task_msg->signal);
-
-	if (job_step_ptr == (void *) SLURM_ERROR) 
-		slurm_seterrno_ret(ESLURMD_ERROR_FINDING_JOB_STEP_IN_SHMEM);
-
-	/* cycle through job_step and kill tasks */
-	task_ptr = job_step_ptr->head_task;
-
-	while (task_ptr != NULL) {
-		debug3("killing task %i of jobid %i , of job_step %i ", i,
-		       kill_task_msg->job_id, kill_task_msg->job_step_id);
-		kill_task(task_ptr, kill_task_msg->signal);
-		task_ptr = task_ptr->next;
-		i++;
-		debug3("next task_ptr %i ", task_ptr);
-	}
-	debug3("leaving kill_tasks");
-	return error_code;
-}
-
-int kill_all_tasks()
-{
-	int error_code = SLURM_SUCCESS;
-
-	/* get shmemptr */
-	slurmd_shmem_t *shmem = get_shmem();
-
-	int i;
-	pthread_mutex_lock(&shmem->mutex);
-	for (i = 0; i < MAX_JOB_STEPS; i++) {
-		if (shmem->job_steps[i].used == true) {
-			/* cycle through job_step and kill tasks */
-			task_t *task_ptr = shmem->job_steps[i].head_task;
-			while (task_ptr != NULL) {
-				kill_task(task_ptr, SIGKILL);
-				task_ptr = task_ptr->next;
-			}
-		}
-	}
-	pthread_mutex_unlock(&shmem->mutex);
-	return error_code;
-
-}
-
-int kill_task(task_t * task, int signal)
-{
-	debug3("killing proccess %i, with signal %i",
-	       task->task_start.exec_pid, signal);
-	return kill(task->task_start.exec_pid, signal);
-}
-
-int reattach_tasks_streams(reattach_tasks_streams_msg_t * req_msg)
-{
-	int i;
-	int error_code = SLURM_SUCCESS;
-	/* get shmemptr */
-	slurmd_shmem_t *shmem_ptr = get_shmem();
-
-	/* find job step */
-	job_step_t *job_step_ptr =
-	    find_job_step(shmem_ptr, req_msg->job_id,
-			  req_msg->job_step_id);
-
-	/* cycle through tasks and set streams address */
-	for (i = 0; i < req_msg->tasks_to_reattach; i++) {
-		task_t *task =
-		    find_task(job_step_ptr, req_msg->global_task_ids[i]);
-		if (task != NULL) {
-			task->task_start.io_streams_dest =
-			    req_msg->streams;
-		} else {
-			error("task id not found job_id %i "
-			      "job_step_id %i global_task_id %i",
-			      req_msg->job_id, req_msg->job_step_id,
-			      req_msg->global_task_ids[i]);
-		}
-	}
-	return error_code;
-}
-
-void pthread_fork_child_after(void)
-{
-	log_reinit();
-}
diff --git a/src/slurmd/task_mgr.h b/src/slurmd/task_mgr.h
deleted file mode 100644
index 306de7fe93c..00000000000
--- a/src/slurmd/task_mgr.h
+++ /dev/null
@@ -1,102 +0,0 @@
-/*****************************************************************************\
- *  task_mgr.h - 
- *****************************************************************************
- *  Copyright (C) 2002 The Regents of the University of California.
- *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
- *  Written by Kevin Tew <tew1@llnl.gov> et. al.
- *  UCRL-CODE-2002-040.
- *  
- *  This file is part of SLURM, a resource management program.
- *  For details, see <http://www.llnl.gov/linux/slurm/>.
- *  
- *  SLURM is free software; you can redistribute it and/or modify it under
- *  the terms of the GNU General Public License as published by the Free
- *  Software Foundation; either version 2 of the License, or (at your option)
- *  any later version.
- *  
- *  SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
- *  WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
- *  FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
- *  details.
- *  
- *  You should have received a copy of the GNU General Public License along
- *  with ConMan; if not, write to the Free Software Foundation, Inc.,
- *  59 Temple Place, Suite 330, Boston, MA  02111-1307  USA.
-\*****************************************************************************/
-
-#ifndef _TASK_MGR_H
-#define _TASK_MGR_H
-
-#if HAVE_CONFIG_H
-#  include <config.h>
-#  if HAVE_INTTYPES_H
-#    include <inttypes.h>
-#  else
-#    if HAVE_STDINT_H
-#      include <stdint.h>
-#    endif
-#  endif			/* HAVE_INTTYPES_H */
-#else				/* !HAVE_CONFIG_H */
-#  include <inttypes.h>
-#endif				/*  HAVE_CONFIG_H */
-
-#include <src/common/slurm_protocol_api.h>
-
-#define STDIN_IO_THREAD 0
-#define STDOUT_IO_THREAD 1
-#define STDERR_IO_THREAD 2
-#define STDSIG_IO_THREAD 3
-#define SLURMD_NUMBER_OF_IO_THREADS 4
-#define SLURMD_IO_MAX_BUFFER_SIZE 4096
-
-/* function prototypes */
-/* launch_tasks
- * called by the rpc method to initiate task launch
- * IN launch_msg	- launch task messge
- * RET int		- return_code
- */
-int launch_tasks(launch_tasks_request_msg_t * launch_msg);
-
-/* kill_tasks
- * called by the rpc method to kill a job_step or set of task launches
- * IN 			- kill task message
- * RET int 		- return_code
- */
-int kill_tasks(kill_tasks_msg_t * kill_task_msg);
-
-/* kill_all_tasks
- * kills all the currently running tasks used by shutdown code 
- * RET 		- return_code
- */
-int kill_all_tasks();
-
-/* reattach_tasks_streams
- * called by the reattach tasks rpc method to change the shmem task structs to point to a new destination for streams
- * IN req_msg		- reattach tasks streams message
- */
-int reattach_tasks_streams(reattach_tasks_streams_msg_t * req_msg);
-
-void *task_exec_thread(void *arg);
-
-void pthread_fork_before(void);
-void pthread_fork_parent_after(void);
-void pthread_fork_child_after(void);
-
-typedef struct task_start {
-	/*task control thread id */
-	pthread_t pthread_id;
-	int thread_return;
-	/*actual exec thread id */
-	int exec_pid;
-	int exec_thread_return;
-	/*io threads ids */
-	pthread_t io_pthread_id[SLURMD_NUMBER_OF_IO_THREADS];
-	int io_thread_return[SLURMD_NUMBER_OF_IO_THREADS];
-	launch_tasks_request_msg_t *launch_msg;
-	int pipes[6];
-	int sockets[2];
-	int local_task_id;
-	char addr_update;
-	slurm_addr io_streams_dest;
-} task_start_t;
-#endif
diff --git a/src/slurmd/threaded_ctrl.c b/src/slurmd/threaded_ctrl.c
deleted file mode 100644
index 6759f430e5b..00000000000
--- a/src/slurmd/threaded_ctrl.c
+++ /dev/null
@@ -1,89 +0,0 @@
-/*****************************************************************************\
- *  threaded_ctrl.c - 
- *****************************************************************************
- *  Copyright (C) 2002 The Regents of the University of California.
- *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
- *  Written by Kevin Tew <tew1@llnl.gov> et. al.
- *  UCRL-CODE-2002-040.
- *  
- *  This file is part of SLURM, a resource management program.
- *  For details, see <http://www.llnl.gov/linux/slurm/>.
- *  
- *  SLURM is free software; you can redistribute it and/or modify it under
- *  the terms of the GNU General Public License as published by the Free
- *  Software Foundation; either version 2 of the License, or (at your option)
- *  any later version.
- *  
- *  SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
- *  WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
- *  FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
- *  details.
- *  
- *  You should have received a copy of the GNU General Public License along
- *  with ConMan; if not, write to the Free Software Foundation, Inc.,
- *  59 Temple Place, Suite 330, Boston, MA  02111-1307  USA.
-\*****************************************************************************/
-
-#include <stdlib.h>
-#include <sys/types.h>
-#include <pwd.h>
-#include <grp.h>
-#include <sys/wait.h>
-#include <errno.h>
-#include <unistd.h>
-#include <string.h>
-#include <pthread.h>
-
-#include <src/common/log.h>
-#include <src/common/list.h>
-#include <src/common/xmalloc.h>
-#include <src/common/slurm_protocol_api.h>
-#include <src/common/slurm_errno.h>
-#include <src/common/util_signals.h>
-
-#include <src/slurmd/task_mgr.h>
-#include <src/slurmd/shmem_struct.h>
-#include <src/slurmd/circular_buffer.h>
-#include <src/slurmd/io.h>
-#include <src/slurmd/pipes.h>
-#include <src/slurmd/reconnect_utils.h>
-
-/* global variables */
-
-/******************************************************************
- *task launch method call hierarchy
- *
- *launch_tasks()
- *	interconnect_init()
- *		fan_out_task_launch() (pthread_create)
- *			task_exec_thread() (fork) for task exec
- *			task_exec_thread() (pthread_create) for io piping 
- ******************************************************************/			
-int launch_task ( task_start_t * task_start )
-{
-		return pthread_create ( & task_start -> pthread_id , NULL , task_exec_thread , ( void * ) task_start ) ;
-}
-
-int wait_for_tasks ( launch_tasks_request_msg_t * launch_msg , task_start_t ** task_start )
-{
-	int i ;
-	int rc ;
-	for ( i = 0 ; i < launch_msg->tasks_to_launch ; i ++ )
-	{
-		rc = pthread_join( task_start[i]->pthread_id , NULL )  ;
-		debug3 ( "wait for tasks: thread %i pthread_id %i joined " , i , task_start[i]->pthread_id ) ;
-	}
-	return SLURM_SUCCESS ;
-}
-	
-int kill_launched_tasks ( launch_tasks_request_msg_t * launch_msg , task_start_t ** task_start , int i )
-{
-	/*
-	int rc ;
-	for (  i-- ; i >= 0  ; i -- )
-	{
-		rc = pthread_kill ( task_start[i]->pthread_id , SIGKILL ) ;
-	}
-	*/
-	return SLURM_SUCCESS ;
-}
diff --git a/src/slurmd/threaded_io.c b/src/slurmd/threaded_io.c
deleted file mode 100644
index 02bd0c05d49..00000000000
--- a/src/slurmd/threaded_io.c
+++ /dev/null
@@ -1,114 +0,0 @@
-/*****************************************************************************\
- *  threaded_io.c - 
- *****************************************************************************
- *  Copyright (C) 2002 The Regents of the University of California.
- *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
- *  Written by Kevin Tew <tew1@llnl.gov> et. al.
- *  UCRL-CODE-2002-040.
- *  
- *  This file is part of SLURM, a resource management program.
- *  For details, see <http://www.llnl.gov/linux/slurm/>.
- *  
- *  SLURM is free software; you can redistribute it and/or modify it under
- *  the terms of the GNU General Public License as published by the Free
- *  Software Foundation; either version 2 of the License, or (at your option)
- *  any later version.
- *  
- *  SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
- *  WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
- *  FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
- *  details.
- *  
- *  You should have received a copy of the GNU General Public License along
- *  with ConMan; if not, write to the Free Software Foundation, Inc.,
- *  59 Temple Place, Suite 330, Boston, MA  02111-1307  USA.
-\*****************************************************************************/
-
-#include <stdlib.h>
-#include <sys/types.h>
-#include <pwd.h>
-#include <grp.h>
-#include <sys/wait.h>
-#include <errno.h>
-#include <unistd.h>
-#include <string.h>
-#include <pthread.h>
-
-#include <src/common/log.h>
-#include <src/common/list.h>
-#include <src/common/xmalloc.h>
-#include <src/common/slurm_protocol_api.h>
-#include <src/common/slurm_errno.h>
-#include <src/common/util_signals.h>
-
-#include <src/slurmd/task_mgr.h>
-#include <src/slurmd/shmem_struct.h>
-#include <src/slurmd/circular_buffer.h>
-#include <src/slurmd/io.h>
-#include <src/slurmd/pipes.h>
-#include <src/slurmd/reconnect_utils.h>
-
-/* global variables */
-
-/******************************************************************
- *task launch method call hierarchy
- *
- *launch_tasks()
- *	interconnect_init()
- *		fan_out_task_launch() (pthread_create)
- *			task_exec_thread() (fork) for task exec
- *			task_exec_thread() (pthread_create) for io piping 
- ******************************************************************/			
-int forward_io ( task_start_t * task_start ) 
-{
-	pthread_attr_t pthread_attr ;
-
-	//posix_signal_pipe_ignore ( ) ;
-
-	/* open stdout*/
-	connect_io_stream ( task_start , STDIN_OUT_SOCK ) ;
-	/* open stderr*/
-	connect_io_stream ( task_start , SIG_STDERR_SOCK ) ;
-	
-	/* spawn io pipe threads */
-	/* set detatch state */
-	pthread_attr_init( & pthread_attr ) ;
-	/*pthread_attr_setdetachstate ( & pthread_attr , PTHREAD_CREATE_DETACHED ) ;*/
-	if ( pthread_create ( & task_start->io_pthread_id[STDIN_FILENO] , NULL , stdin_io_pipe_thread , task_start ) )
-		goto return_label;
-	if ( pthread_create ( & task_start->io_pthread_id[STDOUT_FILENO] , NULL , stdout_io_pipe_thread , task_start ) )
-		goto kill_stdin_thread;
-	if ( pthread_create ( & task_start->io_pthread_id[STDERR_FILENO] , NULL , stderr_io_pipe_thread , task_start ) )
-		goto kill_stdout_thread;
-
-	
-	
-	goto return_label;
-
-	kill_stdout_thread:
-		pthread_kill ( task_start->io_pthread_id[STDOUT_FILENO] , SIGKILL );
-	kill_stdin_thread:
-		pthread_kill ( task_start->io_pthread_id[STDIN_FILENO] , SIGKILL );
-	return_label:
-	return SLURM_SUCCESS ;
-}
-
-int wait_on_io_threads ( task_start_t * task_start ) 
-{
-	/* threads have been detatched*/
-	pthread_join ( task_start->io_pthread_id[STDERR_FILENO] , NULL ) ;
-	info ( "%i: errexit" , task_start -> local_task_id ) ;
-	pthread_join ( task_start->io_pthread_id[STDOUT_FILENO] , NULL ) ;
-	info ( "%i: outexit" , task_start -> local_task_id ) ;
-	/*pthread_join ( task_start->io_pthread_id[STDIN_FILENO] , NULL ) ;*/
-	pthread_cancel ( task_start->io_pthread_id[STDIN_FILENO] );
-	pthread_join ( task_start->io_pthread_id[STDIN_FILENO] , NULL ) ;
-	info ( "%i: inexit" , task_start -> local_task_id ) ;
-	/* thread join on stderr or stdout signifies task termination we should kill the stdin thread */
-	return SLURM_SUCCESS ;
-}
-
-int iotype_init_pipes ( int * pipes )
-{
-	return SLURM_SUCCESS ;
-}
-- 
GitLab