From 0dd3f0cf93796c66e0f16031fb044cf3bd9dc26a Mon Sep 17 00:00:00 2001
From: Moe Jette <>
Date: Wed, 31 Dec 2003 21:48:21 +0000
Subject: [PATCH] Switch plugin module added. While many files were modified,
 these modifications were relatively minor - mostly changes in function names
 or arguments.

 NEWS                                    |    5 +                            |   63 +-
 doc/html/quickstart.html                |    4 +-
 doc/man/man5/slurm.conf.5               |   10 +
 etc/slurm.conf.example                  |   13 +
 slurm/                        |   20 +-
 src/api/config_info.c                   |    3 -
 src/common/                  |   17 +-
 src/common/elanhosts.c                  |  387 ---------
 src/common/elanhosts.h                  |  121 ---
 src/common/plugin.c                     |   12 +-
 src/common/qsw.c                        | 1058 -----------------------
 src/common/qsw.h                        |  100 ---
 src/{slurmd => common}/setenvpf.c       |    2 +-
 src/{slurmd => common}/setenvpf.h       |    2 +-
 src/common/slurm_protocol_defs.c        |   19 +-
 src/common/slurm_protocol_defs.h        |    9 +-
 src/common/slurm_protocol_pack.c        |   49 +-
 src/common/switch.c                     |  390 +++++++++
 src/common/switch.h                     |  228 +++++
 src/plugins/                 |    2 +-
 src/slurmctld/controller.c              |    5 +-
 src/slurmctld/job_mgr.c                 |   21 +-
 src/slurmctld/job_scheduler.c           |    4 +-
 src/slurmctld/node_mgr.c                |    7 +-
 src/slurmctld/proc_req.c                |   30 +-
 src/slurmctld/read_config.c             |  105 +--
 src/slurmctld/slurmctld.h               |    9 +-
 src/slurmctld/step_mgr.c                |   60 +-
 src/slurmd/                  |   18 +-
 src/slurmd/elan_interconnect.c          |  322 -------
 src/slurmd/interconnect.h               |  116 ---
 src/slurmd/job.c                        |    4 +-
 src/slurmd/job.h                        |    6 +-
 src/slurmd/mgr.c                        |   11 +-
 src/slurmd/no_interconnect.c            |   80 --
 src/slurmd/slurmd.c                     |    1 -
 src/slurmd/smgr.c                       |   14 +-
 src/slurmd/ulimits.c                    |    2 +-
 src/srun/allocate.c                     |    4 +-
 src/srun/job.h                          |    4 +-
 src/srun/launch.c                       |    5 +-
 src/srun/srun.c                         |   36 +-
 testsuite/slurm_unit/common/ |    2 +-
 44 files changed, 796 insertions(+), 2584 deletions(-)
 delete mode 100644 src/common/elanhosts.c
 delete mode 100644 src/common/elanhosts.h
 delete mode 100644 src/common/qsw.c
 delete mode 100644 src/common/qsw.h
 rename src/{slurmd => common}/setenvpf.c (98%)
 rename src/{slurmd => common}/setenvpf.h (96%)
 create mode 100644 src/common/switch.c
 create mode 100644 src/common/switch.h
 delete mode 100644 src/slurmd/elan_interconnect.c
 delete mode 100644 src/slurmd/interconnect.h
 delete mode 100644 src/slurmd/no_interconnect.c

diff --git a/NEWS b/NEWS
index 48c3ef1aff9..5fe2ff12293 100644
--- a/NEWS
+++ b/NEWS
@@ -1,6 +1,11 @@
 This file describes changes in recent versions of SLURM. It primarily
 documents those changes that are of interest to users and admins. 
+* Changes in SLURM (NOT TAGGED YET)
+ -- Switch plugin added. Add "SwitchType=switch/elan" to slurm.conf for 
+    systems with Quadrics Elan3 or Elan4 switches.
 * Changes in SLURM
  -- Fixes for reported problems:
diff --git a/ b/
index f50f073bc04..7ad0de3a820 100644
--- a/
+++ b/
@@ -160,54 +160,32 @@ AC_SUBST(SLURMD_PORT)
 dnl check for whether to include Elan support
 AC_MSG_CHECKING(whether to include Elan support)
-  AC_HELP_STRING([--with-elan],[compile with Elan support]),
-  [ case "${withval}" in
-      yes) elan=yes ;;
-      no)  elan=no ;;
-      *) AC_MSG_ERROR([bad value ${enableval} for --with-elan]) ;;
-    esac
-  ]
+AC_CHECK_LIB([elanctrl], [elanctrl_open], 
+             [ have_elanctrl=yes
+               have_elan=yes
+               ELAN_LIBS="-lelanctrl"
+               AC_DEFINE(HAVE_LIBELANCTRL, 1, 
+                         [define if you have libelanctrl.]) ], 
+             [ have_elanctrl=no ]
-AM_CONDITIONAL(WITH_ELAN, test "x$with_elan" = "xyes")
-if test "$with_elan" = "yes"; then
-  savedLIBS="$LIBS"
-  AC_CHECK_LIB([elanctrl], [elanctrl_open], 
-               [ have_elanctrl=yes
-                 ELAN_LIBS="-lelanctrl"
-                 AC_DEFINE(HAVE_LIBELANCTRL, 1, 
-                           [define if you have libelanctrl.])
-               ], 
-               [ have_elanctrl=no
-               ]
-  )
-  AC_CHECK_LIB([elan3], [elan3_create], 
-               [ have_elan3=yes
-                 ELAN_LIBS="-lelan3"
-                 AC_DEFINE(HAVE_LIBELAN3, 1, 
-                           [define if you have libelan3.])
-               ], 
-               [ have_elan3=no
-               ]
+AC_CHECK_LIB([elan3], [elan3_create], 
+             [ have_elan3=yes
+               have_elan=yes
+               ELAN_LIBS="-lelan3"
+               AC_DEFINE(HAVE_LIBELAN3, 1, 
+                         [define if you have libelan3.]) ], 
+             [ have_elan3=no ]
-  if test "$have_elanctrl" = "no" -a "$have_elan3" = "no"; then
-      AC_MSG_ERROR([Unable to find libelan3 or libelanctrl for Elan support!])
-  fi
+AM_CONDITIONAL(HAVE_ELAN, test "x$have_elan" = "xyes")
+if test "x$have_elan" = "xyes"; then
+  AC_DEFINE(HAVE_ELAN, 1, [Define to enable Elan support.])
   AC_CHECK_LIB(rmscall, rms_prgcreate, 
-    [],
+    [ELAN_LIBS="$ELAN_LIBS -lrmscall"],
     [AC_MSG_ERROR([unable to find the RMS library needed for Elan support])],
-  AC_DEFINE(HAVE_ELAN, 1, [Define to enable Elan support.])
-  ELAN_LIBS="$ELAN_LIBS -lrmscall"
-  LIBS="$savedLIBS" 
@@ -316,6 +294,7 @@ AC_CONFIG_FILES([Makefile
+		 src/plugins/switch/Makefile
diff --git a/doc/html/quickstart.html b/doc/html/quickstart.html
index 962835d7bb6..da6bc12a06d 100644
--- a/doc/html/quickstart.html
+++ b/doc/html/quickstart.html
@@ -248,9 +248,6 @@ The most commonly used arguments to the <i>configure</i> command include:
 value is <i>/usr/local</i>
 <dd>Specify location of SLURM configuration file
-<dd>Ccompile with support for the Quadrics Elan switch (see
-<a href=""></a>)
 <dd>compile with support for the TotalView debugger (see 
 <a href=""></a>)
@@ -304,6 +301,7 @@ SlurmdPort=7003
 # Node Configurations
diff --git a/doc/man/man5/slurm.conf.5 b/doc/man/man5/slurm.conf.5
index 7654a6ce7d9..2c292d826d6 100644
--- a/doc/man/man5/slurm.conf.5
+++ b/doc/man/man5/slurm.conf.5
@@ -267,6 +267,14 @@ The default value is "/tmp".
 If any slurm daemons terminate abnormally, their core files will also be written 
 into this directory.
+Identifies the type of switch or interconnect used for application communications. 
+Acceptable values include
+"switch/none" for switches not requiring special processing for job launch 
+or termination (Myrinet, Ethernet, and InfiniBand),
+"switch/elan" for Quadrics Elan 3 or Elan 4 interconnect.
+The default value is "switch/none".
 Fully qualified pathname of the file system available to user jobs for 
 temporary storage. This parameter is used in establishing a node's \fBTmpDisk\fR space. 
@@ -547,6 +555,8 @@ SlurmdSpoolDir=/usr/local/slurm/slurmd.spool
diff --git a/etc/slurm.conf.example b/etc/slurm.conf.example
index 0ca9ffed9a6..6e2f39cf907 100644
--- a/etc/slurm.conf.example
+++ b/etc/slurm.conf.example
@@ -157,6 +157,7 @@
 # SchedulerAuth=42
 # SchedulerPort=7321
 # o Define the job completion logging mechanism to be used
@@ -166,6 +167,18 @@
 # JobCompType=jobcomp/filetxt
+# o Define the switch or interconnect in use.
+# "SwitchType"        : the type of switch or interconnect.
+#     "switch/none"   : the default, supports all switches not requiring
+#                       special set-up for job launch including Myrinet, 
+#                       Ethernet, and InfiniBand.
+#     "switch/elan"   : Quadrics Elan 3 or Elan 4 interconnect.
+# SwitchType=switch/none
 # o Define location where job completion logs are to be written
 #   Interpretation of the parameter is dependent upon the logging 
diff --git a/slurm/ b/slurm/
index adfa83ba61e..d0ca94f116e 100644
--- a/slurm/
+++ b/slurm/
@@ -81,12 +81,10 @@ BEGIN_C_DECLS
    typedef struct slurm_job_credential * slurm_cred_t;
-/* Define qsw_jobinfo_t below to avoid including extraneous slurm headers */
-#ifdef	HAVE_ELAN
-#  ifndef __qsw_jobinfo_t_defined
-#    define  __qsw_jobinfo_t_defined
-     typedef struct qsw_jobinfo *qsw_jobinfo_t;	/* opaque data type */
-#  endif
+/* Define switch_jobinfo_t below to avoid including extraneous slurm headers */
+#ifndef __switch_jobinfo_t_defined
+#  define  __switch_jobinfo_t_defined
+   typedef struct switch_jobinfo *switch_jobinfo_t;	/* opaque data type */
@@ -265,9 +263,7 @@ typedef struct job_step_create_response_msg {
 	uint32_t job_step_id;	/* assigned job step id */
 	char *node_list;	/* list of allocated nodes */
 	slurm_cred_t cred;      /* slurm job credential */
-#ifdef	HAVE_ELAN
-	qsw_jobinfo_t qsw_job;	/* Elan3 switch context, opaque data structure */
+	switch_jobinfo_t switch_job;	/* switch context, opaque data structure */
 } job_step_create_response_msg_t;
 typedef struct {
@@ -356,9 +352,7 @@ typedef struct resource_allocation_and_run_response_msg {
 	uint32_t job_step_id;	/* assigned step id */
 	slurm_cred_t cred;      /* slurm job credential */
-#ifdef HAVE_ELAN
-	qsw_jobinfo_t qsw_job;	/* Elan3 switch context, opaque data type */
+	switch_jobinfo_t switch_job;	/* switch context, opaque data type */
 } resource_allocation_and_run_response_msg_t;
 typedef struct partition_info_msg {
@@ -409,12 +403,12 @@ typedef struct slurm_ctl_conf {
 	uint32_t slurmd_port;	/* default communications port to slurmd */
 	char *slurmd_spooldir;	/* where slurmd put temporary state info */
 	char *slurmd_pidfile;   /* where to put slurmd pidfile           */
-	char *switch_type;	/* switch or interconnect type */
 	uint16_t slurmd_timeout;/* how long slurmctld waits for slurmd before 
 				 * considering node DOWN */
 	char *slurm_conf;	/* pathname of slurm config file */
 	char *state_save_location;/* pathname of slurmctld state save
 				 * directory */
+	char *switch_type;	/* switch or interconnect type */
 	char *tmp_fs;		/* pathname of temporary file system */
 	uint16_t wait_time;	/* default job --wait time */
 	char *job_credential_private_key;	/* path to private key */
diff --git a/src/api/config_info.c b/src/api/config_info.c
index 44841599b57..b41504cac26 100644
--- a/src/api/config_info.c
+++ b/src/api/config_info.c
@@ -129,11 +129,8 @@ void slurm_print_ctl_conf ( FILE* out,
 	fprintf(out, "StateSaveLocation = %s\n", 
-#if 0
-Not quite ready to check in
 	fprintf(out, "SwitchType        = %s\n",
 	fprintf(out, "TmpFS             = %s\n", 
 	fprintf(out, "WaitTime          = %u\n", 
diff --git a/src/common/ b/src/common/
index fa75335cd20..6bc4a9d11e3 100644
--- a/src/common/
+++ b/src/common/
@@ -5,12 +5,6 @@ AUTOMAKE_OPTIONS = foreign
 INCLUDES     = -I$(top_srcdir) $(SSL_CPPFLAGS) 
-elan_sources = qsw.c qsw.h elanhosts.c elanhosts.h
-elan_sources = 
 noinst_LTLIBRARIES = 			\ 			\ 		\
@@ -34,6 +28,7 @@ libcommon_la_SOURCES = 			\
 	plugin.c plugin.h		\
 	plugrack.c plugrack.h		\
 	read_config.c read_config.h	\
+	setenvpf.c setenvpf.h		\
 	slurm_cred.h       		\
 	slurm_cred.c			\
 	slurm_errno.c			\
@@ -53,10 +48,10 @@ libcommon_la_SOURCES = 			\
 	util-net.c util-net.h		\
 	slurm_auth.c slurm_auth.h	\
 	slurm_jobcomp.c slurm_jobcomp.h	\
+	switch.c switch.h		\
 	arg_desc.c arg_desc.h		\
 	macros.h			\
-	hostlist.c hostlist.h		\
-	$(elan_sources) 
+	hostlist.c hostlist.h
 libdaemonize_la_SOURCES =  		\
 	daemonize.c       	 	\
@@ -67,9 +62,5 @@ libeio_la_SOURCES = 	   		\
 	eio.c eio.h	   		\
 	io_hdr.c io_hdr.h
-EXTRA_libcommon_la_SOURCES =    	\
-	qsw.c qsw.h          		\
-	elanhosts.c elanhosts.h
-libcommon_la_LIBADD   = $(SSL_LIBS) $(ELAN_LIBS) -ldl
+libcommon_la_LIBADD   = $(SSL_LIBS) -ldl
 libcommon_la_LDFLAGS  = $(SSL_LDFLAGS)	
diff --git a/src/common/elanhosts.c b/src/common/elanhosts.c
deleted file mode 100644
index 47fcda1c210..00000000000
--- a/src/common/elanhosts.c
+++ /dev/null
@@ -1,387 +0,0 @@
- *  $Id$
- *****************************************************************************
- *  Copyright (C) 2001-2002 The Regents of the University of California.
- *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
- *  Written by Mark Grondona <>.
- *  UCRL-CODE-2003-005.
- *  
- *  This file is part of Pdsh, a parallel remote shell program.
- *  For details, see <>.
- *  
- *  Pdsh is free software; you can redistribute it and/or modify it under
- *  the terms of the GNU General Public License as published by the Free
- *  Software Foundation; either version 2 of the License, or (at your option)
- *  any later version.
- *  
- *  Pdsh is distributed in the hope that it will be useful, but WITHOUT ANY
- *  WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
- *  FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
- *  details.
- *  
- *  You should have received a copy of the GNU General Public License along
- *  with Pdsh; if not, write to the Free Software Foundation, Inc.,
- *  59 Temple Place, Suite 330, Boston, MA  02111-1307  USA.
-#if     HAVE_CONFIG_H
-#include "config.h"
-#include <stdio.h>
-#include <string.h>
-#include <sys/types.h>
-#include <assert.h>
-#include <stdarg.h>
-#include <stdlib.h>
-#include "src/common/list.h"
-#include "src/common/hostlist.h"
-#include "elanhosts.h"
-/* Default ElanId config file */
-#define ELANID_CONFIG_FILE "/etc/elanhosts"
- * Error strings for error codes returned by parse_elanid_config()
- */
-static char *errstr[] = 
-{ "No error", 
-  "Out of memory!",
-  "Parse error", 
-  "Number of ElanIds specified != number of hosts",
-  "Type must be \"eip\" \"eth\" or \"other\"",
- *  Container for converting hostnames to ElanIDs
- */
-struct elan_info {
-    elanhost_type_t type;     /* type of entry                              */
-    int             elanid;   /* ElanID corresponding to this hostname      */
-    char           *hostname; /* Resolveable hostname                       */
-struct elanhost_config {
-#ifndef NDEBUG
-    int magic;
-#   define ELANHOST_CONFIG_MAGIC 0xe100e100
-    int maxid;         /* Storage for max ElanID in config                   */
-	List elanid_list;  /* List of elan_info objects describing configuration */
-    char errstr[1024]; /* String describing last error from this object      */
- * Static Prototypes:
- */
-static elanhost_config_t _elanhost_config_alloc(void);
-static void _elanhost_err(elanhost_config_t ec, const char *fmt, ...);
-static int _find_host(struct elan_info *ei, char *key);
-static int _parse_elanid_config(elanhost_config_t ec, const char *path);
-static int _parse_elanid_line(elanhost_config_t ec, char *buf); 
-static struct elan_info * _elan_info_create(elanhost_type_t type, 
-                                            int elanid, char *hostname);
-static void _elan_info_destroy(struct elan_info *ei);
-elanhost_config_t elanhost_config_create()
-    return _elanhost_config_alloc();
-int elanhost_config_read(elanhost_config_t ec, const char *filename)
-    assert(ec != NULL);
-    assert(ec->magic == ELANHOST_CONFIG_MAGIC);
-    assert(ec->elanid_list != NULL);
-    if (filename == NULL)
-        filename = ELANID_CONFIG_FILE;
-    if (_parse_elanid_config(ec, filename) < 0) 
-        return(-1);
-    return(0);
-void elanhost_config_destroy(elanhost_config_t ec)
-    assert(ec != NULL);
-    assert(ec->magic == ELANHOST_CONFIG_MAGIC);
-    list_destroy(ec->elanid_list);
-    assert(ec->magic = ~ELANHOST_CONFIG_MAGIC);
-    free(ec);
-int elanhost_config_maxid(elanhost_config_t ec)
-    assert(ec != NULL);
-    assert(ec->magic == ELANHOST_CONFIG_MAGIC);
-    return ec->maxid;
-int elanhost_host2elanid(elanhost_config_t ec, char *host)
-    struct elan_info *ei;
-    assert(ec != NULL);
-    assert(host != NULL);
-    assert(ec->magic == ELANHOST_CONFIG_MAGIC);
-    ei = list_find_first(ec->elanid_list, (ListFindF) _find_host, host);
-    if (!ei) {
-        _elanhost_err(ec, "Unable to find host \"%s\" in configuration", host);
-        return -1;
-    }
-    return ei->elanid;
-const char *elanhost_config_err(elanhost_config_t ec)
-    return ec->errstr;
-struct elanid_find_arg {
-    elanhost_type_t type;
-    int elanid;
-static int _find_elanid(struct elan_info *ei, struct elanid_find_arg *arg)
-    if (ei->type != arg->type)
-        return 0;
-    if (ei->elanid != arg->elanid)
-        return 0;
-    return 1;
-char *elanhost_elanid2host(elanhost_config_t ec, elanhost_type_t type, int eid)
-    struct elan_info *ei;
-    struct elanid_find_arg arg;
-    assert(ec != NULL);
-    assert(eid >= 0);
-    assert(ec->magic == ELANHOST_CONFIG_MAGIC);
-    arg.type = type;
-    arg.elanid = eid;
-    ei = list_find_first(ec->elanid_list, (ListFindF) _find_elanid, &arg);
-    if (!ei) {
-        _elanhost_err(ec, "Unable to find host with type=%d elanid=%d", 
-                         type, eid);
-        return(NULL);
-    }
-    return ei->hostname;
-static elanhost_config_t _elanhost_config_alloc(void)
-    elanhost_config_t new = malloc(sizeof(*new));
-    new->maxid = -1;
-    new->elanid_list = list_create((ListDelF) _elan_info_destroy);
-    assert(new->magic = ELANHOST_CONFIG_MAGIC);
-    return new;
-static void _elanhost_err(elanhost_config_t ec, const char *fmt, ...)
-    va_list ap;
-    assert(ec != NULL);
-    assert(fmt != NULL);
-    va_start(ap, fmt);
-    vsnprintf(ec->errstr, 1024, fmt, ap);
-    va_end(ap);
-    return;
- * Parse the "elanhosts" config file which has the form
- * 
- *   ElanIds  Hostnames
- *   [n-m]    host_n,...,host_m
- *   [n-m]    host[n-m]
- *   etc.
- *
- * and which maps ElanIds to hostnames on the cluster.
- * The results are stored in the config object's elanid_list member. 
- *
- * Returns 0 on Success, and an error code < 0 on failure.
- */
-static int _parse_elanid_config(elanhost_config_t ec, const char *path)
-	char  buf[4096];
-	int   line;
-	FILE *fp;
-	if (!(fp = fopen(path, "r"))) {
-		_elanhost_err(ec, "failed to open %s\n",  path);
-		return -1;
-	}
-	line = 1;
-	while (fgets(buf, 4096, fp)) {
-		int rc;
-		if ((rc = _parse_elanid_line(ec, buf)) < 0) {
-			_elanhost_err(ec, "%s: line %d: %s", path, line, errstr[-rc]);
-			return -1;
-		}
-		line++;
-	}
-	if (fclose(fp) < 0)
-		_elanhost_err(ec, "close(%s): %m", path);
-	return 0;
- *  Translate type strings "eip," "eth," or "other" into their
- *   corresponding elanhost_type_t number
- */
-static elanhost_type_t _get_type_num(char *type)
-    if (strcasecmp(type, "eip") == 0)
-        return ELANHOST_EIP;
-    else if (strcasecmp(type, "eth") == 0)
-        return ELANHOST_ETH;
-    else if (strcasecmp(type, "other") == 0)
-        return ELANHOST_OTHER;
-    else
-        return -1;
- *  Parse one line of elanId list appending results to list "eil"
- *
- *  Returns -1 for parse error, -2 if the number of elanids specified
- *  doesn't equal the number of hosts.
- *
- *  Returns 0 on success
- */
-static int 
-_parse_elanid_line(elanhost_config_t ec, char *buf)
-	hostlist_t  el, hl;
-	const char *separators = " \t\n";
-    char       *type;
-	char       *elanids;
-	char       *hosts;
-	char       *sp, *s;
-	int         rc = 0;
-    int         typenum;
-	/* 
-	 *  Nullify any comments
-	 */
-	if ((s = strchr(buf, '#')))
-		*s = '\0';
-    if (!(type = strtok_r(buf, separators, &sp)))
-        return 0;
-	if (!(elanids = strtok_r(NULL, separators, &sp)))
-		return -1;
-	if (!(hosts = strtok_r(NULL, separators, &sp)))
-		return -2;
-	el = hostlist_create(NULL);
-	hl = hostlist_create(NULL);
-	if (!el || !hl) {
-		rc = -1;
-		goto done;
-	}
-	if (hostlist_push(el, elanids) != hostlist_push(hl, hosts)) {
-		rc = -3; 
-		goto done;
-	}
-    if ((typenum = _get_type_num(type)) < 0)
-        return -4;
-	while ((s = hostlist_shift(el))) {
-		char *eptr;
-		int   elanid = (int) strtoul(s, &eptr, 10);
-		if (*eptr != '\0') {
-			rc = -2;
-			goto done;
-		}
-		free(s);
-		if (!(s = hostlist_shift(hl))) {
-			rc = -1;
-			goto done;
-		}
-        if (elanid > ec->maxid)
-            ec->maxid = elanid;
-		list_append(ec->elanid_list, _elan_info_create(typenum, elanid, s));
-	}
-    done:
-	hostlist_destroy(el);
-	hostlist_destroy(hl);
-	return rc;
-static struct elan_info *
-_elan_info_create(elanhost_type_t type, int elanid, char *hostname)
-	struct elan_info *ei = (struct elan_info *) malloc(sizeof(*ei));
-    ei->type     = type;
-	ei->elanid   = elanid;
-	ei->hostname = hostname;
-	return ei;
-static void
-_elan_info_destroy(struct elan_info *ei)
-	if (ei->hostname)
-		free(ei->hostname);
-	free(ei);
- *  List Find function for mapping hostname to an ElanId
- */
-static int _find_host(struct elan_info *ei, char *key)
-    if (strcmp(ei->hostname, key) != 0)
-        return 0;
-    else
-        return 1;
- * vi:tabstop=4 shiftwidth=4 expandtab
- */
diff --git a/src/common/elanhosts.h b/src/common/elanhosts.h
deleted file mode 100644
index d5cb0bb6526..00000000000
--- a/src/common/elanhosts.h
+++ /dev/null
@@ -1,121 +0,0 @@
- *  $Id$
- *****************************************************************************
- *  Copyright (C) 2001-2002 The Regents of the University of California.
- *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
- *  Written by Mark Grondona <>.
- *  UCRL-CODE-2003-005.
- *  
- *  This file is part of Pdsh, a parallel remote shell program.
- *  For details, see <>.
- *  
- *  Pdsh is free software; you can redistribute it and/or modify it under
- *  the terms of the GNU General Public License as published by the Free
- *  Software Foundation; either version 2 of the License, or (at your option)
- *  any later version.
- *  
- *  Pdsh is distributed in the hope that it will be useful, but WITHOUT ANY
- *  WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
- *  FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
- *  details.
- *  
- *  You should have received a copy of the GNU General Public License along
- *  with Pdsh; if not, write to the Free Software Foundation, Inc.,
- *  59 Temple Place, Suite 330, Boston, MA  02111-1307  USA.
-#ifndef _ELANHOSTS_H
-#define _ELANHOSTS_H
- * Type of Elan "hostname" 
- *   Hostname corresponds to the eip adapter, an ethernet adapter, or "other"
- */
-typedef enum {
-} elanhost_type_t;
-/*  Opaque type which holds the elanhost configuration
- */
-typedef struct elanhost_config * elanhost_config_t;
- *  Functions
- */
- * Create an empty Elanhost config object
- */
-elanhost_config_t elanhost_config_create(void);
- *  Read elanhosts configuration from `file'  
- *    (Default /etc/elanhosts)
- *
- *  Config file format is as follows:
- *
- *    Type  ElanIDs  Hostnames
- *
- *  The "type" field may be "eip" for eip interface,  "eth" for an
- *    ethernet interface, or "other" for anything else. ("eth" and
- *    "other" are equivalent at this time)
- *
- *  The "ElanIDs" field consists of a list of one or more ElanIDs in
- *    the form "[i-j,n-m,..]" or just "N" for a single ElanID.
- *
- *  The "Hostname" field consists of the hostnames which correspond
- *    to the ElanIDs. If the hostnames have a numeric suffix a bracketed
- *    hostlist is allowed (see hostlist.[ch]) 
- *
- *  For Example:
- *
- *    Type  ElanIDs  Hostnames
- *    eip   [0-10]   host[0-10]
- *    eth   [0-10]   ehost[0-10]
- *    eth   [0,1]    host0-eth1,host1-eth1
- *
- *  Returns 0 on succes, -1 for failure.
- *
- */
-int elanhost_config_read(elanhost_config_t ec, const char *filename);
- *  Destroy an elanhost configuration object.
- */
-void elanhost_config_destroy(elanhost_config_t conf);
- *  Given a hostname, return the corresponding ElanID
- *
- *  Returns the ElanId on success, -1 if no host matching "hostname"
- *    was found in the configuration.
- *
- */
-int elanhost_host2elanid(elanhost_config_t ec, char *host);
- *  Given an ElanId and adapter type, return the first matching hostname
- *    from the configuration.
- */
-char *elanhost_elanid2host(elanhost_config_t ec, 
-		           elanhost_type_t type, int elanid);
- *  Returns the max ElanID from the configuration
- */
-int elanhost_config_maxid(elanhost_config_t ec);
- *  Returns the last error string generated for the elan config obj `ec'
- */
-const char *elanhost_config_err(elanhost_config_t ec);
diff --git a/src/common/plugin.c b/src/common/plugin.c
index 56fa3d53ac4..a48b4773f26 100644
--- a/src/common/plugin.c
+++ b/src/common/plugin.c
@@ -60,7 +60,8 @@ plugin_peek( const char *fq_path,
 	} else {
 		dlclose( plug );
-		error( "%s: not a SLURM plugin", fq_path );
+		/* could be vestigial library, don't treat as an error */
+		verbose( "%s: not a SLURM plugin", fq_path );
 		return SLURM_ERROR;
 	if ( ( version = (uint32_t *) dlsym( plug, PLUGIN_VERSION ) ) != NULL ) {
@@ -69,7 +70,8 @@ plugin_peek( const char *fq_path,
 	} else {
 		dlclose( plug );
-		error( "%s: not a SLURM plugin", fq_path );
+		/* could be vestigial library, don't treat as an error */
+		verbose( "%s: not a SLURM plugin", fq_path );
 		return SLURM_ERROR;
@@ -93,7 +95,7 @@ plugin_load_from_file( const char *fq_path )
         plug = dlopen( fq_path, RTLD_NOW );
         if ( plug == NULL ) {
-		debug2( "plugin_load_from_file: dlopen(%s): %s",
+		debug( "plugin_load_from_file: dlopen(%s): %s",
 			dlerror() );
                 return PLUGIN_INVALID_HANDLE;
@@ -103,6 +105,7 @@ plugin_load_from_file( const char *fq_path )
         if ( ( dlsym( plug, PLUGIN_NAME ) == NULL ) ||
              ( dlsym( plug, PLUGIN_TYPE ) == NULL ) ||
              ( dlsym( plug, PLUGIN_VERSION ) == NULL ) ) {
+		debug( "plugin_load_from_file: invalid symbol");
                 /* slurm_seterrno( SLURM_PLUGIN_SYMBOLS ); */
                 return PLUGIN_INVALID_HANDLE;
@@ -113,7 +116,8 @@ plugin_load_from_file( const char *fq_path )
         if ( ( init = dlsym( plug, "init" ) ) != NULL ) {
                 if ( (*init)() != 0 ) {
-			debug( "plugin_load_from_file(%s): init() returned SLURM_ERROR", fq_path );
+			debug( "plugin_load_from_file(%s): init() returned SLURM_ERROR", 
+				fq_path );
                         (void) dlclose( plug );
                         return PLUGIN_INVALID_HANDLE;
diff --git a/src/common/qsw.c b/src/common/qsw.c
deleted file mode 100644
index e942834f531..00000000000
--- a/src/common/qsw.c
+++ /dev/null
@@ -1,1058 +0,0 @@
- *  qsw.c - Library routines for initiating jobs on QsNet. 
- *****************************************************************************
- *  Copyright (C) 2002 The Regents of the University of California.
- *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
- *  Written by Jim Garlick <>
- *  UCRL-CODE-2002-040.
- *  
- *  This file is part of SLURM, a resource management program.
- *  For details, see <>.
- *  
- *  SLURM is free software; you can redistribute it and/or modify it under
- *  the terms of the GNU General Public License as published by the Free
- *  Software Foundation; either version 2 of the License, or (at your option)
- *  any later version.
- *  
- *  SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
- *  WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
- *  FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
- *  details.
- *  
- *  You should have received a copy of the GNU General Public License along
- *  with SLURM; if not, write to the Free Software Foundation, Inc.,
- *  59 Temple Place, Suite 330, Boston, MA  02111-1307  USA.
-#if     HAVE_CONFIG_H
-#  include "config.h"
-#  include <pthread.h>
-#endif /* WITH_PTHREADS */
-#include <sys/param.h>
-#include <sys/types.h>
-#include <sys/wait.h>
-#include <syslog.h>
-#include <errno.h>
-#include <string.h>
-#include <paths.h>
-#include <stdarg.h>
-#include <ctype.h>
-#include <assert.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <limits.h>	/* INT_MAX */
-#include <stdio.h>
-# include <elan/elanctrl.h>
-# include <elan/capability.h>
-/* These are taken from elan3/elanvp.h, which we don't
- *  want to include here since we are using the new
- *  version-nonspecific libelanctrl.
- *  (XXX: What is the equivalent in libelanctrl?)
- */
-# define ELAN_USER_BASE_CONTEXT_NUM    0x020
-# define ELAN_USER_TOP_CONTEXT_NUM     0x7ff
-# define Version      cap_version
-# define HighNode     cap_highnode
-# define LowNode      cap_lownode
-# define HighContext  cap_highcontext
-# define LowContext   cap_lowcontext
-# define MyContext    cap_mycontext
-# define Bitmap       cap_bitmap
-# define Type         cap_type
-# define UserKey      cap_userkey
-# define RailMask     cap_railmask
-# define Values       key_values
-# include <elan3/elan3.h>
-# include <elan3/elanvp.h>
-# error "Must have either libelan3 or libelanctrl to compile this module!"
-#endif /* HAVE_LIBELANCTRL */
-#include <rms/rmscall.h>
-#include <slurm/slurm_errno.h>
-#include "src/common/elanhosts.h"
-#include "src/common/xassert.h"
-#include "src/common/strlcpy.h"
-#include "src/common/bitstring.h"
-#include "src/common/log.h"
-#include "src/common/pack.h"
-#include "src/common/qsw.h"
- * Definitions local to this module.
- */
-#define QSW_JOBINFO_MAGIC 	0xf00ff00e
-#define QSW_LIBSTATE_MAGIC 	0xf00ff00f
-/* we will allocate program descriptions in this range */
-/* XXX note: do not start at zero as libelan shifts to get unique shm id */
-#define QSW_PRG_START  		1
-#define QSW_PRG_END    		INT_MAX
-#define QSW_PRG_INVAL		(-1)
-/* we allocate elan hardware context numbers in this range */
-/* XXX: Temporary workaround for slurm/222 (qws sw-kernel/5478) 
- *      (sys_validate_cap does not allow ELAN_USER_TOP_CONTEXT_NUM)
- */
-#define QSW_CTX_INVAL		(-1)
- * We are going to some trouble to keep these defs private so slurm
- * hackers not interested in the interconnect details can just pass around
- * the opaque types.  All use of the data structure internals is local to this
- * module.
- */
-struct qsw_libstate {
-	int ls_magic;
-	int ls_prognum;
-	int ls_hwcontext;
-struct qsw_jobinfo {
-	int             j_magic;
-	int             j_prognum;
-/* Copy library state */
-#define _copy_libstate(dest, src) do { 			\
-	assert((src)->ls_magic == QSW_LIBSTATE_MAGIC); 	\
-	assert((dest)->ls_magic == QSW_LIBSTATE_MAGIC); \
-	memcpy(dest, src, sizeof(struct qsw_libstate));	\
-} while (0)
-/* Lock on library state */
-#define _lock_qsw() do {				\
-	int err;					\
-	err = pthread_mutex_lock(&qsw_lock);		\
-	assert(err == 0);				\
-} while (0)
-#define _unlock_qsw() do {				\
-	int err;					\
-	err = pthread_mutex_unlock(&qsw_lock);		\
-	assert(err == 0);				\
-} while (0)
- * Globals
- */
-static qsw_libstate_t qsw_internal_state = NULL;
-static pthread_mutex_t qsw_lock = PTHREAD_MUTEX_INITIALIZER;
-static elanhost_config_t elanconf = NULL;
- * Allocate a qsw_libstate_t.
- *   lsp (IN)		store pointer to new instantiation here
- *   RETURN		0 on success, -1 on failure (sets errno)
- */
-qsw_alloc_libstate(qsw_libstate_t *lsp)
-	qsw_libstate_t new;
-	assert(lsp != NULL);
-	new = (qsw_libstate_t)malloc(sizeof(struct qsw_libstate));
-	if (!new)
-		slurm_seterrno_ret(ENOMEM);
-	new->ls_magic = QSW_LIBSTATE_MAGIC;
-	*lsp = new;
-	return 0;
- * Free a qsw_libstate_t.
- *   ls (IN)		qsw_libstate_t to free
- */
-qsw_free_libstate(qsw_libstate_t ls)
-	assert(ls->ls_magic == QSW_LIBSTATE_MAGIC);
-	ls->ls_magic = 0;
-	free(ls);
- * Pack libstate structure in a format that can be shipped over the
- * network and unpacked on a different architecture.
- *   ls (IN)		libstate structure to be packed
- *   buffer (IN/OUT)	where to store packed data
- *   RETURN		#bytes unused in 'data'
- */
-qsw_pack_libstate(qsw_libstate_t ls, Buf buffer)
-	int offset;
-	assert(ls->ls_magic == QSW_LIBSTATE_MAGIC);
-	offset = get_buf_offset(buffer);
-	pack32(ls->ls_magic, buffer);
-	pack32(ls->ls_prognum, buffer);
-	pack32(ls->ls_hwcontext, buffer);
-	return (get_buf_offset(buffer) - offset);
- * Unpack libstate packed by qsw_pack_libstate.
- *   ls (IN/OUT)	where to put libstate structure
- *   buffer (IN/OUT)	where to get packed data
- *   RETURN		#bytes unused or -1 on error (sets errno)
- */
-qsw_unpack_libstate(qsw_libstate_t ls, Buf buffer)
-	int offset;
-	assert(ls->ls_magic == QSW_LIBSTATE_MAGIC);
-	offset = get_buf_offset(buffer);
-	safe_unpack32(&ls->ls_magic, buffer);
-	safe_unpack32(&ls->ls_prognum, buffer);
-	safe_unpack32(&ls->ls_hwcontext, buffer);
-	if (ls->ls_magic != QSW_LIBSTATE_MAGIC)
-		goto unpack_error;
-	return SLURM_SUCCESS; 
-    unpack_error:
-	slurm_seterrno_ret(EBADMAGIC_QSWLIBSTATE); /* corrupted libstate */
-	return SLURM_ERROR;
- * Seed the random number generator.  This can be called multiple times,
- * but srand48 will only be called once per program invocation.
- */
-static void
-	static int done = 0;
-	if (!done) {
-		srand48(getpid());
-		done = 1;
-	}
- * Initialize this library, optionally restoring a previously saved state.
- *   oldstate (IN)	old state retrieved from qsw_fini() or NULL
- *   RETURN		0 on success, -1 on failure (sets errno)
- */
-qsw_init(qsw_libstate_t oldstate)
-	qsw_libstate_t new;
-	assert(qsw_internal_state == NULL);
-	_srand_if_needed();
-	if (qsw_alloc_libstate(&new) < 0)
-		return -1; /* errno set by qsw_alloc_libstate */
-	if (oldstate)
-		_copy_libstate(new, oldstate);
-	else {
-		new->ls_prognum = QSW_PRG_START;
-		new->ls_hwcontext = QSW_CTX_START;
-	}
-	qsw_internal_state = new;
-	return 0;
- * Finalize use of this library.  If 'savestate' is non-NULL, final
- * state is copied there before it is destroyed.
- *   savestate (OUT)	place to put state
- */
-qsw_fini(qsw_libstate_t savestate)
-	assert(qsw_internal_state != NULL);
-	_lock_qsw();
-	if (savestate)
-		_copy_libstate(savestate, qsw_internal_state);
-	qsw_free_libstate(qsw_internal_state);
-	qsw_internal_state = NULL;
-	_unlock_qsw();
- * Allocate a qsw_jobinfo_t.
- *   jp (IN)		store pointer to new instantiation here
- *   RETURN		0 on success, -1 on failure (sets errno)
- */
-qsw_alloc_jobinfo(qsw_jobinfo_t *jp)
-	qsw_jobinfo_t new; 
-	assert(jp != NULL);
-	new = (qsw_jobinfo_t)malloc(sizeof(struct qsw_jobinfo));
-	if (!new)
-		slurm_seterrno_ret(ENOMEM);
-	new->j_magic = QSW_JOBINFO_MAGIC;
-	*jp = new;
-	return 0;
- * Make a copy of a qsw_jobinfo_t.
- *   j (IN)		qsw_jobinfo_t to be copied
- *   RETURN		qsw_jobinfo_t on success, NULL on failure
- */
-qsw_copy_jobinfo(qsw_jobinfo_t j)
-	qsw_jobinfo_t new; 
-	if (qsw_alloc_jobinfo(&new))
-		return NULL;
-	memcpy(new, j, sizeof(struct qsw_jobinfo));
-	return new;
- * Free a qsw_jobinfo_t.
- *   ls (IN)		qsw_jobinfo_t to free
- */
-qsw_free_jobinfo(qsw_jobinfo_t j)
-	if (j == NULL)
-		return;
-	assert(j->j_magic == QSW_JOBINFO_MAGIC);
-	j->j_magic = 0;
-	free(j);
- * Pack jobinfo structure in a format that can be shipped over the
- * network and unpacked on a different architecture.
- *   j (IN)		jobinfo structure to be packed
- *   buffer (OUT)		where to store packed data
- *   RETURN		#bytes unused in 'data' or -1 on error (sets errno)
- * NOTE: Keep in sync with QSW_PACK_SIZE above
- */
-qsw_pack_jobinfo(qsw_jobinfo_t j, Buf buffer)
-	int i, offset;
-	assert(j->j_magic == QSW_JOBINFO_MAGIC);
-	offset = get_buf_offset(buffer);
-	pack32(j->j_magic, 		buffer);
-	pack32(j->j_prognum, 		buffer);
-	for (i = 0; i < 4; i++)
-		pack32(j->j_cap.UserKey.Values[i], buffer);
-	pack16(j->j_cap.Type, 		buffer);
-#  ifdef ELAN_CAP_ELAN3
-	pack16(j->j_cap.cap_elan_type,  buffer);
-#  else
-	j->j_cap.cap_spare = ELAN_CAP_UNINITIALISED;
-	pack16(j->j_cap.cap_spare,      buffer);
-#  endif 
-	pack16(j->j_cap.padding, 	buffer);
-	pack32(j->j_cap.Version,	buffer);
-	pack32(j->j_cap.LowContext, 	buffer);
-	pack32(j->j_cap.HighContext, 	buffer);
-	pack32(j->j_cap.MyContext, 	buffer);
-	pack32(j->j_cap.LowNode, 	buffer);
-	pack32(j->j_cap.HighNode, 	buffer);
-	pack32(j->j_cap.Entries, 	buffer);
-	pack32(j->j_cap.RailMask, 	buffer);
-	for (i = 0; i < ELAN_BITMAPSIZE; i++)
-		pack32(j->j_cap.Bitmap[i], buffer);
-	return (get_buf_offset(buffer) - offset);
- * Unpack jobinfo structure packed by qsw_pack_jobinfo.
- *   j (IN/OUT)		where to store libstate structure
- *   buffer (OUT)		where to load packed data
- *   RETURN		#bytes unused in 'data' or -1 on error (sets errno)
- */
-qsw_unpack_jobinfo(qsw_jobinfo_t j, Buf buffer)
-	int i, offset;
-	assert(j->j_magic == QSW_JOBINFO_MAGIC);
-	offset = get_buf_offset(buffer);
-	safe_unpack32(&j->j_magic, 		buffer);
-	safe_unpack32(&j->j_prognum, 		buffer);
-	for (i = 0; i < 4; i++)
-		safe_unpack32(&j->j_cap.UserKey.Values[i], buffer);
-	safe_unpack16(&j->j_cap.Type,		buffer);
-#  ifdef ELAN_CAP_ELAN3
-	safe_unpack16(&j->j_cap.cap_elan_type,  buffer);
-#  else
-	safe_unpack16(&j->j_cap.cap_spare,      buffer);
-#  endif
-	safe_unpack16(&j->j_cap.padding, 	buffer);	    
-	safe_unpack32(&j->j_cap.Version,	buffer); 	    
-	safe_unpack32(&j->j_cap.LowContext, 	buffer);
-	safe_unpack32(&j->j_cap.HighContext,	buffer);
-	safe_unpack32(&j->j_cap.MyContext,	buffer);
-	safe_unpack32(&j->j_cap.LowNode, 	buffer);
-	safe_unpack32(&j->j_cap.HighNode,	buffer);
-	safe_unpack32(&j->j_cap.Entries, 	buffer);
-	safe_unpack32(&j->j_cap.RailMask, 	buffer);
-	for (i = 0; i < ELAN_BITMAPSIZE; i++)
-		safe_unpack32(&j->j_cap.Bitmap[i], buffer);
-	if (j->j_magic != QSW_JOBINFO_MAGIC)
-		goto unpack_error;
-    unpack_error:
-	slurm_seterrno_ret(EBADMAGIC_QSWJOBINFO);
-	return SLURM_ERROR;
- * Allocate a program description number.  Program descriptions, which are the
- * key abstraction maintained by the rms.o kernel module, must not be used
- * more than once simultaneously on a single node.  We allocate one to each
- * parallel job which more than meets this requirement.  A program description
- * can be compared to a process group, except there is no way for a process to
- * disassociate itself or its children from the program description.  
- * If the library is initialized, we allocate these consecutively, otherwise 
- * we generate a random one, assuming we are being called by a transient 
- * program like pdsh.  Ref: rms_prgcreate(3).
- */
-static int
-	int new;
-	if (qsw_internal_state) {
-		_lock_qsw();
-		new = qsw_internal_state->ls_prognum;
-		if (new == QSW_PRG_END)
-			qsw_internal_state->ls_prognum = QSW_PRG_START;
-		else
-			qsw_internal_state->ls_prognum++;
-		_unlock_qsw();
-	} else {
-		_srand_if_needed();
-		new = lrand48() % (QSW_PRG_END - QSW_PRG_START + 1);
-		new += QSW_PRG_START;
-	}
-	return new;
- * Elan hardware context numbers are an adapter resource that must not be used
- * more than once on a single node.  One is allocated to each process on the
- * node that will be communication over Elan.  In order for processes on the 
- * same node to communicate with one another and with other nodes across QsNet,
- * they must use contexts in the hi-lo range of a common capability.
- * If the library is initialized, we allocate these consecutively, otherwise 
- * we generate a random one, assuming we are being called by a transient 
- * program like pdsh.  Ref: rms_setcap(3).
- */
-static int
-_generate_hwcontext(int num)
-	int new;
-	if (qsw_internal_state) {
-		_lock_qsw();
-		if (qsw_internal_state->ls_hwcontext + num - 1 > QSW_CTX_END)
-			qsw_internal_state->ls_hwcontext = QSW_CTX_START;
-		new = qsw_internal_state->ls_hwcontext;
-		qsw_internal_state->ls_hwcontext += num;
-		_unlock_qsw();
-	} else {
-		_srand_if_needed();
-		new = lrand48() % 
-		      (QSW_CTX_END - (QSW_CTX_START + num - 1) - 1);
-		new +=  QSW_CTX_START;
-	}
-	return new;
- * Initialize the elan capability for this job.
- */
-static void
-_init_elan_capability(ELAN_CAPABILITY *cap, int nprocs, int nnodes,
-		bitstr_t *nodeset, int cyclic_alloc)
-	int i, node_num, full_node_cnt, min_procs_per_node, max_procs_per_node;
-	/* Task count may not be identical for all nodes */
-	full_node_cnt = nprocs % nnodes;
-	min_procs_per_node = nprocs / nnodes;
-	max_procs_per_node = (nprocs + nnodes - 1) / nnodes;
-	_srand_if_needed();
-	/* start with a clean slate */
-	elan_nullcap(cap);
-	elan3_nullcap(cap);
-	/* initialize for single rail and either block or cyclic allocation */
-	if (cyclic_alloc)
-		cap->Type = ELAN_CAP_TYPE_CYCLIC;
-	else
-		cap->Type = ELAN_CAP_TYPE_BLOCK;
-	cap->RailMask = 1;
-#  ifdef ELAN_CAP_ELAN3
-	cap->cap_elan_type = ELAN_CAP_ELAN3;
-#  else
-	cap->cap_spare = ELAN_CAP_UNINITIALISED;
-#  endif
-	/* UserKey is 128 bits of randomness which should be kept private */
-        for (i = 0; i < 4; i++)
-		cap->UserKey.Values[i] = lrand48();
-	/* set up hardware context range */
-	cap->LowContext = _generate_hwcontext(max_procs_per_node);
-	cap->HighContext = cap->LowContext + max_procs_per_node - 1;
-	/* Note: not necessary to initialize cap->MyContext */
-	/* set the range of nodes to be used and number of processes */
-	cap->LowNode = bit_ffs(nodeset);
-	assert(cap->LowNode != -1);
-	cap->HighNode = bit_fls(nodeset);
-	assert(cap->HighNode != -1);
-	cap->Entries = nprocs;
-	/* set the hw broadcast bit if consecutive nodes */
-	if (abs(cap->HighNode - cap->LowNode) == nnodes - 1)
-	/* set unconditionally per qsw gnat sw-elan/4334 */
-	/* only time we don't want this is unsupported rev A hardware */
-	/*
-	 * Set up cap->Bitmap, which describes the mapping of processes to 
-	 * the nodes in the range of cap->LowNode - cap->Highnode.
-	 * There are (nprocs * nnodes) significant bits in the mask, each 
- 	 * representing a process slot.  Bits are off for process slots 
-	 * corresponding to unallocated nodes.  For example, if nodes 4 and 6 
-	 * are running two processes per node, bits 0,1 (corresponding to the 
-	 * two processes on node 4) and bits 4,5 (corresponding to the two 
-	 * processes running on node 6) are set.  
-	 */
-	node_num = 0;
-	for (i = cap->LowNode; i <= cap->HighNode; i++) {
-		if (bit_test(nodeset, i)) {
-			int j, bit, task_cnt;
-			if (node_num++ < full_node_cnt)
-				task_cnt = max_procs_per_node;
-			else
-				task_cnt = min_procs_per_node;
-			for (j = 0; j < task_cnt; j++) {
-				if (cyclic_alloc)
-					bit = (i-cap->LowNode) + ( j * 
-					 (cap->HighNode - cap->LowNode + 1));
-				else
-					bit = ((i-cap->LowNode)
-					       * max_procs_per_node) + j;
-				assert(bit < (sizeof(cap->Bitmap) * 8));
-				BT_SET(cap->Bitmap, bit);
-			}
-		}
-	}
- * Create all the QsNet related information needed to set up a QsNet parallel
- * program and store it in the qsw_jobinfo struct.  
- * Call this on the "client" process, e.g. pdsh, srun, slurmctld, etc..
- */
-qsw_setup_jobinfo(qsw_jobinfo_t j, int nprocs, bitstr_t *nodeset, 
-		int cyclic_alloc)
-	int nnodes = bit_set_count(nodeset);
-	assert(j != NULL);
-	assert(j->j_magic == QSW_JOBINFO_MAGIC);
-	/* sanity check on args */
-	/* Note: ELAN_MAX_VPS is 512 on "old" Elan driver, 16384 on new. */
-	if ((nprocs <= 0) || (nprocs > ELAN_MAX_VPS) || (nnodes <= 0)) {
-		slurm_seterrno_ret(EINVAL);
-	}
-	/* initialize jobinfo */
-	j->j_prognum = _generate_prognum();
-	_init_elan_capability(&j->j_cap, nprocs, nnodes, nodeset, 
-	                      cyclic_alloc);
-	return 0;
- * Here are the necessary steps to set up to run an Elan MPI parallel program
- * (set of processes) on a node (possibly one of many allocated to the prog):
- *
- * Process 1	Process 2	|	Process 3
- * read args			|
- * fork	-------	rms_prgcreate	|
- * waitpid 	elan3_create	|
- * 		rms_prgaddcap	|
- *		fork N procs ---+------	rms_setcap
- *		wait all	|	setup RMS_ env	
- *				|	setuid, etc.
- *				|	exec mpi process
- *				|	
- *		exit		|
- * rms_prgdestroy		|
- * exit				|     (one pair of processes per mpi proc!)
- *
- * - The first fork is required because rms_prgdestroy can't occur in the 
- *   process that calls rms_prgcreate (since it is a member, ECHILD).
- * - The second fork is required when running multiple processes per node 
- *   because each process must announce its use of one of the hw contexts 
- *   in the range allocated in the capability.
- */
- * Process 1: issue the rms_prgdestroy for the job.
- */
-qsw_prgdestroy(qsw_jobinfo_t jobinfo)
-	if (rms_prgdestroy(jobinfo->j_prognum) < 0) {
-		/* translate errno values to more descriptive ones */
-		switch (errno) {
-			case ECHILD:
-				slurm_seterrno(ECHILD_PRGDESTROY);
-				break;
-			case EEXIST:
-				slurm_seterrno(EEXIST_PRGDESTROY);
-				break;
-			default:
-				break;
-		}
-		return -1;
-	}
-	return 0;
- * Process 2: Destroy the context after children are dead.
- */
-qsw_prog_fini(qsw_jobinfo_t jobinfo)
-	/* Do nothing... apparently this will be handled by
-	 *  callbacks in the kernel exit handlers ... 
-	 */
-#if 0
-	if (jobinfo->j_ctx) {
-		elan3_control_close(jobinfo->j_ctx);
-		jobinfo->j_ctx = NULL;
-	}
- * Process 2: Create the context and make capability available to children.
- */
-qsw_prog_init(qsw_jobinfo_t jobinfo, uid_t uid)
-	int err;
-	int i, nrails;
-	nrails = elan_nrails(&jobinfo->j_cap);
-	for (i = 0; i < nrails; i++) {
-		/*
-		 *  Open up the Elan control device so we can create
-		 *   a new capability.
-		 */
-		if (elanctrl_open(&handle) != 0) {
-			slurm_seterrno(EELAN3CONTROL);
-			goto fail;
-		}
-		/*  Push capability into device driver */
-		if (elanctrl_create_cap(handle, &jobinfo->j_cap) < 0) {
-			error("elanctrl_create_cap: %m");
-			slurm_seterrno(EELAN3CREATE);
-			goto fail;
-		}
-	}
-#else /* !HAVE_LIBELANCTRL */
-	nrails = elan3_nrails(&jobinfo->j_cap);
-	for (i = 0; i < nrails; i++) {
-		ELAN3_CTX *ctx;
-		/* see qsw gnat sw-elan/4334: elan3_control_open can ret -1 */
-		if ((ctx = elan3_control_open(i)) == NULL 
-				|| ctx == (void *)-1) {
-			slurm_seterrno(EELAN3CONTROL);
-			goto fail;
-		}
-		/* make cap known via rms_getcap/rms_ncaps to members 
-		 * of this prgnum */
-		if (elan3_create(ctx, &jobinfo->j_cap) < 0) {
-			/* XXX masking errno value better than not knowing 
-			 * which function failed? */
-		        error("elan3_create(%d): %m", i);
-			slurm_seterrno(EELAN3CREATE); 
-			goto fail;
-		}
-	}
-	/* associate this process and its children with prgnum */
-	if (rms_prgcreate(jobinfo->j_prognum, uid, 1) < 0) {
-		/* translate errno values to more descriptive ones */
-		switch (errno) {
-			case EINVAL:
-				slurm_seterrno(EINVAL_PRGCREATE);
-				break;
-			default:
-				break;
-		}
-		goto fail;
-	}
-	if (rms_prgaddcap(jobinfo->j_prognum, 0, &jobinfo->j_cap) < 0) {
-		/* translate errno values to more descriptive ones */
-		switch (errno) {
-			case ESRCH:
-				slurm_seterrno(ESRCH_PRGADDCAP);
-				break;
-			case EFAULT:
-				slurm_seterrno(EFAULT_PRGADDCAP);
-				break;
-			default:
-				break;
-		}
-		goto fail;
-	}
-	/* note: _elan3_fini() destroys context and makes capability unavail */
-	/* do it in qsw_prog_fini() after app terminates */
-	return 0;
-	err = errno; /* presrve errno in case _elan3_fini touches it */
-	qsw_prog_fini(jobinfo); 
-	slurm_seterrno(err);
-	return -1;
- * Process 3: Do the rms_setcap.
- */
-qsw_setcap(qsw_jobinfo_t jobinfo, int procnum)
-	/*
-	 * Assign elan hardware context to current process.
-	 * - arg1 (0 below) is an index into the kernel's list of caps for this 
-	 *   program desc (added by rms_prgaddcap).  There will be
-	 *   one per rail.
-	 * - arg2 indexes the hw ctxt range in the capability
-	 *   [cap->LowContext, cap->HighContext]
-	 */
-	if (rms_setcap(0, procnum) < 0) {
-		/* translate errno values to more descriptive ones */
-		switch (errno) {
-			case EINVAL:
-				slurm_seterrno(EINVAL_SETCAP);
-				break;
-			case EFAULT:
-				slurm_seterrno(EFAULT_SETCAP);
-				break;
-			default:
-				break;
-		}
-		return -1;
-	}
-	return 0;
- * Return the local elan address (for rail 0) or -1 on failure.
- */
-	int nodeid = -1;
-	ELAN_DEV_IDX    devidx = 0;
-	ELAN_POSITION   position;
-	if (elanctrl_open(&handle) != 0) 
-		slurm_seterrno_ret(EGETNODEID);
-	if (elanctrl_get_position(handle, devidx, &position) != 0)
-		slurm_seterrno_ret(EGETNODEID);
-	nodeid = position.pos_nodeid;
-	ELAN3_CTX *ctx = _elan3_init(0); /* rail 0 */
-	if (ctx) {
-		nodeid = ctx->devinfo.Position.NodeId;
-		elan3_control_close(ctx);
-	}
-	if (nodeid == -1)
-		slurm_seterrno(EGETNODEID);
-	return nodeid;
-static int 
-_read_elanhost_config (void)
-	int rc;
-	if (!(elanconf = elanhost_config_create ()))
-		return (-1);
-	if ((rc = elanhost_config_read (elanconf, NULL)) < 0) {
-		error ("Unable to read Elan config: %s", 
-		       elanhost_config_err (elanconf));
-		elanhost_config_destroy (elanconf);
-		elanconf = NULL;
-		return (-1);
-	}
-	return (0);
-	int maxid = -1;
-	_lock_qsw();
-	if (!elanconf && (_read_elanhost_config() < 0))
-		goto done;
-	maxid = elanhost_config_maxid (elanconf);
-    done:
-	_unlock_qsw();
-	return maxid;
- * Given a hostname, return the elanid or -1 on error.  
- *  Initializes the elanconfig from the default /etc/elanhosts
- *  config file.
- */
-qsw_getnodeid_byhost(char *host)
-	int id = -1;
-	if (host == NULL)
-		return (-1);
-	_lock_qsw();
-	if (!elanconf && (_read_elanhost_config() < 0))
-		goto done;
-	xassert (elanconf != NULL);
-	id = elanhost_host2elanid (elanconf, host);
-    done:
-	_unlock_qsw();
-	return id;
- * Given an elanid, determine the hostname.  Returns -1 on error or the number
- * of characters copied on success.  
- * XXX - assumes RMS style hostnames (see above)
- */
-qsw_gethost_bynodeid(char *buf, int len, int id)
-	int rc = -1;
-	char *hostp;
-	if (id < 0) slurm_seterrno_ret(EGETHOST_BYNODEID);
-	_lock_qsw();
-	if (!elanconf && (_read_elanhost_config() < 0))
-		goto done;
-	if (!(hostp = elanhost_elanid2host (elanconf, ELANHOST_EIP, id))) {
-		slurm_seterrno (EGETHOST_BYNODEID);
-		goto done;
-	}
-	rc = strlcpy (buf, hostp, len);
-    done:
-	_unlock_qsw();
-	return (rc);
- * Send the specified signal to all members of a program description.
- * Returns -1 on failure and sets errno.  Ref: rms_prgsignal(3).
- */
-qsw_prgsignal(qsw_jobinfo_t jobinfo, int signum)
-	if (rms_prgsignal(jobinfo->j_prognum, signum) < 0) {
-		/* translate errno values to more descriptive ones */
-		switch (errno) {
-			case EINVAL:
-				slurm_seterrno(EINVAL_PRGSIGNAL);
-				break;
-			case ESRCH:
-				slurm_seterrno(ESRCH_PRGSIGNAL);
-				break;
-			default:
-				break;
-		}
-		return -1;
-	}
-	return 0;
-#define TRUNC_BITMAP 1
-static void
-_print_capbitmap(FILE *fp, ELAN_CAPABILITY *cap)
-	int bit_max = sizeof(cap->Bitmap)*8 - 1;
-	int bit;
-	bit_max = bit_max >= 64 ? 64 : bit_max;
-	for (bit = bit_max; bit >= 0; bit--)
-		fprintf(fp, "%c", BT_TEST(cap->Bitmap, bit) ? '1' : '0');
-	fprintf(fp, "\n");
-char *
-qsw_capability_string(struct qsw_jobinfo *j, char *buf, size_t size)
-	assert(buf != NULL);
-	assert(j->j_magic == QSW_JOBINFO_MAGIC);
-	cap = &j->j_cap;
-	snprintf(buf, size, "prg=%d ctx=%x.%x nodes=%d.%d",
-	         j->j_prognum, cap->LowContext, cap->HighContext, 
-		 cap->LowNode, cap->HighNode);
-	snprintf(buf, size, "prg=%d ctx=%x.%x nodes=%d.%d entries=%d",
-	         j->j_prognum, cap->LowContext, cap->HighContext, 
-		 cap->LowNode, cap->HighNode, 
-	         cap->Entries);
-	return buf;
-qsw_print_jobinfo(FILE *fp, struct qsw_jobinfo *jobinfo)
-	char str[8192];
-	assert(jobinfo->j_magic == QSW_JOBINFO_MAGIC);
-	fprintf(fp, "__________________\n");
-	fprintf(fp, "prognum=%d\n", jobinfo->j_prognum);
-	cap = &jobinfo->j_cap;
-	/* use elan3_capability_string as a shorter alternative for now */
-	fprintf(fp, "%s\n", elan_capability_string(cap, str));
-#  else
-	fprintf(fp, "%s\n", elan3_capability_string(cap, str));
-#  endif
-	fprintf(fp, "cap.UserKey=%8.8x.%8.8x.%8.8x.%8.8x\n",
-			cap->UserKey.Values[0], cap->UserKey.Values[1],
-			cap->UserKey.Values[2], cap->UserKey.Values[3]);
-	/*fprintf(fp, "cap.Version=%d\n", cap->Version);*/
-	fprintf(fp, "cap.Type=0x%hx\n", cap->Type);
-	fprintf(fp, "cap.LowContext=%d\n", cap->LowContext);
-	fprintf(fp, "cap.HighContext=%d\n", cap->HighContext);
-	fprintf(fp, "cap.MyContext=%d\n", cap->MyContext);
-	fprintf(fp, "cap.LowNode=%d\n", cap->LowNode);
-	fprintf(fp, "cap.HighNode=%d\n", cap->HighNode);
-	fprintf(fp, "cap.padding=%hd\n", cap->padding);
-	fprintf(fp, "cap.Entries=%d\n", cap->Entries);
-	fprintf(fp, "cap.Railmask=0x%x\n", cap->RailMask);
-	fprintf(fp, "cap.Bitmap=");
-	_print_capbitmap(fp, cap);
-	fprintf(fp, "\n------------------\n");
diff --git a/src/common/qsw.h b/src/common/qsw.h
deleted file mode 100644
index 21f722d5839..00000000000
--- a/src/common/qsw.h
+++ /dev/null
@@ -1,100 +0,0 @@
- *  qsw.h - Library routines for initiating jobs on QsNet. 
- *****************************************************************************
- *  Copyright (C) 2002 The Regents of the University of California.
- *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
- *  Written by Jim Garlick <>
- *  UCRL-CODE-2002-040.
- *  
- *  This file is part of SLURM, a resource management program.
- *  For details, see <>.
- *  
- *  SLURM is free software; you can redistribute it and/or modify it under
- *  the terms of the GNU General Public License as published by the Free
- *  Software Foundation; either version 2 of the License, or (at your option)
- *  any later version.
- *  
- *  SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
- *  WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
- *  FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
- *  details.
- *  
- *  You should have received a copy of the GNU General Public License along
- *  with SLURM; if not, write to the Free Software Foundation, Inc.,
- *  59 Temple Place, Suite 330, Boston, MA  02111-1307  USA.
-#include <stdio.h>
-#include <sys/types.h>
-#  include "config.h"
-#include "src/common/bitstring.h"
-#include "src/common/pack.h"
-#ifndef _QSW_INCLUDED
-#define _QSW_INCLUDED
-# include <elan/capability.h>
-# include <elan3/elanvp.h>
-# error "Don't have either libelanctrl or libelan3!"
-/* opaque data structures - no peeking! */
-typedef struct qsw_libstate 	*qsw_libstate_t;
-#ifndef __qsw_jobinfo_t_defined
-#  define  __qsw_jobinfo_t_defined
-   typedef struct qsw_jobinfo *qsw_jobinfo_t;	/* opaque data type */
-#define QSW_PACK_SIZE		(4 * (2+4+1+8+ELAN_BITMAPSIZE))
-int		qsw_alloc_libstate(qsw_libstate_t *lsp);
-void		qsw_free_libstate(qsw_libstate_t ls);
-int		qsw_pack_libstate(qsw_libstate_t ls, Buf buffer);
-int		qsw_unpack_libstate(qsw_libstate_t ls, Buf buffer);
-int 		qsw_init(qsw_libstate_t restorestate);
-void 		qsw_fini(qsw_libstate_t savestate);
-int		qsw_alloc_jobinfo(qsw_jobinfo_t *jp);
-qsw_jobinfo_t	qsw_copy_jobinfo(qsw_jobinfo_t j);
-void		qsw_free_jobinfo(qsw_jobinfo_t j);
-int		qsw_pack_jobinfo(qsw_jobinfo_t j, Buf buffer);
-int		qsw_unpack_jobinfo(qsw_jobinfo_t j, Buf buffer);
-int 		qsw_setup_jobinfo(qsw_jobinfo_t j, int nprocs, 
-			bitstr_t *nodeset, int cyclic_alloc);
-int 		qsw_prog_init(qsw_jobinfo_t jobinfo, uid_t uid);
-void 		qsw_prog_fini(qsw_jobinfo_t jobinfo);
-int 		qsw_prgdestroy(qsw_jobinfo_t jobinfo); /* was qsw_prog_reap */
-int 		qsw_setcap(qsw_jobinfo_t jobinfo, int procnum); 
-		/* was qsw_attach */
-int		qsw_prgsignal(qsw_jobinfo_t jobinfo, int signum); 
-		/* was qsw_signal_job */
-		/* return max ElanID in configuration */
-int             qsw_maxnodeid(void);
-int		qsw_getnodeid(void);
-int		qsw_getnodeid_byhost(char *host);
-int		qsw_gethost_bynodeid(char *host, int len, int elanid);
-char *		qsw_capability_string(qsw_jobinfo_t j, char *buf, size_t len);
-void		qsw_print_jobinfo(FILE *fp, struct qsw_jobinfo *jobinfo);
-#endif /* _QSW_INCLUDED */
diff --git a/src/slurmd/setenvpf.c b/src/common/setenvpf.c
similarity index 98%
rename from src/slurmd/setenvpf.c
rename to src/common/setenvpf.c
index de4ea20916d..ccbea5672a3 100644
--- a/src/slurmd/setenvpf.c
+++ b/src/common/setenvpf.c
@@ -1,5 +1,5 @@
- * src/slurmd/setenvpf.c - add an environment variable to environment vector
+ * src/common/setenvpf.c - add an environment variable to environment vector
  * $Id$
  *  Copyright (C) 2002 The Regents of the University of California.
diff --git a/src/slurmd/setenvpf.h b/src/common/setenvpf.h
similarity index 96%
rename from src/slurmd/setenvpf.h
rename to src/common/setenvpf.h
index c8667d936d7..8a9060a30c3 100644
--- a/src/slurmd/setenvpf.h
+++ b/src/common/setenvpf.h
@@ -1,5 +1,5 @@
- * src/slurmd/setenvpf.h - environment vector manipulation
+ * src/common/setenvpf.h - environment vector manipulation
  *  Copyright (C) 2002 The Regents of the University of California.
  *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
diff --git a/src/common/slurm_protocol_defs.c b/src/common/slurm_protocol_defs.c
index 7f5a3e82e36..0b61fcee0fe 100644
--- a/src/common/slurm_protocol_defs.c
+++ b/src/common/slurm_protocol_defs.c
@@ -39,6 +39,7 @@
 #include "src/common/log.h"
 #include "src/common/slurm_cred.h"
 #include "src/common/slurm_protocol_defs.h"
+#include "src/common/switch.h"
 #include "src/common/xmalloc.h"
 static void _free_all_job_info (job_info_msg_t *msg);
@@ -282,9 +283,8 @@ void slurm_free_launch_tasks_request_msg(launch_tasks_request_msg_t * msg)
-#	ifdef HAVE_ELAN
-	qsw_free_jobinfo(msg->qsw_job);
-#	endif
+	if (msg->switch_job)
+		switch_free_jobinfo(msg->switch_job);
@@ -473,10 +473,8 @@ void slurm_free_resource_allocation_and_run_response_msg (
-#		ifdef HAVE_LIBELAN3
-		if (msg->qsw_job)
-			qsw_free_jobinfo(msg->qsw_job);
-#		endif
+		if (msg->switch_job)
+			switch_free_jobinfo(msg->switch_job);
@@ -494,10 +492,8 @@ void slurm_free_job_step_create_response_msg(
 	if (msg) {
-#		ifdef HAVE_LIBELAN3
-		if (msg->qsw_job)
-			qsw_free_jobinfo(msg->qsw_job);
-#		endif
+		if (msg->switch_job)
+			switch_free_jobinfo(msg->switch_job);
@@ -546,7 +542,6 @@ void slurm_free_ctl_conf(slurm_ctl_conf_info_msg_t * config_ptr)
-		xfree(config_ptr->switch_type);
diff --git a/src/common/slurm_protocol_defs.h b/src/common/slurm_protocol_defs.h
index b4ae235cb5a..2b47baec4bc 100644
--- a/src/common/slurm_protocol_defs.h
+++ b/src/common/slurm_protocol_defs.h
@@ -36,9 +36,6 @@
 #      include <stdint.h>
 #    endif
 #  endif			/* HAVE_INTTYPES_H */
-#  if HAVE_ELAN
-#    include <src/common/qsw.h>
-#  endif
 #else				/* !HAVE_CONFIG_H */
 #  include <inttypes.h>
 #endif				/*  HAVE_CONFIG_H */
@@ -47,6 +44,7 @@
 #include "src/common/macros.h"
 #include "src/common/slurm_protocol_common.h"
+#include "src/common/switch.h"
 #include "src/common/xassert.h"
@@ -258,10 +256,7 @@ typedef struct launch_tasks_request_msg {
 	int32_t   slurmd_debug; /* remote slurmd debug level */
 	slurm_cred_t cred;	/* job credential            */
-#ifdef HAVE_ELAN
-	qsw_jobinfo_t qsw_job;	/* Elan3 switch context */
+	switch_jobinfo_t switch_job;	/* switch credential for the job */
 } launch_tasks_request_msg_t;
 typedef struct launch_tasks_response_msg {
diff --git a/src/common/slurm_protocol_pack.c b/src/common/slurm_protocol_pack.c
index 4cd95d0e354..468174eb1f6 100644
--- a/src/common/slurm_protocol_pack.c
+++ b/src/common/slurm_protocol_pack.c
@@ -28,9 +28,9 @@
 #  include "config.h"
+#include <errno.h>
 #include <stdio.h>
 #include <stdlib.h>
-#include <errno.h>
 #include <string.h>
 #include "src/common/bitstring.h"
@@ -41,12 +41,9 @@
 #include "src/common/slurm_protocol_api.h"
 #include "src/common/slurm_protocol_defs.h"
 #include "src/common/slurm_protocol_pack.h"
+#include "src/common/switch.h"
 #include "src/common/xmalloc.h"
-#  include "src/common/qsw.h"
 #define _pack_job_info_msg(msg,buf)		_pack_buffer_msg(msg,buf)
 #define _pack_job_step_info_msg(msg,buf)	_pack_buffer_msg(msg,buf)
@@ -890,9 +887,7 @@ static void
 	_pack_slurm_addr_array(msg->node_addr, msg->node_cnt, buffer);
 	slurm_cred_pack(msg->cred, buffer);
-#ifdef HAVE_ELAN
-	qsw_pack_jobinfo(msg->qsw_job, buffer);
+	switch_pack_jobinfo(msg->switch_job, buffer);
 static int
@@ -941,14 +936,12 @@ static int
 	if (!(tmp_ptr->cred = slurm_cred_unpack(buffer)))
 		goto unpack_error;
-#ifdef HAVE_ELAN
-	qsw_alloc_jobinfo(&tmp_ptr->qsw_job);
-	if (qsw_unpack_jobinfo(tmp_ptr->qsw_job, buffer) < 0) {
-		error("qsw_unpack_jobinfo: %m");
-		qsw_free_jobinfo(tmp_ptr->qsw_job);
+	switch_alloc_jobinfo(&tmp_ptr->switch_job);
+	if (switch_unpack_jobinfo(tmp_ptr->switch_job, buffer) < 0) {
+		error("switch_unpack_jobinfo: %m");
+		switch_free_jobinfo(tmp_ptr->switch_job);
 		goto unpack_error;
@@ -1286,9 +1279,7 @@ _pack_job_step_create_response_msg(job_step_create_response_msg_t * msg,
 	pack32(msg->job_step_id, buffer);
 	packstr(msg->node_list, buffer);
 	slurm_cred_pack(msg->cred, buffer);
-#ifdef HAVE_ELAN
-	qsw_pack_jobinfo(msg->qsw_job, buffer);
+	switch_pack_jobinfo(msg->switch_job, buffer);
@@ -1309,14 +1300,12 @@ _unpack_job_step_create_response_msg(job_step_create_response_msg_t ** msg,
 	if (!(tmp_ptr->cred = slurm_cred_unpack(buffer)))
 		goto unpack_error;
-#ifdef HAVE_ELAN
-	qsw_alloc_jobinfo(&tmp_ptr->qsw_job);
-	if (qsw_unpack_jobinfo(tmp_ptr->qsw_job, buffer)) {
-		error("qsw_unpack_jobinfo: %m");
-		qsw_free_jobinfo(tmp_ptr->qsw_job);
+	switch_alloc_jobinfo(&tmp_ptr->switch_job);
+	if (switch_unpack_jobinfo(tmp_ptr->switch_job, buffer)) {
+		error("switch_unpack_jobinfo: %m");
+		switch_free_jobinfo(tmp_ptr->switch_job);
 		goto unpack_error;
@@ -2139,9 +2128,7 @@ _pack_launch_tasks_request_msg(launch_tasks_request_msg_t * msg, Buf buffer)
 	pack32(msg->slurmd_debug, buffer);
 		     msg->tasks_to_launch, buffer);
-#ifdef HAVE_ELAN
-	qsw_pack_jobinfo(msg->qsw_job, buffer);
+	switch_pack_jobinfo(msg->switch_job, buffer);
 static int
@@ -2179,13 +2166,13 @@ _unpack_launch_tasks_request_msg(launch_tasks_request_msg_t **
 	if (msg->tasks_to_launch != uint32_tmp)
 		goto unpack_error;
-#ifdef HAVE_ELAN
-	qsw_alloc_jobinfo(&msg->qsw_job);
-	if (qsw_unpack_jobinfo(msg->qsw_job, buffer) < 0) {
-		error("qsw_unpack_jobinfo: %m");
+	switch_alloc_jobinfo(&msg->switch_job);
+	if (switch_unpack_jobinfo(msg->switch_job, buffer) < 0) {
+		error("switch_unpack_jobinfo: %m");
+		switch_free_jobinfo(msg->switch_job);
 		goto unpack_error;
diff --git a/src/common/switch.c b/src/common/switch.c
new file mode 100644
index 00000000000..f2499251d71
--- /dev/null
+++ b/src/common/switch.c
@@ -0,0 +1,390 @@
+ * src/common/switch.c - Generic switch (interconnect) for slurm
+ *****************************************************************************
+ *  Copyright (C) 2002 The Regents of the University of California.
+ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+ *  Written by Moe Jette <>.
+ *  UCRL-CODE-2002-040.
+ *  
+ *  This file is part of SLURM, a resource management program.
+ *  For details, see <>.
+ *  
+ *  SLURM is free software; you can redistribute it and/or modify it under
+ *  the terms of the GNU General Public License as published by the Free
+ *  Software Foundation; either version 2 of the License, or (at your option)
+ *  any later version.
+ *  
+ *  SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
+ *  WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ *  FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
+ *  details.
+ *  
+ *  You should have received a copy of the GNU General Public License along
+ *  with SLURM; if not, write to the Free Software Foundation, Inc.,
+ *  59 Temple Place, Suite 330, Boston, MA  02111-1307  USA.
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "src/common/macros.h"
+#include "src/common/plugin.h"
+#include "src/common/plugrack.h"
+#include "src/common/slurm_protocol_api.h"
+#include "src/common/switch.h"
+#include "src/common/xmalloc.h"
+#include "src/common/xstring.h"
+ * WARNING:  Do not change the order of these fields or add additional
+ * fields at the beginning of the structure.  If you do, job completion
+ * logging plugins will stop working.  If you need to add fields, add them 
+ * at the end of the structure.
+ */
+typedef struct slurm_switch_ops {
+	int          (*state_save)        ( char *dir_name );
+	int          (*state_restore)     ( char *dir_name );
+	bool         (*no_frag)           ( void );
+	int          (*alloc_jobinfo)     ( switch_jobinfo_t *jobinfo );
+	int          (*build_jobinfo)     ( switch_jobinfo_t jobinfo,
+						char *nodelist, int nprocs, 
+						int cyclic_alloc);
+	switch_jobinfo_t (*copy_jobinfo)  ( switch_jobinfo_t jobinfo );
+	void         (*free_jobinfo)      ( switch_jobinfo_t jobinfo );
+	int          (*pack_jobinfo)      ( switch_jobinfo_t jobinfo, 
+						Buf buffer );
+	int          (*unpack_jobinfo)    ( switch_jobinfo_t jobinfo, 
+						Buf buffer );
+	void         (*print_jobinfo)     ( FILE *fp, 
+						switch_jobinfo_t jobinfo );
+	char *       (*string_jobinfo)    ( switch_jobinfo_t jobinfo, 
+						char *buf, size_t size);
+	int          (*node_init)         ( void );
+	int          (*node_fini)         ( void );
+	int          (*job_preinit)       ( switch_jobinfo_t jobinfo );
+	int          (*job_init)          ( switch_jobinfo_t jobinfo,
+						uid_t uid );
+	int          (*job_fini)          ( switch_jobinfo_t jobinfo );
+	int          (*job_postfini)      ( switch_jobinfo_t jobinfo,
+						uid_t pgid, 
+						uint32_t job_id, 
+						uint32_t step_id );
+	int          (*job_attach)        ( switch_jobinfo_t jobinfo, 
+						char ***env, int nodeid, 
+						int procid, int nnodes, 
+						int nprocs, gid_t gid);
+} slurm_switch_ops_t;
+struct slurm_switch_context {
+	char *			switch_type;
+	plugrack_t              plugin_list;
+	plugin_handle_t         cur_plugin;
+	int                     switch_errno;
+	slurm_switch_ops_t	ops;
+static slurm_switch_context_t g_context = NULL;
+static pthread_mutex_t      context_lock = PTHREAD_MUTEX_INITIALIZER;
+static slurm_switch_context_t
+_slurm_switch_context_create( const char *switch_type)
+	slurm_switch_context_t c;
+	if ( switch_type == NULL ) {
+		debug3( "_slurm_switch_context_create: no switch type" );
+		return NULL;
+	}
+	c = xmalloc( sizeof( struct slurm_switch_context ) );
+	c->switch_errno = SLURM_SUCCESS;
+	/* Copy the job completion authentication type. */
+	c->switch_type = xstrdup( switch_type );
+	if (c->switch_type == NULL ) {
+		debug3( "can't make local copy of switch type" );
+		xfree( c );
+		return NULL;
+	}
+	/* Plugin rack is demand-loaded on first reference. */
+	c->plugin_list = NULL;
+	c->cur_plugin = PLUGIN_INVALID_HANDLE;
+	return c;
+static int
+_slurm_switch_context_destroy( slurm_switch_context_t c )
+	/*
+	 * Must check return code here because plugins might still
+	 * be loaded and active.
+	 */
+	if ( c->plugin_list ) {
+		if ( plugrack_destroy( c->plugin_list ) != SLURM_SUCCESS ) {
+			return SLURM_ERROR;
+		}
+	}
+	xfree( c->switch_type );
+	xfree( c );
+ * Resolve the operations from the plugin.
+ */
+static slurm_switch_ops_t * 
+_slurm_switch_get_ops( slurm_switch_context_t c )
+	/*
+	 * These strings must be kept in the same order as the fields
+	 * declared for slurm_switch_ops_t.
+	 */
+	static const char *syms[] = {
+		"switch_p_libstate_save",
+		"switch_p_libstate_restore",
+		"switch_p_no_frag",
+		"switch_p_alloc_jobinfo",
+		"switch_p_build_jobinfo",
+		"switch_p_copy_jobinfo",
+		"switch_p_free_jobinfo",
+		"switch_p_pack_jobinfo",
+		"switch_p_unpack_jobinfo",
+		"switch_p_print_jobinfo",
+		"switch_p_sprint_jobinfo",
+		"switch_p_node_init",
+		"switch_p_node_fini",
+		"switch_p_job_preinit",
+		"switch_p_job_init",
+		"switch_p_job_fini",
+		"switch_p_job_postfini",
+		"switch_p_job_attach"
+	};
+	int n_syms = sizeof( syms ) / sizeof( char * );
+	/* Get the plugin list, if needed. */
+	if ( c->plugin_list == NULL ) {
+		char *plugin_dir;
+		c->plugin_list = plugrack_create();
+		if ( c->plugin_list == NULL ) {
+			verbose( "Unable to create a plugin manager" );
+			return NULL; 
+		}
+		plugrack_set_major_type( c->plugin_list, "switch" );
+		plugrack_set_paranoia( c->plugin_list,
+				 0 );
+		plugin_dir = slurm_get_plugin_dir();
+		plugrack_read_dir( c->plugin_list, plugin_dir );
+		xfree(plugin_dir);
+	}
+	/* Find the correct plugin. */
+	c->cur_plugin =
+		plugrack_use_by_type( c->plugin_list, c->switch_type );
+	if ( c->cur_plugin == PLUGIN_INVALID_HANDLE ) {
+		verbose( "can't find a plugin for type %s", c->switch_type );
+		return NULL;
+	}
+	/* Dereference the API. */
+	if ( plugin_get_syms( c->cur_plugin,
+				n_syms,
+				syms,
+				(void **) &c->ops ) < n_syms ) {
+		verbose( "incomplete switch plugin detected" );
+		return NULL;
+	}
+	return &c->ops;
+extern int switch_init( void )
+	int retval = SLURM_SUCCESS;
+	char *switch_type = NULL;
+	slurm_mutex_lock( &context_lock );
+	if ( g_context )
+		goto done;
+	switch_type = slurm_get_switch_type();
+	g_context = _slurm_switch_context_create( switch_type );
+	if ( g_context == NULL ) {
+		error( "cannot create a context for %s", switch_type );
+		retval = SLURM_ERROR;
+		goto done;
+	}
+	if ( _slurm_switch_get_ops( g_context ) == NULL ) {
+		error( "cannot resolve plugin operations for %s", switch_type );
+		_slurm_switch_context_destroy( g_context );
+		g_context = NULL;
+		retval = SLURM_ERROR;
+	}
+      done:
+	slurm_mutex_unlock( &context_lock );
+	xfree(switch_type);
+	return retval;
+extern int  switch_save(char *dir_name)
+	if ( switch_init() < 0 )
+		return SLURM_ERROR;
+	return (*(g_context->ops.state_save))( dir_name );
+extern int  switch_restore(char *dir_name)
+	if ( switch_init() < 0 )
+		return SLURM_ERROR;
+	return (*(g_context->ops.state_restore))( dir_name );
+extern bool switch_no_frag(void)
+	if ( switch_init() < 0 )
+		return SLURM_ERROR;
+	return (*(g_context->ops.no_frag))( );
+extern int  switch_alloc_jobinfo(switch_jobinfo_t *jobinfo)
+	if ( switch_init() < 0 )
+		return SLURM_ERROR;
+	return (*(g_context->ops.alloc_jobinfo))( jobinfo );
+extern int  switch_build_jobinfo(switch_jobinfo_t jobinfo, 
+		char *nodelist, int nprocs, int cyclic_alloc)
+	if ( switch_init() < 0 )
+		return SLURM_ERROR;
+	return (*(g_context->ops.build_jobinfo))( jobinfo, nodelist, 
+			nprocs, cyclic_alloc );
+extern switch_jobinfo_t switch_copy_jobinfo(switch_jobinfo_t jobinfo)
+	if ( switch_init() < 0 )
+		return NULL;
+	return (*(g_context->ops.copy_jobinfo))( jobinfo );
+extern void switch_free_jobinfo(switch_jobinfo_t jobinfo)
+	if ( switch_init() < 0 )
+		return;
+	(*(g_context->ops.free_jobinfo))( jobinfo );
+extern int switch_pack_jobinfo(switch_jobinfo_t jobinfo, Buf buffer)
+	if ( switch_init() < 0 )
+		return SLURM_ERROR;
+	return (*(g_context->ops.pack_jobinfo))( jobinfo, buffer );
+extern int switch_unpack_jobinfo(switch_jobinfo_t jobinfo, Buf buffer)
+	if ( switch_init() < 0 )
+		return SLURM_ERROR;
+	return (*(g_context->ops.unpack_jobinfo))( jobinfo, buffer );
+extern void switch_print_jobinfo(FILE *fp, switch_jobinfo_t jobinfo)
+	if ( switch_init() < 0 )
+		return;
+	(*(g_context->ops.print_jobinfo)) (fp, jobinfo);
+extern char *switch_sprint_jobinfo( switch_jobinfo_t jobinfo,
+	       char *buf, size_t size)
+	if ( switch_init() < 0 )
+		return NULL;
+	return (*(g_context->ops.string_jobinfo)) (jobinfo, buf, size);
+extern int interconnect_node_init(void)
+	if ( switch_init() < 0 )
+		return SLURM_ERROR;
+	return (*(g_context->ops.node_init)) ();
+extern int interconnect_node_fini(void)
+	if ( switch_init() < 0 )
+		return SLURM_ERROR;
+	return (*(g_context->ops.node_fini)) ();
+extern int interconnect_preinit(switch_jobinfo_t jobinfo)
+	if ( switch_init() < 0 )
+		return SLURM_ERROR;
+	return (*(g_context->ops.job_preinit)) (jobinfo);
+extern int interconnect_init(switch_jobinfo_t jobinfo, uid_t uid)
+	if ( switch_init() < 0 )
+		return SLURM_ERROR;
+	return (*(g_context->ops.job_init)) (jobinfo, uid);
+extern int interconnect_fini(switch_jobinfo_t jobinfo)
+	if ( switch_init() < 0 )
+		return SLURM_ERROR;
+	return (*(g_context->ops.job_fini)) (jobinfo);
+extern int interconnect_postfini(switch_jobinfo_t jobinfo, uid_t pgid, 
+				uint32_t job_id, uint32_t step_id )
+	if ( switch_init() < 0 )
+		return SLURM_ERROR;
+	return (*(g_context->ops.job_postfini)) (jobinfo, pgid, 
+		job_id, step_id);
+extern int interconnect_attach(switch_jobinfo_t jobinfo, char ***env,
+		int nodeid, int procid, int nnodes, int nprocs,
+		gid_t gid)
+	if ( switch_init() < 0 )
+		return SLURM_ERROR;
+	return (*(g_context->ops.job_attach)) (jobinfo, env,
+		nodeid, procid, nnodes, nprocs, gid);
diff --git a/src/common/switch.h b/src/common/switch.h
new file mode 100644
index 00000000000..b70047402cd
--- /dev/null
+++ b/src/common/switch.h
@@ -0,0 +1,228 @@
+ * src/common/switch.h - Generic switch (interconnect) info for slurm
+ *****************************************************************************
+ *  Copyright (C) 2002 The Regents of the University of California.
+ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+ *  Written by Moe Jette <>.
+ *  UCRL-CODE-2002-040.
+ *  
+ *  This file is part of SLURM, a resource management program.
+ *  For details, see <>.
+ *  
+ *  SLURM is free software; you can redistribute it and/or modify it under
+ *  the terms of the GNU General Public License as published by the Free
+ *  Software Foundation; either version 2 of the License, or (at your option)
+ *  any later version.
+ *  
+ *  SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
+ *  WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ *  FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
+ *  details.
+ *  
+ *  You should have received a copy of the GNU General Public License along
+ *  with SLURM; if not, write to the Free Software Foundation, Inc.,
+ *  59 Temple Place, Suite 330, Boston, MA  02111-1307  USA.
+#ifndef _SWITCH_H
+#define _SWITCH_H 	1
+#  include "config.h"
+#include <stdio.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include "src/common/macros.h"
+#include "src/common/pack.h"
+/* opaque data structures - no peeking! */
+#ifndef __switch_jobinfo_t_defined
+#  define __switch_jobinfo_t_defined
+   typedef struct switch_jobinfo *switch_jobinfo_t;
+typedef struct slurm_switch_context * slurm_switch_context_t;
+\ *****************************************/
+/* initialize the switch plugin */
+extern int  switch_init   (void);
+/* save any global switch state to a file within the specified directory
+ * the actual file name used in plugin specific
+ * IN dir_name - directory into which switch state is saved
+ * RET         - slurm error code
+ */
+extern int  switch_save   (char *dir_name);
+/* restore any global switch state from a file within the specified directory
+ * the actual file name used in plugin specific
+ * IN dir_name - directory from hich switch state is restored or NULL for 
+ *               switch restart with no state restored
+ * RET         - slurm error code
+ */
+extern int  switch_restore(char *dir_name);
+/* report if resource fragmentation is important. if so, delay scheduling a 
+ * new job while another is in the process of terminating.
+ * RET          - true if fragmentation is important
+ */
+extern bool switch_no_frag(void);
+/* allocate storage for a switch job credential
+ * OUT jobinfo - storage for a switch job credential
+ * RET         - slurm error code
+ * NOTE: storage must be freed using g_switch_free_jobinfo
+ */
+extern int  switch_alloc_jobinfo (switch_jobinfo_t *jobinfo);
+/* fill a job's switch credential
+ * OUT jobinfo  - storage for a switch job credential
+ * IN  nodelist - list of nodes to be used by the job
+ * IN  nprocs   - count of tasks in the job
+ * IN  cyclic_alloc - task distribution pattern, 1=cyclic, 0=block
+ * NOTE: storage must be freed using g_switch_free_jobinfo
+ */
+extern int  switch_build_jobinfo (switch_jobinfo_t jobinfo, 
+		char *nodelist, int nprocs, int cyclic_alloc);
+/* copy a switch job credential
+ * IN jobinfo - the switch job credential to be copied
+ * RET        - the copy
+ * NOTE: returned value must be freed using g_switch_free_jobinfo
+ */
+extern switch_jobinfo_t switch_copy_jobinfo(switch_jobinfo_t jobinfo);
+/* free storage previously allocated for a switch job credential
+ * IN jobinfo  - the switch job credential to be freed
+ */
+extern void switch_free_jobinfo  (switch_jobinfo_t jobinfo);
+/* pack a switch job credential into a buffer in machine independent form
+ * IN jobinfo  - the switch job credential to be saved
+ * OUT buffer  - buffer with switch credential appended
+ * RET         - slurm error code
+ */
+extern int  switch_pack_jobinfo  (switch_jobinfo_t jobinfo, Buf buffer);
+/* unpack a switch job credential from a buffer
+ * OUT jobinfo - the switch job credential read
+ * IN  buffer  - buffer with switch credential read from current pointer loc
+ * RET         - slurm error code
+ * NOTE: returned value must be freed using g_switch_free_jobinfo
+ */
+extern int  switch_unpack_jobinfo(switch_jobinfo_t jobinfo, Buf buffer);
+/* write job credential string representation to a file
+ * IN fp      - an open file pointer
+ * IN jobinfo - a switch job credential
+ */
+extern void switch_print_jobinfo(FILE *fp, switch_jobinfo_t jobinfo);
+/* write job credential to a string
+ * IN jobinfo - a switch job credential
+ * OUT buf    - location to write job credential contents
+ * IN size    - byte size of buf
+ * RET        - the string, same as buf
+ */
+extern char *switch_sprint_jobinfo( switch_jobinfo_t jobinfo,
+			char *buf, size_t size);
+ * Setup node for interconnect use.
+ *
+ * This function is run from the top level slurmd only once per
+ * slurmd run. It may be used, for instance, to perform some one-time
+ * interconnect setup or spawn an error handling thread.
+ *
+ */
+extern int interconnect_node_init(void);
+ * Finalize interconnect on node. 
+ *
+ * This function is called once as slurmd exits (slurmd will wait for
+ * this function to return before continuing the exit process)
+ */
+extern int interconnect_node_fini(void);
+ * Notes on job related interconnect functions:
+ *
+ * Interconnect functions are run within slurmd in the following way:
+ * (Diagram courtesy of Jim Garlick [see qsw.c] )
+ *
+ *  Process 1 (root)        Process 2 (root, user)  |  Process 3 (user task)
+ *                                                  |
+ *  interconnect_preinit                            |
+ *  fork ------------------ interconnect_init       |
+ *  waitpid                 setuid, chdir, etc.     |
+ *                          fork N procs -----------+--- interconnect_attach
+ *                          wait all                |    exec mpi process
+ *                          interconnect_fini*      |
+ *  interconnect_postfini                           |    
+ *                                                  |
+ *
+ * [ *Note: interconnect_fini() is run as the uid of the job owner, not root ]
+ */
+ * Prepare node for job. 
+ *
+ * pre is run as root in the first slurmd process, the so called job
+ * manager. This function can be used to perform any initialization
+ * that needs to be performed in the same process as interconnect_fini()
+ * 
+ */
+extern int interconnect_preinit(switch_jobinfo_t jobinfo);
+ * initialize interconnect on node for job. This function is run from the 
+ * 2nd slurmd process (some interconnect implementations may require
+ * interconnect init functions to be executed from a separate process
+ * than the process executing interconnect_fini() [e.g. QsNet])
+ *
+ */
+extern int interconnect_init(switch_jobinfo_t jobinfo, uid_t uid);
+ * This function is run from the same process as interconnect_init()
+ * after all job tasks have exited. It is *not* run as root, because
+ * the process in question has already setuid to the job owner.
+ *
+ */
+extern int interconnect_fini(switch_jobinfo_t jobinfo);
+ * Finalize interconnect on node.
+ *
+ * This function is run from the initial slurmd process (same process
+ * as interconnect_preinit()), and is run as root. Any cleanup routines
+ * that need to be run with root privileges should be run from this
+ * function.
+ */
+extern int interconnect_postfini(switch_jobinfo_t jobinfo, uid_t pgid, 
+				uint32_t job_id, uint32_t step_id );
+ * attach process to interconnect
+ * (Called from within the process, so it is appropriate to set 
+ * interconnect specific environment variables here)
+ */
+extern int interconnect_attach(switch_jobinfo_t jobinfo, char ***env,
+		int nodeid, int procid, int nnodes, int nprocs,
+		gid_t gid);
+#endif /* _SWITCH_H */
diff --git a/src/plugins/ b/src/plugins/
index 6841e92f357..b1f24f9f6b0 100644
--- a/src/plugins/
+++ b/src/plugins/
@@ -1,3 +1,3 @@
 # $Id$
-SUBDIRS = auth jobcomp sched
+SUBDIRS = auth jobcomp sched switch
diff --git a/src/slurmctld/controller.c b/src/slurmctld/controller.c
index df07f580ea6..bb624ffebea 100644
--- a/src/slurmctld/controller.c
+++ b/src/slurmctld/controller.c
@@ -56,13 +56,10 @@
 #include "src/common/slurm_auth.h"
 #include "src/common/slurm_jobcomp.h"
 #include "src/common/slurm_protocol_api.h"
+#include "src/common/switch.h"
 #include "src/common/xsignal.h"
 #include "src/common/xstring.h"
-#  include "src/common/qsw.h"
 #include "src/slurmctld/agent.h"
 #include "src/slurmctld/locks.h"
 #include "src/slurmctld/ping_nodes.h"
diff --git a/src/slurmctld/job_mgr.c b/src/slurmctld/job_mgr.c
index 58cc688244d..33dbf7329a7 100644
--- a/src/slurmctld/job_mgr.c
+++ b/src/slurmctld/job_mgr.c
@@ -41,15 +41,12 @@
 #include <string.h>
 #include <sys/stat.h>
-#ifdef HAVE_ELAN
-#  include "src/common/qsw.h"
 #include <slurm/slurm_errno.h>
 #include "src/common/bitstring.h"
 #include "src/common/hostlist.h"
 #include "src/common/slurm_jobcomp.h"
+#include "src/common/switch.h"
 #include "src/common/xassert.h"
 #include "src/common/xstring.h"
 #include "src/slurmctld/agent.h"
@@ -707,9 +704,7 @@ static void _dump_job_step_state(struct step_record *step_ptr, Buf buffer)
 	pack_time(step_ptr->start_time, buffer);
 	packstr(step_ptr->host,  buffer);
 	packstr(step_ptr->step_node_list,  buffer);
-#ifdef HAVE_ELAN
-	qsw_pack_jobinfo(step_ptr->qsw_job, buffer);
+	switch_pack_jobinfo(step_ptr->switch_job, buffer);
 /* Unpack job step state information from a buffer */
@@ -756,13 +751,11 @@ static int _load_step_state(struct job_record *job_ptr, Buf buffer)
 	step_ptr->step_node_list = step_node_list;
 	step_node_list = NULL;	/* re-used, nothing left to free */
 	step_ptr->time_last_active = time(NULL);
-#ifdef HAVE_ELAN
-	qsw_alloc_jobinfo(&step_ptr->qsw_job);
-	if (qsw_unpack_jobinfo(step_ptr->qsw_job, buffer)) {
-		qsw_free_jobinfo(step_ptr->qsw_job);
+	switch_alloc_jobinfo(&step_ptr->switch_job);
+	if (switch_unpack_jobinfo(step_ptr->switch_job, buffer)) {
+		switch_free_jobinfo(step_ptr->switch_job);
 		goto unpack_error;
 	info("recovered job step %u.%u", job_ptr->job_id, step_id);
@@ -1174,11 +1167,9 @@ int job_allocate(job_desc_msg_t * job_specs, uint32_t * new_job_id,
 	top_prio = _top_priority(job_ptr);
-#ifdef HAVE_ELAN
 	/* Avoid resource fragmentation if important */
-	if (top_prio && job_is_completing())
+	if (switch_no_frag() && top_prio && job_is_completing())
 		top_prio = false;	/* Don't scheduled job right now */
 	if (immediate && (!top_prio)) {
 		job_ptr->job_state  = JOB_FAILED;
 		job_ptr->start_time = 0;
diff --git a/src/slurmctld/job_scheduler.c b/src/slurmctld/job_scheduler.c
index 3ffbef73ca1..d794b02b057 100644
--- a/src/slurmctld/job_scheduler.c
+++ b/src/slurmctld/job_scheduler.c
@@ -146,13 +146,11 @@ int schedule(void)
-#ifdef HAVE_ELAN
 	/* Avoid resource fragmentation if important */
-	if (job_is_completing()) {
+	if (switch_no_frag() && job_is_completing()) {
 		return SLURM_SUCCESS;
 	debug("Running job scheduler");
 	job_queue_size = _build_job_queue(&job_queue);
 	if (job_queue_size == 0) {
diff --git a/src/slurmctld/node_mgr.c b/src/slurmctld/node_mgr.c
index 7922451a786..c0c5c6e4ac9 100644
--- a/src/slurmctld/node_mgr.c
+++ b/src/slurmctld/node_mgr.c
@@ -1155,16 +1155,15 @@ validate_node_specs (char *node_name, uint32_t cpus,
 		node_ptr->cpus = cpus;
 		node_ptr->real_memory = real_memory;
 		node_ptr->tmp_disk = tmp_disk;
-#ifdef 		HAVE_ELAN
 		/* Every node in a given partition must have the same 
 		 * processor count at present */
-		if ((slurmctld_conf.fast_schedule == 0) &&
-		    (node_ptr->config_ptr->cpus != cpus)) {
+		if ((slurmctld_conf.fast_schedule == 0)  &&
+		    (node_ptr->config_ptr->cpus != cpus) && 
+		    (strcmp(slurmctld_conf.switch_type, "switch/elan") == 0)) {
 			error ("Node %s processor count inconsistent with rest "
 			       "of partition", node_name);
 			return EINVAL;		/* leave node down */
 		if (node_ptr->node_state == NODE_STATE_UNKNOWN) {
 			last_node_update = time (NULL);
diff --git a/src/slurmctld/proc_req.c b/src/slurmctld/proc_req.c
index 8c8330715ec..707c9d9a185 100644
--- a/src/slurmctld/proc_req.c
+++ b/src/slurmctld/proc_req.c
@@ -50,12 +50,9 @@
 #include "src/common/slurm_auth.h"
 #include "src/common/slurm_cred.h"
 #include "src/common/slurm_protocol_api.h"
+#include "src/common/switch.h"
 #include "src/common/xstring.h"
-#  include "src/common/qsw.h"
 #include "src/slurmctld/locks.h"
 #include "src/slurmctld/proc_req.h"
 #include "src/slurmctld/read_config.h"
@@ -275,8 +272,8 @@ void _fill_ctld_conf(slurm_ctl_conf_t * conf_ptr)
 	conf_ptr->slurmd_spooldir     = slurmctld_conf.slurmd_spooldir;
 	conf_ptr->slurmd_timeout      = slurmctld_conf.slurmd_timeout;
 	conf_ptr->slurm_conf          = slurmctld_conf.slurm_conf;
-	conf_ptr->switch_type         = slurmctld_conf.switch_type;
 	conf_ptr->state_save_location = slurmctld_conf.state_save_location;
+	conf_ptr->switch_type         = slurmctld_conf.switch_type;
 	conf_ptr->tmp_fs              = slurmctld_conf.tmp_fs;
 	conf_ptr->wait_time           = slurmctld_conf.wait_time;
@@ -510,9 +507,8 @@ static void _slurm_rpc_allocate_and_run(slurm_msg_t * msg)
 		alloc_msg.node_cnt       = node_cnt;
 		alloc_msg.node_addr      = node_addr;
 		alloc_msg.cred           = slurm_cred;
-#ifdef HAVE_ELAN
-		alloc_msg.qsw_job = qsw_copy_jobinfo(step_rec->qsw_job);
+		alloc_msg.switch_job     = switch_copy_jobinfo(
+						step_rec->switch_job);
 		response_msg.msg_type = RESPONSE_ALLOCATION_AND_RUN_JOB_STEP; = &alloc_msg;
@@ -520,9 +516,7 @@ static void _slurm_rpc_allocate_and_run(slurm_msg_t * msg)
 		if (slurm_send_node_msg(msg->conn_fd, &response_msg) < 0)
-#ifdef HAVE_ELAN
-		qsw_free_jobinfo(alloc_msg.qsw_job);
+		switch_free_jobinfo(alloc_msg.switch_job);
 		(void) dump_all_job_state();	/* Has its own locks */
@@ -952,10 +946,8 @@ static void _slurm_rpc_job_step_create(slurm_msg_t * msg)
 		job_step_resp.job_step_id = step_rec->step_id;
 		job_step_resp.node_list   = xstrdup(step_rec->step_node_list);
 		job_step_resp.cred        = slurm_cred;
-#ifdef HAVE_ELAN
-		job_step_resp.qsw_job =  qsw_copy_jobinfo(step_rec->qsw_job);
+		job_step_resp.switch_job  = switch_copy_jobinfo(
+						step_rec->switch_job);
 		resp.address = msg->address;
 		resp.msg_type = RESPONSE_JOB_STEP_CREATE;
@@ -964,9 +956,7 @@ static void _slurm_rpc_job_step_create(slurm_msg_t * msg)
 		slurm_send_node_msg(msg->conn_fd, &resp);
-#ifdef HAVE_ELAN
-		qsw_free_jobinfo(job_step_resp.qsw_job);
+		switch_free_jobinfo(job_step_resp.switch_job);
 		(void) dump_all_job_state();	/* Sets own locks */
@@ -1353,8 +1343,8 @@ static void _slurm_rpc_shutdown_controller(slurm_msg_t * msg)
 		/* save_all_state();	performed by _slurmctld_background */
 	slurm_send_rc_msg(msg, error_code);
-	if ((error_code == SLURM_SUCCESS) && core_arg && 
-	    (slurmctld_config.thread_id_sig)) 
+	if ((error_code == SLURM_SUCCESS) && core_arg &&
+	    (slurmctld_config.thread_id_sig))
 		pthread_kill(slurmctld_config.thread_id_sig, SIGABRT);
diff --git a/src/slurmctld/read_config.c b/src/slurmctld/read_config.c
index 0c6f8b3393b..1092a415fca 100644
--- a/src/slurmctld/read_config.c
+++ b/src/slurmctld/read_config.c
@@ -41,11 +41,12 @@
 #include <unistd.h>
 #include "src/common/hostlist.h"
-#include "src/common/slurm_jobcomp.h"
 #include "src/common/list.h"
 #include "src/common/macros.h"
 #include "src/common/parse_spec.h"
 #include "src/common/read_config.h"
+#include "src/common/slurm_jobcomp.h"
+#include "src/common/switch.h"
 #include "src/common/xstring.h"
 #include "src/slurmctld/locks.h"
 #include "src/slurmctld/proc_req.h"
@@ -273,7 +274,7 @@ static int _init_all_slurm_conf(void)
 static int _parse_node_spec(char *in_line)
 	char *node_addr = NULL, *node_name = NULL, *state = NULL;
-        char *feature = NULL, *reason = NULL;
+	char *feature = NULL, *reason = NULL;
 	char *this_node_addr, *this_node_name;
 	int error_code, first, i;
 	int state_val, cpus_val, real_memory_val, tmp_disk_val, weight_val;
@@ -767,6 +768,7 @@ int read_slurm_conf(int recover)
+	switch_init();
 	if (default_part_loc == NULL)
 		error("read_slurm_conf: default partition not set.");
@@ -1019,61 +1021,10 @@ static void _validate_node_proc_count(void)
 int switch_state_begin(int recover)
-	int error_code = SLURM_SUCCESS;
-#ifdef HAVE_ELAN
-	qsw_libstate_t old_state = NULL;
-	Buf buffer = NULL;
-	char *qsw_state_file = NULL, *data = NULL;
-	int state_fd, data_allocated, data_read= 0, data_size = 0;
-	if (recover) {
-		/* Read state from file into buffer */
-		qsw_state_file = xstrdup (slurmctld_conf.state_save_location);
-		xstrcat (qsw_state_file, "/qsw_state");
-		state_fd = open (qsw_state_file, O_RDONLY);
-		if (state_fd >= 0) {
-			data_allocated = BUF_SIZE;
-			data = xmalloc(data_allocated);
-			while ((data_read = 
-					read (state_fd, &data[data_size], 
-					BUF_SIZE)) == BUF_SIZE) {
-				data_size += data_read;
-				data_allocated += BUF_SIZE;
-				xrealloc(data, data_allocated);
-			}
-			data_size += data_read;
-			if (data_read < 0) {
-				error ("Read error on %s, %m", qsw_state_file);
-				error_code = SLURM_ERROR;
-				data_size = 0;
-			}
-			close (state_fd);
-		} else
-			info("No %s file to recover QSW state from", 
-				qsw_state_file);
-		xfree(qsw_state_file);
-		if ((error_code == SLURM_SUCCESS) && data_size) {
-			if (qsw_alloc_libstate(&old_state)) {
-				error_code = SLURM_ERROR;
-			} else {
-				buffer = create_buf (data, data_size);
-				if (qsw_unpack_libstate(old_state, buffer) < 0)
-					error_code = errno;
-			}
-		}
-		if (buffer)
-			free_buf(buffer);
-		else if (data)
-			xfree(data);
-	}
-	if (error_code == SLURM_SUCCESS)
-		error_code = qsw_init(old_state);
-	if (old_state)
-		qsw_free_libstate(old_state);
-#endif				/* HAVE_ELAN */
-	return error_code;
+	if (recover)
+		return switch_restore(slurmctld_conf.state_save_location);
+	else
+		return switch_restore(NULL); 
@@ -1082,42 +1033,6 @@ int switch_state_begin(int recover)
 int switch_state_fini(void)
-	int error_code = SLURM_SUCCESS;
-#ifdef HAVE_ELAN
-	qsw_libstate_t old_state = NULL;
-	Buf buffer = NULL;
-	char *qsw_state_file = NULL;
-	int state_fd;
-	if (qsw_alloc_libstate(&old_state))
-		return errno;
-	qsw_fini(old_state);
-	buffer = init_buf(1024);
-	error_code = qsw_pack_libstate(old_state, buffer);
-	qsw_state_file = xstrdup (slurmctld_conf.state_save_location);
-	xstrcat (qsw_state_file, "/qsw_state");
-	(void) unlink (qsw_state_file);
-	state_fd = creat (qsw_state_file, 0600);
-	if (state_fd == 0) {
-		error ("Can't save state, error creating file %s %m", 
-		       qsw_state_file);
-		error_code = errno;
-	}
-	else {
-		if (write (state_fd, get_buf_data(buffer), 
-				get_buf_offset(buffer)) != 
-				get_buf_offset(buffer)) {
-			error ("Can't save state, error writing file %s %m", 
-			       qsw_state_file);
-			error_code = errno;
-		}
-		close (state_fd);
-	}
-	xfree (qsw_state_file);
-	if (buffer)
-		free_buf(buffer);
-	if (old_state)
-		qsw_free_libstate(old_state);
-#endif				/* HAVE_ELAN */
-	return error_code;
+	return switch_save(slurmctld_conf.state_save_location);
diff --git a/src/slurmctld/slurmctld.h b/src/slurmctld/slurmctld.h
index 8bc9e9a7e77..00120d60f97 100644
--- a/src/slurmctld/slurmctld.h
+++ b/src/slurmctld/slurmctld.h
@@ -45,10 +45,6 @@
 #include <sys/types.h>
 #include <unistd.h>
-#ifdef HAVE_ELAN
-#  include "src/common/qsw.h"
-#endif				/* HAVE_ELAN */
 #  include <pthread.h>
 #endif				/* WITH_PTHREADS */
@@ -62,6 +58,7 @@
 #include "src/common/pack.h"
 #include "src/common/slurm_cred.h"
 #include "src/common/slurm_protocol_api.h"
+#include "src/common/switch.h"
 #include "src/common/xmalloc.h"
 #define FREE_NULL_BITMAP(_X)		\
@@ -314,9 +311,7 @@ struct 	step_record {
 	time_t time_last_active;	/* time of last job activity */
 	uint16_t port;			/* port for srun communications */
 	char *host;			/* host for srun communications */
-#ifdef HAVE_ELAN
-	qsw_jobinfo_t qsw_job;		/* Elan3 switch context, opaque */
+	switch_jobinfo_t switch_job;	/* switch context, opaque */
 typedef struct job_step_specs step_specs; 
diff --git a/src/slurmctld/step_mgr.c b/src/slurmctld/step_mgr.c
index 310efebc103..304521085cf 100644
--- a/src/slurmctld/step_mgr.c
+++ b/src/slurmctld/step_mgr.c
@@ -37,13 +37,10 @@
 #include <string.h>
 #include <unistd.h>
-#ifdef HAVE_ELAN
-#  include "src/common/qsw.h"
 #include <slurm/slurm_errno.h>
 #include "src/common/bitstring.h"
+#include "src/common/switch.h"
 #include "src/common/xstring.h"
 #include "src/slurmctld/agent.h"
 #include "src/slurmctld/locks.h"
@@ -97,9 +94,7 @@ delete_all_step_records (struct job_record *job_ptr)
 	last_job_update = time(NULL);
 	while ((step_ptr = (struct step_record *) list_next (step_iterator))) {
 		list_remove (step_iterator);
-#ifdef HAVE_ELAN
-		qsw_free_jobinfo (step_ptr->qsw_job);
+		switch_free_jobinfo(step_ptr->switch_job);
@@ -132,9 +127,7 @@ delete_step_record (struct job_record *job_ptr, uint32_t step_id)
 	while ((step_ptr = (struct step_record *) list_next (step_iterator))) {
 		if (step_ptr->step_id == step_id) {
 			list_remove (step_iterator);
-#ifdef HAVE_ELAN
-			qsw_free_jobinfo (step_ptr->qsw_job);
+			switch_free_jobinfo (step_ptr->switch_job);
@@ -495,10 +488,6 @@ step_create ( step_specs *step_specs, struct step_record** new_step_record,
 	bitstr_t *nodeset;
 	int node_count;
 	time_t now = time(NULL);
-#ifdef HAVE_ELAN
-	int first, last, i, node_id;
-	int node_set_size = QSW_MAX_TASKS; /* overkill but safe */
 	*new_step_record = NULL;
 	job_ptr = find_job_record (step_specs->job_id);
@@ -516,11 +505,9 @@ step_create ( step_specs *step_specs, struct step_record** new_step_record,
 	    (job_ptr->end_time <= time(NULL)))
-#ifdef HAVE_ELAN
 	if ((step_specs->task_dist != SLURM_DIST_CYCLIC) &&
 	    (step_specs->task_dist != SLURM_DIST_BLOCK))
 		return ESLURM_BAD_DIST;
 	if (job_ptr->kill_on_step_done)
 		/* Don't start more steps, job already being cancelled */
@@ -542,10 +529,6 @@ step_create ( step_specs *step_specs, struct step_record** new_step_record,
 	if ((step_specs->num_tasks < 1) ||
 	    (step_specs->num_tasks > (node_count*MAX_TASKS_PER_NODE)))
-#ifdef HAVE_ELAN
-	if (step_specs->num_tasks > node_set_size)
 	step_ptr = create_step_record (job_ptr);
 	if (step_ptr == NULL)
@@ -561,39 +544,16 @@ step_create ( step_specs *step_specs, struct step_record** new_step_record,
 	step_ptr->port = step_specs->port;
 	step_ptr->host = xstrdup(step_specs->host);
-#ifdef HAVE_ELAN
-	if (qsw_alloc_jobinfo (&step_ptr->qsw_job) < 0)
-		fatal ("step_create: qsw_alloc_jobinfo error");
-	first = bit_ffs (step_ptr->step_node_bitmap);
-	last  = bit_fls (step_ptr->step_node_bitmap);
-	nodeset = bit_alloc (node_set_size);
-	if (nodeset == NULL)
-		fatal ("step_create: bit_alloc error");
-	for (i = first; i <= last; i++) {
-		if (bit_test (step_ptr->step_node_bitmap, i)) {
-			node_id = qsw_getnodeid_byhost (
-					node_record_table_ptr[i].name);
-			if (node_id >= 0)	/* no lookup error */
-				bit_set(nodeset, node_id);
-			else {
-				error ("qsw_getnodeid_byhost lookup failure on %s",
-				       node_record_table_ptr[i].name);
-				delete_step_record (job_ptr, 
-							step_ptr->step_id);
-				bit_free (nodeset);
-			}
-		}
-	}
-	if (qsw_setup_jobinfo (step_ptr->qsw_job, step_specs->num_tasks, 
-				nodeset, step_ptr->cyclic_alloc) < 0) {
-		error ("step_create: qsw_setup_jobinfo error %m");
+	if (switch_alloc_jobinfo (&step_ptr->switch_job) < 0)
+		fatal ("step_create: switch_alloc_jobinfo error");
+	if (switch_build_jobinfo(step_ptr->switch_job, 
+				step_ptr->step_node_list,
+				step_specs->num_tasks, 
+				step_ptr->cyclic_alloc) < 0) {
+		error("switch_build_jobinfo: %m");
 		delete_step_record (job_ptr, step_ptr->step_id);
-		bit_free (nodeset);
-	bit_free (nodeset);
 	*new_step_record = step_ptr;
diff --git a/src/slurmd/ b/src/slurmd/
index 6c0e827f352..a2f0169f163 100644
--- a/src/slurmd/
+++ b/src/slurmd/
@@ -8,12 +8,6 @@ sbin_PROGRAMS = slurmd
 INCLUDES = -I$(top_srcdir) $(SSL_CPPFLAGS)
-interconnect_sources = elan_interconnect.c 
-interconnect_sources = no_interconnect.c
 slurmd_LDADD = 					   \
         $(top_builddir)/src/common/    \
 	$(top_builddir)/src/common/ \
@@ -21,7 +15,7 @@ slurmd_LDADD = 					   \
-common_sources = 	        	\
+slurmd_SOURCES = 	        	\
 	slurmd.c slurmd.h       	\
 	req.c req.h			\
 	mgr.c mgr.h			\
@@ -35,18 +29,10 @@ common_sources = 	        	\
 	shm.c shm.h			\
 	fname.c fname.h			\
 	ulimits.c ulimits.h     	\
-	setenvpf.c setenvpf.h   	\
-	setproctitle.c setproctitle.h 	\
-	interconnect.h
-slurmd_SOURCES = $(common_sources) $(interconnect_sources)
+	setproctitle.c setproctitle.h
 slurmd_LDFLAGS = -export-dynamic
-EXTRA_slurmd_SOURCES = \
-	no_interconnect.c 	\
-	elan_interconnect.c	
 $(slurmd_LDADD) : force
 	@cd `dirname $@` && $(MAKE) `basename $@`
diff --git a/src/slurmd/elan_interconnect.c b/src/slurmd/elan_interconnect.c
deleted file mode 100644
index ecf503ef675..00000000000
--- a/src/slurmd/elan_interconnect.c
+++ /dev/null
@@ -1,322 +0,0 @@
- *  src/slurmd/elan_interconnect.c Elan interconnect implementation
- *****************************************************************************
- *  Copyright (C) 2002 The Regents of the University of California.
- *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
- *  Written by Kevin Tew <> 
- *         and Mark Grondona <>
- *  UCRL-CODE-2002-040.
- *  
- *  This file is part of SLURM, a resource management program.
- *  For details, see <>.
- *  
- *  SLURM is free software; you can redistribute it and/or modify it under
- *  the terms of the GNU General Public License as published by the Free
- *  Software Foundation; either version 2 of the License, or (at your option)
- *  any later version.
- *  
- *  SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
- *  WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
- *  FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
- *  details.
- *  
- *  You should have received a copy of the GNU General Public License along
- *  with SLURM; if not, write to the Free Software Foundation, Inc.,
- *  59 Temple Place, Suite 330, Boston, MA  02111-1307  USA.
-#include <sys/types.h>
-#include <sys/wait.h>
-#include <stdio.h>
-#include <unistd.h>
-#include <stdlib.h>
-#include <string.h>
-#include <stdarg.h>
-#include <pthread.h>
-#include <slurm/slurm_errno.h>
-#include "src/common/xmalloc.h"
-#include "src/common/xstring.h"
-#include "src/common/bitstring.h"
-#include "src/common/log.h"
-#include "src/common/list.h"
-#include "src/common/hostlist.h"
-#include "src/common/qsw.h"
-#include "src/common/slurm_protocol_api.h"
-#include "src/common/elanhosts.h"
-#include "src/slurmd/interconnect.h"
-#include "src/slurmd/setenvpf.h"
-#include <elan3/elan3.h>
- * Static prototypes for network error resolver creation:
- */
-static int   set_elan_ids(void);
-static void *neterr_thr(void *arg);
-static int             neterr_retval = 0;
-static pthread_t       neterr_tid;
-static pthread_mutex_t neterr_mutex = PTHREAD_MUTEX_INITIALIZER;
-static pthread_cond_t  neterr_cond  = PTHREAD_COND_INITIALIZER;
-#endif /* HAVE_LIBELAN3 */
-/*  Initialize node for use of the Elan interconnect by loading 
- *   elanid/hostname pairs then spawning the Elan network error
- *   resover thread.
- *
- *  Main thread waits for neterr thread to successfully start before
- *   continuing.
- */
-int interconnect_node_init(void)
-	int err = 0;
-	pthread_attr_t attr;
-        /* 
-         *  We only know how to do this for Elan3 right now
-         */
-	/*
-	 *  Load neterr elanid/hostname values into kernel 
-	 */
-	if (set_elan_ids() < 0)
-		return SLURM_FAILURE;
-	if ((err = pthread_attr_init(&attr)))
-		error("pthread_attr_init: %s", slurm_strerror(err));
-	err = pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
-	if (err)
-		error("pthread_attr_setdetachstate: %s", slurm_strerror(err));
-	slurm_mutex_lock(&neterr_mutex);
-	if ((err = pthread_create(&neterr_tid, &attr, neterr_thr, NULL)))
-		return SLURM_FAILURE;
-	/*
-	 *  Wait for successful startup of neterr thread before
-	 *   returning control to slurmd.
-	 */
-	pthread_cond_wait(&neterr_cond, &neterr_mutex);
-	pthread_mutex_unlock(&neterr_mutex);
-	return neterr_retval;
-#else  /* !HAVE_LIBELAN3 */
-        return SLURM_SUCCESS;
-#endif /*  HAVE_LIBELAN3 */
-static void *neterr_thr(void *arg)
-	debug3("Starting Elan network error resolver thread");
-	if (!elan3_init_neterr_svc(0)) {
-		error("elan3_init_neterr_svc: %m");
-		goto fail;
-	}
-	/* 
-	 *  Attempt to register the neterr svc thread. If the address 
-	 *   cannot be bound, then there is already a thread running, and
-	 *   we should just exit with success.
-	 */
-	if (!elan3_register_neterr_svc()) {
-		if (errno != EADDRINUSE) {
-			error("elan3_register_neterr_svc: %m");
-			goto fail;
-		}
-		info("Warning: Elan error resolver thread already running");
-	}
-	/* 
-	 *  Signal main thread that we've successfully initialized
-	 */
-	slurm_mutex_lock(&neterr_mutex);
-	neterr_retval = 0;
-	pthread_cond_signal(&neterr_cond);
-	slurm_mutex_unlock(&neterr_mutex);
-	/*
-	 *  Run the network error resolver thread. This should
-	 *   never return. If it does, there's not much we can do
-	 *   about it.
-	 */
-	elan3_run_neterr_svc();
-	return NULL;
-   fail:
-	slurm_mutex_lock(&neterr_mutex);
-	neterr_retval = SLURM_FAILURE;
-	pthread_cond_signal(&neterr_cond);
-	slurm_mutex_unlock(&neterr_mutex);
-	return NULL;
-#endif /* HAVE_LIBELAN3 */
- *  Called from slurmd just before termination.
- *   We don't really need to do anything special for Elan, but
- *   we'll call pthread_cancel() on the neterr resolver thread anyhow.
- */
-	int err = pthread_cancel(neterr_tid);
-	if (err == 0) 
-		return SLURM_SUCCESS;
-	error("Unable to cancel neterr thread: %s", slurm_strerror(err));
-#endif /* HAVE_LIBELAN3 */
-        return SLURM_SUCCESS;
-static int 
-_wait_and_destroy_prg(qsw_jobinfo_t qsw_job)
-	int i = 0;
-	int sleeptime = 1;
-	debug("going to destroy program description...");
-	while((qsw_prgdestroy(qsw_job) < 0) && (errno == ECHILD_PRGDESTROY)) {
-		debug("qsw_prgdestroy: %m");
-		i++;
-		if (i == 1) {
-			debug("sending SIGTERM to remaining tasks");
-			qsw_prgsignal(qsw_job, SIGTERM);
-		} else {
-			debug("sending SIGKILL to remaining tasks");
-			qsw_prgsignal(qsw_job, SIGKILL);
-		}
-		debug("sleeping for %d sec ...", sleeptime);
-		sleep(sleeptime*=2);
-	}
-	debug("destroyed program description");
-interconnect_preinit(slurmd_job_t *job)
- * prepare node for interconnect use
- */
-interconnect_init(slurmd_job_t *job)
-	char buf[4096];
-	debug2("calling interconnect_init from process %lu", 
-		(unsigned long) getpid());
-	verbose("ELAN: %s", qsw_capability_string(job->qsw_job, buf, 4096));
-	if (qsw_prog_init(job->qsw_job, job->uid) < 0) {
-		/*
-		 * Check for EBADF, which probably means the rms
-		 *  kernel module is not loaded.
-		 */
-		if (errno == EBADF)
-			error("Initializing interconnect: "
-			      "is the rms kernel module loaded?");
-		else
-			error ("elan_interconnect_init: %m");
-		qsw_print_jobinfo(log_fp(), job->qsw_job);
-		return SLURM_ERROR;
-	}
-	return SLURM_SUCCESS; 
-interconnect_fini(slurmd_job_t *job)
-	qsw_prog_fini(job->qsw_job); 
-interconnect_postfini(slurmd_job_t *job)
-	_wait_and_destroy_prg(job->qsw_job);
-interconnect_attach(slurmd_job_t *job, int procid)
-	int nodeid, nnodes, nprocs; 
-	int rank = job->task[procid]->gid; 
-	nodeid = job->nodeid;
-	nnodes = job->nnodes;
-	nprocs = job->nprocs;
-	debug3("nodeid=%d nnodes=%d procid=%d nprocs=%d", 
-	       nodeid, nnodes, procid, nprocs);
-	debug3("setting capability in process %lu", (unsigned long) getpid());
-	if (qsw_setcap(job->qsw_job, procid) < 0) {
-		error("qsw_setcap: %m");
-		return SLURM_ERROR;
-	}
-	if (setenvpf(&job->env, "RMS_RANK",   "%d",   rank       ) < 0)
-		return -1;
-	if (setenvpf(&job->env, "RMS_NODEID", "%d", job->nodeid) < 0)
-		return -1;
-	if (setenvpf(&job->env, "RMS_PROCID", "%d", rank       ) < 0)
-		return -1;
-	if (setenvpf(&job->env, "RMS_NNODES", "%d", job->nnodes) < 0)
-		return -1;
-	if (setenvpf(&job->env, "RMS_NPROCS", "%d", job->nprocs) < 0)
-		return -1;
-static int
-	int i;
-	for (i = 0; i <= qsw_maxnodeid(); i++) {
-		char host[256]; 
-		if (qsw_gethost_bynodeid(host, 256, i) < 0)
-			continue;
-		if (elan3_load_neterr_svc(i, host) < 0)
-			error("elan3_load_neterr_svc(%d, %s): %m", i, host);
-	}
-	return 0;
diff --git a/src/slurmd/interconnect.h b/src/slurmd/interconnect.h
deleted file mode 100644
index c5461df7573..00000000000
--- a/src/slurmd/interconnect.h
+++ /dev/null
@@ -1,116 +0,0 @@
- *  src/slurmd/interconnect.h - general interconnect routines for slurmd
- *  $Id$
- *****************************************************************************
- *  Copyright (C) 2002 The Regents of the University of California.
- *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
- *  Written by Kevin Tew <> 
- *          modified by Mark Grondona <>
- *  UCRL-CODE-2002-040.
- *  
- *  This file is part of SLURM, a resource management program.
- *  For details, see <>.
- *  
- *  SLURM is free software; you can redistribute it and/or modify it under
- *  the terms of the GNU General Public License as published by the Free
- *  Software Foundation; either version 2 of the License, or (at your option)
- *  any later version.
- *  
- *  SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
- *  WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
- *  FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
- *  details.
- *  
- *  You should have received a copy of the GNU General Public License along
- *  with SLURM; if not, write to the Free Software Foundation, Inc.,
- *  59 Temple Place, Suite 330, Boston, MA  02111-1307  USA.
-#include "src/slurmd/job.h"
- * Setup node for interconnect use.
- *
- * This function is run from the top level slurmd only once per
- * slurmd run. It may be used, for instance, to perform some one-time
- * interconnect setup or spawn an error handling thread.
- *
- */
-int interconnect_node_init(void);
- * Finalize interconnect on node. 
- *
- * This function is called once as slurmd exits (slurmd will wait for
- * this function to return before continuing the exit process)
- */
-int interconnect_node_fini(void);
- * Notes on job related interconnect functions:
- *
- * Interconnect functions are run within slurmd in the following way:
- * (Diagram courtesy of Jim Garlick [see qsw.c] )
- *
- *  Process 1 (root)        Process 2 (root, user)  |  Process 3 (user task)
- *                                                  |
- *  interconnect_preinit                            |
- *  fork ------------------ interconnect_init       |
- *  waitpid                 setuid, chdir, etc.     |
- *                          fork N procs -----------+--- interconnect_attach
- *                          wait all                |    exec mpi process
- *                          interconnect_fini*      |
- *  interconnect_postfini                           |    
- *                                                  |
- *
- * [ *Note: interconnect_fini() is run as the uid of the job owner, not root ]
- */
- * Prepare node for job. 
- *
- * pre is run as root in the first slurmd process, the so called job
- * manager. This function can be used to perform any initialization
- * that needs to be performed in the same process as interconnect_fini()
- * 
- */
-int interconnect_preinit(slurmd_job_t *job);
- * initialize interconnect on node for job. This function is run from the 
- * 2nd slurmd process (some interconnect implementations may require
- * interconnect init functions to be executed from a separate process
- * than the process executing initerconnect_fini() [e.g. QsNet])
- *
- */
-int interconnect_init(slurmd_job_t *job);
- * This function is run from the same process as interconnect_init()
- * after all job tasks have exited. It is *not* run as root, because
- * the process in question has already setuid to the job owner.
- *
- */
-int interconnect_fini(slurmd_job_t *job);
- * Finalize interconnect on node.
- *
- * This function is run from the initial slurmd process (same process
- * as interconnect_preinit()), and is run as root. Any cleanup routines
- * that need to be run with root privileges should be run from this
- * function.
- */
-int interconnect_postfini(slurmd_job_t *job);
- * attach process to interconnect
- * (Called from within the process, so it is appropriate to set 
- * interconnect specific environment variables here)
- */
-int interconnect_attach(slurmd_job_t *job, int taskid);
-#endif /* _INTERCONNECT_H */
diff --git a/src/slurmd/job.c b/src/slurmd/job.c
index 047d2116a47..0f0efaac024 100644
--- a/src/slurmd/job.c
+++ b/src/slurmd/job.c
@@ -137,9 +137,7 @@ job_create(launch_tasks_request_msg_t *msg, slurm_addr *cli_addr)
 	memcpy(&io_addr,   cli_addr, sizeof(slurm_addr));
 	slurm_set_addr(&io_addr,   msg->io_port,   NULL); 
-#ifdef HAVE_ELAN
-	job->qsw_job = msg->qsw_job;
+	job->switch_job = msg->switch_job;
 	job->objs    = list_create((ListDelF) io_obj_destroy);
 	job->eio     = eio_handle_create();
diff --git a/src/slurmd/job.h b/src/slurmd/job.h
index 280ed30b4f8..baff2684613 100644
--- a/src/slurmd/job.h
+++ b/src/slurmd/job.h
@@ -38,7 +38,7 @@
 #include "src/common/slurm_protocol_api.h"
 #include "src/common/list.h"
 #include "src/common/eio.h"
+#include "src/common/switch.h"
@@ -110,9 +110,7 @@ typedef struct slurmd_job {
 	char         **env;    /* job environment                           */
 	char         **argv;   /* job argument vector                       */
 	char          *cwd;    /* path to current working directory         */
-#ifdef HAVE_ELAN
-	qsw_jobinfo_t qsw_job; /* Elan-specific job information             */
+	switch_jobinfo_t switch_job; /* switch-specific job information     */
 	uid_t         uid;     /* user id for job                           */
 	gid_t         gid;     /* group ID for job                          */
diff --git a/src/slurmd/mgr.c b/src/slurmd/mgr.c
index 47db96942c3..dc6ffa919b4 100644
--- a/src/slurmd/mgr.c
+++ b/src/slurmd/mgr.c
@@ -58,6 +58,8 @@
 #include "src/common/log.h"
 #include "src/common/fd.h"
 #include "src/common/safeopen.h"
+#include "src/common/setenvpf.h"
+#include "src/common/switch.h"
 #include "src/common/xsignal.h"
 #include "src/common/xstring.h"
 #include "src/common/xmalloc.h"
@@ -66,12 +68,10 @@
 #include "src/slurmd/mgr.h"
 #include "src/slurmd/slurmd.h"
-#include "src/slurmd/setenvpf.h"
 #include "src/slurmd/setproctitle.h"
 #include "src/slurmd/smgr.h"
 #include "src/slurmd/io.h"
 #include "src/slurmd/shm.h"
-#include "src/slurmd/interconnect.h"
@@ -428,7 +428,8 @@ _job_mgr(slurmd_job_t *job)
 		goto fail0;
-	if (!job->batch && (interconnect_preinit(job) < 0)) {
+	if (!job->batch && 
+	    (interconnect_preinit(job->switch_job) < 0)) {
 		goto fail1;
@@ -482,7 +483,9 @@ _job_mgr(slurmd_job_t *job)
 	 *    is moved behind wait_for_io(), we may block waiting for IO
 	 *    on a hung process.
-	if (!job->batch && (interconnect_postfini(job) < 0))
+	if (!job->batch && 
+	    (interconnect_postfini(job->switch_job, job->smgr_pid,
+			job->jobid, job->stepid) < 0))
 		error("interconnect_postfini: %m");
diff --git a/src/slurmd/no_interconnect.c b/src/slurmd/no_interconnect.c
deleted file mode 100644
index f55f9116be8..00000000000
--- a/src/slurmd/no_interconnect.c
+++ /dev/null
@@ -1,80 +0,0 @@
- *  no_interconnect.c - Manage user task communications without an high-speed
- *	interconnect
- *****************************************************************************
- *  Copyright (C) 2002 The Regents of the University of California.
- *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
- *  Written by Kevin Tew <> et. al.
- *  UCRL-CODE-2002-040.
- *  
- *  This file is part of SLURM, a resource management program.
- *  For details, see <>.
- *  
- *  SLURM is free software; you can redistribute it and/or modify it under
- *  the terms of the GNU General Public License as published by the Free
- *  Software Foundation; either version 2 of the License, or (at your option)
- *  any later version.
- *  
- *  SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
- *  WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
- *  FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
- *  details.
- *  
- *  You should have received a copy of the GNU General Public License along
- *  with ConMan; if not, write to the Free Software Foundation, Inc.,
- *  59 Temple Place, Suite 330, Boston, MA  02111-1307  USA.
-#include <sys/types.h>
-#include <signal.h>
-#include <src/slurmd/interconnect.h>
-#include <src/slurmd/setenvpf.h>
-int interconnect_node_init (void)
-int interconnect_node_fini (void)
-int interconnect_preinit (slurmd_job_t *job)
-int interconnect_init (slurmd_job_t *job)
-int interconnect_attach (slurmd_job_t *job, int taskid) 
-int interconnect_fini (slurmd_job_t *job)
-int interconnect_postfini (slurmd_job_t *job)
-	/*
-	 *  Kill all processes in the job's session
-	 */
-	if (job->smgr_pid) {
-		debug2("Sending SIGKILL to pgid %lu", 
-			(unsigned long) job->smgr_pid); 
-		kill(-job->smgr_pid, SIGKILL);
-	} else
-		debug("Job %u.%u: Bad pid valud %lu", job->jobid, 
-		      job->stepid, (unsigned long) job->smgr_pid);
diff --git a/src/slurmd/slurmd.c b/src/slurmd/slurmd.c
index 4b91c61e468..450df193d69 100644
--- a/src/slurmd/slurmd.c
+++ b/src/slurmd/slurmd.c
@@ -60,7 +60,6 @@
 #include "src/slurmd/req.h"
 #include "src/slurmd/shm.h"
 #include "src/slurmd/setproctitle.h"
-#include "src/slurmd/interconnect.h"
 #include "src/slurmd/get_mach_stat.h"
 #define GETOPT_ARGS	"L:f:Dvhc"
diff --git a/src/slurmd/smgr.c b/src/slurmd/smgr.c
index 59562ca7fdb..6f0884d0c5a 100644
--- a/src/slurmd/smgr.c
+++ b/src/slurmd/smgr.c
@@ -51,12 +51,12 @@
 #include "src/common/fd.h"
 #include "src/common/log.h"
+#include "src/common/setenvpf.h"
+#include "src/common/switch.h"
 #include "src/common/xsignal.h"
 #include "src/slurmd/smgr.h"
 #include "src/slurmd/ulimits.h"
-#include "src/slurmd/interconnect.h"
-#include "src/slurmd/setenvpf.h"
 #include "src/slurmd/io.h"
@@ -143,7 +143,8 @@ _session_mgr(slurmd_job_t *job)
 	 * Call interconnect_init() before becoming user
-	if (!job->batch && (interconnect_init(job) < 0)) {
+	if (!job->batch && 
+	    (interconnect_init(job->switch_job, job->uid) < 0)) {
 		/* error("interconnect_init: %m"); already logged */
@@ -187,7 +188,8 @@ _session_mgr(slurmd_job_t *job)
-	if (!job->batch && (interconnect_fini(job) < 0)) {
+	if (!job->batch && 
+	    (interconnect_fini(job->switch_job) < 0)) {
 		error("interconnect_fini: %m");
@@ -297,7 +299,9 @@ _exec_task(slurmd_job_t *job, int i)
 	if (!job->batch) {
-		if (interconnect_attach(job, i) < 0) {
+		if (interconnect_attach(job->switch_job, &job->env,
+				job->nodeid, i, job->nnodes,
+				job->nprocs, job->task[i]->gid) < 0) {
 			error("Unable to attach to interconnect: %m");
diff --git a/src/slurmd/ulimits.c b/src/slurmd/ulimits.c
index b8e6a0622f7..3feb5f196b2 100644
--- a/src/slurmd/ulimits.c
+++ b/src/slurmd/ulimits.c
@@ -35,10 +35,10 @@
 #include <string.h>
 #include "src/common/log.h"
+#include "src/common/setenvpf.h" /* For unsetenvp() */
 #include "src/common/xmalloc.h"
 #include "src/slurmd/job.h"
-#include "src/slurmd/setenvpf.h" /* For unsetenvp() */
 struct userlim {
 	char *var;
diff --git a/src/srun/allocate.c b/src/srun/allocate.c
index 2f382fc47b5..94c6273cf3f 100644
--- a/src/srun/allocate.c
+++ b/src/srun/allocate.c
@@ -328,9 +328,7 @@ create_job_step(job_t *job)
 	job->stepid  = resp->job_step_id;
 	job->cred    = resp->cred;
-#ifdef HAVE_ELAN
-	job->qsw_job = resp->qsw_job;
+	job->switch_job = resp->switch_job;
 	 * Recreate filenames which may depend upon step id
diff --git a/src/srun/job.h b/src/srun/job.h
index 177f6c2b6b9..b3972c40d5e 100644
--- a/src/srun/job.h
+++ b/src/srun/job.h
@@ -128,9 +128,7 @@ typedef struct srun_job {
 	task_state_t *task_state; /* ntask task states */
 	pthread_mutex_t task_mutex;
-#ifdef HAVE_ELAN
-	qsw_jobinfo_t qsw_job;
+	switch_jobinfo_t switch_job;
 	io_filename_t *ifname;
 	io_filename_t *ofname;
 	io_filename_t *efname;
diff --git a/src/srun/launch.c b/src/srun/launch.c
index 449f8ebcb74..2975bc0d6d5 100644
--- a/src/srun/launch.c
+++ b/src/srun/launch.c
@@ -237,10 +237,7 @@ launch(void *arg)
 		m->msg_type        = REQUEST_LAUNCH_TASKS;
 		m->data            = &msg_array_ptr[i];
 		memcpy(&m->address, &job->slurmd_addr[i], sizeof(slurm_addr));
-#ifdef HAVE_ELAN
-		r->qsw_job = job->qsw_job;
+		r->switch_job = job->switch_job;
 		if (opt.totalview)
diff --git a/src/srun/srun.c b/src/srun/srun.c
index 958341c2d1a..ea0db0fc960 100644
--- a/src/srun/srun.c
+++ b/src/srun/srun.c
@@ -51,6 +51,7 @@
 #include "src/common/fd.h"
 #include "src/common/log.h"
 #include "src/common/slurm_protocol_api.h"
+#include "src/common/switch.h"
 #include "src/common/xmalloc.h"
 #include "src/common/xsignal.h"
 #include "src/common/xstring.h"
@@ -92,12 +93,7 @@ static int   _run_batch_job (void);
 static void  _run_job_script(job_t *job);
 static int   _set_batch_script_env(job_t *job);
 static int   _set_rlimit_env(void);
-#ifdef HAVE_ELAN
-#  include "src/common/qsw.h"
-   static void _qsw_standalone(job_t *job);
+static void  _switch_standalone(job_t *job);
 int srun(int ac, char **av)
@@ -143,9 +139,7 @@ int main(int ac, char **av)
 		info("do not allocate resources");
 		job = job_create_noalloc(); 
-#ifdef HAVE_ELAN
-		_qsw_standalone(job);
+		_switch_standalone(job);
 	} else if ( (resp = existing_allocation()) ) {
 		if (opt.allocate) {
@@ -257,27 +251,17 @@ int main(int ac, char **av)
-#ifdef HAVE_ELAN
 static void
-_qsw_standalone(job_t *job)
+_switch_standalone(job_t *job)
-	int i;
-	bitstr_t bit_decl(nodeset, QSW_MAX_TASKS);
-	bool cyclic = (opt.distribution == SRUN_DIST_CYCLIC);
-	for (i = 0; i < job->nhosts; i++) {
-		int nodeid;
-		if ((nodeid = qsw_getnodeid_byhost(job->host[i])) < 0)
-			fatal("qsw_getnodeid_byhost: %m");
-		bit_set(nodeset, nodeid);
-	}
+	int cyclic = (opt.distribution == SRUN_DIST_CYCLIC);
-	if (qsw_alloc_jobinfo(&job->qsw_job) < 0)
-		fatal("qsw_alloc_jobinfo: %m");
-	if (qsw_setup_jobinfo(job->qsw_job, opt.nprocs, nodeset, cyclic) < 0)
-		fatal("qsw_setup_jobinfo: %m");
+	if (switch_alloc_jobinfo(&job->switch_job) < 0)
+		fatal("switch_alloc_jobinfo: %m");
+	if (switch_build_jobinfo(job->switch_job, job->nodelist, opt.nprocs, 
+				cyclic) < 0)
+		fatal("switch_build_jobinfo: %m");
-#endif /* HAVE_ELAN */
 static void 
diff --git a/testsuite/slurm_unit/common/ b/testsuite/slurm_unit/common/
index 1f212adac6e..33ae8950a66 100644
--- a/testsuite/slurm_unit/common/
+++ b/testsuite/slurm_unit/common/
@@ -1,6 +1,6 @@
 elan_testprogs = runqsw
 elan_testprogs =