diff --git a/src/common/qsw.c b/src/common/qsw.c
index 93911e481c1e80a87dfb262ec5e4814c602e401c..600b2cf209e533ee3857057eb2205099ed23ed6b 100644
--- a/src/common/qsw.c
+++ b/src/common/qsw.c
@@ -61,6 +61,9 @@
 
 struct qsw_libstate *qsw_internal_state = NULL;
 
+/*
+ * Seed the rng once per program invocation.
+ */
 static void
 _srand_if_needed(void)
 {
@@ -120,11 +123,14 @@ qsw_fini(struct qsw_libstate *savestate)
 
 /*
  * Allocate a program description number.  Program descriptions, which are the
- * key abstraction maintained by the rms.o kernel module,  must be unique
- * per node.  It is like an inescapable process group.  If the library is 
- * initialized, we allocate these consecutively, otherwise we generate a 
- * random one, assuming we are being called by a transient program like pdsh.  
- * Ref: rms_prgcreate(3).
+ * key abstraction maintained by the rms.o kernel module, must not be used
+ * more than once simultaneously on a single node.  We allocate one to each
+ * parallel job which more than meets this requirement.  A program description
+ * can be compared to a process group, except there is no way for a process to
+ * disassociate itself or its children from the program description.  
+ * If the library is initialized, we allocate these consecutively, otherwise 
+ * we generate a random one, assuming we are being called by a transient 
+ * program like pdsh.  Ref: rms_prgcreate(3).
  */
 static int
 _generate_prognum(void)
@@ -146,10 +152,11 @@ _generate_prognum(void)
 }
 
 /*
- * Elan hardware context numbers must be unique per node.  One is allocated 
- * to each parallel process.  In order for processes on the same node to 
- * communicate, they must use contexts in the hi-lo range of a common 
- * capability.
+ * Elan hardware context numbers are an adapter resource that must not be used
+ * more than once on a single node.  One is allocated to each process on the
+ * node that will be communication over Elan.  In order for processes on the 
+ * same node to communicate with one another and with other nodes across QsNet,
+ * they must use contexts in the hi-lo range of a common capability.
  * If the library is initialized, we allocate these consecutively, otherwise 
  * we generate a random one, assuming we are being called by a transient 
  * program like pdsh.  Ref: rms_setcap(3).
@@ -213,7 +220,7 @@ _init_elan_capability(ELAN_CAPABILITY *cap, int nprocs, int nnodes,
 	cap->Entries = nprocs;
 
 	/* set the hw broadcast bit if consecutive nodes */
-	if (abs(cap->HighNode - cap->LowNode) == nnodes)
+	if (abs(cap->HighNode - cap->LowNode) == nnodes - 1)
 		cap->Type |= ELAN_CAP_TYPE_BROADCASTABLE;
 
 	/*
@@ -254,7 +261,8 @@ qsw_create_jobinfo(struct qsw_jobinfo **jp, int nprocs, bitstr_t *nodeset,
 	assert(jp != NULL);
 
 	/* sanity check on args */
-	if (nprocs <= 0 || nprocs > ELAN_MAX_VPS || nnodes == 0 
+	/* Note: ELAN_MAX_VPS is 512 on "old" Elan driver, 16384 on new. */
+	if (nprocs <= 0 || nprocs > ELAN_MAX_VPS || nnodes <= 0 
 			|| (nprocs % nnodes) != 0) {
 		errno = EINVAL;
 		return -1;
@@ -289,31 +297,113 @@ qsw_destroy_jobinfo(struct qsw_jobinfo *jobinfo)
 	free(jobinfo);
 }
 
+/*
+ * Call this in a forked child.  Parent will call qsw_destroy_prg().
+ */
 int
-qsw_create_prg(struct qsw_jobinfo *jobinfo)
+qsw_create_prg(struct qsw_jobinfo *jobinfo, uid_t uid)
 {
+	ELAN3_CTX *ctx;
+
+	/* obtain an Elan context to use in call to elan3_create */
+	if ((ctx = _elan3_init(0)) == NULL) {
+		/* sets errno */
+		return -1;
+	}
+
+	/* associate this process and its children with prgnum */
+	if (rms_prgcreate(jobinfo->j_prognum, uid, 1) < 0) {
+		/* sets errno */
+		return -1;
+	}
+
+      	/* make cap known via rms_getcap/rms_ncaps to members of this prgnum */
+	if (elan3_create(ctx, &jobinfo->j_cap) < 0) {
+		/* sets errno */
+		return -1;
+	}
+	if (rms_prgaddcap(jobinfo->j_prognum, 0, &jobinfo->j_cap) < 0) {
+		/* sets errno */
+		return -1;
+	}
+
 	return 0;
 }
 
+/*
+ * Destroy the program description.  Call this in the parent of the
+ * process that calls qsw_create_prg.  If return val is -1 and errno 
+ * is ECHILD, there are still active processes out there in the program group.
+ */
 int
 qsw_destroy_prg(struct qsw_jobinfo *jobinfo)
 {
+	if (rms_prgdestroy(jobinfo->j_prognum) < 0) {
+		/* sets errno */
+		return -1;
+	}
 	return 0;
 }
 
 int
 qsw_attach(struct qsw_jobinfo *jobinfo, int procnum)
 {
+	/*
+	 * Assign elan hardware context to current process.
+	 * - arg1 (0 below) is an index into the kernel's list of caps for this 
+	 *   program desc (added by rms_prgaddcap).  There will be
+	 *   one per rail.
+	 * - arg2 indexes the hw ctxt range in the capability
+	 *   [cap->LowContext, cap->HighContext]
+	 */
+	if (rms_setcap(0, procnum) < 0) {
+		/* sets errno */
+		return -1;
+	}
 	return 0;
 }
 
 #ifdef DEBUG_MODULE
+#define TRUNC_BITMAP 1
+static void
+_dump_capbitmap(ELAN_CAPABILITY *cap)
+{
+	int bit_max = sizeof(cap->Bitmap)*8 - 1;
+	int bit;
+#if TRUNC_BITMAP
+	bit_max = bit_max >= 64 ? 64 : bit_max;
+#endif
+	for (bit = bit_max; bit >= 0; bit--)
+		printf("%c", BT_TEST(cap->Bitmap, bit) ? '1' : '0');
+	printf("\n");
+}
+
 static void
 _dump_jobinfo(struct qsw_jobinfo *jobinfo)
 {
+	ELAN_CAPABILITY *cap;
+
 	assert(jobinfo->j_magic == QSW_JOBINFO_MAGIC);
+
 	printf("__________________\n");
-	printf("jobinfo.prognum=%d\n", jobinfo->j_prognum);
+	printf("prognum=%d\n", jobinfo->j_prognum);
+
+	cap = &jobinfo->j_cap;
+	printf("cap.UserKey=%8.8x.%8.8x.%8.8x.%8.8x\n",
+			cap->UserKey.Values[0], cap->UserKey.Values[1],
+			cap->UserKey.Values[2], cap->UserKey.Values[3]);
+	printf("cap.Version=%d\n", cap->Version);
+	printf("cap.Type=0x%x\n", cap->Type);
+	printf("cap.Generation=%d\n", cap->Generation);
+	printf("cap.LowContext=%d\n", cap->LowContext);
+	printf("cap.HighContext=%d\n", cap->HighContext);
+	printf("cap.MyContext=%d\n", cap->MyContext);
+	printf("cap.LowNode=%d\n", cap->LowNode);
+	printf("cap.HighNode=%d\n", cap->HighNode);
+	printf("cap.Entries=%d\n", cap->Entries);
+	printf("cap.Railmask=0x%x\n", cap->RailMask);
+	printf("cap.Bitmap=");
+	_dump_capbitmap(cap);
 	printf("------------------\n");
 }
 
@@ -336,7 +426,7 @@ main(int argc, char *argv[])
 
 	bit_nset(nodeset, 4, 7);
 
-	_safe_mkjob(&job, 4, nodeset, 0);
+	_safe_mkjob(&job, 8, nodeset, 0);
 	_dump_jobinfo(job);
 	qsw_destroy_jobinfo(job);
 	
@@ -362,6 +452,14 @@ main(int argc, char *argv[])
 	_dump_jobinfo(job);
 	qsw_destroy_jobinfo(job);
 
+	_safe_mkjob(&job, 12, nodeset, 1);
+	_dump_jobinfo(job);
+	qsw_destroy_jobinfo(job);
+
+	_safe_mkjob(&job, 513, nodeset, 1);
+	_dump_jobinfo(job);
+	qsw_destroy_jobinfo(job);
+
 	qsw_fini(NULL);
 
 	exit(0);
diff --git a/src/common/qsw.h b/src/common/qsw.h
index 0b8411d367b8db0d4f4859c59a5f6cdef70ecd8a..70b6ada7e6301e59cdf4063a9c329d8bcab2812a 100644
--- a/src/common/qsw.h
+++ b/src/common/qsw.h
@@ -20,11 +20,11 @@ struct qsw_jobinfo {
 	ELAN_CAPABILITY j_cap;
 };
 
-int qsw_init(struct qsw_libstate *ls);
-void qsw_fini(struct qsw_libstate *ls);
-int qsw_create_jobinfo(struct qsw_jobinfo **jp, int nprocs, bitstr_t *nodeset, 
-			int cyclic_alloc);
-void qsw_destroy_jobinfo(struct qsw_jobinfo *jp);
+int 	qsw_init(struct qsw_libstate *ls);
+void 	qsw_fini(struct qsw_libstate *ls);
+int 	qsw_create_jobinfo(struct qsw_jobinfo **jp, int nprocs, 
+		bitstr_t *nodeset, int cyclic_alloc);
+void 	qsw_destroy_jobinfo(struct qsw_jobinfo *jp);
 
 
 #endif /* _QSW_INCLUDED */