diff --git a/src/common/bitstring.c b/src/common/bitstring.c index 5b17ad989ee6f2f46cb3f26e31e5050b7b7843b5..d8fe992a81a8a6bad06d66faf536c4aa856c51a9 100644 --- a/src/common/bitstring.c +++ b/src/common/bitstring.c @@ -59,7 +59,7 @@ bit_realloc(bitstr_t *b, bitoff_t nbits) /* * Free a bitstr. - * bp (IN/OUT) bitstr to be freed + * b (IN/OUT) bitstr to be freed */ void bit_free(bitstr_t *b) @@ -69,7 +69,19 @@ bit_free(bitstr_t *b) _bitstr_magic(b) = 0; free(b); } - + +/* + * Return the number of possible bits in a bitstring. + * b (IN) bitstring to check + * RETURN number of bits allocated + */ +bitoff_t +bit_size(bitstr_t *b) +{ + _assert_bitstr_valid(b); + return _bitstr_bits(b); +} + /* * Is bit N of bitstring b set? * b (IN) bitstring to test @@ -338,19 +350,14 @@ bit_or(bitstr_t *b1, bitstr_t *b2) { bitstr_t * bit_copy(bitstr_t *b) { - bitoff_t bit; bitstr_t *new; + int newsize; _assert_bitstr_valid(b); - new = (bitstr_t *)malloc(_bitstr_words(_bitstr_bits(b)) * sizeof(bitstr_t)); - if (new) { - _bitstr_magic(new) = BITSTR_MAGIC; - _bitstr_bits(new) = _bitstr_bits(b); - - memcpy(&new[_bit_word(0)], - &b[_bit_word(0)], _bitstr_bits(b)/8); - } + newsize = _bitstr_words(_bitstr_bits(b)) * sizeof(bitstr_t); + if ((new = (bitstr_t *)malloc(newsize))) + memcpy(new, b, newsize); return new; } diff --git a/src/common/bitstring.h b/src/common/bitstring.h index 3a75196408dd59f54cc5c8c80d792eeba8222db2..1c0d306dfc4d9a1a66496b3819475d95478ef03d 100644 --- a/src/common/bitstring.h +++ b/src/common/bitstring.h @@ -91,7 +91,7 @@ typedef bitstr_t bitoff_t; (name)[_bitstr_words(nbits)] = { BITSTR_MAGIC_STACK, (nbits) } /* compat with Vixie macros */ -bitstr_t *bit_allocbit_alloc(bitoff_t nbits); +bitstr_t *bit_alloc(bitoff_t nbits); int bit_test(bitstr_t *b, bitoff_t bit); void bit_set(bitstr_t *b, bitoff_t bit); void bit_clear(bitstr_t *b, bitoff_t bit); @@ -105,6 +105,7 @@ bitoff_t bit_ffs(bitstr_t *b); /* new */ void bit_free(bitstr_t *b); bitstr_t *bit_realloc(bitstr_t *b, bitoff_t nbits); +bitoff_t bit_size(bitstr_t *b); void bit_and(bitstr_t *b1, bitstr_t *b2); void bit_or(bitstr_t *b1, bitstr_t *b2); int bit_set_count(bitstr_t *b); diff --git a/src/common/qsw.c b/src/common/qsw.c new file mode 100644 index 0000000000000000000000000000000000000000..393bff542306df1a268e9510ba3cf797efad9935 --- /dev/null +++ b/src/common/qsw.c @@ -0,0 +1,381 @@ +/* + * $Id$ + * + * Library routines for initiating jobs on QsNet. + */ + +#if HAVE_CONFIG_H +#include "config.h" +#endif + +#include <sys/param.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <syslog.h> +#include <errno.h> +#include <string.h> +#include <paths.h> +#include <stdarg.h> +#include <ctype.h> +#include <assert.h> +#include <stdlib.h> +#include <unistd.h> +#include <limits.h> /* INT_MAX */ +#include <stdio.h> +#include <elan3/elan3.h> +#include <elan3/elanvp.h> +#include <rms/rmscall.h> + +#include "bitstring.h" +#include "qsw.h" + +/* + * Definitions local to this module. + */ + +#define QSW_JOBINFO_MAGIC 0xf00ff00e +#define QSW_CKPT_MAGIC 0xf00ff00f + +/* we will allocate program descriptions in this range */ +/* XXX note: do not start at zero as libelan shifts to get unique shm id */ +#define QSW_PRG_START 1 +#define QSW_PRG_END INT_MAX + +/* + * Macros + */ + +/* Copy library state */ +#define _copy_libstate(dest, src) do { \ + assert((src)->ls_magic == QSW_CKPT_MAGIC); \ + memcpy(dest, src, sizeof(struct qsw_libstate)); \ +} while (0) + +/* + * Globals + */ + +struct qsw_libstate *qsw_internal_state = NULL; + +static void +_srand_if_needed(void) +{ + static int done = 0; + + if (!done) { + srand48(getpid()); + done = 1; + } +} + +/* + * Initialize this library. If called, qsw_create_jobinfo() and + * qsw_destroy_jobinfo() will use consecutive integers for program + * descriptions. If not called, those functions will use random numbers. + * Internal state is initialized from 'oldstate' if non-null. + */ +int +qsw_init(struct qsw_libstate *oldstate) +{ + struct qsw_libstate *new; + + _srand_if_needed(); + + assert(qsw_internal_state == NULL); + + new = (struct qsw_libstate *)malloc( sizeof(struct qsw_libstate)); + if (!new) { + errno = ENOMEM; + return -1; + } + if (oldstate) + _copy_libstate(new, oldstate); + else { + new->ls_magic = QSW_CKPT_MAGIC; + new->ls_prognum = QSW_PRG_START; + new->ls_hwcontext = ELAN_USER_BASE_CONTEXT_NUM; + } + qsw_internal_state = new; + return 0; +} + +/* + * Finalize use of this library. If 'savestate' is non-NULL, final + * state is copied there before it is destroyed. + */ +void +qsw_fini(struct qsw_libstate *savestate) +{ + assert(qsw_internal_state != NULL); + if (savestate) + _copy_libstate(savestate, qsw_internal_state); + qsw_internal_state->ls_magic = 0; + free(qsw_internal_state); + qsw_internal_state = NULL; +} + +/* + * There are (nprocs * nnodes) significant bits in the mask, each representing + * a process slot. Bits are off for process slots corresponding to unallocated + * nodes. For example, if nodes 4 and 6 are running two processes per node, + * bits 0,1 (corresponding to the two processes on node 4) and bits 4,5 + * (corresponding to the two processes running on node 6) are set. + */ +static void +_setcapbitmap(ELAN_CAPABILITY *cap, int procs_per_node, bitstr_t *nodeset) +{ + int i, j, proc0; + + for (i = 0; i < bit_size(nodeset); i++) { + if (bit_test(nodeset, i)) { + for (j = 0; j < procs_per_node; j++) { + proc0 = (i - cap->LowNode) * procs_per_node; + assert(proc0 + j < sizeof(cap->Bitmap)*8); + BT_SET(cap->Bitmap, proc0 + j); + } + } + } +} + +/* + * Allocate a program description number. The program description is the key + * abstraction maintained by the rms.o kernel module. It is like an + * inescapable process group. If the library is initialized, we allocate + * these consecutively, otherwise we generate a random one, assuming we are + * being called by a transient program like pdsh. Ref: rms_prgcreate(3). + */ +static int +_generate_prognum(void) +{ + int new; + + if (qsw_internal_state) { + new = qsw_internal_state->ls_prognum; + if (new == QSW_PRG_END) + qsw_internal_state->ls_prognum = QSW_PRG_START; + else + qsw_internal_state->ls_prognum++; + } else { + _srand_if_needed(); + new = lrand48() % (QSW_PRG_END - QSW_PRG_START + 1); + new += QSW_PRG_START; + } + return new; +} + +/* + * Elan hardware context numbers must be unique per node. + * One is allocated to each parallel process. In order for processes + * on the same node to communicate, they must use contexts in the + * hi-lo range of a common capability. + * If the library is initialized, we allocate these consecutively, otherwise + * we generate a random one, assuming we are being called by a transient + * program like pdsh. Ref: rms_setcap(3). + */ +static int +_generate_hwcontext(void) +{ + int new; + + if (qsw_internal_state) { + new = qsw_internal_state->ls_hwcontext; + if (new == ELAN_USER_TOP_CONTEXT_NUM) + qsw_internal_state->ls_hwcontext = ELAN_USER_BASE_CONTEXT_NUM; + else + qsw_internal_state->ls_hwcontext++; + } else { + _srand_if_needed(); + new = lrand48() % (ELAN_USER_TOP_CONTEXT_NUM - ELAN_USER_BASE_CONTEXT_NUM + 1); + new += ELAN_USER_BASE_CONTEXT_NUM; + } + return new; +} + +/* + * UserKey is 128 bits of randomness which should be kept private. + */ +static void +_generate_capkey(ELAN_USERKEY *key) +{ + int i; + + _srand_if_needed(); + for (i = 0; i < 4; i++) + key->Values[i] = lrand48(); +} + +static void +_init_elan_capability(ELAN_CAPABILITY *cap, int nprocs, int nnodes, + bitstr_t *nodeset, int cyclic_alloc) +{ + int procs_per_node = nprocs / nnodes; + + /* + * Initialize for single rail and either block or cyclic allocation. + * Set ELAN_CAP_TYPE_BROADCASTABLE later if appropriate. + */ + elan3_nullcap(cap); + if (cyclic_alloc) + cap->Type = ELAN_CAP_TYPE_CYCLIC; + else + cap->Type = ELAN_CAP_TYPE_BLOCK; + cap->Type |= ELAN_CAP_TYPE_MULTI_RAIL; + cap->RailMask = 1; + + _generate_capkey(&cap->UserKey); + + cap->LowContext = _generate_hwcontext(); + cap->HighContext = cap->LowContext + procs_per_node - 1; + /* not necessary to initialize cap->MyContext */ + + cap->LowNode = bit_ffs(nodeset); + assert(cap->LowNode != -1); + cap->HighNode = bit_fls(nodeset); + assert(cap->HighNode != -1); + + /* set up cap->Bitmap to describe the mapping of processes to nodes */ + _setcapbitmap(cap, procs_per_node, nodeset); + + /* + * Set cap->Entries and add broadcast bit to cap->type based on + * cap->HighNode and cap->LowNode values set above. + */ + cap->Entries = nprocs; + if (abs(cap->HighNode - cap->LowNode) == cap->Entries) + cap->Type |= ELAN_CAP_TYPE_BROADCASTABLE; +} + +/* + * Create all the QsNet related information needed to set up a QsNet parallel + * program and store it in the qsw_jobinfo struct. + */ +int +qsw_create_jobinfo(struct qsw_jobinfo **jp, int nprocs, bitstr_t *nodeset, + int cyclic_alloc) +{ + struct qsw_jobinfo *new; + int nnodes = bit_set_count(nodeset); + + assert(jp != NULL); + + /* sanity check on args */ + if (nprocs <= 0 || nprocs > ELAN_MAX_VPS + || nnodes == 0 || nprocs % nnodes != 0) { + errno = EINVAL; + return -1; + } + + /* allocate space */ + new = (struct qsw_jobinfo *)malloc(sizeof(struct qsw_jobinfo)); + if (!new) { + errno = ENOMEM; + return -1; + } + + new->j_magic = QSW_JOBINFO_MAGIC; + new->j_nprocs = nprocs; + new->j_prognum = _generate_prognum(); + new->j_nodeset = bit_copy(nodeset); + _init_elan_capability(&new->j_cap, nprocs, nnodes, nodeset, + cyclic_alloc); + + /* success! */ + *jp = new; + return 0; +} + +/* + * Destroy a jobinfo_t structure and free associated storage. + */ +void +qsw_destroy_jobinfo(struct qsw_jobinfo *jobinfo) +{ + assert(jobinfo->j_magic == QSW_JOBINFO_MAGIC); + bit_free(jobinfo->j_nodeset); + jobinfo->j_magic = 0; + free(jobinfo); +} + +int +qsw_create_prg(struct qsw_jobinfo *jobinfo) +{ + return 0; +} + +int +qsw_destroy_prg(struct qsw_jobinfo *jobinfo) +{ + return 0; +} + +int +qsw_attach(struct qsw_jobinfo *jobinfo, int procnum) +{ + return 0; +} + +#ifdef DEBUG_MODULE +static void +_dump_jobinfo(struct qsw_jobinfo *jobinfo) +{ + char tmpstr[1024]; + + assert(jobinfo->j_magic == QSW_JOBINFO_MAGIC); + printf("__________________\n"); + printf("jobinfo.prognum=%d\n", jobinfo->j_prognum); + printf("jobinfo.nprocs=%d\n", jobinfo->j_nprocs); + bit_fmt(tmpstr, sizeof(tmpstr), jobinfo->j_nodeset); + printf("jobinfo.nodeset=[%s]\n", tmpstr); + printf("------------------\n"); +} + +static void +_safe_mkjob(struct qsw_jobinfo **jp, int nprocs, bitstr_t *nodeset, + int cyclic_alloc) +{ + if (qsw_create_jobinfo(jp, nprocs, nodeset, cyclic_alloc) < 0) { + perror("qsw_create_jobinfo"); + exit(1); + } +} + +int +main(int argc, char *argv[]) +{ + struct qsw_libstate libstate; + struct qsw_jobinfo *job; + bitstr_t *nodeset = bit_alloc(42); + + bit_nset(nodeset, 4, 7); + + _safe_mkjob(&job, 4, nodeset, 0); + _dump_jobinfo(job); + qsw_destroy_jobinfo(job); + + qsw_init(NULL); + + _safe_mkjob(&job, 4, nodeset, 0); + _dump_jobinfo(job); + qsw_destroy_jobinfo(job); + + qsw_fini(NULL); + + qsw_init(NULL); + + _safe_mkjob(&job, 4, nodeset, 0); + _dump_jobinfo(job); + qsw_destroy_jobinfo(job); + + qsw_fini(&libstate); + + qsw_init(&libstate); + + _safe_mkjob(&job, 4, nodeset, 0); + _dump_jobinfo(job); + qsw_destroy_jobinfo(job); + + qsw_fini(NULL); + + exit(0); +} +#endif /* DEBUG_MODULE */ diff --git a/src/common/qsw.h b/src/common/qsw.h new file mode 100644 index 0000000000000000000000000000000000000000..86fed5ec16235d0b2350362c1bdb56572cc22a48 --- /dev/null +++ b/src/common/qsw.h @@ -0,0 +1,33 @@ +/* + * $Id$ + * + * Copyright (C) 2001-2002 Regents of the University of California + * See ./DISCLAIMER + */ + +#ifndef _QSW_INCLUDED +#define _QSW_INCLUDED + +struct qsw_libstate { + int ls_magic; + int ls_prognum; + int ls_hwcontext; +}; + +struct qsw_jobinfo { + int j_magic; + int j_prognum; + bitstr_t *j_nodeset; + int j_nprocs; + int j_cyclic_alloc; + ELAN_CAPABILITY j_cap; +}; + +int qsw_init(struct qsw_libstate *ls); +void qsw_fini(struct qsw_libstate *ls); +int qsw_create_jobinfo(struct qsw_jobinfo **jp, int nprocs, bitstr_t *nodeset, + int cyclic_alloc); +void qsw_destroy_jobinfo(struct qsw_jobinfo *jp); + + +#endif /* _QSW_INCLUDED */