diff --git a/doc/man/man1/salloc.1 b/doc/man/man1/salloc.1 index 5da59bf2736011392d6687cadf0dbce7bee0a645..8f421529665c7bfad95ce7824b90df4ccbf46d5e 100644 --- a/doc/man/man1/salloc.1 +++ b/doc/man/man1/salloc.1 @@ -747,22 +747,32 @@ All options are case in\-sensitive. Supported values include: .RS .TP 12 -\fBBULK_XFER\fR [=<\fIresources\fR>] +\fBBULK_XFER\fR[=<\fIresources\fR>] Enable bulk transfer of data using Remote Direct-Memory Access (RDMA). The optional \fIresources\fR specification is a numeric value which can have a suffix of "k", "K", "m", "M", "g" or "G" for kilobytes, megabytes or gigabytes. .TP -\fBDEVNAME\fR =<\fIname\fR> +\fBCAU\fR=<\fIcount\fR> +Number of Collecitve Accelleration Units (CAU) required. +Applies only to IBM Power7 processors. +Default value is zero. +.TP +\fBDEVNAME\fR=<\fIname\fR> Specify the device name to use for communications (e.g. "eth0" or "mlx4_0"). .TP -\fBDEVTYPE\fR =<\fItype\fR> +\fBDEVTYPE\fR=<\fItype\fR> Specify the device type to use for communications. The supported values of \fItype\fR are: "IB" (InfiniBand), "HFI" (P7 Host Fabric Interface), "IPONLY" (IP-Only interfaces), "HPCE" (HPC Ethernet), and "KMUX" (Kernel Emulation of HPCE). .TP +\fBIMMED\fR =<\fIcount\fR> +Number of immediate send slots per window required. +Applies only to IBM Power7 processors. +Default value is zero. +.TP \fBINSTANCES\fR =<\fIcount\fR> Specify number of network connections for each task on each network connection. The default instance count is 1. diff --git a/doc/man/man1/sbatch.1 b/doc/man/man1/sbatch.1 index 639584a42692d880f4ec0fc87fe50833ee555f26..6cb363827e2004153d50502fe225458da6c598ba 100644 --- a/doc/man/man1/sbatch.1 +++ b/doc/man/man1/sbatch.1 @@ -810,23 +810,33 @@ All options are case in\-sensitive. Supported values include: .RS .TP 12 -\fBBULK_XFER\fR [=<\fIresources\fR>] +\fBBULK_XFER\fR[=<\fIresources\fR>] Enable bulk transfer of data using Remote Direct-Memory Access (RDMA). The optional \fIresources\fR specification is a numeric value which can have a suffix of "k", "K", "m", "M", "g" or "G" for kilobytes, megabytes or gigabytes. .TP -\fBDEVNAME\fR =<\fIname\fR> +\fBCAU\fR=<\fIcount\fR> +Number of Collecitve Accelleration Units (CAU) required. +Applies only to IBM Power7 processors. +Default value is zero. +.TP +\fBDEVNAME\fR=<\fIname\fR> Specify the device name to use for communications (e.g. "eth0" or "mlx4_0"). .TP -\fBDEVTYPE\fR =<\fItype\fR> +\fBDEVTYPE\fR=<\fItype\fR> Specify the device type to use for communications. The supported values of \fItype\fR are: "IB" (InfiniBand), "HFI" (P7 Host Fabric Interface), "IPONLY" (IP-Only interfaces), "HPCE" (HPC Ethernet), and "KMUX" (Kernel Emulation of HPCE). .TP -\fBINSTANCES\fR =<\fIcount\fR> +\fBIMMED\fR=<\fIcount\fR> +Number of immediate send slots per window required. +Applies only to IBM Power7 processors. +Default value is zero. +.TP +\fBINSTANCES\fR=<\fIcount\fR> Specify number of network connections for each task on each network connection. The default instance count is 1. .TP diff --git a/doc/man/man1/srun.1 b/doc/man/man1/srun.1 index aaf4a9faee74f61b0d7182eba83e77edb8e3d8bf..59fa01e89bb14a4ae7c19ffbede8992e180a0d7a 100644 --- a/doc/man/man1/srun.1 +++ b/doc/man/man1/srun.1 @@ -849,23 +849,33 @@ All options are case in\-sensitive. Supported values include: .RS .TP 12 -\fBBULK_XFER\fR [=<\fIresources\fR>] +\fBBULK_XFER\fR[=<\fIresources\fR>] Enable bulk transfer of data using Remote Direct-Memory Access (RDMA). The optional \fIresources\fR specification is a numeric value which can have a suffix of "k", "K", "m", "M", "g" or "G" for kilobytes, megabytes or gigabytes. .TP -\fBDEVNAME\fR =<\fIname\fR> +\fBCAU\fR=<\fIcount\fR> +Number of Collecitve Accelleration Units (CAU) required. +Applies only to IBM Power7 processors. +Default value is zero. +.TP +\fBDEVNAME\fR=<\fIname\fR> Specify the device name to use for communications (e.g. "eth0" or "mlx4_0"). .TP -\fBDEVTYPE\fR =<\fItype\fR> +\fBDEVTYPE\fR=<\fItype\fR> Specify the device type to use for communications. The supported values of \fItype\fR are: "IB" (InfiniBand), "HFI" (P7 Host Fabric Interface), "IPONLY" (IP-Only interfaces), "HPCE" (HPC Ethernet), and "KMUX" (Kernel Emulation of HPCE). .TP -\fBINSTANCES\fR =<\fIcount\fR> +\fBIMMED\fR=<\fIcount\fR> +Number of immediate send slots per window required. +Applies only to IBM Power7 processors. +Default value is zero. +.TP +\fBINSTANCES\fR=<\fIcount\fR> Specify number of network connections for each task on each network connection. The default instance count is 1. .TP diff --git a/src/plugins/switch/nrt/nrt.c b/src/plugins/switch/nrt/nrt.c index 8610bb98da1940e7f7121a7c4f6a00d31613ce53..43dbfaf3e4663cccf44d69d492d4d0efffe8a390 100644 --- a/src/plugins/switch/nrt/nrt.c +++ b/src/plugins/switch/nrt/nrt.c @@ -2400,7 +2400,8 @@ nrt_build_jobinfo(slurm_nrt_jobinfo_t *jp, hostlist_t hl, uint16_t *tasks_per_node, uint32_t **tids, bool sn_all, char *adapter_name, nrt_adapter_t dev_type, bool bulk_xfer, uint32_t bulk_xfer_resources, - bool ip_v4, bool user_space, char *protocol, int instances) + bool ip_v4, bool user_space, char *protocol, int instances, + int cau, int immed) { int nnodes, nprocs = 0; hostlist_iterator_t hi; @@ -2412,7 +2413,7 @@ nrt_build_jobinfo(slurm_nrt_jobinfo_t *jp, hostlist_t hl, int network_id = -1; int adapter_type_count = 0; nrt_protocol_table_t *protocol_table = NULL; - +error("cau=%d immed=%d", cau, immed); assert(jp); assert(jp->magic == NRT_JOBINFO_MAGIC); assert(tasks_per_node); diff --git a/src/plugins/switch/nrt/slurm_nrt.h b/src/plugins/switch/nrt/slurm_nrt.h index 662ba5f413c1f1942f61512693bb8d1d79065003..e507953980705c23782fc24f30b0c41b23688cd1 100644 --- a/src/plugins/switch/nrt/slurm_nrt.h +++ b/src/plugins/switch/nrt/slurm_nrt.h @@ -102,7 +102,7 @@ extern int nrt_build_jobinfo(slurm_nrt_jobinfo_t *jp, hostlist_t hl, char *adapter_name, nrt_adapter_t dev_type, bool bulk_xfer, uint32_t bulk_xfer_resources, bool ip_v4, bool user_space, char *protocol, - int instances); + int instances, int cau, int immed); extern int nrt_pack_jobinfo(slurm_nrt_jobinfo_t *jp, Buf buf); extern int nrt_unpack_jobinfo(slurm_nrt_jobinfo_t *jp, Buf buf); extern slurm_nrt_jobinfo_t *nrt_copy_jobinfo(slurm_nrt_jobinfo_t *jp); diff --git a/src/plugins/switch/nrt/switch_nrt.c b/src/plugins/switch/nrt/switch_nrt.c index c717f99ce998e31d9f78738ad58f10eb7cfce529..21d1271f60037bc77eaff347d8298bfef07d5960 100644 --- a/src/plugins/switch/nrt/switch_nrt.c +++ b/src/plugins/switch/nrt/switch_nrt.c @@ -407,7 +407,7 @@ extern int switch_p_build_jobinfo(switch_jobinfo_t *switch_job, char *nodelist, bool bulk_xfer = false, ip_v4 = true, user_space = false; uint32_t bulk_xfer_resources = 0; bool sn_all = true; /* default to sn_all */ - int instances = 1; + int cau = 0, immed = 0, instances = 1; int dev_type = NRT_MAX_ADAPTER_TYPES; int err = SLURM_SUCCESS; char *adapter_name = NULL; @@ -522,6 +522,34 @@ extern int switch_p_build_jobinfo(switch_jobinfo_t *switch_job, char *nodelist, } else if (!strcasecmp(token, "sn_single")) { sn_all = false; + /* Collective Acceleration Units (CAU) */ + } else if (!strncasecmp(token, "cau=", 4)) { + long int count; + char *end_ptr = NULL; + count = strtol(token+4, &end_ptr, 10); + if ((end_ptr[0] == 'k') || (end_ptr[0] == 'K')) + count *= 1024; + if (count >= 0) + cau = count; + else { + info("switch/nrt: invalid option: %s", token); + err = SLURM_ERROR; + } + + /* Immediate Send Slots Per Window */ + } else if (!strncasecmp(token, "immed=", 6)) { + long int count; + char *end_ptr = NULL; + count = strtol(token+6, &end_ptr, 10); + if ((end_ptr[0] == 'k') || (end_ptr[0] == 'K')) + count *= 1024; + if (count >= 0) + immed = count; + else { + info("switch/nrt: invalid option: %s", token); + err = SLURM_ERROR; + } + /* other */ } else { info("switch/nrt: invalid option: %s", token); @@ -544,7 +572,7 @@ extern int switch_p_build_jobinfo(switch_jobinfo_t *switch_job, char *nodelist, adapter_name, dev_type, bulk_xfer, bulk_xfer_resources, ip_v4, user_space, protocol, - instances); + instances, cau, immed); } nrt_need_state_save = true;