diff --git a/NEWS b/NEWS index 936ea31e56d4f4eb99c5c13c157632a91be51dc9..ae679b8be85626be4206bde244817a9103ca6667 100644 --- a/NEWS +++ b/NEWS @@ -10,6 +10,8 @@ documents those changes that are of interest to users and admins. are expecting to get back. No srun_node_id anymore passed around in a slurm_msg_t -- Remove sched/wiki plugin (use sched/wiki2 for now) + -- Disable pthread_create() for PMI_send when TotalView is running for + better performance. * Changes in SLURM 1.2.0-pre2 ============================= diff --git a/src/api/pmi_server.c b/src/api/pmi_server.c index 374e5ca0056a3b24da093e17b58d689784500dd6..12c797116d0d98f3d7b5586a4c6cdaf44f19ae79 100644 --- a/src/api/pmi_server.c +++ b/src/api/pmi_server.c @@ -187,7 +187,14 @@ static void *_agent(void *x) msg_args = xmalloc(sizeof(struct msg_arg)); msg_args->bar_ptr = &args->barrier_xmit_ptr[j]; msg_args->kvs_ptr = &kvs_set; - if (pthread_create(&msg_id, &attr, _msg_thread, + if (agent_max_cnt == 1) { + /* TotalView slows down a great deal for + * pthread_create() calls, so just send the + * messages inline when TotalView is in use + * or for some other reason we only want + * one pthread. */ + _msg_thread((void *) msg_args); + } else if (pthread_create(&msg_id, &attr, _msg_thread, (void *) msg_args)) { fatal("pthread_create: %m"); } diff --git a/src/slaunch/opt.c b/src/slaunch/opt.c index 0b6f8c8e7ed776a20c3c83683e479bd7f4e346db..8e8b43b16ae48cff4c993225b58dadefaf7e0720 100644 --- a/src/slaunch/opt.c +++ b/src/slaunch/opt.c @@ -1102,6 +1102,7 @@ void set_options(const int argc, char **argv) * is really attached */ opt.parallel_debug = true; MPIR_being_debugged = 1; + pmi_server_max_threads(1); break; case LONG_OPT_USAGE: _usage(); diff --git a/testsuite/expect/test7.2 b/testsuite/expect/test7.2 index 898ac19e5e16390180d3c6de19e11a97fcdbe78d..a2e75f893707ffc1d7181ed2c978961c9d3baafd 100755 --- a/testsuite/expect/test7.2 +++ b/testsuite/expect/test7.2 @@ -67,7 +67,7 @@ if { [test_bluegene] } { # Adjust time limits as needed for large task counts */ # times are here vv set timeout [expr $max_job_delay + 60] -spawn $srun -l -N$node_cnt -n8 -O -t1 --pmi-threads=3 $file_prog_get +spawn $srun -l -N$node_cnt -n8 -O -t1 --threads=1 $file_prog_get expect { -re "FAILURE" { send_user "\nFAILURE: some error occured\n"