diff --git a/src/srun/pmi.c b/src/srun/pmi.c index 609b87c9d8b67c5f1e145dfb4f34591e9bf3727a..4ee9a179ac9a2421370277b6e62ee804dea09391 100644 --- a/src/srun/pmi.c +++ b/src/srun/pmi.c @@ -2,7 +2,7 @@ * pmi.c - Global PMI data as maintained within srun * $Id$ ***************************************************************************** - * Copyright (C) 2005 The Regents of the University of California. + * Copyright (C) 2005-2006 The Regents of the University of California. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). * Written by Morris Jette <jette1@llnl.gov> * UCRL-CODE-217948. @@ -116,8 +116,8 @@ static void _kvs_xmit_tasks(void) static void *_msg_thread(void *x) { struct msg_arg *msg_arg_ptr = (struct msg_arg *) x; - int rc, success = 0;//, task_fd; - slurm_msg_t msg_send;//, msg_rcv; + int rc, success = 0; + slurm_msg_t msg_send; debug2("KVS_Barrier msg to %s:%u", @@ -129,33 +129,9 @@ static void *_msg_thread(void *x) msg_arg_ptr->bar_ptr->port, msg_arg_ptr->bar_ptr->hostname); - slurm_send_recv_rc_msg_only_one(&msg_send, &rc, 0); - /* if ((task_fd = slurm_open_msg_conn(&msg_send.address)) < 0) { */ -/* error("slurm_init_msg_engine_port: %m"); */ -/* goto fini; */ -/* } */ -/* if ((rc = slurm_send_node_msg(task_fd, &msg_send)) < 0) { */ -/* error("KVS_Barrier send data fail to %s", */ -/* msg_arg_ptr->bar_ptr->hostname); */ -/* (void) slurm_shutdown_msg_conn(task_fd); */ -/* goto fini; */ -/* } */ -/* rc = slurm_receive_msg(task_fd, &msg_rcv, 0); */ -/* (void) slurm_shutdown_msg_conn(task_fd); */ -/* if (rc < 0) { */ -/* error("KVS_Barrier confirm fail from %s", */ -/* msg_arg_ptr->bar_ptr->hostname); */ -/* goto fini; */ -/* } */ -/* if (msg_rcv.msg_type != RESPONSE_SLURM_RC) { */ -/* error("KVS_Barrier confirm type %d from %s", */ -/* msg_rcv.msg_type, */ -/* msg_arg_ptr->bar_ptr->hostname); */ -/* goto fini; */ -/* } */ -/* rc = ((return_code_msg_t *) msg_rcv.data)->return_code; */ -/* slurm_free_return_code_msg((return_code_msg_t *) msg_rcv.data); */ - if (rc != SLURM_SUCCESS) { + if (slurm_send_recv_rc_msg_only_one(&msg_send, &rc, 0) < 0) { + error("slurm_send_recv_rc_msg_only_one: %m"); + } else if (rc != SLURM_SUCCESS) { error("KVS_Barrier confirm from %s, rc=%d", msg_arg_ptr->bar_ptr->hostname, rc); } else { @@ -163,7 +139,6 @@ static void *_msg_thread(void *x) success = 1; } -/* fini: */ slurm_mutex_lock(&agent_mutex); agent_cnt--; if (success) @@ -210,6 +185,7 @@ static void *_agent(void *x) } while (agent_cnt > 0) pthread_cond_wait(&agent_cond, &agent_mutex); + slurm_mutex_unlock(&agent_mutex); } /* Release allocated memory */ diff --git a/testsuite/expect/test7.2 b/testsuite/expect/test7.2 index 95879756ef8a9653fc874005767d7d62b4812f3b..850eab00ef4aec9f829c5a2ae14de2f0947ca7e8 100755 --- a/testsuite/expect/test7.2 +++ b/testsuite/expect/test7.2 @@ -7,7 +7,7 @@ # "FAILURE: ..." otherwise with an explanation of the failure, OR # anything else indicates a failure mode that must be investigated. ############################################################################ -# Copyright (C) 2005 The Regents of the University of California. +# Copyright (C) 2005-2006 The Regents of the University of California. # Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). # Written by Morris Jette <jette1@llnl.gov> # UCRL-CODE-217948. @@ -58,11 +58,11 @@ if { [test_bluegene] } { if { [test_xcpu] } { set node_cnt 1-1 } else { - set node_cnt 1-2 + set node_cnt 1-4 } } -spawn $srun -l -N$node_cnt -n6 -O -t1 $file_prog_get +spawn $srun -l -N$node_cnt -n4 -O -t1 $file_prog_get expect { -re "FAILURE" { send_user "\nFAILURE: some error occured\n"