diff --git a/src/common/slurm_protocol_defs.h b/src/common/slurm_protocol_defs.h index 41d9d3b9fee7b1f06d89f9ffa3d12dab0cec4e0c..544ec49c515884af8652a6da1b2245653f9f3e37 100644 --- a/src/common/slurm_protocol_defs.h +++ b/src/common/slurm_protocol_defs.h @@ -322,7 +322,8 @@ typedef enum { SRUN_EXEC, SRUN_STEP_MISSING, SRUN_REQUEST_SUSPEND, - SRUN_STEP_SIGNAL, /* BluegeneQ: srun forwards signal to runjob */ + SRUN_STEP_SIGNAL, /* for launch plugins aprun, poe and runjob, + * srun forwards signal to the launch command */ PMI_KVS_PUT_REQ = 7201, PMI_KVS_PUT_RESP, diff --git a/src/slurmd/slurmstepd/req.c b/src/slurmd/slurmstepd/req.c index f70c0fab6bac3a692da68a0f30a927294b1b7360..818136b7462964df70f3447717e686923d714109 100644 --- a/src/slurmd/slurmstepd/req.c +++ b/src/slurmd/slurmstepd/req.c @@ -675,13 +675,11 @@ _handle_signal_task_local(int fd, slurmd_job_t *job, uid_t uid) int signal; int ltaskid; /* local task index */ - debug("_handle_signal_task_local for job %u.%u", - job->jobid, job->stepid); - safe_read(fd, &signal, sizeof(int)); safe_read(fd, <askid, sizeof(int)); + debug("_handle_signal_task_local for step=%u.%u uid=%d signal=%d", + job->jobid, job->stepid, (int) uid, signal); - debug3(" uid = %d", uid); if (uid != job->uid && !_slurm_authorized_user(uid)) { debug("kill req from uid %ld for job %u.%u owned by uid %ld", (long)uid, job->jobid, job->stepid, (long)job->uid); @@ -751,12 +749,9 @@ _handle_signal_container(int fd, slurmd_job_t *job, uid_t uid) int sig; static int msg_sent = 0; - debug("_handle_signal_container for job %u.%u", - job->jobid, job->stepid); - safe_read(fd, &sig, sizeof(int)); - - debug3(" uid = %d", uid); + debug("_handle_signal_container for step=%u.%u uid=%d signal=%d", + job->jobid, job->stepid, (int) uid, sig); if (uid != job->uid && !_slurm_authorized_user(uid)) { debug("kill container req from uid %ld for job %u.%u " "owned by uid %ld", @@ -985,11 +980,10 @@ _handle_terminate(int fd, slurmd_job_t *job, uid_t uid) int rc = SLURM_SUCCESS; int errnum = 0; - debug("_handle_terminate for job %u.%u", - job->jobid, job->stepid); + debug("_handle_terminate for step=%u.%u uid=%d", + job->jobid, job->stepid, uid); step_terminate_monitor_start(job->jobid, job->stepid); - debug3(" uid = %d", uid); if (uid != job->uid && !_slurm_authorized_user(uid)) { debug("terminate req from uid %ld for job %u.%u " "owned by uid %ld", @@ -1159,8 +1153,8 @@ _handle_suspend(int fd, slurmd_job_t *job, uid_t uid) int rc = SLURM_SUCCESS; int errnum = 0; - debug("_handle_suspend for job %u.%u", job->jobid, job->stepid); - debug3(" uid = %d", uid); + debug("_handle_suspend for step=%u.%u uid=%d", + job->jobid, job->stepi, (int) uid); if (!_slurm_authorized_user(uid)) { debug("job step suspend request from uid %ld for job %u.%u ", (long)uid, job->jobid, job->stepid); diff --git a/testsuite/expect/test3.7 b/testsuite/expect/test3.7 index 1cd3dec1bab2c85f6b7f8326ba711c9fbc4787da..1018a4bd4dcb03910172c0ed5806348908ccb2db 100755 --- a/testsuite/expect/test3.7 +++ b/testsuite/expect/test3.7 @@ -126,7 +126,8 @@ exec $bin_cc -o $file_prog ${file_prog}.c exec $bin_chmod 700 $file_prog # -# Submit two jobs to the same node +# Submit two jobs to the same node,. +# The first job includes srun, second only the application # set srun_pid [spawn $sbatch -N1 -t2 --output=$file_out1 $file_prog_sh1] expect { @@ -249,6 +250,7 @@ if {$exit_code == 0} { exec $bin_rm -f $file_out1 $file_out2 $file_prog $file_prog_sh1 $file_prog_sh2 send_user "\nSUCCESS\n" } else { - send_user "\nFAILURE: May be due to use of gang scheduler, a race conditions, or the ProcTrack plugin not identifying the application as part of the job\n" + send_user "\nFAILURE: May be due to use of gang scheduler, a race conditions, or the ProcTrack plugin not identifying the application as part of the job.\n" + send_user "\nFAILURE: launch/poe and proctrack/pgid are incompatible.\n" } exit $exit_code