Skip to content
Snippets Groups Projects
Commit 0acd414e authored by Morris Jette's avatar Morris Jette
Browse files

Minor improvements to logging

parent 48dddc23
No related branches found
No related tags found
No related merge requests found
...@@ -322,7 +322,8 @@ typedef enum { ...@@ -322,7 +322,8 @@ typedef enum {
SRUN_EXEC, SRUN_EXEC,
SRUN_STEP_MISSING, SRUN_STEP_MISSING,
SRUN_REQUEST_SUSPEND, SRUN_REQUEST_SUSPEND,
SRUN_STEP_SIGNAL, /* BluegeneQ: srun forwards signal to runjob */ SRUN_STEP_SIGNAL, /* for launch plugins aprun, poe and runjob,
* srun forwards signal to the launch command */
PMI_KVS_PUT_REQ = 7201, PMI_KVS_PUT_REQ = 7201,
PMI_KVS_PUT_RESP, PMI_KVS_PUT_RESP,
......
...@@ -675,13 +675,11 @@ _handle_signal_task_local(int fd, slurmd_job_t *job, uid_t uid) ...@@ -675,13 +675,11 @@ _handle_signal_task_local(int fd, slurmd_job_t *job, uid_t uid)
int signal; int signal;
int ltaskid; /* local task index */ int ltaskid; /* local task index */
debug("_handle_signal_task_local for job %u.%u",
job->jobid, job->stepid);
safe_read(fd, &signal, sizeof(int)); safe_read(fd, &signal, sizeof(int));
safe_read(fd, &ltaskid, sizeof(int)); safe_read(fd, &ltaskid, sizeof(int));
debug("_handle_signal_task_local for step=%u.%u uid=%d signal=%d",
job->jobid, job->stepid, (int) uid, signal);
debug3(" uid = %d", uid);
if (uid != job->uid && !_slurm_authorized_user(uid)) { if (uid != job->uid && !_slurm_authorized_user(uid)) {
debug("kill req from uid %ld for job %u.%u owned by uid %ld", debug("kill req from uid %ld for job %u.%u owned by uid %ld",
(long)uid, job->jobid, job->stepid, (long)job->uid); (long)uid, job->jobid, job->stepid, (long)job->uid);
...@@ -751,12 +749,9 @@ _handle_signal_container(int fd, slurmd_job_t *job, uid_t uid) ...@@ -751,12 +749,9 @@ _handle_signal_container(int fd, slurmd_job_t *job, uid_t uid)
int sig; int sig;
static int msg_sent = 0; static int msg_sent = 0;
debug("_handle_signal_container for job %u.%u",
job->jobid, job->stepid);
safe_read(fd, &sig, sizeof(int)); safe_read(fd, &sig, sizeof(int));
debug("_handle_signal_container for step=%u.%u uid=%d signal=%d",
debug3(" uid = %d", uid); job->jobid, job->stepid, (int) uid, sig);
if (uid != job->uid && !_slurm_authorized_user(uid)) { if (uid != job->uid && !_slurm_authorized_user(uid)) {
debug("kill container req from uid %ld for job %u.%u " debug("kill container req from uid %ld for job %u.%u "
"owned by uid %ld", "owned by uid %ld",
...@@ -985,11 +980,10 @@ _handle_terminate(int fd, slurmd_job_t *job, uid_t uid) ...@@ -985,11 +980,10 @@ _handle_terminate(int fd, slurmd_job_t *job, uid_t uid)
int rc = SLURM_SUCCESS; int rc = SLURM_SUCCESS;
int errnum = 0; int errnum = 0;
debug("_handle_terminate for job %u.%u", debug("_handle_terminate for step=%u.%u uid=%d",
job->jobid, job->stepid); job->jobid, job->stepid, uid);
step_terminate_monitor_start(job->jobid, job->stepid); step_terminate_monitor_start(job->jobid, job->stepid);
debug3(" uid = %d", uid);
if (uid != job->uid && !_slurm_authorized_user(uid)) { if (uid != job->uid && !_slurm_authorized_user(uid)) {
debug("terminate req from uid %ld for job %u.%u " debug("terminate req from uid %ld for job %u.%u "
"owned by uid %ld", "owned by uid %ld",
...@@ -1159,8 +1153,8 @@ _handle_suspend(int fd, slurmd_job_t *job, uid_t uid) ...@@ -1159,8 +1153,8 @@ _handle_suspend(int fd, slurmd_job_t *job, uid_t uid)
int rc = SLURM_SUCCESS; int rc = SLURM_SUCCESS;
int errnum = 0; int errnum = 0;
debug("_handle_suspend for job %u.%u", job->jobid, job->stepid); debug("_handle_suspend for step=%u.%u uid=%d",
debug3(" uid = %d", uid); job->jobid, job->stepi, (int) uid);
if (!_slurm_authorized_user(uid)) { if (!_slurm_authorized_user(uid)) {
debug("job step suspend request from uid %ld for job %u.%u ", debug("job step suspend request from uid %ld for job %u.%u ",
(long)uid, job->jobid, job->stepid); (long)uid, job->jobid, job->stepid);
......
...@@ -126,7 +126,8 @@ exec $bin_cc -o $file_prog ${file_prog}.c ...@@ -126,7 +126,8 @@ exec $bin_cc -o $file_prog ${file_prog}.c
exec $bin_chmod 700 $file_prog exec $bin_chmod 700 $file_prog
# #
# Submit two jobs to the same node # Submit two jobs to the same node,.
# The first job includes srun, second only the application
# #
set srun_pid [spawn $sbatch -N1 -t2 --output=$file_out1 $file_prog_sh1] set srun_pid [spawn $sbatch -N1 -t2 --output=$file_out1 $file_prog_sh1]
expect { expect {
...@@ -249,6 +250,7 @@ if {$exit_code == 0} { ...@@ -249,6 +250,7 @@ if {$exit_code == 0} {
exec $bin_rm -f $file_out1 $file_out2 $file_prog $file_prog_sh1 $file_prog_sh2 exec $bin_rm -f $file_out1 $file_out2 $file_prog $file_prog_sh1 $file_prog_sh2
send_user "\nSUCCESS\n" send_user "\nSUCCESS\n"
} else { } else {
send_user "\nFAILURE: May be due to use of gang scheduler, a race conditions, or the ProcTrack plugin not identifying the application as part of the job\n" send_user "\nFAILURE: May be due to use of gang scheduler, a race conditions, or the ProcTrack plugin not identifying the application as part of the job.\n"
send_user "\nFAILURE: launch/poe and proctrack/pgid are incompatible.\n"
} }
exit $exit_code exit $exit_code
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment