From db36b0f0adabe232c0e68649ac8c97ca161effd4 Mon Sep 17 00:00:00 2001 From: Morris Jette <jette@schedmd.com> Date: Tue, 3 Dec 2013 08:30:20 -0800 Subject: [PATCH] Make sure batch complete RPC has node name This is a correction in the logic of commit 3f4b2d51fcdc2178d67739fbfbcebc5ed1a75872 on launch failures. --- src/slurmctld/proc_req.c | 2 +- src/slurmd/slurmd/req.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/slurmctld/proc_req.c b/src/slurmctld/proc_req.c index 0f0cbd732ca..c112c17b801 100644 --- a/src/slurmctld/proc_req.c +++ b/src/slurmctld/proc_req.c @@ -1703,7 +1703,7 @@ static void _slurm_rpc_complete_batch_script(slurm_msg_t * msg) lock_slurmctld(job_write_lock); job_ptr = find_job_record(comp_msg->job_id); - if (job_ptr && job_ptr->batch_host && + if (job_ptr && job_ptr->batch_host && comp_msg->node_name && strcmp(job_ptr->batch_host, comp_msg->node_name)) { /* This can be the result of the slurmd on the batch_host * failing, but the slurmstepd continuing to run. Then the diff --git a/src/slurmd/slurmd/req.c b/src/slurmd/slurmd/req.c index 5c95d9465ee..b5d4b1b84e5 100644 --- a/src/slurmd/slurmd/req.c +++ b/src/slurmd/slurmd/req.c @@ -1637,7 +1637,7 @@ _abort_job(uint32_t job_id, uint32_t slurm_rc) resp.job_id = job_id; resp.job_rc = 1; resp.slurm_rc = slurm_rc; - resp.node_name = NULL; /* unused */ + resp.node_name = conf->node_name; resp.jobacct = NULL; /* unused */ resp_msg.msg_type = REQUEST_COMPLETE_BATCH_SCRIPT; resp_msg.data = &resp; -- GitLab