From 0908fc1e3c615d2f8a319dac8f9d520409660504 Mon Sep 17 00:00:00 2001 From: Danny Auble <da@llnl.gov> Date: Thu, 10 Dec 2009 22:29:52 +0000 Subject: [PATCH] patch from Matthew to avoid a NULL task being handled incorrectly --- src/common/jobacct_common.c | 2 +- src/common/plugstack.c | 14 -------------- src/slurmd/slurmstepd/mgr.c | 25 +++++++++---------------- src/slurmd/slurmstepd/slurmstepd_job.c | 1 + src/slurmd/slurmstepd/slurmstepd_job.h | 16 ++++++++++++++++ 5 files changed, 27 insertions(+), 31 deletions(-) diff --git a/src/common/jobacct_common.c b/src/common/jobacct_common.c index cb352144d71..c27a7ab1390 100644 --- a/src/common/jobacct_common.c +++ b/src/common/jobacct_common.c @@ -1115,7 +1115,7 @@ extern struct jobacctinfo *jobacct_common_remove_task(pid_t pid, List task_list) debug2("removing task %u pid %d from jobacct", jobacct->max_vsize_id.taskid, jobacct->pid); } else { - error("pid(%d) not being watched in jobacct!", pid); + debug2("pid(%d) not being watched in jobacct!", pid); } error: slurm_mutex_unlock(&jobacct_lock); diff --git a/src/common/plugstack.c b/src/common/plugstack.c index 88ecfed246d..d8a56bcb725 100644 --- a/src/common/plugstack.c +++ b/src/common/plugstack.c @@ -1384,20 +1384,6 @@ int spank_get_remote_options(job_options_t opts) return (0); } -/* - * Return a task info structure corresponding to pid. - */ -static slurmd_task_info_t * job_task_info_by_pid (slurmd_job_t *job, pid_t pid) -{ - slurmd_task_info_t *task = NULL; - int i; - for (i = 0; i < job->ntasks; i++) { - if (job->task[i]->pid == pid) - task = job->task[i]; - } - return (task); -} - static int tasks_execd (spank_t spank) { return ( (spank->phase == STEP_TASK_POST_FORK) diff --git a/src/slurmd/slurmstepd/mgr.c b/src/slurmd/slurmstepd/mgr.c index c3aa6eb88e0..92f05490b69 100644 --- a/src/slurmd/slurmstepd/mgr.c +++ b/src/slurmd/slurmstepd/mgr.c @@ -1332,7 +1332,6 @@ static int _wait_for_any_task(slurmd_job_t *job, bool waitflag) { slurmd_task_info_t *t = NULL; - int i; int status; pid_t pid; int completed = 0; @@ -1367,22 +1366,15 @@ _wait_for_any_task(slurmd_job_t *job, bool waitflag) } /*********************************************/ - /* See if the pid matches that of one of the tasks */ - for (i = 0; i < job->ntasks; i++) { - if (job->task[i]->pid == pid) { - t = job->task[i]; - completed++; - break; - } - } - if (t != NULL) { - _log_task_exit(job->task[i]->gtid, pid, status); + if ((t = job_task_info_by_pid(job, pid))) { + completed++; + _log_task_exit(t->gtid, pid, status); t->exited = true; t->estatus = status; job->envtp->env = job->env; - job->envtp->procid = job->task[i]->gtid; - job->envtp->localid = job->task[i]->id; + job->envtp->procid = t->gtid; + job->envtp->localid = t->id; job->envtp->distribution = -1; job->envtp->batch_flag = job->batch; @@ -1403,10 +1395,11 @@ _wait_for_any_task(slurmd_job_t *job, bool waitflag) job, -1, job->env); xfree(my_epilog); } - job->envtp->procid = i; + job->envtp->procid = t->id; - if (spank_task_exit (job, i) < 0) - error ("Unable to spank task %d at exit", i); + if (spank_task_exit (job, t->id) < 0) + error ("Unable to spank task %d at exit", + t->id); post_term(job); } diff --git a/src/slurmd/slurmstepd/slurmstepd_job.c b/src/slurmd/slurmstepd/slurmstepd_job.c index 2f17aeb0dfb..92c67e6f619 100644 --- a/src/slurmd/slurmstepd/slurmstepd_job.c +++ b/src/slurmd/slurmstepd/slurmstepd_job.c @@ -638,3 +638,4 @@ _task_info_destroy(slurmd_task_info_t *t, uint16_t multi_prog) } /* otherwise, t->argv is a pointer to job->argv */ xfree(t); } + diff --git a/src/slurmd/slurmstepd/slurmstepd_job.h b/src/slurmd/slurmstepd/slurmstepd_job.h index 0a58865faaa..0cafdade1a5 100644 --- a/src/slurmd/slurmstepd/slurmstepd_job.h +++ b/src/slurmd/slurmstepd/slurmstepd_job.h @@ -220,4 +220,20 @@ void srun_info_destroy(struct srun_info *srun); slurmd_task_info_t * task_info_create(int taskid, int gtaskid, char *ifname, char *ofname, char *efname); +/* + * Return a task info structure corresponding to pid. + * We inline it here so that it can be included from src/common/plugstack.c + * without undefined symbol warnings. + */ +static inline slurmd_task_info_t * +job_task_info_by_pid (slurmd_job_t *job, pid_t pid) +{ + int i; + for (i = 0; i < job->ntasks; i++) { + if (job->task[i]->pid == pid) + return (job->task[i]); + } + return (NULL); +} + #endif /* !_SLURMSTEPD_JOB_H */ -- GitLab