From cd0d28a83a787bd766b92c746e4fc537d3ccf45a Mon Sep 17 00:00:00 2001 From: Mark Grondona <mgrondona@llnl.gov> Date: Wed, 22 Jan 2003 00:39:45 +0000 Subject: [PATCH] o new list of job states in job.h o Only terminate IO thread if IO is done or job is in FORCETERM state --- src/srun/io.c | 5 +---- src/srun/job.c | 2 +- src/srun/job.h | 19 +++++++++++-------- src/srun/msg.c | 15 +++------------ src/srun/reattach.c | 7 +------ src/srun/signals.c | 8 ++------ src/srun/srun.c | 7 +------ 7 files changed, 20 insertions(+), 43 deletions(-) diff --git a/src/srun/io.c b/src/srun/io.c index e7774200ac7..901df8f0e9d 100644 --- a/src/srun/io.c +++ b/src/srun/io.c @@ -418,10 +418,7 @@ static char *_host_state_name(host_state_t state_inx) static bool _job_io_done(job_t *job) { - if (job->state == SRUN_JOB_DETACHED) - return true; - else - return false; + return (job->state >= SRUN_JOB_FORCETERM); } void report_task_status(job_t *job) diff --git a/src/srun/job.c b/src/srun/job.c index f55de1377da..17fd3c11293 100644 --- a/src/srun/job.c +++ b/src/srun/job.c @@ -303,7 +303,7 @@ job_force_termination(job_t *job) update_job_state(job, SRUN_JOB_DETACHED); } else { info ("forcing job termination"); - update_job_state(job, SRUN_JOB_OVERDONE); + update_job_state(job, SRUN_JOB_FORCETERM); } pthread_kill(job->ioid, SIGHUP); diff --git a/src/srun/job.h b/src/srun/job.h index 2cfe4e400d8..59ba5a92f6d 100644 --- a/src/srun/job.h +++ b/src/srun/job.h @@ -17,14 +17,17 @@ #include "src/srun/fname.h" typedef enum { - SRUN_JOB_INIT = 0, - SRUN_JOB_LAUNCHING, - SRUN_JOB_STARTING, - SRUN_JOB_RUNNING, - SRUN_JOB_FAILED, - SRUN_JOB_TERMINATING, - SRUN_JOB_OVERDONE, - SRUN_JOB_DETACHED + SRUN_JOB_INIT = 0, /* Job's initial state */ + SRUN_JOB_LAUNCHING, /* Launch thread is running */ + SRUN_JOB_STARTING, /* Launch thread is complete */ + SRUN_JOB_RUNNING, /* Launch thread complete */ + SRUN_JOB_TERMINATING, /* Once first task terminates */ + SRUN_JOB_TERMINATED, /* All tasks terminated (may have IO) */ + SRUN_JOB_WAITING_ON_IO, /* All tasks terminated; waiting for IO */ + SRUN_JOB_FORCETERM, /* Forced termination of IO thread */ + SRUN_JOB_DONE, /* tasks and IO complete */ + SRUN_JOB_DETACHED, /* Detached IO from job (Not used now) */ + SRUN_JOB_FAILED, /* Job failed for some reason */ } job_state_t; typedef enum { diff --git a/src/srun/msg.c b/src/srun/msg.c index 08c99323e09..c8a20b94281 100644 --- a/src/srun/msg.c +++ b/src/srun/msg.c @@ -171,11 +171,7 @@ void MPIR_Breakpoint(void) static bool _job_msg_done(job_t *job) { - if ((job->state == SRUN_JOB_DETACHED) || - (job->state == SRUN_JOB_OVERDONE)) - return true; - else - return false; + return (job->state >= SRUN_JOB_TERMINATED); } static void @@ -233,7 +229,7 @@ update_failed_tasks(job_t *job, uint32_t nodeid) if (tasks_exited == opt.nprocs) { debug2("all tasks exited"); - update_job_state(job, SRUN_JOB_OVERDONE); + update_job_state(job, SRUN_JOB_TERMINATED); } } @@ -336,11 +332,6 @@ _reattach_handler(job_t *job, slurm_msg_t *msg) update_running_tasks(job, resp->srun_node_id); - /* - if (job->stepid == NO_VAL) - update_job_state(job, SRUN_JOB_OVERDONE); - */ - } static void @@ -380,7 +371,7 @@ _exit_handler(job_t *job, slurm_msg_t *exit_msg) tasks_exited++; if (tasks_exited == opt.nprocs) { debug2("All tasks exited"); - update_job_state(job, SRUN_JOB_OVERDONE); + update_job_state(job, SRUN_JOB_TERMINATED); } } } diff --git a/src/srun/reattach.c b/src/srun/reattach.c index d7aa9a17c34..86490eda973 100644 --- a/src/srun/reattach.c +++ b/src/srun/reattach.c @@ -512,11 +512,6 @@ int reattach() static bool _job_all_done(job_t *job) { - if ((job->state == SRUN_JOB_DETACHED ) || - (job->state == SRUN_JOB_FAILED ) || - (job->state == SRUN_JOB_OVERDONE)) - return true; - else - return false; + return (job->state == SRUN_JOB_DETACHED); } diff --git a/src/srun/signals.c b/src/srun/signals.c index bdeb26e40f0..606f9b90f99 100644 --- a/src/srun/signals.c +++ b/src/srun/signals.c @@ -81,11 +81,7 @@ static void * _p_signal_task(void *); static bool _job_sig_done(job_t *job) { - if ((job->state == SRUN_JOB_DETACHED) || - (job->state == SRUN_JOB_OVERDONE)) - return true; - else - return false; + return (job->state >= SRUN_JOB_DONE); } int @@ -182,7 +178,7 @@ _handle_intr(job_t *job, time_t *last_intr, time_t *last_intr_sent) } else { /* second Ctrl-C in half as many seconds */ /* terminate job */ - if (job->state != SRUN_JOB_OVERDONE) { + if (job->state < SRUN_JOB_FORCETERM) { info("sending Ctrl-C to job"); *last_intr = time(NULL); diff --git a/src/srun/srun.c b/src/srun/srun.c index 38cb0265246..eed374c29d4 100644 --- a/src/srun/srun.c +++ b/src/srun/srun.c @@ -352,12 +352,7 @@ _sig_kill_alloc(int signum) static bool _job_all_done(job_t *job) { - if ((job->state == SRUN_JOB_DETACHED ) || - (job->state == SRUN_JOB_FAILED ) || - (job->state == SRUN_JOB_OVERDONE)) - return true; - else - return false; + return (job->state >= SRUN_JOB_TERMINATED); } -- GitLab