diff --git a/NEWS b/NEWS index 45bbb3b2975b8a7377f0b056a36e88c325c9c4a0..b0c0011debae7a72e26b5c174798ee769246e217 100644 --- a/NEWS +++ b/NEWS @@ -81,6 +81,7 @@ documents those changes that are of interest to users and admins. -- Make sched/backfill properly schedule jobs with constraints having node counts. NOTE: Backfill of jobs with constraings having exclusive OR operators are not fully supported. + -- if srun is cancelled by SIGINT that job is marked as cancelled. * Changes in SLURM 2.0.1 ======================== diff --git a/src/salloc/salloc.c b/src/salloc/salloc.c index adb396913abaeaa910fa212f00067012a3d556e5..6ecce218e3d8199d6f0ec6d528a2abfb398f6db7 100644 --- a/src/salloc/salloc.c +++ b/src/salloc/salloc.c @@ -358,9 +358,21 @@ relinquish: } else if (WIFSIGNALED(status)) { verbose("Command \"%s\" was terminated by signal %d", command_argv[0], WTERMSIG(status)); + /* if we get these signals we return a normal + exit since this was most likely sent from the + user */ + switch(WTERMSIG(status)) { + case SIGHUP: + case SIGINT: + case SIGQUIT: + case SIGKILL: + rc = 0; + break; + default: + break; + } } } - return rc; } diff --git a/src/srun/srun.c b/src/srun/srun.c index f03264440ca63df6498754031680b2fe04f26659..13c5f81e5e2d7b50ca9df1ddf567ac8810416fba 100644 --- a/src/srun/srun.c +++ b/src/srun/srun.c @@ -469,7 +469,7 @@ int srun(int ac, char **av) if (create_job_step(job, true) < 0) exit(1); } else { - if (create_job_step(job, true) < 0) + if (create_job_step(job, false) < 0) exit(1); } task_state_destroy(task_state); @@ -479,8 +479,14 @@ int srun(int ac, char **av) cleanup: if(got_alloc) { cleanup_allocation(); - slurm_complete_job(job->jobid, global_rc); + + /* send the controller we were cancelled */ + if (job->state >= SRUN_JOB_CANCELLED) + slurm_complete_job(job->jobid, NO_VAL); + else + slurm_complete_job(job->jobid, global_rc); } + _run_srun_epilog(job); slurm_step_ctx_destroy(job->step_ctx); mpir_cleanup(); @@ -1188,13 +1194,9 @@ static void _handle_intr() { static time_t last_intr = 0; static time_t last_intr_sent = 0; - if (opt.quit_on_intr) { - job_force_termination(job); - slurm_step_launch_abort(job->step_ctx); - return; - } - if (((time(NULL) - last_intr) > 1) && !opt.disable_status) { + if (!opt.quit_on_intr && + (((time(NULL) - last_intr) > 1) && !opt.disable_status)) { if (job->state < SRUN_JOB_FORCETERM) info("interrupt (one more within 1 sec to abort)"); else diff --git a/testsuite/expect/test1.38 b/testsuite/expect/test1.38 index 58e9a206e4ec51100f9ee3ced95a5dff90aee2ab..8a4d25d06a652d343f84ddc480d6b2c8b9eb3c4a 100755 --- a/testsuite/expect/test1.38 +++ b/testsuite/expect/test1.38 @@ -142,7 +142,7 @@ expect { set match_run 999 exp_continue } - -re "forcing job termination" { + -re "Job step aborted" { set match_term 1 exp_continue }