diff --git a/NEWS b/NEWS index bd9a2d18289a8ea5831e8420c0a2ff1887e42654..ff510d3c12a0d2554428c6215b4716df1bd9fbb6 100644 --- a/NEWS +++ b/NEWS @@ -214,6 +214,10 @@ documents those changes that are of interest to users and admins. previous it was always -2. -- If step exitcode hasn't been set display with sacct the -2 instead of acting like it is a signal and exitcode. + -- Send calculated step_rc for batch step instead of raw status as + done for normal steps. + -- If a job times out put 1 for the exit code in accounting instead of the + signal 1. * Changes in Slurm 14.03.6 ========================== diff --git a/src/plugins/accounting_storage/filetxt/accounting_storage_filetxt.c b/src/plugins/accounting_storage/filetxt/accounting_storage_filetxt.c index 52b8917dd39a6355954e0142ee3a4abb15823410..e3950abbef08f24df99d6683ef017b123a76d8c4 100644 --- a/src/plugins/accounting_storage/filetxt/accounting_storage_filetxt.c +++ b/src/plugins/accounting_storage/filetxt/accounting_storage_filetxt.c @@ -650,6 +650,7 @@ extern int jobacct_storage_p_job_complete(void *db_conn, char buf[BUFFER_SIZE]; uint16_t job_state; int duration; + uint32_t exit_code; if (!storage_init) { debug("jobacct init was not called or it failed"); @@ -675,11 +676,19 @@ extern int jobacct_storage_p_job_complete(void *db_conn, duration = job_ptr->end_time - job_ptr->start_time; } + exit_code = job_ptr->exit_code; + if (exit_code == 1) { + /* This wasn't signalled, it was set by Slurm so don't + * treat it like a signal. + */ + exit_code = 256; + } + /* leave the requid as a %d since we want to see if it is -1 in stats */ snprintf(buf, BUFFER_SIZE, "%d %d %u %u %u", JOB_TERMINATED, duration, - job_state, job_ptr->requid, job_ptr->exit_code); + job_state, job_ptr->requid, exit_code); return _print_record(job_ptr, job_ptr->end_time, buf); } diff --git a/src/plugins/accounting_storage/mysql/as_mysql_job.c b/src/plugins/accounting_storage/mysql/as_mysql_job.c index 764b42f57269d94d9b7036902ae4c339f4fc0803..dc747b4e8a9a03a33e2f091dfa68ae4665a5b6e6 100644 --- a/src/plugins/accounting_storage/mysql/as_mysql_job.c +++ b/src/plugins/accounting_storage/mysql/as_mysql_job.c @@ -725,6 +725,7 @@ extern int as_mysql_job_complete(mysql_conn_t *mysql_conn, char *query = NULL; int rc = SLURM_SUCCESS, job_state; time_t submit_time, end_time; + uint32_t exit_code = 0; if (!job_ptr->db_index && ((!job_ptr->details || !job_ptr->details->submit_time) @@ -825,9 +826,17 @@ extern int as_mysql_job_complete(mysql_conn_t *mysql_conn, xfree(comment); } + exit_code = job_ptr->exit_code; + if (exit_code == 1) { + /* This wasn't signalled, it was set by Slurm so don't + * treat it like a signal. + */ + exit_code = 256; + } + xstrfmtcat(query, ", exit_code=%d, kill_requid=%d where job_db_inx=%d;", - job_ptr->exit_code, job_ptr->requid, + exit_code, job_ptr->requid, job_ptr->db_index); if (debug_flags & DEBUG_FLAG_DB_JOB) diff --git a/src/slurmd/slurmstepd/mgr.c b/src/slurmd/slurmstepd/mgr.c index 9679007ade30241000334c172aa19a6c59b5f72a..3ba9f554edcc99aed2c9f2b9306d0f5c4403d042 100644 --- a/src/slurmd/slurmstepd/mgr.c +++ b/src/slurmd/slurmstepd/mgr.c @@ -400,7 +400,7 @@ batch_finish(stepd_step_rec_t *job, int rc) (job->stepid == SLURM_BATCH_SCRIPT)) { verbose("job %u completed with slurm_rc = %d, job_rc = %d", job->jobid, rc, step_complete.step_rc); - _send_complete_batch_script_msg(job, rc, job->task[0]->estatus); + _send_complete_batch_script_msg(job, rc, step_complete.step_rc); } else { _wait_for_children_slurmstepd(job); verbose("job %u.%u completed with slurm_rc = %d, job_rc = %d",