From 49d483dbae704047f876f3b2c840eafe7f1f2279 Mon Sep 17 00:00:00 2001 From: Morris Jette <jette@schedmd.com> Date: Thu, 14 Apr 2016 09:39:27 -0700 Subject: [PATCH] Set burst buffer reason for job If a job fails stage in, set its reason to BurstBufferOperation with a string describing what happened. Previously the reason was set to AdminHeld on stage-in failure. --- src/plugins/burst_buffer/cray/burst_buffer_cray.c | 2 +- src/slurmctld/job_mgr.c | 1 + src/slurmctld/job_scheduler.c | 1 + src/slurmctld/node_scheduler.c | 1 + 4 files changed, 4 insertions(+), 1 deletion(-) diff --git a/src/plugins/burst_buffer/cray/burst_buffer_cray.c b/src/plugins/burst_buffer/cray/burst_buffer_cray.c index 25605301121..151867b05cc 100644 --- a/src/plugins/burst_buffer/cray/burst_buffer_cray.c +++ b/src/plugins/burst_buffer/cray/burst_buffer_cray.c @@ -3231,7 +3231,7 @@ static void _kill_job(struct job_record *job_ptr) job_ptr->priority = 0; /* Hold job */ build_cg_bitmap(job_ptr); job_ptr->exit_code = 1; - job_ptr->state_reason = WAIT_HELD; + job_ptr->state_reason = FAIL_BURST_BUFFER_OP; xfree(job_ptr->state_desc); job_ptr->state_desc = xstrdup("Burst buffer pre_run error"); job_completion_logger(job_ptr, false); diff --git a/src/slurmctld/job_mgr.c b/src/slurmctld/job_mgr.c index 5e599a28d3f..4e00fefe687 100644 --- a/src/slurmctld/job_mgr.c +++ b/src/slurmctld/job_mgr.c @@ -9570,6 +9570,7 @@ static bool _top_priority(struct job_record *job_ptr) if ((!top) && detail_ptr) { /* not top prio */ if (job_ptr->priority == 0) { /* user/admin hold */ if (job_ptr->state_reason != FAIL_BAD_CONSTRAINTS + && (job_ptr->state_reason != FAIL_BURST_BUFFER_OP) && (job_ptr->state_reason != WAIT_HELD) && (job_ptr->state_reason != WAIT_HELD_USER) && job_ptr->state_reason != WAIT_MAX_REQUEUE) { diff --git a/src/slurmctld/job_scheduler.c b/src/slurmctld/job_scheduler.c index b7b12c96d8a..12997e8dfec 100644 --- a/src/slurmctld/job_scheduler.c +++ b/src/slurmctld/job_scheduler.c @@ -225,6 +225,7 @@ static bool _job_runnable_test1(struct job_record *job_ptr, bool sched_plugin) job_ptr->start_time = (time_t) 0; if (job_ptr->priority == 0) { /* held */ if (job_ptr->state_reason != FAIL_BAD_CONSTRAINTS + && (job_ptr->state_reason != FAIL_BURST_BUFFER_OP) && (job_ptr->state_reason != WAIT_HELD) && (job_ptr->state_reason != WAIT_HELD_USER) && job_ptr->state_reason != WAIT_MAX_REQUEUE) { diff --git a/src/slurmctld/node_scheduler.c b/src/slurmctld/node_scheduler.c index d81393526b5..026de5ff337 100644 --- a/src/slurmctld/node_scheduler.c +++ b/src/slurmctld/node_scheduler.c @@ -1855,6 +1855,7 @@ extern int select_nodes(struct job_record *job_ptr, bool test_only, if (job_ptr->priority == 0) { /* user/admin hold */ if (job_ptr->state_reason != FAIL_BAD_CONSTRAINTS + && (job_ptr->state_reason != FAIL_BURST_BUFFER_OP) && (job_ptr->state_reason != WAIT_HELD) && (job_ptr->state_reason != WAIT_HELD_USER) && job_ptr->state_reason != WAIT_MAX_REQUEUE) { -- GitLab