From 49d483dbae704047f876f3b2c840eafe7f1f2279 Mon Sep 17 00:00:00 2001
From: Morris Jette <jette@schedmd.com>
Date: Thu, 14 Apr 2016 09:39:27 -0700
Subject: [PATCH] Set burst buffer reason for job

If a job fails stage in, set its reason to BurstBufferOperation
with a string describing what happened. Previously the reason was
set to AdminHeld on stage-in failure.
---
 src/plugins/burst_buffer/cray/burst_buffer_cray.c | 2 +-
 src/slurmctld/job_mgr.c                           | 1 +
 src/slurmctld/job_scheduler.c                     | 1 +
 src/slurmctld/node_scheduler.c                    | 1 +
 4 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/plugins/burst_buffer/cray/burst_buffer_cray.c b/src/plugins/burst_buffer/cray/burst_buffer_cray.c
index 25605301121..151867b05cc 100644
--- a/src/plugins/burst_buffer/cray/burst_buffer_cray.c
+++ b/src/plugins/burst_buffer/cray/burst_buffer_cray.c
@@ -3231,7 +3231,7 @@ static void _kill_job(struct job_record *job_ptr)
 	job_ptr->priority = 0;	/* Hold job */
 	build_cg_bitmap(job_ptr);
 	job_ptr->exit_code = 1;
-	job_ptr->state_reason = WAIT_HELD;
+	job_ptr->state_reason = FAIL_BURST_BUFFER_OP;
 	xfree(job_ptr->state_desc);
 	job_ptr->state_desc = xstrdup("Burst buffer pre_run error");
 	job_completion_logger(job_ptr, false);
diff --git a/src/slurmctld/job_mgr.c b/src/slurmctld/job_mgr.c
index 5e599a28d3f..4e00fefe687 100644
--- a/src/slurmctld/job_mgr.c
+++ b/src/slurmctld/job_mgr.c
@@ -9570,6 +9570,7 @@ static bool _top_priority(struct job_record *job_ptr)
 	if ((!top) && detail_ptr) {	/* not top prio */
 		if (job_ptr->priority == 0) {		/* user/admin hold */
 			if (job_ptr->state_reason != FAIL_BAD_CONSTRAINTS
+			    && (job_ptr->state_reason != FAIL_BURST_BUFFER_OP)
 			    && (job_ptr->state_reason != WAIT_HELD)
 			    && (job_ptr->state_reason != WAIT_HELD_USER)
 			    && job_ptr->state_reason != WAIT_MAX_REQUEUE) {
diff --git a/src/slurmctld/job_scheduler.c b/src/slurmctld/job_scheduler.c
index b7b12c96d8a..12997e8dfec 100644
--- a/src/slurmctld/job_scheduler.c
+++ b/src/slurmctld/job_scheduler.c
@@ -225,6 +225,7 @@ static bool _job_runnable_test1(struct job_record *job_ptr, bool sched_plugin)
 		job_ptr->start_time = (time_t) 0;
 	if (job_ptr->priority == 0)	{ /* held */
 		if (job_ptr->state_reason != FAIL_BAD_CONSTRAINTS
+		    && (job_ptr->state_reason != FAIL_BURST_BUFFER_OP)
 		    && (job_ptr->state_reason != WAIT_HELD)
 		    && (job_ptr->state_reason != WAIT_HELD_USER)
 		    && job_ptr->state_reason != WAIT_MAX_REQUEUE) {
diff --git a/src/slurmctld/node_scheduler.c b/src/slurmctld/node_scheduler.c
index d81393526b5..026de5ff337 100644
--- a/src/slurmctld/node_scheduler.c
+++ b/src/slurmctld/node_scheduler.c
@@ -1855,6 +1855,7 @@ extern int select_nodes(struct job_record *job_ptr, bool test_only,
 
 	if (job_ptr->priority == 0) {	/* user/admin hold */
 		if (job_ptr->state_reason != FAIL_BAD_CONSTRAINTS
+		    && (job_ptr->state_reason != FAIL_BURST_BUFFER_OP)
 		    && (job_ptr->state_reason != WAIT_HELD)
 		    && (job_ptr->state_reason != WAIT_HELD_USER)
 		    && job_ptr->state_reason != WAIT_MAX_REQUEUE) {
-- 
GitLab