From 91bc07b8878049cf3a1444c306b4a3ca52760f77 Mon Sep 17 00:00:00 2001
From: Morris Jette <jette@schedmd.com>
Date: Fri, 15 Jul 2016 09:09:52 -0700
Subject: [PATCH] bufst_buffer/cray race condition

This hardens the code with respect to a race condtion if the
  slurmctld restarts and a burts buffer creation for a job is
  in progress. Eliminate the possibility of a duplicate job
  allocation record.
---
 .../burst_buffer/cray/burst_buffer_cray.c       | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/src/plugins/burst_buffer/cray/burst_buffer_cray.c b/src/plugins/burst_buffer/cray/burst_buffer_cray.c
index 62ef7fbf865..1a6c78a451d 100644
--- a/src/plugins/burst_buffer/cray/burst_buffer_cray.c
+++ b/src/plugins/burst_buffer/cray/burst_buffer_cray.c
@@ -1508,14 +1508,19 @@ static void *_start_stage_in(void *x)
 		} else if (!bb_job) {
 			error("%s: unable to find bb_job record for job %u",
 			      __func__, stage_args->job_id);
-		} else if (bb_job->total_size) {
-			bb_job->state = BB_STATE_STAGING_IN;
-			bb_alloc = bb_alloc_job(&bb_state, job_ptr, bb_job);
-			bb_limit_add(stage_args->user_id, bb_job->total_size,
-				     stage_args->pool, &bb_state);
-			bb_alloc->create_time = time(NULL);
 		} else {
 			bb_job->state = BB_STATE_STAGING_IN;
+			bb_alloc = bb_find_alloc_rec(&bb_state, job_ptr);
+			if (!bb_alloc && bb_job->total_size) {
+				/* Not found (from restart race condtion) and
+				 * job buffer has non-zero size */
+				bb_alloc = bb_alloc_job(&bb_state, job_ptr,
+							bb_job);
+				bb_limit_add(stage_args->user_id,
+					     bb_job->total_size,
+					     stage_args->pool, &bb_state);
+				bb_alloc->create_time = time(NULL);
+			}
 		}
 	}
 	slurm_mutex_unlock(&bb_state.bb_mutex);
-- 
GitLab