From 91bc07b8878049cf3a1444c306b4a3ca52760f77 Mon Sep 17 00:00:00 2001 From: Morris Jette <jette@schedmd.com> Date: Fri, 15 Jul 2016 09:09:52 -0700 Subject: [PATCH] bufst_buffer/cray race condition This hardens the code with respect to a race condtion if the slurmctld restarts and a burts buffer creation for a job is in progress. Eliminate the possibility of a duplicate job allocation record. --- .../burst_buffer/cray/burst_buffer_cray.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/src/plugins/burst_buffer/cray/burst_buffer_cray.c b/src/plugins/burst_buffer/cray/burst_buffer_cray.c index 62ef7fbf865..1a6c78a451d 100644 --- a/src/plugins/burst_buffer/cray/burst_buffer_cray.c +++ b/src/plugins/burst_buffer/cray/burst_buffer_cray.c @@ -1508,14 +1508,19 @@ static void *_start_stage_in(void *x) } else if (!bb_job) { error("%s: unable to find bb_job record for job %u", __func__, stage_args->job_id); - } else if (bb_job->total_size) { - bb_job->state = BB_STATE_STAGING_IN; - bb_alloc = bb_alloc_job(&bb_state, job_ptr, bb_job); - bb_limit_add(stage_args->user_id, bb_job->total_size, - stage_args->pool, &bb_state); - bb_alloc->create_time = time(NULL); } else { bb_job->state = BB_STATE_STAGING_IN; + bb_alloc = bb_find_alloc_rec(&bb_state, job_ptr); + if (!bb_alloc && bb_job->total_size) { + /* Not found (from restart race condtion) and + * job buffer has non-zero size */ + bb_alloc = bb_alloc_job(&bb_state, job_ptr, + bb_job); + bb_limit_add(stage_args->user_id, + bb_job->total_size, + stage_args->pool, &bb_state); + bb_alloc->create_time = time(NULL); + } } } slurm_mutex_unlock(&bb_state.bb_mutex); -- GitLab