From 72f7c1fda7e298b9dd160f4aea65aef950dc5a64 Mon Sep 17 00:00:00 2001 From: Danny Auble <da@schedmd.com> Date: Tue, 10 May 2016 14:56:29 -0700 Subject: [PATCH] If running cached information and the database loses all TRES information make sure we handle it correctly when the database comes back up. --- src/slurmctld/controller.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/slurmctld/controller.c b/src/slurmctld/controller.c index 73b9da4bb94..211ddaaa2b4 100644 --- a/src/slurmctld/controller.c +++ b/src/slurmctld/controller.c @@ -2676,6 +2676,18 @@ static void *_assoc_cache_mgr(void *no_data) assoc_mgr_refresh_lists(acct_db_conn, 0); if (running_cache) unlock_slurmctld(job_write_lock); + else if (g_tres_count != slurmctld_tres_cnt) { + /* This has to be done outside of the job write lock. + * This should only happen in very rare situations + * where we have state, but the database some how has + * changed out from under us. */ + unlock_slurmctld(job_write_lock); + info("TRES in database does not match cache " + "(%u != %u). Updating...", + g_tres_count, slurmctld_tres_cnt); + _init_tres(); + lock_slurmctld(job_write_lock); + } slurm_mutex_unlock(&assoc_cache_mutex); } -- GitLab