From 90e2347b54c5b257a5d0b48faa2e27ea14339fe3 Mon Sep 17 00:00:00 2001
From: Danny Auble <da@schedmd.com>
Date: Fri, 28 Oct 2016 15:37:28 -0600
Subject: [PATCH] Add on to commit be924b88eb3cd.  This fixes the situation
 completely where a job could be accounted for more than it should in the
 _decay_thread inside the priority/multifactor plugin.  Before the
 end_time_exp wasn't stored for the job which was what was used to determine
 if the job was already processed or not.  In 16.05 we were able to fix this
 mostly, but for the TRES numbers they could get accounted for multiple times.
  Since a pack was needed to fix this we had to wait until 17.02.

---
 .../priority/multifactor/priority_multifactor.c        |  6 ++++++
 src/slurmctld/job_mgr.c                                | 10 ++++++++--
 2 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/src/plugins/priority/multifactor/priority_multifactor.c b/src/plugins/priority/multifactor/priority_multifactor.c
index 75fc0a4797f..eed42a74268 100644
--- a/src/plugins/priority/multifactor/priority_multifactor.c
+++ b/src/plugins/priority/multifactor/priority_multifactor.c
@@ -928,6 +928,12 @@ static void _init_grp_used_cpu_run_secs(time_t last_ran)
 		if (priority_debug)
 			debug2("job: %u", job_ptr->job_id);
 
+		/* If end_time_exp is NO_VAL we have already ran the end for
+		 * this job.  We don't want to do it again, so just exit.
+		 */
+		if (job_ptr->end_time_exp == (time_t)NO_VAL)
+			continue;
+
 		if (!IS_JOB_RUNNING(job_ptr))
 			continue;
 
diff --git a/src/slurmctld/job_mgr.c b/src/slurmctld/job_mgr.c
index 4a855bbf549..e2bc4c2e1d2 100644
--- a/src/slurmctld/job_mgr.c
+++ b/src/slurmctld/job_mgr.c
@@ -1158,6 +1158,7 @@ static void _dump_job_state(struct job_record *dump_job_ptr, Buf buffer)
 	pack_time(dump_job_ptr->preempt_time, buffer);
 	pack_time(dump_job_ptr->start_time, buffer);
 	pack_time(dump_job_ptr->end_time, buffer);
+	pack_time(dump_job_ptr->end_time_exp, buffer);
 	pack_time(dump_job_ptr->suspend_time, buffer);
 	pack_time(dump_job_ptr->pre_sus_time, buffer);
 	pack_time(dump_job_ptr->resize_time, buffer);
@@ -1274,7 +1275,8 @@ static int _load_job_state(Buf buffer, uint16_t protocol_version)
 	uint32_t array_job_id = 0, req_switch = 0, wait4switch = 0;
 	uint32_t profile = ACCT_GATHER_PROFILE_NOT_SET;
 	uint32_t job_state, local_job_id = 0, delay_boot = 0;
-	time_t start_time, end_time, suspend_time, pre_sus_time, tot_sus_time;
+	time_t start_time, end_time, end_time_exp, suspend_time,
+		pre_sus_time, tot_sus_time;
 	time_t preempt_time = 0, deadline = 0;
 	time_t resize_time = 0, now = time(NULL);
 	uint8_t reboot = 0, power_flags = 0;
@@ -1384,6 +1386,7 @@ static int _load_job_state(Buf buffer, uint16_t protocol_version)
 		safe_unpack_time(&preempt_time, buffer);
 		safe_unpack_time(&start_time, buffer);
 		safe_unpack_time(&end_time, buffer);
+		safe_unpack_time(&end_time_exp, buffer);
 		safe_unpack_time(&suspend_time, buffer);
 		safe_unpack_time(&pre_sus_time, buffer);
 		safe_unpack_time(&resize_time, buffer);
@@ -1582,6 +1585,7 @@ static int _load_job_state(Buf buffer, uint16_t protocol_version)
 		safe_unpack_time(&preempt_time, buffer);
 		safe_unpack_time(&start_time, buffer);
 		safe_unpack_time(&end_time, buffer);
+		end_time_exp = end_time;
 		safe_unpack_time(&suspend_time, buffer);
 		safe_unpack_time(&pre_sus_time, buffer);
 		safe_unpack_time(&resize_time, buffer);
@@ -1775,6 +1779,7 @@ static int _load_job_state(Buf buffer, uint16_t protocol_version)
 		safe_unpack_time(&preempt_time, buffer);
 		safe_unpack_time(&start_time, buffer);
 		safe_unpack_time(&end_time, buffer);
+		end_time_exp = end_time;
 		safe_unpack_time(&suspend_time, buffer);
 		safe_unpack_time(&pre_sus_time, buffer);
 		safe_unpack_time(&resize_time, buffer);
@@ -1988,7 +1993,8 @@ static int _load_job_state(Buf buffer, uint16_t protocol_version)
 	job_ptr->direct_set_prio = direct_set_prio;
 	job_ptr->db_index     = db_index;
 	job_ptr->derived_ec   = derived_ec;
-	job_ptr->end_time_exp = job_ptr->end_time = end_time;
+	job_ptr->end_time_exp = end_time_exp;
+	job_ptr->end_time     = end_time;
 	job_ptr->exit_code    = exit_code;
 	job_ptr->group_id     = group_id;
 	job_ptr->job_state    = job_state;
-- 
GitLab