From 26fa811ba4fd2cf7c795c50ad628a3d837feb43b Mon Sep 17 00:00:00 2001
From: Moe Jette <jette1@llnl.gov>
Date: Fri, 10 Jul 2009 18:03:29 +0000
Subject: [PATCH] svn merge -r18102:18114
 https://eris.llnl.gov/svn/slurm/branches/slurm-2.0

---
 NEWS                                          |  1 +
 .../mysql/accounting_storage_mysql.c          | 18 ++++++------
 src/slurmctld/job_mgr.c                       | 28 +++++++++++++------
 3 files changed, 29 insertions(+), 18 deletions(-)

diff --git a/NEWS b/NEWS
index 8ff16356734..40ad95536ac 100644
--- a/NEWS
+++ b/NEWS
@@ -116,6 +116,7 @@ documents those changes that are of interest to users and admins.
     to properly handle user names that contain all digits. Return error code 
     from uid_from_string() and gid_from_string() functions rather than a uid of
     -1, which might be a valid uid or gid on some systems.
+ -- Fix in re-calcuation of job priorities do to DOWN or DRAINED nodes.
 
 * Changes in SLURM 2.0.3
 ========================
diff --git a/src/plugins/accounting_storage/mysql/accounting_storage_mysql.c b/src/plugins/accounting_storage/mysql/accounting_storage_mysql.c
index 49b017d5122..4d3e55d72cd 100644
--- a/src/plugins/accounting_storage/mysql/accounting_storage_mysql.c
+++ b/src/plugins/accounting_storage/mysql/accounting_storage_mysql.c
@@ -3032,14 +3032,14 @@ static int _mysql_acct_check_tables(MYSQL *db_conn)
 		{ "user_usec", "int unsigned default 0 not null" },
 		{ "sys_sec", "int unsigned default 0 not null" },
 		{ "sys_usec", "int unsigned default 0 not null" },
-		{ "max_vsize", "int unsigned default 0 not null" },
+		{ "max_vsize", "bigint unsigned default 0 not null" },
 		{ "max_vsize_task", "smallint unsigned default 0 not null" },
 		{ "max_vsize_node", "int unsigned default 0 not null" },
-		{ "ave_vsize", "float default 0.0 not null" },
-		{ "max_rss", "int unsigned default 0 not null" },
+		{ "ave_vsize", "double default 0.0 not null" },
+		{ "max_rss", "bigint unsigned default 0 not null" },
 		{ "max_rss_task", "smallint unsigned default 0 not null" },
 		{ "max_rss_node", "int unsigned default 0 not null" },
-		{ "ave_rss", "float default 0.0 not null" },
+		{ "ave_rss", "double default 0.0 not null" },
 		{ "max_pages", "int unsigned default 0 not null" },
 		{ "max_pages_task", "smallint unsigned default 0 not null" },
 		{ "max_pages_node", "int unsigned default 0 not null" },
@@ -10972,13 +10972,13 @@ extern int jobacct_storage_p_step_complete(mysql_conn_t *mysql_conn,
 		"user_sec=%ld, user_usec=%ld, "
 		"sys_sec=%ld, sys_usec=%ld, "
 		"max_vsize=%u, max_vsize_task=%u, "
-		"max_vsize_node=%u, ave_vsize=%.2f, "
+		"max_vsize_node=%u, ave_vsize=%f, "
 		"max_rss=%u, max_rss_task=%u, "
-		"max_rss_node=%u, ave_rss=%.2f, "
+		"max_rss_node=%u, ave_rss=%f, "
 		"max_pages=%u, max_pages_task=%u, "
-		"max_pages_node=%u, ave_pages=%.2f, "
-		"min_cpu=%.2f, min_cpu_task=%u, "
-		"min_cpu_node=%u, ave_cpu=%.2f "
+		"max_pages_node=%u, ave_pages=%f, "
+		"min_cpu=%f, min_cpu_task=%u, "
+		"min_cpu_node=%u, ave_cpu=%f "
 		"where id=%d and stepid=%u",
 		step_table, (int)now,
 		comp_status,
diff --git a/src/slurmctld/job_mgr.c b/src/slurmctld/job_mgr.c
index a35dee4ec66..986485edc7c 100644
--- a/src/slurmctld/job_mgr.c
+++ b/src/slurmctld/job_mgr.c
@@ -2592,8 +2592,8 @@ static int _job_create(job_desc_msg_t * job_desc, int allocate, int will_run,
 	
 	/* already confirmed submit_uid==0 */
 	/* If the priority isn't given we will figure it out later
-	   after we see if the job is eligible or not. So we want
-	   NO_VAL if not set. */
+	 * after we see if the job is eligible or not. So we want
+	 * NO_VAL if not set. */
 	job_ptr->priority = job_desc->priority;
 
 	if (update_job_dependency(job_ptr, job_desc->dependency)) {
@@ -3541,7 +3541,18 @@ void job_time_limit(void)
 			}
 		}
 
+		/* This needs to be near the top of the loop, checks every 
+		 * running, suspended and pending job */
 		resv_status = job_resv_check(job_ptr);
+
+		if ((job_ptr->priority == 1) && (!IS_JOB_FINISHED(job_ptr))) {
+			/* Rather than resetting job priorities whenever a 
+			 * DOWN, DRAINED or non-responsive node is returned to 
+			 * service, we pick them up here. There will be a small
+			 * delay in restting a job's priority, but the code is 
+			 * a lot cleaner this way. */
+			_set_job_prio(job_ptr);
+		}
 		if (!IS_JOB_RUNNING(job_ptr))
 			continue;
 
@@ -5260,6 +5271,9 @@ int update_job(job_desc_msg_t * job_specs, uid_t uid)
 		if (IS_JOB_PENDING(job_ptr) && detail_ptr) {
 			detail_ptr->begin_time = job_specs->begin_time;
 			update_accounting = true;
+			if ((job_ptr->priority == 1) &&
+			    (detail_ptr->begin_time <= now))
+				_set_job_prio(job_ptr);
 		} else
 			error_code = ESLURM_DISABLED;
 	}
@@ -6905,9 +6919,9 @@ extern int update_job_account(char *module, struct job_record *job_ptr,
 		  !job_ptr->assoc_ptr          &&
 		  !(accounting_enforce & ACCOUNTING_ENFORCE_ASSOCS)) {
 		/* if not enforcing associations we want to look for
-		   the default account and use it to avoid getting
-		   trash in the accounting records.
-		*/
+		 * the default account and use it to avoid getting
+		 * trash in the accounting records.
+		 */
 		assoc_rec.acct = NULL;
 		assoc_mgr_fill_in_assoc(acct_db_conn, &assoc_rec,
 					accounting_enforce, 
@@ -7308,10 +7322,6 @@ _copy_job_record_to_job_desc(struct job_record *job_ptr)
 
 	/* construct a job_desc_msg_t from job */
 	job_desc = xmalloc(sizeof(job_desc_msg_t));
-	if (!job_desc) {
-		error("_pack_job_for_ckpt: memory exhausted");
-		return NULL;
-	}
 
 	job_desc->account           = xstrdup(job_ptr->account);
 	job_desc->acctg_freq        = details->acctg_freq;
-- 
GitLab