From 494086b5847e0b5a61db553f1572d079fa5c3df6 Mon Sep 17 00:00:00 2001
From: Danny Auble <da@llnl.gov>
Date: Fri, 23 May 2008 21:12:21 +0000
Subject: [PATCH] added suspend time to the table along with flushing jobs at
 cold start.  Suspend time isn't calculated yet, but we will figure it out

---
 src/common/assoc_mgr.c                        |  28 +-
 src/common/parse_time.c                       |   3 +-
 src/common/slurm_accounting_storage.c         |  71 ++-
 src/common/slurm_accounting_storage.h         |  11 +
 src/common/slurmdbd_defs.c                    |   2 +
 src/common/slurmdbd_defs.h                    |   2 +
 src/database/mysql_common.c                   |   7 +
 src/database/mysql_common.h                   |   1 +
 .../filetxt/accounting_storage_filetxt.c      |   7 +
 .../mysql/accounting_storage_mysql.c          | 477 ++++++++++++++----
 .../accounting_storage/mysql/mysql_rollup.c   | 322 ++++++++++++
 .../accounting_storage/mysql/mysql_rollup.h   |   8 +
 .../none/accounting_storage_none.c            |   6 +
 .../pgsql/accounting_storage_pgsql.c          |  11 +
 .../slurmdbd/accounting_storage_slurmdbd.c    |  24 +-
 src/sacctmgr/account_functions.c              |  34 +-
 src/sacctmgr/cluster_functions.c              | 169 ++++---
 src/sacctmgr/sacctmgr.c                       |   2 +-
 src/sacctmgr/user_functions.c                 |  34 +-
 src/slurmctld/controller.c                    |   7 +
 src/slurmctld/job_mgr.c                       |   2 +-
 src/slurmctld/node_mgr.c                      |   5 +-
 src/slurmdbd/proc_req.c                       |  42 +-
 23 files changed, 1041 insertions(+), 234 deletions(-)

diff --git a/src/common/assoc_mgr.c b/src/common/assoc_mgr.c
index a4af3636e24..1a35ca43221 100644
--- a/src/common/assoc_mgr.c
+++ b/src/common/assoc_mgr.c
@@ -113,11 +113,13 @@ static int _get_local_association_list(void *db_conn, int enforce)
 				}
 				list_iterator_reset(itr2);
 			}
-			if(!assoc->user)
+			if(!assoc->user) {
 				continue;
+			}
 			passwd_ptr = getpwnam(assoc->user);
 			if(passwd_ptr) 
 				assoc->uid = passwd_ptr->pw_uid;
+			//log_assoc_rec(assoc);
 		}
 		list_iterator_destroy(itr2);
 		list_iterator_destroy(itr);
@@ -232,8 +234,10 @@ extern int assoc_mgr_fill_in_assoc(void *db_conn, acct_association_rec_t *assoc,
 		if(!assoc->cluster)
 			assoc->cluster = local_cluster_name;
 	}
-/* 	info("looking for assoc of user=%u, acct=%s, cluster=%s, partition=%s", */
-/* 	     assoc->uid, assoc->acct, assoc->cluster, assoc->partition); */
+/* 	info("looking for assoc of user=%s(%u), acct=%s, " */
+/* 	     "cluster=%s, partition=%s", */
+/* 	     assoc->user, assoc->uid, assoc->acct,  */
+/* 	     assoc->cluster, assoc->partition); */
 	slurm_mutex_lock(&local_association_lock);
 	itr = list_iterator_create(local_association_list);
 	while((found_assoc = list_next(itr))) {
@@ -244,12 +248,13 @@ extern int assoc_mgr_fill_in_assoc(void *db_conn, acct_association_rec_t *assoc,
 			}
 			continue;
 		} else {
-			if(!assoc->user && found_assoc->user) {
+			if(!assoc->uid && found_assoc->uid) {
 				debug3("we are looking for a "
 				       "nonuser association");
 				continue;
 			} else if(assoc->uid != found_assoc->uid) {
-				debug3("not the right user");
+				debug3("not the right user %u != %u",
+				       assoc->uid, found_assoc->uid);
 				continue;
 			}
 			
@@ -300,11 +305,17 @@ extern int assoc_mgr_fill_in_assoc(void *db_conn, acct_association_rec_t *assoc,
 		assoc->cluster = ret_assoc->cluster;
 	if(!assoc->partition)
 		assoc->partition = ret_assoc->partition;
+	assoc->fairshare                 = ret_assoc->fairshare;
 	assoc->max_cpu_secs_per_job      = ret_assoc->max_cpu_secs_per_job;
+	assoc->max_jobs                  = ret_assoc->max_jobs;
 	assoc->max_nodes_per_job         = ret_assoc->max_nodes_per_job;
 	assoc->max_wall_duration_per_job = ret_assoc->max_wall_duration_per_job;
-	/* The other fields are not relevant to the specific job,
-	 * for example max_jobs */
+	assoc->parent_acct_ptr           = ret_assoc->parent_acct_ptr;
+	if(assoc->parent_acct) {
+		xfree(assoc->parent_acct);
+		assoc->parent_acct       = xstrdup(ret_assoc->parent_acct);
+	} else 
+		assoc->parent_acct       = ret_assoc->parent_acct;
 	slurm_mutex_unlock(&local_association_lock);
 
 	return SLURM_SUCCESS;
@@ -481,7 +492,7 @@ extern int assoc_mgr_update_local_assocs(acct_update_object_t *update)
 				rc = SLURM_ERROR;
 				break;
 			}
-			debug("updating the assocs here on %u", rec->id);
+			debug("updating assoc %u", rec->id);
 			if(object->fairshare != (uint32_t)NO_VAL) {
 				rec->fairshare = object->fairshare;
 			}
@@ -517,6 +528,7 @@ extern int assoc_mgr_update_local_assocs(acct_update_object_t *update)
 				parents_changed = 1;
 				
 			}
+			log_assoc_rec(rec);
 			/* FIX ME: do more updates here */
 			break;
 		case ACCT_ADD_ASSOC:
diff --git a/src/common/parse_time.c b/src/common/parse_time.c
index 1a7f06ee32e..e72f2c93e45 100644
--- a/src/common/parse_time.c
+++ b/src/common/parse_time.c
@@ -471,7 +471,8 @@ extern int time_str2mins(char *string)
 
 	if ((string == NULL) || (string[0] == '\0'))
 		return -1;	/* invalid input */
-	if ((!strcasecmp(string, "INFINITE")) ||
+	if ((!strcasecmp(string, "-1")) ||
+	    (!strcasecmp(string, "INFINITE")) ||
 	    (!strcasecmp(string, "UNLIMITED"))) {
 		return INFINITE;
 	}
diff --git a/src/common/slurm_accounting_storage.c b/src/common/slurm_accounting_storage.c
index 9d0bbe8858d..59879e61592 100644
--- a/src/common/slurm_accounting_storage.c
+++ b/src/common/slurm_accounting_storage.c
@@ -142,6 +142,9 @@ typedef struct slurm_acct_storage_ops {
 				    List selected_parts, void *params);	
 	int (*update_shares_used)  (void *db_conn,
 				    List shares_used);
+	int (*flush_jobs)          (void *db_conn,
+				    char *cluster,
+				    time_t event_time);
 } slurm_acct_storage_ops_t;
 
 typedef struct slurm_acct_storage_context {
@@ -211,7 +214,8 @@ static slurm_acct_storage_ops_t * _acct_storage_get_ops(
 		"jobacct_storage_p_suspend",
 		"jobacct_storage_p_get_jobs",
 		"jobacct_storage_p_archive",
-		"acct_storage_p_update_shares_used"
+		"acct_storage_p_update_shares_used",
+		"acct_storage_p_flush_jobs_on_cluster"
 	};
 	int n_syms = sizeof( syms ) / sizeof( char * );
 
@@ -1561,20 +1565,39 @@ extern acct_admin_level_t str_2_acct_admin_level(char *level)
 
 extern void log_assoc_rec(acct_association_rec_t *assoc_ptr)
 {
-	info("association rec id: %u", assoc_ptr->id);
-	info("  acct :            %s", assoc_ptr->acct);
-	info("  cluster :         %s", assoc_ptr->cluster);
-	info("  fairshare :       %u", assoc_ptr->fairshare);
-	info("  max_cpu_secs_per_job : %u", assoc_ptr->max_cpu_secs_per_job);
-	info("  max_jobs :        %u", assoc_ptr->max_jobs);
-	info("  max_nodes_per_job : %u", assoc_ptr->max_nodes_per_job);
-	info("  max_wall_duration_per_job : %u", 
-	     assoc_ptr->max_wall_duration_per_job);
-	info("  parent_acct :     %s", assoc_ptr->parent_acct);
-	info("  partition :       %s", assoc_ptr->partition);
-	info("  user :            %s(%u)", assoc_ptr->user, assoc_ptr->uid);
-	info("  used_jobs :       %u", assoc_ptr->used_jobs);
-	info("  used_share :      %u", assoc_ptr->used_share);
+	debug("association rec id          : %u", assoc_ptr->id);
+	debug("  acct                      : %s", assoc_ptr->acct);
+	debug("  cluster                   : %s", assoc_ptr->cluster);
+	if(assoc_ptr->fairshare == INFINITE)
+		debug("  fairshare                 : NONE");
+	else
+		debug("  fairshare                 : %u",
+		       assoc_ptr->fairshare);
+	if(assoc_ptr->max_cpu_secs_per_job == INFINITE)
+		debug("  max_cpu_secs_per_job      : NONE");
+	else
+		debug("  max_cpu_secs_per_job      : %d",
+		     assoc_ptr->max_cpu_secs_per_job);
+	if(assoc_ptr->max_jobs == INFINITE)
+		debug("  max_jobs                  : NONE");
+	else
+		debug("  max_jobs                  : %u", assoc_ptr->max_jobs);
+	if(assoc_ptr->max_nodes_per_job == INFINITE)
+		debug("  max_nodes_per_job         : NONE");
+	else
+		debug("  max_nodes_per_job         : %d",
+		     assoc_ptr->max_nodes_per_job);
+	if(assoc_ptr->max_wall_duration_per_job == INFINITE)
+		debug("  max_wall_duration_per_job : NONE");
+	else
+		debug("  max_wall_duration_per_job : %d", 
+		     assoc_ptr->max_wall_duration_per_job);
+	debug("  parent_acct               : %s", assoc_ptr->parent_acct);
+	debug("  partition                 : %s", assoc_ptr->partition);
+	debug("  user                      : %s(%u)",
+	      assoc_ptr->user, assoc_ptr->uid);
+	debug("  used_jobs                 : %u", assoc_ptr->used_jobs);
+	debug("  used_share                : %u", assoc_ptr->used_share);
 }
 
 /*
@@ -1984,3 +2007,21 @@ extern int acct_storage_g_update_shares_used(void *db_conn, List acct_list)
 								   acct_list);
 }
 
+/* 
+ * This should be called when a cluster does a cold start to flush out
+ * any jobs that were running during the restart so we don't have any
+ * jobs in the database "running" forever since no endtime will be
+ * placed in there other wise. 
+ * IN:  char * = cluster name
+ * RET: SLURM_SUCCESS on success SLURM_ERROR else
+ */
+extern int acct_storage_g_flush_jobs_on_cluster(
+	void *db_conn, char *cluster, time_t event_time)
+{
+	if (slurm_acct_storage_init(NULL) < 0)
+		return SLURM_ERROR;
+ 	return (*(g_acct_storage_context->ops.flush_jobs))
+		(db_conn, cluster, event_time);
+
+}
+
diff --git a/src/common/slurm_accounting_storage.h b/src/common/slurm_accounting_storage.h
index e737bd35ae3..1aec1b9f619 100644
--- a/src/common/slurm_accounting_storage.h
+++ b/src/common/slurm_accounting_storage.h
@@ -468,6 +468,17 @@ extern int acct_storage_g_roll_usage(void *db_conn);
  */
 extern int acct_storage_g_update_shares_used(void *db_conn, List acct_list);
 
+/* 
+ * This should be called when a cluster does a cold start to flush out
+ * any jobs that were running during the restart so we don't have any
+ * jobs in the database "running" forever since no endtime will be
+ * placed in there other wise. 
+ * IN:  char * = cluster name
+ * RET: SLURM_SUCCESS on success SLURM_ERROR else
+ */
+extern int acct_storage_g_flush_jobs_on_cluster(
+	void *db_conn, char *cluster, time_t event_time);
+
 /*********************** CLUSTER ACCOUNTING STORAGE **************************/
 
 extern int clusteracct_storage_g_node_down(void *db_conn, 
diff --git a/src/common/slurmdbd_defs.c b/src/common/slurmdbd_defs.c
index 0fcd2eec408..9b61450b182 100644
--- a/src/common/slurmdbd_defs.c
+++ b/src/common/slurmdbd_defs.c
@@ -353,6 +353,7 @@ extern Buf pack_slurmdbd_msg(slurmdbd_msg_t *req)
 					     buffer);
 		break;
 	case DBD_CLUSTER_PROCS:
+	case DBD_FLUSH_JOBS:
 		slurmdbd_pack_cluster_procs_msg(
 			(dbd_cluster_procs_msg_t *)req->data, buffer);
 		break;
@@ -467,6 +468,7 @@ extern int unpack_slurmdbd_msg(slurmdbd_msg_t *resp, Buf buffer)
 			(dbd_acct_coord_msg_t **)&resp->data, buffer);
 		break;
 	case DBD_CLUSTER_PROCS:
+	case DBD_FLUSH_JOBS:
 		rc = slurmdbd_unpack_cluster_procs_msg(
 			(dbd_cluster_procs_msg_t **)&resp->data, buffer);
 		break;
diff --git a/src/common/slurmdbd_defs.h b/src/common/slurmdbd_defs.h
index 0acd5435c43..f11647fd2a2 100644
--- a/src/common/slurmdbd_defs.h
+++ b/src/common/slurmdbd_defs.h
@@ -70,6 +70,8 @@ typedef enum {
 	DBD_ADD_CLUSTERS,       /* Add new cluster to the mix           */
 	DBD_ADD_USERS,          /* Add new user to the mix              */
 	DBD_CLUSTER_PROCS,	/* Record total processors on cluster	*/
+	DBD_FLUSH_JOBS, 	/* End jobs that are still running
+				 * when a controller is restarted.	*/
 	DBD_GET_ACCOUNTS,	/* Get account information		*/
 	DBD_GET_ASSOCS,         /* Get assocation information   	*/
 	DBD_GET_ASSOC_USAGE,  	/* Get assoc usage information  	*/
diff --git a/src/database/mysql_common.c b/src/database/mysql_common.c
index 2a159c980c4..24e4de9ec8f 100644
--- a/src/database/mysql_common.c
+++ b/src/database/mysql_common.c
@@ -304,6 +304,13 @@ extern int mysql_db_query(MYSQL *mysql_db, char *query)
 	return SLURM_SUCCESS;
 }
 
+extern int mysql_db_ping(MYSQL *mysql_db)
+{
+	/* clear out the old results so we don't get a 2014 error */
+	_clear_results(mysql_db);		
+	return mysql_ping(mysql_db);
+}
+
 extern int mysql_db_commit(MYSQL *mysql_db)
 {
 	//slurm_mutex_lock(&mysql_lock);
diff --git a/src/database/mysql_common.h b/src/database/mysql_common.h
index 0f2ca83d9c8..69acb54c499 100644
--- a/src/database/mysql_common.h
+++ b/src/database/mysql_common.h
@@ -81,6 +81,7 @@ extern int mysql_get_db_connection(MYSQL **mysql_db, char *db_name,
 extern int mysql_close_db_connection(MYSQL **mysql_db);
 extern int mysql_cleanup();
 extern int mysql_db_query(MYSQL *mysql_db, char *query);
+extern int mysql_db_ping(MYSQL *mysql_db);
 extern int mysql_db_commit(MYSQL *mysql_db);
 extern int mysql_db_rollback(MYSQL *mysql_db);
 
diff --git a/src/plugins/accounting_storage/filetxt/accounting_storage_filetxt.c b/src/plugins/accounting_storage/filetxt/accounting_storage_filetxt.c
index 1be52a9415e..458305e457d 100644
--- a/src/plugins/accounting_storage/filetxt/accounting_storage_filetxt.c
+++ b/src/plugins/accounting_storage/filetxt/accounting_storage_filetxt.c
@@ -820,3 +820,10 @@ extern int acct_storage_p_update_shares_used(void *db_conn,
 {
 	return SLURM_SUCCESS;
 }
+
+extern int acct_storage_p_flush_jobs_on_cluster(
+	void *db_conn, char *cluster, time_t event_time)
+{
+	/* put end times for a clean start */
+	return SLURM_SUCCESS;
+}
diff --git a/src/plugins/accounting_storage/mysql/accounting_storage_mysql.c b/src/plugins/accounting_storage/mysql/accounting_storage_mysql.c
index 490216118f9..b367d1df1f8 100644
--- a/src/plugins/accounting_storage/mysql/accounting_storage_mysql.c
+++ b/src/plugins/accounting_storage/mysql/accounting_storage_mysql.c
@@ -110,6 +110,7 @@ char *step_table = "step_table";
 char *txn_table = "txn_table";
 char *user_table = "user_table";
 char *last_ran_table = "last_ran_table";
+char *suspend_table = "suspend_table";
 
 extern int acct_storage_p_commit(mysql_conn_t *mysql_conn, bool commit);
 
@@ -875,7 +876,6 @@ static int _mysql_acct_check_tables(MYSQL *acct_mysql_db)
 		{ "deleted", "tinyint default 0" },
 		{ "id", "int not null" },
 		{ "period_start", "int unsigned not null" },
-		{ "cpu_count", "int unsigned default 0" },
 		{ "alloc_cpu_secs", "bigint default 0" },
 		{ NULL, NULL}		
 	};
@@ -983,6 +983,14 @@ static int _mysql_acct_check_tables(MYSQL *acct_mysql_db)
 		{ NULL, NULL}
 	};
 
+	storage_field_t suspend_table_fields[] = {
+		{ "id", "int not null" },
+		{ "associd", "mediumint not null" },
+		{ "start", "int unsigned default 0 not null" },
+		{ "end", "int unsigned default 0 not null" },
+		{ NULL, NULL}		
+	};
+
 	storage_field_t txn_table_fields[] = {
 		{ "id", "int not null auto_increment" },
 		{ "timestamp", "int unsigned default 0 not null" },
@@ -1124,6 +1132,11 @@ static int _mysql_acct_check_tables(MYSQL *acct_mysql_db)
 				 ", primary key (id, stepid))") == SLURM_ERROR)
 		return SLURM_ERROR;
 
+	if(mysql_db_create_table(acct_mysql_db, suspend_table,
+				 suspend_table_fields, 
+				 ")") == SLURM_ERROR)
+		return SLURM_ERROR;
+
 	if(mysql_db_create_table(acct_mysql_db, txn_table, txn_table_fields,
 				 ", primary key (id))") == SLURM_ERROR)
 		return SLURM_ERROR;
@@ -1259,12 +1272,21 @@ extern int acct_storage_p_close_connection(mysql_conn_t **mysql_conn)
 extern int acct_storage_p_commit(mysql_conn_t *mysql_conn, bool commit)
 {
 #ifdef HAVE_MYSQL
-
-	if(!mysql_conn) 
+	
+	if(!mysql_conn) {
+		error("We need a connection to run this");
 		return SLURM_ERROR;
+	} else if(!mysql_conn->acct_mysql_db
+		  || mysql_db_ping(mysql_conn->acct_mysql_db) != 0) {
+		if(!mysql_get_db_connection(&mysql_conn->acct_mysql_db,
+					    mysql_db_name, mysql_db_info)) {
+			error("unable to re-connect to mysql database");
+			return SLURM_ERROR;
+		}
+	}
 
 	debug4("got %d commits", list_count(mysql_conn->update_list));
-	
+
 	if(mysql_conn->rollback) {
 		if(!commit) {
 			if(mysql_db_rollback(mysql_conn->acct_mysql_db))
@@ -1302,7 +1324,7 @@ extern int acct_storage_p_commit(mysql_conn_t *mysql_conn, bool commit)
 		}
 		xfree(query);
 		while((row = mysql_fetch_row(result))) {
-			//info("sending to %s(%s)", row[0], row[1]);
+			info("sending to %s(%s)", row[0], row[1]);
 			slurm_set_addr_char(&req.address, atoi(row[1]), row[0]);
 			req.msg_type = ACCOUNTING_UPDATE_MSG;
 			req.flags = SLURM_GLOBAL_AUTH_KEY;
@@ -1390,6 +1412,18 @@ extern int acct_storage_p_add_users(mysql_conn_t *mysql_conn, uint32_t uid,
 	int affect_rows = 0;
 	List assoc_list = list_create(destroy_acct_association_rec);
 
+	if(!mysql_conn) {
+		error("We need a connection to run this");
+		return SLURM_ERROR;
+	} else if(!mysql_conn->acct_mysql_db
+		  || mysql_db_ping(mysql_conn->acct_mysql_db) != 0) {
+		if(!mysql_get_db_connection(&mysql_conn->acct_mysql_db,
+					    mysql_db_name, mysql_db_info)) {
+			error("unable to re-connect to mysql database");
+			return SLURM_ERROR;
+		}
+	}
+
 	if((pw=getpwuid(uid))) {
 		user = pw->pw_name;
 	}
@@ -1521,6 +1555,18 @@ extern int acct_storage_p_add_accts(mysql_conn_t *mysql_conn, uint32_t uid,
 	int affect_rows = 0;
 	List assoc_list = list_create(destroy_acct_association_rec);
 
+	if(!mysql_conn) {
+		error("We need a connection to run this");
+		return SLURM_ERROR;
+	} else if(!mysql_conn->acct_mysql_db
+		  || mysql_db_ping(mysql_conn->acct_mysql_db) != 0) {
+		if(!mysql_get_db_connection(&mysql_conn->acct_mysql_db,
+					    mysql_db_name, mysql_db_info)) {
+			error("unable to re-connect to mysql database");
+			return SLURM_ERROR;
+		}
+	}
+
 	if((pw=getpwuid(uid))) {
 		user = pw->pw_name;
 	}
@@ -1638,6 +1684,18 @@ extern int acct_storage_p_add_clusters(mysql_conn_t *mysql_conn, uint32_t uid,
 	char *user = NULL;
 	int affect_rows = 0;
 
+	if(!mysql_conn) {
+		error("We need a connection to run this");
+		return SLURM_ERROR;
+	} else if(!mysql_conn->acct_mysql_db
+		  || mysql_db_ping(mysql_conn->acct_mysql_db) != 0) {
+		if(!mysql_get_db_connection(&mysql_conn->acct_mysql_db,
+					    mysql_db_name, mysql_db_info)) {
+			error("unable to re-connect to mysql database");
+			return SLURM_ERROR;
+		}
+	}
+
 	if((pw=getpwuid(uid))) {
 		user = pw->pw_name;
 	}
@@ -1833,6 +1891,18 @@ extern int acct_storage_p_add_associations(mysql_conn_t *mysql_conn,
 		return SLURM_ERROR;
 	}
 
+	if(!mysql_conn) {
+		error("We need a connection to run this");
+		return SLURM_ERROR;
+	} else if(!mysql_conn->acct_mysql_db
+		  || mysql_db_ping(mysql_conn->acct_mysql_db) != 0) {
+		if(!mysql_get_db_connection(&mysql_conn->acct_mysql_db,
+					    mysql_db_name, mysql_db_info)) {
+			error("unable to re-connect to mysql database");
+			return SLURM_ERROR;
+		}
+	}
+
 	if((pw=getpwuid(uid))) {
 		user = pw->pw_name;
 	}
@@ -2199,6 +2269,16 @@ extern List acct_storage_p_modify_users(mysql_conn_t *mysql_conn, uint32_t uid,
 		return NULL;
 	}
 
+	if(!mysql_conn) {
+		error("We need a connection to run this");
+		return NULL;
+	} else if(!mysql_conn->acct_mysql_db
+		  || mysql_db_ping(mysql_conn->acct_mysql_db) != 0) {
+		if(!mysql_get_db_connection(&mysql_conn->acct_mysql_db,
+					    mysql_db_name, mysql_db_info))
+			return NULL;
+	}
+
 	if((pw=getpwuid(uid))) {
 		user_name = pw->pw_name;
 	}
@@ -2322,6 +2402,18 @@ extern List acct_storage_p_modify_accts(mysql_conn_t *mysql_conn, uint32_t uid,
 		return NULL;
 	}
 
+	if(!mysql_conn) {
+		error("We need a connection to run this");
+		return NULL;
+	} else if(!mysql_conn->acct_mysql_db
+		  || mysql_db_ping(mysql_conn->acct_mysql_db) != 0) {
+		if(!mysql_get_db_connection(&mysql_conn->acct_mysql_db,
+					    mysql_db_name, mysql_db_info)) {
+			error("unable to re-connect to mysql database");
+			return NULL;
+		}
+	}
+
 	if((pw=getpwuid(uid))) {
 		user = pw->pw_name;
 	}
@@ -2391,6 +2483,7 @@ extern List acct_storage_p_modify_accts(mysql_conn_t *mysql_conn, uint32_t uid,
 	if(!(result = mysql_db_query_ret(
 		     mysql_conn->acct_mysql_db, query, 0))) {
 		xfree(query);
+		xfree(vals);
 		return NULL;
 	}
 	xfree(query);
@@ -2444,7 +2537,7 @@ extern List acct_storage_p_modify_clusters(mysql_conn_t *mysql_conn,
 	List ret_list = NULL;
 	int rc = SLURM_SUCCESS;
 	char *object = NULL;
-	char *vals = NULL, *assoc_vals = NULL, *extra = NULL, *query = NULL,
+	char *vals = NULL, *extra = NULL, *query = NULL,
 		*name_char = NULL, *assoc_char= NULL, *send_char = NULL;
 	time_t now = time(NULL);
 	struct passwd *pw = NULL;
@@ -2453,11 +2546,28 @@ extern List acct_storage_p_modify_clusters(mysql_conn_t *mysql_conn,
 	MYSQL_RES *result = NULL;
 	MYSQL_ROW row;
 
+	/* If you need to alter the default values of the cluster use
+	 * modify_associations since this is used only for registering
+	 * the controller when it loads 
+	 */
+
 	if(!cluster_q) {
 		error("we need something to change");
 		return NULL;
 	}
 
+	if(!mysql_conn) {
+		error("We need a connection to run this");
+		return NULL;
+	} else if(!mysql_conn->acct_mysql_db
+		  || mysql_db_ping(mysql_conn->acct_mysql_db) != 0) {
+		if(!mysql_get_db_connection(&mysql_conn->acct_mysql_db,
+					    mysql_db_name, mysql_db_info)) {
+			error("unable to re-connect to mysql database");
+			return NULL;
+		}
+	}
+
 	if((pw=getpwuid(uid))) {
 		user = pw->pw_name;
 	}
@@ -2485,40 +2595,7 @@ extern List acct_storage_p_modify_clusters(mysql_conn_t *mysql_conn,
 		xstrfmtcat(vals, ", control_port=%u", cluster->control_port);
 	}
 
-	if((int)cluster->default_fairshare >= 0) {
-		xstrfmtcat(assoc_vals, ", fairshare=%u",
-			   cluster->default_fairshare);
-	} else if((int)cluster->default_fairshare == -1) 
-		xstrfmtcat(assoc_vals, ", fairshare=1");
-
-	if((int)cluster->default_max_cpu_secs_per_job >= 0) {
-		xstrfmtcat(assoc_vals, ", max_cpu_secs_per_job=%u",
-			   cluster->default_max_cpu_secs_per_job);
-	} else if((int)cluster->default_max_cpu_secs_per_job == -1) 
-		xstrfmtcat(assoc_vals, ", max_cpu_secs_per_job=NULL");
-
-	if((int)cluster->default_max_jobs >= 0) {
-		xstrfmtcat(assoc_vals, ", max_jobs=%u",
-			   cluster->default_max_jobs);
-	} else if((int)cluster->default_max_jobs == -1)
-		xstrfmtcat(assoc_vals, ", max_jobs=NULL");
-	
-
-	if((int)cluster->default_max_nodes_per_job >= 0) {
-		xstrfmtcat(assoc_vals, ", max_nodes_per_job=%u",
-			   cluster->default_max_nodes_per_job);
-	} else if((int)cluster->default_max_nodes_per_job == -1)
-		xstrfmtcat(assoc_vals, ", max_nodes_per_job=NULL");
-
-
-	if((int)cluster->default_max_wall_duration_per_job >= 0) {
-		xstrfmtcat(assoc_vals, ", max_wall_duration_per_job=%u",
-			   cluster->default_max_wall_duration_per_job);
-	} else if((int)cluster->default_max_wall_duration_per_job == -1) 
-		xstrfmtcat(assoc_vals, ", max_wall_duration_per_job=NULL");
-	
-
-	if(!vals && !assoc_vals) {
+	if(!vals) {
 		error("Nothing to change");
 		return NULL;
 	}
@@ -2530,7 +2607,6 @@ extern List acct_storage_p_modify_clusters(mysql_conn_t *mysql_conn,
 		     mysql_conn->acct_mysql_db, query, 0))) {
 		xfree(query);
 		xfree(vals);
-		xfree(assoc_vals);
 		error("no result given for %s", extra);
 		return NULL;
 	}
@@ -2539,42 +2615,19 @@ extern List acct_storage_p_modify_clusters(mysql_conn_t *mysql_conn,
 	rc = 0;
 	ret_list = list_create(slurm_destroy_char);
 	while((row = mysql_fetch_row(result))) {
-		acct_association_rec_t *assoc = NULL;
-
 		object = xstrdup(row[0]);
 		list_append(ret_list, object);
 		if(!rc) {
 			xstrfmtcat(name_char, "name='%s'", object);
-			xstrfmtcat(assoc_char, "cluster='%s'", object);
 			rc = 1;
 		} else  {
 			xstrfmtcat(name_char, " || name='%s'", object);
-			xstrfmtcat(assoc_char, " || cluster='%s'", object);
-		}
-		if(assoc_vals) {
-			assoc = xmalloc(sizeof(acct_association_rec_t));
-			assoc->cluster = xstrdup(object);
-			assoc->acct = xstrdup("root");
-			assoc->fairshare = cluster->default_fairshare;
-			assoc->max_jobs = cluster->default_max_jobs;
-			assoc->max_nodes_per_job =
-				cluster->default_max_nodes_per_job;
-			assoc->max_wall_duration_per_job = 
-				cluster->default_max_wall_duration_per_job;
-			assoc->max_cpu_secs_per_job = 
-				cluster->default_max_cpu_secs_per_job;
-			
-			if(_addto_update_list(mysql_conn->update_list, 
-					      ACCT_MODIFY_ASSOC,
-					      assoc) != SLURM_SUCCESS) 
-				error("couldn't add to the update list");
 		}
 	}
 	mysql_free_result(result);
 
 	if(!list_count(ret_list)) {
 		debug3("didn't effect anything");
-		xfree(assoc_vals);
 		xfree(vals);
 		return ret_list;
 	}
@@ -2591,23 +2644,10 @@ extern List acct_storage_p_modify_clusters(mysql_conn_t *mysql_conn,
 		}
 	}
 
-	if(assoc_vals) {
-		send_char = xstrdup_printf("acct='root' && (%s)",
-					   assoc_char);
-		if(_modify_common(mysql_conn, DBD_MODIFY_CLUSTERS, now,
-				  user, assoc_table, send_char, assoc_vals)
-		   == SLURM_ERROR) {
-			error("Couldn't modify cluster");
-			list_destroy(ret_list);
-			ret_list = NULL;
-			goto end_it;
-		}
-	}
 end_it:
 	xfree(name_char);
 	xfree(assoc_char);
 	xfree(vals);
-	xfree(assoc_vals);
 	xfree(send_char);
 
 	return ret_list;
@@ -2661,6 +2701,18 @@ extern List acct_storage_p_modify_associations(mysql_conn_t *mysql_conn,
 		return NULL;
 	}
 
+	if(!mysql_conn) {
+		error("We need a connection to run this");
+		return NULL;
+	} else if(!mysql_conn->acct_mysql_db
+		  || mysql_db_ping(mysql_conn->acct_mysql_db) != 0) {
+		if(!mysql_get_db_connection(&mysql_conn->acct_mysql_db,
+					    mysql_db_name, mysql_db_info)) {
+			error("unable to re-connect to mysql database");
+			return NULL;
+		}
+	}
+
 	if((pw=getpwuid(uid))) {
 		user = pw->pw_name;
 	}
@@ -2934,6 +2986,18 @@ extern List acct_storage_p_remove_users(mysql_conn_t *mysql_conn, uint32_t uid,
 		return NULL;
 	}
 
+	if(!mysql_conn) {
+		error("We need a connection to run this");
+		return NULL;
+	} else if(!mysql_conn->acct_mysql_db
+		  || mysql_db_ping(mysql_conn->acct_mysql_db) != 0) {
+		if(!mysql_get_db_connection(&mysql_conn->acct_mysql_db,
+					    mysql_db_name, mysql_db_info)) {
+			error("unable to re-connect to mysql database");
+			return NULL;
+		}
+	}
+
 	if((pw=getpwuid(uid))) {
 		user_name = pw->pw_name;
 	}
@@ -3033,7 +3097,7 @@ extern List acct_storage_p_remove_coord(mysql_conn_t *mysql_conn, uint32_t uid,
 					char *acct, acct_user_cond_t *user_q)
 {
 #ifdef HAVE_MYSQL
-	return SLURM_SUCCESS;
+	return NULL;
 #else
 	return NULL;
 #endif
@@ -3065,6 +3129,18 @@ extern List acct_storage_p_remove_accts(mysql_conn_t *mysql_conn, uint32_t uid,
 		user_name = pw->pw_name;
 	}
 
+	if(!mysql_conn) {
+		error("We need a connection to run this");
+		return NULL;
+	} else if(!mysql_conn->acct_mysql_db
+		  || mysql_db_ping(mysql_conn->acct_mysql_db) != 0) {
+		if(!mysql_get_db_connection(&mysql_conn->acct_mysql_db,
+					    mysql_db_name, mysql_db_info)) {
+			error("unable to re-connect to mysql database");
+			return NULL;
+		}
+	}
+
 	xstrcat(extra, "where deleted=0");
 	if(acct_q->acct_list && list_count(acct_q->acct_list)) {
 		set = 0;
@@ -3188,6 +3264,18 @@ extern List acct_storage_p_remove_clusters(mysql_conn_t *mysql_conn,
 		return NULL;
 	}
 
+	if(!mysql_conn) {
+		error("We need a connection to run this");
+		return NULL;
+	} else if(!mysql_conn->acct_mysql_db
+		  || mysql_db_ping(mysql_conn->acct_mysql_db) != 0) {
+		if(!mysql_get_db_connection(&mysql_conn->acct_mysql_db,
+					    mysql_db_name, mysql_db_info)) {
+			error("unable to re-connect to mysql database");
+			return NULL;
+		}
+	}
+
 	if((pw=getpwuid(uid))) {
 		user_name = pw->pw_name;
 	}
@@ -3336,6 +3424,18 @@ extern List acct_storage_p_remove_associations(mysql_conn_t *mysql_conn,
 		return NULL;
 	}
 
+	if(!mysql_conn) {
+		error("We need a connection to run this");
+		return NULL;
+	} else if(!mysql_conn->acct_mysql_db
+		  || mysql_db_ping(mysql_conn->acct_mysql_db) != 0) {
+		if(!mysql_get_db_connection(&mysql_conn->acct_mysql_db,
+					    mysql_db_name, mysql_db_info)) {
+			error("unable to re-connect to mysql database");
+			return NULL;
+		}
+	}
+
 	xstrcat(extra, "where id>0 && deleted=0");
 
 	if((pw=getpwuid(uid))) {
@@ -3547,6 +3647,18 @@ extern List acct_storage_p_get_users(mysql_conn_t *mysql_conn,
 		USER_REQ_COUNT
 	};
 
+	if(!mysql_conn) {
+		error("We need a connection to run this");
+		return NULL;
+	} else if(!mysql_conn->acct_mysql_db
+		  || mysql_db_ping(mysql_conn->acct_mysql_db) != 0) {
+		if(!mysql_get_db_connection(&mysql_conn->acct_mysql_db,
+					    mysql_db_name, mysql_db_info)) {
+			error("unable to re-connect to mysql database");
+			return NULL;
+		}
+	}
+
 	xstrcat(extra, "where deleted=0");
 
 	if(!user_q) 
@@ -3712,6 +3824,18 @@ extern List acct_storage_p_get_accts(mysql_conn_t *mysql_conn,
 		ACCT_REQ_COUNT
 	};
 
+	if(!mysql_conn) {
+		error("We need a connection to run this");
+		return NULL;
+	} else if(!mysql_conn->acct_mysql_db
+		  || mysql_db_ping(mysql_conn->acct_mysql_db) != 0) {
+		if(!mysql_get_db_connection(&mysql_conn->acct_mysql_db,
+					    mysql_db_name, mysql_db_info)) {
+			error("unable to re-connect to mysql database");
+			return NULL;
+		}
+	}
+
 	xstrcat(extra, "where deleted=0");
 	if(!acct_q) 
 		goto empty;
@@ -3885,6 +4009,18 @@ extern List acct_storage_p_get_clusters(mysql_conn_t *mysql_conn,
 		ASSOC_REQ_COUNT
 	};
 
+	if(!mysql_conn) {
+		error("We need a connection to run this");
+		return NULL;
+	} else if(!mysql_conn->acct_mysql_db
+		  || mysql_db_ping(mysql_conn->acct_mysql_db) != 0) {
+		if(!mysql_get_db_connection(&mysql_conn->acct_mysql_db,
+					    mysql_db_name, mysql_db_info)) {
+			error("unable to re-connect to mysql database");
+			return NULL;
+		}
+	}
+
 	xstrcat(extra, "where deleted=0");
 		
 	if(!cluster_q) 
@@ -4053,6 +4189,18 @@ extern List acct_storage_p_get_associations(mysql_conn_t *mysql_conn,
 		ASSOC2_REQ_MCPJ
 	};
 
+	if(!mysql_conn) {
+		error("We need a connection to run this");
+		return NULL;
+	} else if(!mysql_conn->acct_mysql_db
+		  || mysql_db_ping(mysql_conn->acct_mysql_db) != 0) {
+		if(!mysql_get_db_connection(&mysql_conn->acct_mysql_db,
+					    mysql_db_name, mysql_db_info)) {
+			error("unable to re-connect to mysql database");
+			return NULL;
+		}
+	}
+
 	xstrcat(extra, "where deleted=0");
 	if(!assoc_q) 
 		goto empty;
@@ -4307,6 +4455,18 @@ extern int acct_storage_p_roll_usage(mysql_conn_t *mysql_conn)
 		UPDATE_COUNT
 	};
 
+	if(!mysql_conn) {
+		error("We need a connection to run this");
+		return SLURM_ERROR;
+	} else if(!mysql_conn->acct_mysql_db
+		  || mysql_db_ping(mysql_conn->acct_mysql_db) != 0) {
+		if(!mysql_get_db_connection(&mysql_conn->acct_mysql_db,
+					    mysql_db_name, mysql_db_info)) {
+			error("unable to re-connect to mysql database");
+			return SLURM_ERROR;
+		}
+	}
+
 	i=0;
 	xstrfmtcat(tmp, "%s", update_req_inx[i]);
 	for(i=1; i<UPDATE_COUNT; i++) {
@@ -4344,10 +4504,13 @@ extern int acct_storage_p_roll_usage(mysql_conn_t *mysql_conn)
 		if(rc == SLURM_ERROR) 
 			return rc;
 	}
-	last_hour = 1211403599;
+	last_hour = 1211475599;
+	last_day = 1211475599;
+	last_month = 1211475599;
+//	last_hour = 1211403599;
 	//	last_hour = 1206946800;
-	last_day = 1207033199;
-	last_month = 1204358399;
+//	last_day = 1207033199;
+//	last_month = 1204358399;
 
 	if(!localtime_r(&last_hour, &start_tm)) {
 		error("Couldn't get localtime from hour start %d", last_hour);
@@ -4469,6 +4632,18 @@ extern int clusteracct_storage_p_node_down(mysql_conn_t *mysql_conn,
 	char *query = NULL;
 	char *my_reason;
 
+	if(!mysql_conn) {
+		error("We need a connection to run this");
+		return SLURM_ERROR;
+	} else if(!mysql_conn->acct_mysql_db
+		  || mysql_db_ping(mysql_conn->acct_mysql_db) != 0) {
+		if(!mysql_get_db_connection(&mysql_conn->acct_mysql_db,
+					    mysql_db_name, mysql_db_info)) {
+			error("unable to re-connect to mysql database");
+			return SLURM_ERROR;
+		}
+	}
+
 	if (slurmctld_conf.fast_schedule && !slurmdbd_conf)
 		cpus = node_ptr->config_ptr->cpus;
 	else
@@ -4508,6 +4683,18 @@ extern int clusteracct_storage_p_node_up(mysql_conn_t *mysql_conn,
 	char* query;
 	int rc = SLURM_SUCCESS;
 
+	if(!mysql_conn) {
+		error("We need a connection to run this");
+		return SLURM_ERROR;
+	} else if(!mysql_conn->acct_mysql_db
+		  || mysql_db_ping(mysql_conn->acct_mysql_db) != 0) {
+		if(!mysql_get_db_connection(&mysql_conn->acct_mysql_db,
+					    mysql_db_name, mysql_db_info)) {
+			error("unable to re-connect to mysql database");
+			return SLURM_ERROR;
+		}
+	}
+
 	query = xstrdup_printf(
 		"update %s set period_end=%d where cluster='%s' "
 		"and period_end=0 and node_name='%s';",
@@ -4545,6 +4732,18 @@ extern int clusteracct_storage_p_cluster_procs(mysql_conn_t *mysql_conn,
 	}
 	last_procs = procs;
 
+	if(!mysql_conn) {
+		error("We need a connection to run this");
+		return SLURM_ERROR;
+	} else if(!mysql_conn->acct_mysql_db
+		  || mysql_db_ping(mysql_conn->acct_mysql_db) != 0) {
+		if(!mysql_get_db_connection(&mysql_conn->acct_mysql_db,
+					    mysql_db_name, mysql_db_info)) {
+			error("unable to re-connect to mysql database");
+			return SLURM_ERROR;
+		}
+	}
+
 	/* Record the processor count */
 	query = xstrdup_printf(
 		"select cpu_count from %s where cluster='%s' "
@@ -4631,13 +4830,14 @@ extern int jobacct_storage_p_job_start(mysql_conn_t *mysql_conn,
 		error("We need a connection to run this");
 		return SLURM_ERROR;
 	} else if(!mysql_conn->acct_mysql_db
-		  || mysql_ping(mysql_conn->acct_mysql_db) != 0) {
+		  || mysql_db_ping(mysql_conn->acct_mysql_db) != 0) {
 		if(!mysql_get_db_connection(&mysql_conn->acct_mysql_db,
-					    mysql_db_name, mysql_db_info))
+					    mysql_db_name, mysql_db_info)) {
+			error("unable to re-connect to mysql database");
 			return SLURM_ERROR;
+		}
 	}
 	
-	
 	debug2("mysql_jobacct_job_start() called");
 	priority = (job_ptr->priority == NO_VAL) ?
 		-1L : (long) job_ptr->priority;
@@ -4758,10 +4958,12 @@ extern int jobacct_storage_p_job_complete(mysql_conn_t *mysql_conn,
 		error("We need a connection to run this");
 		return SLURM_ERROR;
 	} else if(!mysql_conn->acct_mysql_db
-		  || mysql_ping(mysql_conn->acct_mysql_db) != 0) {
+		  || mysql_db_ping(mysql_conn->acct_mysql_db) != 0) {
 		if(!mysql_get_db_connection(&mysql_conn->acct_mysql_db,
-					    mysql_db_name, mysql_db_info))
+					    mysql_db_name, mysql_db_info)) {
+			error("unable to re-connect to mysql database");
 			return SLURM_ERROR;
+		}
 	}
 	debug2("mysql_jobacct_job_complete() called");
 	if (job_ptr->end_time == 0) {
@@ -4826,10 +5028,12 @@ extern int jobacct_storage_p_step_start(mysql_conn_t *mysql_conn,
 		error("We need a connection to run this");
 		return SLURM_ERROR;
 	} else if(!mysql_conn->acct_mysql_db
-		  || mysql_ping(mysql_conn->acct_mysql_db) != 0) {
+		  || mysql_db_ping(mysql_conn->acct_mysql_db) != 0) {
 		if(!mysql_get_db_connection(&mysql_conn->acct_mysql_db,
-					    mysql_db_name, mysql_db_info))
+					    mysql_db_name, mysql_db_info)) {
+			error("unable to re-connect to mysql database");
 			return SLURM_ERROR;
+		}
 	}
 	if(slurmdbd_conf) {
 		cpus = step_ptr->job_ptr->total_procs;
@@ -4931,10 +5135,12 @@ extern int jobacct_storage_p_step_complete(mysql_conn_t *mysql_conn,
 		error("We need a connection to run this");
 		return SLURM_ERROR;
 	} else if(!mysql_conn->acct_mysql_db
-		  || mysql_ping(mysql_conn->acct_mysql_db) != 0) {
+		  || mysql_db_ping(mysql_conn->acct_mysql_db) != 0) {
 		if(!mysql_get_db_connection(&mysql_conn->acct_mysql_db,
-					    mysql_db_name, mysql_db_info))
+					    mysql_db_name, mysql_db_info)) {
+			error("unable to re-connect to mysql database");
 			return SLURM_ERROR;
+		}
 	}
 
 	if(slurmdbd_conf) {
@@ -5048,17 +5254,20 @@ extern int jobacct_storage_p_suspend(mysql_conn_t *mysql_conn,
 				     struct job_record *job_ptr)
 {
 #ifdef HAVE_MYSQL
-	char query[1024];
+	char *query = NULL;
 	int rc = SLURM_SUCCESS;
-	
+	bool suspended = false;
+
 	if(!mysql_conn) {
 		error("We need a connection to run this");
 		return SLURM_ERROR;
 	} else if(!mysql_conn->acct_mysql_db
-		  || mysql_ping(mysql_conn->acct_mysql_db) != 0) {
+		  || mysql_db_ping(mysql_conn->acct_mysql_db) != 0) {
 		if(!mysql_get_db_connection(&mysql_conn->acct_mysql_db,
-					    mysql_db_name, mysql_db_info))
+					    mysql_db_name, mysql_db_info)) {
+			error("unable to re-connect to mysql database");
 			return SLURM_ERROR;
+		}
 	}
 	if(!job_ptr->db_index) {
 		job_ptr->db_index = _get_db_index(mysql_conn->acct_mysql_db,
@@ -5069,20 +5278,38 @@ extern int jobacct_storage_p_suspend(mysql_conn_t *mysql_conn,
 			return SLURM_ERROR;
 	}
 
-	snprintf(query, sizeof(query),
-		 "update %s set suspended=%u-suspended, state=%d "
-		 "where id=%u",
-		 job_table, (int)job_ptr->suspend_time, 
-		 job_ptr->job_state & (~JOB_COMPLETING),
-		 job_ptr->db_index);
+	if (job_ptr->job_state == JOB_SUSPENDED)
+		suspended = true;
+
+	xstrfmtcat(query,
+		   "update %s set suspended=%d-suspended, state=%d "
+		   "where id=%u;",
+		   job_table, (int)job_ptr->suspend_time, 
+		   job_ptr->job_state & (~JOB_COMPLETING),
+		   job_ptr->db_index);
+	if(suspended)
+		xstrfmtcat(query,
+			   "insert into %s (id, associd, start, end) "
+			   "values (%u, %u, %d, 0);",
+			   suspend_table, job_ptr->assoc_id, job_ptr->db_index, 
+			   (int)job_ptr->suspend_time);
+	else
+		xstrfmtcat(query,
+			   "update %s set end=%d where id=%u && end=0;",
+			   suspend_table, (int)job_ptr->suspend_time, 
+			   job_ptr->db_index);
+		
 	rc = mysql_db_query(mysql_conn->acct_mysql_db, query);
+
+	xfree(query);
 	if(rc != SLURM_ERROR) {
-		snprintf(query, sizeof(query),
-			 "update %s set suspended=%u-suspended, "
-			 "state=%d where id=%u and end=0",
-			 step_table, (int)job_ptr->suspend_time, 
-			 job_ptr->job_state, job_ptr->db_index);
+		xstrfmtcat(query,
+			   "update %s set suspended=%u-suspended, "
+			   "state=%d where id=%u and end=0",
+			   step_table, (int)job_ptr->suspend_time, 
+			   job_ptr->job_state, job_ptr->db_index);
 		rc = mysql_db_query(mysql_conn->acct_mysql_db, query);
+		xfree(query);
 	}
 	
 	return rc;
@@ -5107,10 +5334,12 @@ extern List jobacct_storage_p_get_jobs(mysql_conn_t *mysql_conn,
 		error("We need a connection to run this");
 		return NULL;
 	} else if(!mysql_conn->acct_mysql_db
-		  || mysql_ping(mysql_conn->acct_mysql_db) != 0) {
+		  || mysql_db_ping(mysql_conn->acct_mysql_db) != 0) {
 		if(!mysql_get_db_connection(&mysql_conn->acct_mysql_db,
-					    mysql_db_name, mysql_db_info))
+					    mysql_db_name, mysql_db_info)) {
+			error("unable to re-connect to mysql database");
 			return NULL;
+		}
 	}
 	job_list = mysql_jobacct_process_get_jobs(mysql_conn,
 						  selected_steps,
@@ -5132,7 +5361,7 @@ extern void jobacct_storage_p_archive(mysql_conn_t *mysql_conn,
 		error("We need a connection to run this");
 		return;
 	} else if(!mysql_conn->acct_mysql_db
-		  || mysql_ping(mysql_conn->acct_mysql_db) != 0) {
+		  || mysql_db_ping(mysql_conn->acct_mysql_db) != 0) {
 		if(!mysql_get_db_connection(&mysql_conn->acct_mysql_db,
 					    mysql_db_name, mysql_db_info))
 			return;
@@ -5143,10 +5372,40 @@ extern void jobacct_storage_p_archive(mysql_conn_t *mysql_conn,
 	return;
 }
 
-extern int acct_storage_p_update_shares_used(void *db_conn,
+extern int acct_storage_p_update_shares_used(mysql_conn_t *mysql_conn, 
 					     List shares_used)
 {
 	/* This definitely needs to be fleshed out.
 	 * Go through the list of shares_used_object_t objects and store them */
 	return SLURM_SUCCESS;
 }
+
+extern int acct_storage_p_flush_jobs_on_cluster(
+	mysql_conn_t *mysql_conn, char *cluster, time_t event_time)
+{
+	/* put end times for a clean start */
+	char *query = NULL;
+	int rc = SLURM_SUCCESS;
+
+	if(!mysql_conn) {
+		error("We need a connection to run this");
+		return SLURM_ERROR;
+	} else if(!mysql_conn->acct_mysql_db
+		  || mysql_db_ping(mysql_conn->acct_mysql_db) != 0) {
+		if(!mysql_get_db_connection(&mysql_conn->acct_mysql_db,
+					    mysql_db_name, mysql_db_info)) {
+			error("unable to re-connect to mysql database");
+			return SLURM_ERROR;
+		}
+	}
+
+	query = xstrdup_printf("update %s as t1, %s as t2 set t1.end=%u where "
+			       "t2.id=t1.associd and t2.cluster='%s' "
+			       "&& t1.end=0;",
+			       job_table, assoc_table, event_time, cluster);
+
+	rc = mysql_db_query(mysql_conn->acct_mysql_db, query);
+	xfree(query);
+
+	return rc;
+}
diff --git a/src/plugins/accounting_storage/mysql/mysql_rollup.c b/src/plugins/accounting_storage/mysql/mysql_rollup.c
index aeda3153dd9..452d1bb9aa2 100644
--- a/src/plugins/accounting_storage/mysql/mysql_rollup.c
+++ b/src/plugins/accounting_storage/mysql/mysql_rollup.c
@@ -42,21 +42,343 @@
 
 #ifdef HAVE_MYSQL
 
+typedef struct {
+	int assoc_id;
+	int a_cpu;
+} local_assoc_usage_t;
+
+typedef struct {
+	char *name;
+	int a_cpu;
+	int cpu_count;
+	int d_cpu;
+	int i_cpu;
+	int r_cpu;
+	time_t start;
+	time_t end;
+} local_cluster_usage_t;
+
+
+extern void _destroy_local_assoc_usage(void *object)
+{
+	local_assoc_usage_t *a_usage = (local_assoc_usage_t *)object;
+	if(a_usage) {
+		xfree(a_usage);
+	}
+}
+
+extern void _destroy_local_cluster_usage(void *object)
+{
+	local_cluster_usage_t *c_usage = (local_cluster_usage_t *)object;
+	if(c_usage) {
+		xfree(c_usage->name);
+		xfree(c_usage);
+	}
+}
+
 extern int mysql_hourly_rollup(mysql_conn_t *mysql_conn,
 			       time_t start, time_t end)
 {
 	int add_sec = 3599;
+	int i=0;
 	time_t curr_start = start;
 	time_t curr_end = curr_start + add_sec;
+	char *query = NULL;
+	MYSQL_RES *result = NULL;
+	MYSQL_ROW row;
+	ListIterator a_itr = NULL;
+	ListIterator c_itr = NULL;
+	List assoc_usage_list = list_create(_destroy_local_assoc_usage);
+	List cluster_usage_list = list_create(_destroy_local_cluster_usage);
+	char *event_req_inx[] = {
+		"node_name",
+		"cluster",
+		"cpu_count",
+		"period_start",
+		"period_end"
+	};
+	char *event_str = NULL;
+	enum {
+		EVENT_REQ_NAME,
+		EVENT_REQ_CLUSTER,
+		EVENT_REQ_CPU,
+		EVENT_REQ_START,
+		EVENT_REQ_END,
+		EVENT_REQ_COUNT
+	};
+	char *job_req_inx[] = {
+		"jobid",
+		"associd",
+		"cluster",
+		"eligible",
+		"start",
+		"end",
+		"suspended",
+		"alloc_cpus",
+		"req_cpus"
+	};
+	char *job_str = NULL;
+	enum {
+		JOB_REQ_JOBID,
+		JOB_REQ_ASSOCID,
+		JOB_REQ_CLUSTER,
+		JOB_REQ_ELG,
+		JOB_REQ_START,
+		JOB_REQ_END,
+		JOB_REQ_SUSPENDED,
+		JOB_REQ_ACPU,
+		JOB_REQ_RCPU,
+		JOB_REQ_COUNT
+	};
+
+	i=0;
+	xstrfmtcat(event_str, "%s", event_req_inx[i]);
+	for(i=1; i<EVENT_REQ_COUNT; i++) {
+		xstrfmtcat(event_str, ", %s", event_req_inx[i]);
+	}
+
+	i=0;
+	xstrfmtcat(job_str, "%s", job_req_inx[i]);
+	for(i=1; i<JOB_REQ_COUNT; i++) {
+		xstrfmtcat(job_str, ", %s", job_req_inx[i]);
+	}
 
 /* 	info("begin start %s", ctime(&curr_start)); */
 /* 	info("begin end %s", ctime(&curr_end)); */
+	a_itr = list_iterator_create(cluster_usage_list);
+	c_itr = list_iterator_create(cluster_usage_list);
 	while(curr_start < end) {
+		int last_id = 0;
+		local_cluster_usage_t *c_usage = NULL;
+		local_assoc_usage_t *a_usage = NULL;
+
+		// first get the events during this time
+		query = xstrdup_printf("select %s from %s where "
+				       "(period_start <= %d "
+				       "&& period_end >= %d) "
+				       "|| period_end = 0 "
+				       "order by node_name, period_start",
+				       event_str, event_table,
+				       curr_end, curr_start);
+
+		debug3("%d query\n%s", mysql_conn->conn, query);
+		if(!(result = mysql_db_query_ret(
+			     mysql_conn->acct_mysql_db, query, 0))) {
+			xfree(query);
+			return SLURM_ERROR;
+		}
+		xfree(query);
+		
+		while((row = mysql_fetch_row(result))) {
+			int row_start = atoi(row[EVENT_REQ_START]);
+			int row_end = atoi(row[EVENT_REQ_END]);
+			int row_cpu = atoi(row[EVENT_REQ_CPU]);
+					
+			if(row_start < curr_start)
+				row_start = curr_start;
+		
+			if(!row_end) 
+				row_end = curr_end;
 
+			if(!row[EVENT_REQ_NAME][0]) {
+				c_usage =
+					xmalloc(sizeof(local_cluster_usage_t));
+				c_usage->name = xstrdup(row[EVENT_REQ_CLUSTER]);
+				c_usage->cpu_count = row_cpu;
+				c_usage->start = row_start;
+				c_usage->end = row_end;
+				list_append(cluster_usage_list, c_usage);
+				continue;
+			}
+
+			list_iterator_reset(c_itr);
+			while((c_usage = list_next(c_itr))) {
+				if(!strcmp(c_usage->name,
+					   row[EVENT_REQ_CLUSTER])) {
+					int local_start = row_start;
+					int local_end = row_end;
+					if(c_usage->start > local_start)
+						local_start = c_usage->start;
+					if(c_usage->end < local_end)
+						local_end = c_usage->end;
+
+					if((local_end - local_start) < 1)
+						continue;
+
+					info("node %s adds (%d)(%d-%d) * %d = %d "
+					     "to %d",
+					     row[EVENT_REQ_NAME],
+					     (local_end - local_start)+1,
+					     local_end, local_start,
+					     row_cpu, 
+					     ((local_end - local_start)+1)
+					     * row_cpu, 
+					     row_cpu);
+					/* need to add 1 sec to the
+					   subtraction to get the
+					   total time */
+					c_usage->d_cpu += 
+						((local_end - local_start) + 1)
+						* row_cpu;
+					
+					/* don't break here just
+					   incase the cpu count changed during
+					   this time period.
+					*/
+				}				   
+			}
+		}
+		mysql_free_result(result);
+
+		query = xstrdup_printf("select %s from %s, %s as t2 where "
+				       "((eligible <= %d && end >= %d) "
+				       "|| end = 0 || start = 0) "
+				       "&& associd=t2.id "
+				       "order by associd, eligible",
+				       job_str, job_table, assoc_table,
+				       curr_end, curr_start);
+
+		debug3("%d query\n%s", mysql_conn->conn, query);
+		if(!(result = mysql_db_query_ret(
+			     mysql_conn->acct_mysql_db, query, 0))) {
+			xfree(query);
+			return SLURM_ERROR;
+		}
+		xfree(query);
+		
+		
+		while((row = mysql_fetch_row(result))) {
+			int job_id = atoi(row[JOB_REQ_ASSOCID]);
+			int assoc_id = atoi(row[JOB_REQ_ASSOCID]);
+			int row_eligible = atoi(row[JOB_REQ_ELG]);
+			int row_start = atoi(row[JOB_REQ_START]);
+			int row_end = atoi(row[JOB_REQ_END]);
+			int row_acpu = atoi(row[JOB_REQ_ACPU]);
+			int row_rcpu = atoi(row[JOB_REQ_RCPU]);
+					
+			if(row_start && (row_start < curr_start))
+				row_start = curr_start;
+			if(!row_start && row_end)
+				row_start = row_end;
+			if(!row_end) 
+				row_end = curr_end;
+			
+			if(last_id != assoc_id) {
+				a_usage =
+					xmalloc(sizeof(local_cluster_usage_t));
+				a_usage->assoc_id = assoc_id;
+				list_append(assoc_usage_list, a_usage);
+				last_id = assoc_id;
+			}
+
+			if(row_start) {
+				a_usage->a_cpu += 
+					(row_end - row_start)
+					* row_acpu;
+			}
+			list_iterator_reset(c_itr);
+			while((c_usage = list_next(c_itr))) {
+				if(!strcmp(c_usage->name,
+					   row[JOB_REQ_CLUSTER])) {
+					int local_start = row_start;
+					int local_end = row_end;
+					if(!local_start)
+						goto calc_resv;
+
+					if(c_usage->start > local_start)
+						local_start = c_usage->start;
+					if(c_usage->end < local_end)
+						local_end = c_usage->end;
+
+					if((local_end - local_start) < 1)
+						goto calc_resv;
+					info("%d assoc %d adds (%d)(%d-%d) * %d = %d "
+					     "to %d",
+					     job_id,
+					     assoc_id,
+					     local_end - local_start,
+					     local_end, local_start,
+					     row_acpu,
+					     (local_end - local_start)
+					     * row_acpu,
+					     row_acpu);
+					c_usage->a_cpu +=
+						(local_end - local_start)
+						* row_acpu;
+				calc_resv:
+					/* now reserved time */
+					if(row_start < c_usage->start)
+						continue;
+					local_start = row_eligible;
+					local_end = row_start;
+					if(c_usage->start > local_start)
+						local_start = c_usage->start;
+					if(c_usage->end < local_end)
+						local_end = c_usage->end;
+
+					if((local_end - local_start) < 1)
+						continue;
+					
+					info("%d assoc %d reserved (%d)(%d-%d) * %d = %d "
+					     "to %d",
+					     job_id,
+					     assoc_id,
+					     (local_end - local_start),
+					     local_end, local_start,
+					     row_rcpu,
+					     (local_end - local_start)
+					     * row_rcpu,
+					     row_rcpu);
+					c_usage->r_cpu +=
+						(local_end - local_start)
+						* row_rcpu;
+
+
+					/* don't break here just
+					   incase the cpu count changed during
+					   this time period.
+					*/
+				}
+			}
+		}
+		mysql_free_result(result);
+
+		list_iterator_reset(c_itr);
+		while((c_usage = list_next(c_itr))) {
+			int total_time = ((curr_end - curr_start) + 1)
+				* c_usage->cpu_count;
+			
+			c_usage->i_cpu = total_time - c_usage->a_cpu -
+				c_usage->d_cpu - c_usage->r_cpu;
+			if(c_usage->i_cpu < 0) {
+				c_usage->r_cpu += c_usage->i_cpu;
+				c_usage->i_cpu = 0;
+				if(c_usage->r_cpu < 0) 
+					c_usage->r_cpu = 0;
+			}
+
+			info("cluster %s(%u) down %u alloc %u "
+			     "resv %u idle %u total= %u from %s", c_usage->name,
+			     c_usage->cpu_count, c_usage->d_cpu, c_usage->a_cpu,
+			     c_usage->r_cpu, c_usage->i_cpu,
+			     c_usage->d_cpu + c_usage->a_cpu +
+			     c_usage->r_cpu + c_usage->i_cpu,
+			     ctime(&c_usage->start));
+			info("to %s", ctime(&c_usage->end));
+		}
+		list_flush(assoc_usage_list);
+		list_flush(cluster_usage_list);
 		curr_start = curr_end+1;
 		curr_end = curr_start + add_sec;
 		debug3("curr hour is now %d-%d", curr_start, curr_end);
 	}
+	xfree(event_str);	
+	xfree(job_str);
+	list_iterator_destroy(a_itr);
+	list_iterator_destroy(c_itr);
+		
+	list_destroy(assoc_usage_list);
+	list_destroy(cluster_usage_list);
 /* 	info("stop start %s", ctime(&curr_start)); */
 /* 	info("stop end %s", ctime(&curr_end)); */
 	return SLURM_SUCCESS;
diff --git a/src/plugins/accounting_storage/mysql/mysql_rollup.h b/src/plugins/accounting_storage/mysql/mysql_rollup.h
index d04db2387b1..199e30d802b 100644
--- a/src/plugins/accounting_storage/mysql/mysql_rollup.h
+++ b/src/plugins/accounting_storage/mysql/mysql_rollup.h
@@ -44,6 +44,14 @@
 #include "mysql_jobacct_process.h"
 
 #ifdef HAVE_MYSQL
+extern char *assoc_table;
+extern char *assoc_day_table;
+extern char *assoc_hour_table;
+extern char *assoc_month_table;
+extern char *cluster_day_table;
+extern char *cluster_hour_table;
+extern char *cluster_month_table;
+extern char *event_table;
 
 extern int mysql_hourly_rollup(mysql_conn_t *mysql_conn,
 			       time_t start, time_t end);
diff --git a/src/plugins/accounting_storage/none/accounting_storage_none.c b/src/plugins/accounting_storage/none/accounting_storage_none.c
index 29deeafbe2e..24de0dd251d 100644
--- a/src/plugins/accounting_storage/none/accounting_storage_none.c
+++ b/src/plugins/accounting_storage/none/accounting_storage_none.c
@@ -340,3 +340,9 @@ extern int acct_storage_p_update_shares_used(void *db_conn,
 {
 	return SLURM_SUCCESS;
 }
+
+extern int acct_storage_p_flush_jobs_on_cluster(
+	void *db_conn, char *cluster, time_t event_time)
+{
+	return SLURM_SUCCESS;
+}
diff --git a/src/plugins/accounting_storage/pgsql/accounting_storage_pgsql.c b/src/plugins/accounting_storage/pgsql/accounting_storage_pgsql.c
index 6b732283a28..0c7c390ab16 100644
--- a/src/plugins/accounting_storage/pgsql/accounting_storage_pgsql.c
+++ b/src/plugins/accounting_storage/pgsql/accounting_storage_pgsql.c
@@ -1498,5 +1498,16 @@ extern int acct_storage_p_update_shares_used(void *db_conn,
 {
 	/* This definitely needs to be fleshed out.
 	 * Go through the list of shares_used_object_t objects and store them */
+	return SLURM_SUCCESS;
+}
+
+extern int acct_storage_p_flush_jobs_on_cluster(
+	void *db_conn, char *cluster, time_t event_time)
+{
+	/* put end times for a clean start */
+
+
+
+
 	return SLURM_SUCCESS;
 }
diff --git a/src/plugins/accounting_storage/slurmdbd/accounting_storage_slurmdbd.c b/src/plugins/accounting_storage/slurmdbd/accounting_storage_slurmdbd.c
index b8dce12a15a..2ba67e2f42d 100644
--- a/src/plugins/accounting_storage/slurmdbd/accounting_storage_slurmdbd.c
+++ b/src/plugins/accounting_storage/slurmdbd/accounting_storage_slurmdbd.c
@@ -357,8 +357,8 @@ extern List acct_storage_p_modify_clusters(void *db_conn, uint32_t uid,
 }
 
 extern List acct_storage_p_modify_associations(void *db_conn, uint32_t uid,
-					      acct_association_cond_t *assoc_q,
-					      acct_association_rec_t *assoc)
+					       acct_association_cond_t *assoc_q,
+					       acct_association_rec_t *assoc)
 {
 	slurmdbd_msg_t req;
 	dbd_modify_msg_t get_msg;
@@ -1203,3 +1203,23 @@ extern int acct_storage_p_update_shares_used(void *db_conn,
 
 	return rc;
 }
+
+extern int acct_storage_p_flush_jobs_on_cluster(void *db_conn, char *cluster,
+						time_t event_time)
+{
+	slurmdbd_msg_t msg;
+	dbd_cluster_procs_msg_t req;
+
+	info("Ending any jobs in accounting that were running when controller "
+	     "went down on cluster %s", cluster);
+	req.cluster_name = cluster;
+	req.proc_count   = 0;
+	req.event_time   = event_time;
+	msg.msg_type     = DBD_FLUSH_JOBS;
+	msg.data         = &req;
+
+	if (slurm_send_slurmdbd_msg(&msg) < 0)
+		return SLURM_ERROR;
+
+	return SLURM_SUCCESS;
+}
diff --git a/src/sacctmgr/account_functions.c b/src/sacctmgr/account_functions.c
index a09912fe1f8..ed0e3d916b9 100644
--- a/src/sacctmgr/account_functions.c
+++ b/src/sacctmgr/account_functions.c
@@ -142,13 +142,10 @@ static int _set_rec(int *start, int argc, char *argv[],
 				a_set = 1;
 		} else if (strncasecmp (argv[i], "MaxWall", 4) == 0) {
 			mins = time_str2mins(argv[i]+end);
-			if (mins >= 0) {
+			if (mins != NO_VAL) {
 				assoc->max_wall_duration_per_job 
 					= (uint32_t) mins;
 				a_set = 1;
-			} else if (strcmp(argv[i]+end, "-1") == 0) {
-				assoc->max_wall_duration_per_job = -1;
-				a_set = 1;
 			} else {
 				printf(" Bad MaxWall time format: %s\n", 
 					argv[i]);
@@ -297,12 +294,9 @@ extern int sacctmgr_add_account(int argc, char *argv[])
 				limit_set = 1;
 		} else if (strncasecmp (argv[i], "MaxWall", 4) == 0) {
 			mins = time_str2mins(argv[i]+end);
-			if (mins >= 0) {
+			if (mins != NO_VAL) {
 				max_wall_duration_per_job = (uint32_t) mins;
 				limit_set = 1;
-			} else if (strcmp(argv[i]+end, "-1") == 0) {
-				max_wall_duration_per_job = -1;
-				limit_set = 1;
 			} else {
 				printf(" Bad MaxWall time format: %s\n", 
 					argv[i]);
@@ -561,16 +555,30 @@ extern int sacctmgr_add_account(int argc, char *argv[])
 
 	if(limit_set) {
 		printf(" Settings\n");
-		if(fairshare != NO_VAL)
+		if(fairshare == INFINITE)
+			printf("  Fairshare       = NONE\n");
+		else if(fairshare != NO_VAL) 
 			printf("  Fairshare       = %u\n", fairshare);
-		if(max_cpu_secs_per_job != NO_VAL)
+		
+		if(max_cpu_secs_per_job == INFINITE)
+			printf("  MaxCPUSecs      = NONE\n");
+		else if(max_cpu_secs_per_job != NO_VAL) 
 			printf("  MaxCPUSecs      = %u\n",
 			       max_cpu_secs_per_job);
-		if(max_jobs != NO_VAL)
+		
+		if(max_jobs == INFINITE) 
+			printf("  MaxJobs         = NONE\n");
+		else if(max_jobs != NO_VAL) 
 			printf("  MaxJobs         = %u\n", max_jobs);
-		if(max_nodes_per_job != NO_VAL)
+		
+		if(max_nodes_per_job == INFINITE)
+			printf("  MaxNodes        = NONE\n");
+		else if(max_nodes_per_job != NO_VAL)
 			printf("  MaxNodes        = %u\n", max_nodes_per_job);
-		if(max_wall_duration_per_job != NO_VAL) {
+		
+		if(max_wall_duration_per_job == INFINITE) 
+			printf("  MaxWall         = NONE\n");		
+		else if(max_wall_duration_per_job != NO_VAL) {
 			char time_buf[32];
 			mins2time_str((time_t) max_wall_duration_per_job, 
 				      time_buf, sizeof(time_buf));
diff --git a/src/sacctmgr/cluster_functions.c b/src/sacctmgr/cluster_functions.c
index c53ed62de49..8a099cbce44 100644
--- a/src/sacctmgr/cluster_functions.c
+++ b/src/sacctmgr/cluster_functions.c
@@ -41,7 +41,7 @@
 #include "src/sacctmgr/print.h"
 
 static int _set_cond(int *start, int argc, char *argv[],
-		     acct_cluster_cond_t *cluster_cond,
+		     List cluster_list,
 		     List format_list)
 {
 	int i;
@@ -54,13 +54,13 @@ static int _set_cond(int *start, int argc, char *argv[],
 			i--;
 			break;
 		} else if(!end) {
-			addto_char_list(cluster_cond->cluster_list, argv[i]);
+			addto_char_list(cluster_list, argv[i]);
 			set = 1;
 		} else if (strncasecmp (argv[i], "Format", 1) == 0) {
 			if(format_list)
 				addto_char_list(format_list, argv[i]+end);
 		} else if (strncasecmp (argv[i], "Names", 1) == 0) {
-			addto_char_list(cluster_cond->cluster_list,
+			addto_char_list(cluster_list,
 					argv[i]+end);
 			set = 1;
 		} else {
@@ -74,7 +74,7 @@ static int _set_cond(int *start, int argc, char *argv[],
 }
 
 static int _set_rec(int *start, int argc, char *argv[],
-		    acct_cluster_rec_t *cluster)
+		    acct_association_rec_t *assoc)
 {
 	int i, mins;
 	int set = 0;
@@ -89,34 +89,31 @@ static int _set_rec(int *start, int argc, char *argv[],
 			printf(" Bad format on %s: End your option with "
 			       "an '=' sign\n", argv[i]);			
 		} else if (strncasecmp (argv[i], "FairShare", 1) == 0) {
-			if (get_uint(argv[i]+end, &cluster->default_fairshare, 
+			if (get_uint(argv[i]+end, &assoc->fairshare, 
 			    "FairShare") == SLURM_SUCCESS)
 				set = 1;
 		} else if (strncasecmp (argv[i], "MaxJobs", 4) == 0) {
-			if (get_uint(argv[i]+end, &cluster->default_max_jobs,
+			if (get_uint(argv[i]+end, &assoc->max_jobs,
 			    "MaxJobs") == SLURM_SUCCESS)
 				set = 1;
 		} else if (strncasecmp (argv[i], "MaxNodes", 4) == 0) {
 			if (get_uint(argv[i]+end, 
-			    &cluster->default_max_nodes_per_job,
+			    &assoc->max_nodes_per_job,
 			    "MaxNodes") == SLURM_SUCCESS)
 				set = 1;
 		} else if (strncasecmp (argv[i], "MaxWall", 4) == 0) {
 			mins = time_str2mins(argv[i]+end);
-			if (mins >= 0) {
-				cluster->default_max_wall_duration_per_job
+			if (mins != NO_VAL) {
+				assoc->max_wall_duration_per_job
 						= (uint32_t) mins;
 				set = 1;
-			} else if (strcmp(argv[i]+end, "-1") == 0) {
-				cluster->default_max_wall_duration_per_job = -1;
-				set = 1;
 			} else {
 				printf(" Bad MaxWall time format: %s\n", 
 					argv[i]);
 			}
 		} else if (strncasecmp (argv[i], "MaxCPUSecs", 4) == 0) {
 			if (get_uint(argv[i]+end, 
-			     &cluster->default_max_cpu_secs_per_job, 
+			     &assoc->max_cpu_secs_per_job, 
 			    "MaxCPUSecs") == SLURM_SUCCESS)
 				set = 1;
 		} else {
@@ -154,7 +151,7 @@ extern int sacctmgr_add_cluster(int argc, char *argv[])
 		} else if (strncasecmp (argv[i], "FairShare", 1) == 0) {
 			fairshare = atoi(argv[i]+end);
 			limit_set = 1;
-		} else if (strncasecmp (argv[i], "MaxCPUSecs4", 4) == 0) {
+		} else if (strncasecmp (argv[i], "MaxCPUSecs", 4) == 0) {
 			max_cpu_secs_per_job = atoi(argv[i]+end);
 			limit_set = 1;
 		} else if (strncasecmp (argv[i], "MaxJobs=", 4) == 0) {
@@ -165,12 +162,9 @@ extern int sacctmgr_add_cluster(int argc, char *argv[])
 			limit_set = 1;
 		} else if (strncasecmp (argv[i], "MaxWall", 4) == 0) {
 			mins = time_str2mins(argv[i]+end);
-			if (mins >= 0) {
+			if (mins != NO_VAL) {
 				max_wall_duration_per_job = (uint32_t) mins;
 				limit_set = 1;
-			} else if (strcmp(argv[i]+end, "-1") == 0) {
-				max_wall_duration_per_job = -1;
-				limit_set = 1;
 			} else {
 				printf(" Bad MaxWall time format: %s\n", 
 					argv[i]);
@@ -248,16 +242,30 @@ extern int sacctmgr_add_cluster(int argc, char *argv[])
 
 	if(limit_set) {
 		printf(" User Defaults\n");
-		if(fairshare != NO_VAL)
+		if(fairshare == INFINITE)
+			printf("  Fairshare       = NONE\n");
+		else if(fairshare != NO_VAL) 
 			printf("  Fairshare       = %u\n", fairshare);
-		if(max_cpu_secs_per_job != NO_VAL)
+		
+		if(max_cpu_secs_per_job == INFINITE)
+			printf("  MaxCPUSecs      = NONE\n");
+		else if(max_cpu_secs_per_job != NO_VAL) 
 			printf("  MaxCPUSecs      = %u\n",
 			       max_cpu_secs_per_job);
-		if(max_jobs != NO_VAL)
+		
+		if(max_jobs == INFINITE) 
+			printf("  MaxJobs         = NONE\n");
+		else if(max_jobs != NO_VAL) 
 			printf("  MaxJobs         = %u\n", max_jobs);
-		if(max_nodes_per_job != NO_VAL)
+		
+		if(max_nodes_per_job == INFINITE)
+			printf("  MaxNodes        = NONE\n");
+		else if(max_nodes_per_job != NO_VAL)
 			printf("  MaxNodes        = %u\n", max_nodes_per_job);
-		if(max_wall_duration_per_job != NO_VAL) {
+		
+		if(max_wall_duration_per_job == INFINITE) 
+			printf("  MaxWall         = NONE\n");		
+		else if(max_wall_duration_per_job != NO_VAL) {
 			char time_buf[32];
 			mins2time_str((time_t) max_wall_duration_per_job, 
 				      time_buf, sizeof(time_buf));
@@ -319,7 +327,7 @@ extern int sacctmgr_list_cluster(int argc, char *argv[])
 
 
 	cluster_cond->cluster_list = list_create(slurm_destroy_char);
-	_set_cond(&i, argc, argv, cluster_cond, format_list);
+	_set_cond(&i, argc, argv, cluster_cond->cluster_list, format_list);
 	
 	cluster_list = acct_storage_g_get_clusters(db_conn, cluster_cond);
 	destroy_acct_cluster_cond(cluster_cond);
@@ -455,47 +463,55 @@ extern int sacctmgr_modify_cluster(int argc, char *argv[])
 {
 	int rc = SLURM_SUCCESS;
 	int i=0;
-	acct_cluster_rec_t *cluster = xmalloc(sizeof(acct_cluster_rec_t));
-	acct_cluster_cond_t *cluster_cond =
-		xmalloc(sizeof(acct_cluster_cond_t));
-	List cluster_list = NULL;
-	int cond_set = 0, rec_set = 0;
+	acct_association_rec_t *assoc = xmalloc(sizeof(acct_association_rec_t));
+	acct_association_cond_t *assoc_cond =
+		xmalloc(sizeof(acct_association_cond_t));
+	int cond_set = 0, rec_set = 0, set = 0;
 	List ret_list = NULL;
 
-	cluster_cond->cluster_list = list_create(slurm_destroy_char);
-
-	cluster->default_fairshare = -2; 
-	cluster->default_max_cpu_secs_per_job = -2;
-	cluster->default_max_jobs = -2; 
-	cluster->default_max_nodes_per_job = -2;
-	cluster->default_max_wall_duration_per_job = -2;
+	assoc_cond = xmalloc(sizeof(acct_association_cond_t));
+	assoc_cond->cluster_list = list_create(slurm_destroy_char);
+	assoc_cond->acct_list = list_create(NULL);
+	assoc_cond->fairshare = NO_VAL;
+	assoc_cond->max_cpu_secs_per_job = NO_VAL;
+	assoc_cond->max_jobs = NO_VAL;
+	assoc_cond->max_nodes_per_job = NO_VAL;
+	assoc_cond->max_wall_duration_per_job = NO_VAL;
+	
+	assoc->fairshare = NO_VAL;
+	assoc->max_cpu_secs_per_job = NO_VAL;
+	assoc->max_jobs = NO_VAL;
+	assoc->max_nodes_per_job = NO_VAL;
+	assoc->max_wall_duration_per_job = NO_VAL;
 
 	for (i=0; i<argc; i++) {
 		if (strncasecmp (argv[i], "Where", 5) == 0) {
 			i++;
-			if(_set_cond(&i, argc, argv, cluster_cond, NULL))
+			if(_set_cond(&i, argc, argv,
+				     assoc_cond->cluster_list, NULL))
 				cond_set = 1;
 		} else if (strncasecmp (argv[i], "Set", 3) == 0) {
 			i++;
-			if(_set_rec(&i, argc, argv, cluster))
+			if(_set_rec(&i, argc, argv, assoc))
 				rec_set = 1;
 		} else {
-			if(_set_cond(&i, argc, argv, cluster_cond, NULL))
+			if(_set_cond(&i, argc, argv,
+				     assoc_cond->cluster_list, NULL))
 				cond_set = 1;
 		}
 	}
 
 	if(!rec_set) {
 		printf(" You didn't give me anything to set\n");
-		destroy_acct_cluster_rec(cluster);
-		destroy_acct_cluster_cond(cluster_cond);
+		destroy_acct_association_rec(assoc);
+		destroy_acct_association_cond(assoc_cond);
 		return SLURM_ERROR;
 	} else if(!cond_set) {
 		if(!commit_check("You didn't set any conditions with 'WHERE'.\n"
 				 "Are you sure you want to continue?")) {
 			printf("Aborted\n");
-			destroy_acct_cluster_rec(cluster);
-			destroy_acct_cluster_cond(cluster_cond);
+			destroy_acct_association_rec(assoc);
+			destroy_acct_association_cond(assoc);
 			return SLURM_SUCCESS;
 		}		
 	}
@@ -503,45 +519,53 @@ extern int sacctmgr_modify_cluster(int argc, char *argv[])
 	printf(" Setting\n");
 	if(rec_set) 
 		printf(" User Defaults  =\n");
-	if(cluster->default_fairshare != NO_VAL)
-		printf("  Fairshare     = %u\n", cluster->default_fairshare);
 
-	if(cluster->default_max_cpu_secs_per_job != NO_VAL)
+	if(assoc->fairshare == INFINITE)
+		printf("  Fairshare     = NONE\n");
+	else if(assoc->fairshare != NO_VAL) 
+		printf("  Fairshare     = %u\n", assoc->fairshare);
+		
+	if(assoc->max_cpu_secs_per_job == INFINITE)
+		printf("  MaxCPUSecs    = NONE\n");
+	else if(assoc->max_cpu_secs_per_job != NO_VAL) 
 		printf("  MaxCPUSecs    = %u\n",
-		       cluster->default_max_cpu_secs_per_job);
-	if(cluster->default_max_jobs != NO_VAL)
-		printf("  MaxJobs       = %u\n", cluster->default_max_jobs);
-	if(cluster->default_max_nodes_per_job != NO_VAL)
+		       assoc->max_cpu_secs_per_job);
+		
+	if(assoc->max_jobs == INFINITE) 
+		printf("  MaxJobs       = NONE\n");
+	else if(assoc->max_jobs != NO_VAL) 
+		printf("  MaxJobs       = %u\n", assoc->max_jobs);
+		
+	if(assoc->max_nodes_per_job == INFINITE)
+		printf("  MaxNodes      = NONE\n");
+	else if(assoc->max_nodes_per_job != NO_VAL)
 		printf("  MaxNodes      = %u\n",
-		       cluster->default_max_nodes_per_job);
-	if(cluster->default_max_wall_duration_per_job != NO_VAL) {
+		       assoc->max_nodes_per_job);
+		
+	if(assoc->max_wall_duration_per_job == INFINITE) 
+		printf("  MaxWall       = NONE\n");		
+	else if(assoc->max_wall_duration_per_job != NO_VAL) {
 		char time_buf[32];
 		mins2time_str((time_t) 
-			      cluster->default_max_wall_duration_per_job, 
+			      assoc->max_wall_duration_per_job, 
 			      time_buf, sizeof(time_buf));
 		printf("  MaxWall       = %s\n", time_buf);
 	}
 
-	cluster_list = list_create(destroy_acct_cluster_rec);
-	list_append(cluster_list, cluster);
+	list_append(assoc_cond->acct_list, "root");
 	notice_thread_init();
-	ret_list = acct_storage_g_modify_clusters(
-		db_conn, my_uid, cluster_cond, cluster);
-	notice_thread_fini();
+	ret_list = acct_storage_g_modify_associations(
+		db_conn, my_uid, assoc_cond, assoc);
+	
 	if(ret_list && list_count(ret_list)) {
 		char *object = NULL;
 		ListIterator itr = list_iterator_create(ret_list);
-		printf(" Modifying clusters...\n");
+		printf(" Modified cluster defaults for associations...\n");
 		while((object = list_next(itr))) {
 			printf("  %s\n", object);
 		}
 		list_iterator_destroy(itr);
-		if(commit_check("Would you like to commit changes?")) {
-			acct_storage_g_commit(db_conn, 1);
-		} else {
-			printf(" Changes Discarded\n");
-			acct_storage_g_commit(db_conn, 0);
-		}
+		set = 1;
 	} else if(ret_list) {
 		printf(" Nothing modified\n");
 	} else {
@@ -551,9 +575,18 @@ extern int sacctmgr_modify_cluster(int argc, char *argv[])
 
 	if(ret_list)
 		list_destroy(ret_list);
+	notice_thread_fini();
 
-	destroy_acct_cluster_cond(cluster_cond);
-	destroy_acct_cluster_rec(cluster);
+	if(set) {
+		if(commit_check("Would you like to commit changes?")) 
+			acct_storage_g_commit(db_conn, 1);
+		else {
+			printf(" Changes Discarded\n");
+			acct_storage_g_commit(db_conn, 0);
+		}
+	}
+	destroy_acct_association_cond(assoc_cond);
+	destroy_acct_association_rec(assoc);
 
 	return rc;
 }
@@ -568,7 +601,7 @@ extern int sacctmgr_delete_cluster(int argc, char *argv[])
 
 	cluster_cond->cluster_list = list_create(slurm_destroy_char);
 	
-	if(!_set_cond(&i, argc, argv, cluster_cond, NULL)) {
+	if(!_set_cond(&i, argc, argv, cluster_cond->cluster_list, NULL)) {
 		printf(" No conditions given to remove, not executing.\n");
 		destroy_acct_cluster_cond(cluster_cond);
 		return SLURM_ERROR;
diff --git a/src/sacctmgr/sacctmgr.c b/src/sacctmgr/sacctmgr.c
index 73f6b23dc31..536081bbec2 100644
--- a/src/sacctmgr/sacctmgr.c
+++ b/src/sacctmgr/sacctmgr.c
@@ -426,7 +426,7 @@ _process_command (int argc, char *argv[])
 				 argv[0]);
 		}		
 		quiet_flag = -1;
-	} else if (strncasecmp (argv[0], "rollup", 1) == 0) {
+	} else if (strncasecmp (argv[0], "rollup", 2) == 0) {
 		if (argc > 1) {
 			exit_code = 1;
 			fprintf (stderr,
diff --git a/src/sacctmgr/user_functions.c b/src/sacctmgr/user_functions.c
index 358e53809aa..5891753a8be 100644
--- a/src/sacctmgr/user_functions.c
+++ b/src/sacctmgr/user_functions.c
@@ -151,13 +151,10 @@ static int _set_rec(int *start, int argc, char *argv[],
 				a_set = 1;
 		} else if (strncasecmp (argv[i], "MaxWall", 4) == 0) {
 			mins = time_str2mins(argv[i]+end);
-			if (mins >= 0) {
+			if (mins != NO_VAL) {
 				association->max_wall_duration_per_job 
 					= (uint32_t) mins;
 				a_set = 1;
-			} else if (strcmp(argv[i]+end, "-1") == 0) {
-				association->max_wall_duration_per_job = -1;
-				a_set = 1;
 			} else {
 				printf(" Bad MaxWall time format: %s\n", 
 					argv[i]);
@@ -319,12 +316,9 @@ extern int sacctmgr_add_user(int argc, char *argv[])
 				limit_set = 1;
 		} else if (strncasecmp (argv[i], "MaxWall", 4) == 0) {
 			mins = time_str2mins(argv[i]+end);
-			if (mins >= 0) {
+			if (mins != NO_VAL) {
 				max_wall_duration_per_job = (uint32_t) mins;
 				limit_set = 1;
-			} else if (strcmp(argv[i]+end, "-1") == 0) {
-				max_wall_duration_per_job = -1;
-				limit_set = 1;
 			} else {
 				printf(" Bad MaxWall time format: %s\n", 
 					argv[i]);
@@ -606,16 +600,30 @@ no_default:
 
 	if(limit_set) {
 		        printf(" Non Default Settings\n");
-		if(fairshare != NO_VAL)
+		if(fairshare == INFINITE)
+			printf("  Fairshare       = NONE\n");
+		else if(fairshare != NO_VAL) 
 			printf("  Fairshare       = %u\n", fairshare);
-		if(max_cpu_secs_per_job != NO_VAL)
+		
+		if(max_cpu_secs_per_job == INFINITE)
+			printf("  MaxCPUSecs      = NONE\n");
+		else if(max_cpu_secs_per_job != NO_VAL) 
 			printf("  MaxCPUSecs      = %u\n",
 			       max_cpu_secs_per_job);
-		if(max_jobs != NO_VAL)
+		
+		if(max_jobs == INFINITE) 
+			printf("  MaxJobs         = NONE\n");
+		else if(max_jobs != NO_VAL) 
 			printf("  MaxJobs         = %u\n", max_jobs);
-		if(max_nodes_per_job != NO_VAL)
+		
+		if(max_nodes_per_job == INFINITE)
+			printf("  MaxNodes        = NONE\n");
+		else if(max_nodes_per_job != NO_VAL)
 			printf("  MaxNodes        = %u\n", max_nodes_per_job);
-		if(max_wall_duration_per_job != NO_VAL) {
+		
+		if(max_wall_duration_per_job == INFINITE) 
+			printf("  MaxWall         = NONE\n");		
+		else if(max_wall_duration_per_job != NO_VAL) {
 			char time_buf[32];
 			mins2time_str((time_t) max_wall_duration_per_job, 
 				      time_buf, sizeof(time_buf));
diff --git a/src/slurmctld/controller.c b/src/slurmctld/controller.c
index cb762c9d836..0e8f00bad41 100644
--- a/src/slurmctld/controller.c
+++ b/src/slurmctld/controller.c
@@ -377,6 +377,7 @@ int main(int argc, char *argv[])
 				 * information to Gold or SlurmDBD, create 
 				 * a file called "/tmp/slurm_accounting_first" to 
 				 * capture node initialization information */
+		   
 				_accounting_mark_all_nodes_down("cold-start");
 				 unlink("/tmp/slurm_accounting_first");
 			}
@@ -928,6 +929,12 @@ static int _accounting_mark_all_nodes_down(char *reason)
 	}
 	xfree(state_file);
 
+	if((rc = acct_storage_g_flush_jobs_on_cluster(acct_db_conn,
+						      slurmctld_cluster_name,
+						      event_time))
+	   == SLURM_ERROR)
+		return rc;
+
 	node_ptr = node_record_table_ptr;
 	for (i = 0; i < node_record_count; i++, node_ptr++) {
 		if (node_ptr->name == '\0')
diff --git a/src/slurmctld/job_mgr.c b/src/slurmctld/job_mgr.c
index 85cbb7be008..4174b656787 100644
--- a/src/slurmctld/job_mgr.c
+++ b/src/slurmctld/job_mgr.c
@@ -5296,7 +5296,7 @@ static bool _validate_acct_policy(job_desc_msg_t *job_desc,
 {
 	uint32_t time_limit;
 
-	log_assoc_rec(assoc_ptr);
+	//log_assoc_rec(assoc_ptr);
 	if ((assoc_ptr->max_wall_duration_per_job != NO_VAL) &&
 	    (assoc_ptr->max_wall_duration_per_job != INFINITE)) {
 		time_limit = assoc_ptr->max_wall_duration_per_job;
diff --git a/src/slurmctld/node_mgr.c b/src/slurmctld/node_mgr.c
index a99a99030a1..834ecc65ad6 100644
--- a/src/slurmctld/node_mgr.c
+++ b/src/slurmctld/node_mgr.c
@@ -1638,7 +1638,10 @@ extern int validate_nodes_via_front_end(
 		slurm_node_registration_status_msg_t *reg_msg)
 {
 	int error_code = 0, i, jobs_on_node;
-	bool updated_job = false, failure_logged = false;
+	bool updated_job = false;
+#ifdef HAVE_BG
+	bool failure_logged = false;
+#endif
 	struct job_record *job_ptr;
 	struct config_record *config_ptr;
 	struct node_record *node_ptr;
diff --git a/src/slurmdbd/proc_req.c b/src/slurmdbd/proc_req.c
index f2b6627b012..945732abb7e 100644
--- a/src/slurmdbd/proc_req.c
+++ b/src/slurmdbd/proc_req.c
@@ -66,6 +66,8 @@ static int   _get_jobs(void *db_conn, Buf in_buffer, Buf *out_buffer);
 static int   _get_usage(uint16_t type, void *db_conn,
 			Buf in_buffer, Buf *out_buffer);
 static int   _get_users(void *db_conn, Buf in_buffer, Buf *out_buffer);
+static int   _flush_jobs(void *db_conn,
+			 Buf in_buffer, Buf *out_buffer, uint32_t *uid);
 static void *_init_conn(Buf in_buffer, Buf *out_buffer, uint32_t *uid);
 static int   _fini_conn(void **db_conn, Buf in_buffer, Buf *out_buffer);
 static int   _job_complete(void *db_conn,
@@ -177,6 +179,9 @@ proc_req(void **db_conn, slurm_fd orig_fd,
 		case DBD_GET_USERS:
 			rc = _get_users(*db_conn, in_buffer, out_buffer);
 			break;
+		case DBD_FLUSH_JOBS:
+			rc = _flush_jobs(*db_conn, in_buffer, out_buffer, uid);
+			break;
 		case DBD_INIT:
 			if (first)
 				(*db_conn) = _init_conn(
@@ -786,6 +791,39 @@ static int _get_users(void *db_conn, Buf in_buffer, Buf *out_buffer)
 	return SLURM_SUCCESS;
 }
 
+static int _flush_jobs(void *db_conn,
+			  Buf in_buffer, Buf *out_buffer, uint32_t *uid)
+{
+	dbd_cluster_procs_msg_t *cluster_procs_msg = NULL;
+	int rc = SLURM_SUCCESS;
+	char *comment = NULL;
+
+	if (*uid != slurmdbd_conf->slurm_user_id) {
+		comment = "DBD_FLUSH_JOBS message from invalid uid";
+		error("DBD_FLUSH_JOBS message from invalid uid %u", *uid);
+		rc = ESLURM_ACCESS_DENIED;
+		goto end_it;
+	}
+	if (slurmdbd_unpack_cluster_procs_msg(&cluster_procs_msg, in_buffer) !=
+	    SLURM_SUCCESS) {
+		comment = "Failed to unpack DBD_FLUSH_JOBS message";
+		error("%s", comment);
+		rc = SLURM_ERROR;
+		goto end_it;
+	}
+	debug2("DBD_FLUSH_JOBS: called for %s",
+	       cluster_procs_msg->cluster_name);
+
+	rc = acct_storage_g_flush_jobs_on_cluster(
+		db_conn,
+		cluster_procs_msg->cluster_name,
+		cluster_procs_msg->event_time);
+end_it:
+	slurmdbd_free_cluster_procs_msg(cluster_procs_msg);
+	*out_buffer = make_dbd_rc_msg(rc, comment, DBD_FLUSH_JOBS);
+	return rc;
+}
+
 static void *_init_conn(Buf in_buffer, Buf *out_buffer, uint32_t *uid)
 {
 	dbd_init_msg_t *init_msg = NULL;
@@ -1085,7 +1123,7 @@ static int   _modify_assocs(void *db_conn,
 						get_msg->cond, get_msg->rec);
 
 	slurmdbd_free_modify_msg(DBD_MODIFY_ASSOCS, get_msg);
-		*out_buffer = init_buf(1024);
+	*out_buffer = init_buf(1024);
 	pack16((uint16_t) DBD_GOT_LIST, *out_buffer);
 	slurmdbd_pack_list_msg(DBD_GOT_LIST, &list_msg, *out_buffer);
 	if(list_msg.my_list)
@@ -1128,7 +1166,7 @@ static int   _modify_clusters(void *db_conn,
 					    get_msg->cond, get_msg->rec);
 
 	slurmdbd_free_modify_msg(DBD_MODIFY_CLUSTERS, get_msg);
-		*out_buffer = init_buf(1024);
+	*out_buffer = init_buf(1024);
 	pack16((uint16_t) DBD_GOT_LIST, *out_buffer);
 	slurmdbd_pack_list_msg(DBD_GOT_LIST, &list_msg, *out_buffer);
 	if(list_msg.my_list)
-- 
GitLab