From 2551a44527b30baf0cd171b98acaf5a59cb3f5f9 Mon Sep 17 00:00:00 2001
From: Danny Auble <da@llnl.gov>
Date: Wed, 10 Jun 2009 22:22:43 +0000
Subject: [PATCH] svn merge -r17789:17805
 https://eris.llnl.gov/svn/slurm/branches/slurm-2.0

---
 NEWS                                          |   1 +
 src/common/read_config.c                      | 142 +++++++++---------
 src/common/read_config.h                      |   2 +
 .../mysql/accounting_storage_mysql.c          |   2 +-
 .../pgsql/accounting_storage_pgsql.c          |   2 +-
 src/plugins/jobcomp/mysql/jobcomp_mysql.c     |   2 +-
 src/plugins/jobcomp/pgsql/jobcomp_pgsql.c     |   2 +-
 src/plugins/select/cons_res/select_cons_res.c | 134 +++++++++++++----
 src/sacctmgr/config_functions.c               |   4 +
 src/slurmctld/node_mgr.c                      |   2 +-
 src/slurmd/slurmstepd/io.c                    |  50 ++++--
 src/slurmd/slurmstepd/io.h                    |   1 +
 src/slurmd/slurmstepd/mgr.c                   |   9 +-
 src/slurmdbd/read_config.c                    |  16 +-
 testsuite/expect/test1.84                     |   2 +
 15 files changed, 242 insertions(+), 129 deletions(-)

diff --git a/NEWS b/NEWS
index dfff9749c85..e15bac8dbc9 100644
--- a/NEWS
+++ b/NEWS
@@ -72,6 +72,7 @@ documents those changes that are of interest to users and admins.
     memory affinity that was applied to the batch script.
  -- Ignore the extra processors on a node above configured size if either 
     sched/gang or select/cons_res is configured.
+ -- Fix bug in tracking memory allocated on a node for select/cons_res plugin.
  -- Fixed a race condition when writing labelled output with a file per task
     or per node, which potentially closed a file before all data was written.
 
diff --git a/src/common/read_config.c b/src/common/read_config.c
index e6ba2eb5aea..6306a1fb6d1 100644
--- a/src/common/read_config.c
+++ b/src/common/read_config.c
@@ -114,23 +114,23 @@ bool nodehash_initialized = false;
 static names_ll_t *host_to_node_hashtbl[NAME_HASH_LEN] = {NULL};
 static names_ll_t *node_to_host_hashtbl[NAME_HASH_LEN] = {NULL};
 
-static int parse_nodename(void **dest, slurm_parser_enum_t type,
-			  const char *key, const char *value,
-			  const char *line, char **leftover);
-static void destroy_nodename(void *ptr);
-static int parse_partitionname(void **dest, slurm_parser_enum_t type,
-			       const char *key, const char *value,
-			       const char *line, char **leftover);
-static void destroy_partitionname(void *ptr);
-static int parse_downnodes(void **dest, slurm_parser_enum_t type,
+static int _parse_nodename(void **dest, slurm_parser_enum_t type,
 			   const char *key, const char *value,
 			   const char *line, char **leftover);
-static void destroy_downnodes(void *ptr);
-static int defunct_option(void **dest, slurm_parser_enum_t type,
-			  const char *key, const char *value,
-			  const char *line, char **leftover);
-static void validate_and_set_defaults(slurm_ctl_conf_t *conf,
-				      s_p_hashtbl_t *hashtbl);
+static void _destroy_nodename(void *ptr);
+static int _parse_partitionname(void **dest, slurm_parser_enum_t type,
+				const char *key, const char *value,
+				const char *line, char **leftover);
+static void _destroy_partitionname(void *ptr);
+static int _parse_downnodes(void **dest, slurm_parser_enum_t type,
+			    const char *key, const char *value,
+			    const char *line, char **leftover);
+static void _destroy_downnodes(void *ptr);
+static int _defunct_option(void **dest, slurm_parser_enum_t type,
+			   const char *key, const char *value,
+			   const char *line, char **leftover);
+static void _validate_and_set_defaults(slurm_ctl_conf_t *conf,
+				       s_p_hashtbl_t *hashtbl);
 
 s_p_options_t slurm_conf_options[] = {
 	{"AccountingStorageEnforce", S_P_STRING},
@@ -169,13 +169,13 @@ s_p_options_t slurm_conf_options[] = {
 	{"FastSchedule", S_P_UINT16},
 	{"FirstJobId", S_P_UINT32},
 	{"GetEnvTimeout", S_P_UINT16},
-	{"HashBase", S_P_LONG, defunct_option},
-	{"HeartbeatInterval", S_P_LONG, defunct_option},
+	{"HashBase", S_P_LONG, _defunct_option},
+	{"HeartbeatInterval", S_P_LONG, _defunct_option},
 	{"HealthCheckInterval", S_P_UINT16},
 	{"HealthCheckProgram", S_P_STRING},
 	{"InactiveLimit", S_P_UINT16},
 	{"JobAcctGatherType", S_P_STRING},
-	{"JobAcctFrequency", S_P_UINT16, defunct_option},
+	{"JobAcctFrequency", S_P_UINT16, _defunct_option},
 	{"JobAcctGatherFrequency", S_P_UINT16},
 	{"JobAcctLogFile", S_P_STRING},
 	{"JobAcctType", S_P_STRING},
@@ -190,7 +190,7 @@ s_p_options_t slurm_conf_options[] = {
 	{"JobCredentialPublicCertificate", S_P_STRING},
 	{"JobFileAppend", S_P_UINT16},
 	{"JobRequeue", S_P_UINT16},
-	{"KillTree", S_P_UINT16, defunct_option},
+	{"KillTree", S_P_UINT16, _defunct_option},
 	{"KillOnBadExit", S_P_UINT16},
 	{"KillWait", S_P_UINT16},
 	{"Licenses", S_P_STRING},
@@ -201,7 +201,7 @@ s_p_options_t slurm_conf_options[] = {
 	{"MaxMemPerTask", S_P_UINT32},	/* defunct */
 	{"MessageTimeout", S_P_UINT16},
 	{"MinJobAge", S_P_UINT16},
-	{"MpichGmDirectSupport", S_P_LONG, defunct_option},
+	{"MpichGmDirectSupport", S_P_LONG, _defunct_option},
 	{"MpiDefault", S_P_STRING},
 	{"MpiParams", S_P_STRING},
 	{"OverTimeLimit", S_P_UINT16},
@@ -230,7 +230,7 @@ s_p_options_t slurm_conf_options[] = {
 	{"ResvOverRun", S_P_UINT16},
 	{"ReturnToService", S_P_UINT16},
 	{"SallocDefaultCommand", S_P_STRING},
-	{"SchedulerAuth", S_P_STRING, defunct_option},
+	{"SchedulerAuth", S_P_STRING, _defunct_option},
 	{"SchedulerParameters", S_P_STRING},
 	{"SchedulerPort", S_P_UINT16},
 	{"SchedulerRootFilter", S_P_UINT16},
@@ -275,16 +275,16 @@ s_p_options_t slurm_conf_options[] = {
 	{"UsePAM", S_P_BOOLEAN},
 	{"WaitTime", S_P_UINT16},
 
-	{"NodeName", S_P_ARRAY, parse_nodename, destroy_nodename},
-	{"PartitionName", S_P_ARRAY, parse_partitionname,
-	 destroy_partitionname},
-	{"DownNodes", S_P_ARRAY, parse_downnodes, destroy_downnodes},
+	{"NodeName", S_P_ARRAY, _parse_nodename, _destroy_nodename},
+	{"PartitionName", S_P_ARRAY, _parse_partitionname,
+	 _destroy_partitionname},
+	{"DownNodes", S_P_ARRAY, _parse_downnodes, _destroy_downnodes},
 
 	{NULL}
 };
 
 
-static int defunct_option(void **dest, slurm_parser_enum_t type,
+static int _defunct_option(void **dest, slurm_parser_enum_t type,
 			  const char *key, const char *value,
 			  const char *line, char **leftover)
 {
@@ -325,9 +325,9 @@ static void _set_node_prefix(const char *nodenames)
 #endif /* HAVE_BG */
 
 
-static int parse_nodename(void **dest, slurm_parser_enum_t type,
-			  const char *key, const char *value,
-			  const char *line, char **leftover)
+static int _parse_nodename(void **dest, slurm_parser_enum_t type,
+			   const char *key, const char *value,
+			   const char *line, char **leftover)
 {
 	s_p_hashtbl_t *tbl, *dflt;
 	slurm_conf_node_t *n;
@@ -499,7 +499,7 @@ static int parse_nodename(void **dest, slurm_parser_enum_t type,
 	/* should not get here */
 }
 
-static void destroy_nodename(void *ptr)
+static void _destroy_nodename(void *ptr)
 {
 	slurm_conf_node_t *n = (slurm_conf_node_t *)ptr;
 	xfree(n->nodenames);
@@ -526,7 +526,7 @@ int slurm_conf_nodename_array(slurm_conf_node_t **ptr_array[])
 }
 
 
-static int parse_partitionname(void **dest, slurm_parser_enum_t type,
+static int _parse_partitionname(void **dest, slurm_parser_enum_t type,
 			       const char *key, const char *value,
 			       const char *line, char **leftover)
 {
@@ -604,7 +604,7 @@ static int parse_partitionname(void **dest, slurm_parser_enum_t type,
 			int max_time = time_str2mins(tmp);
 			if ((max_time < 0) && (max_time != INFINITE)) {
 				error("Bad value \"%s\" for MaxTime", tmp);
-				destroy_partitionname(p);
+				_destroy_partitionname(p);
 				s_p_hashtbl_destroy(tbl);
 				xfree(tmp);
 				return -1;
@@ -620,7 +620,7 @@ static int parse_partitionname(void **dest, slurm_parser_enum_t type,
 			int default_time = time_str2mins(tmp);
 			if ((default_time < 0) && (default_time != INFINITE)) {
 				error("Bad value \"%s\" for DefaultTime", tmp);
-				destroy_partitionname(p);
+				_destroy_partitionname(p);
 				s_p_hashtbl_destroy(tbl);
 				xfree(tmp);
 				return -1;
@@ -687,7 +687,7 @@ static int parse_partitionname(void **dest, slurm_parser_enum_t type,
 #endif
 			else {
 				error("Bad value \"%s\" for Shared", tmp);
-				destroy_partitionname(p);
+				_destroy_partitionname(p);
 				s_p_hashtbl_destroy(tbl);
 				xfree(tmp);
 				return -1;
@@ -711,7 +711,7 @@ static int parse_partitionname(void **dest, slurm_parser_enum_t type,
 	/* should not get here */
 }
 
-static void destroy_partitionname(void *ptr)
+static void _destroy_partitionname(void *ptr)
 {
 	slurm_conf_partition_t *p = (slurm_conf_partition_t *)ptr;
 
@@ -737,7 +737,7 @@ int slurm_conf_partition_array(slurm_conf_partition_t **ptr_array[])
 	}
 }
 
-static int parse_downnodes(void **dest, slurm_parser_enum_t type,
+static int _parse_downnodes(void **dest, slurm_parser_enum_t type,
 			   const char *key, const char *value,
 			   const char *line, char **leftover)
 {
@@ -771,7 +771,7 @@ static int parse_downnodes(void **dest, slurm_parser_enum_t type,
 	return 1;
 }
 
-static void destroy_downnodes(void *ptr)
+static void _destroy_downnodes(void *ptr)
 {
 	slurm_conf_downnodes_t *n = (slurm_conf_downnodes_t *)ptr;
 	xfree(n->nodenames);
@@ -1494,8 +1494,7 @@ init_slurm_conf (slurm_ctl_conf_t *ctl_conf_ptr)
 }
 
 /* caller must lock conf_lock */
-static void
-_init_slurm_conf(const char *file_name)
+static void _init_slurm_conf(const char *file_name)
 {
 	char *name = (char *)file_name;
 	/* conf_ptr = (slurm_ctl_conf_t *)xmalloc(sizeof(slurm_ctl_conf_t)); */
@@ -1513,7 +1512,7 @@ _init_slurm_conf(const char *file_name)
 	if(s_p_parse_file(conf_hashtbl, name) == SLURM_ERROR)
 		fatal("something wrong with opening/reading conf file");
 	/* s_p_dump_values(conf_hashtbl, slurm_conf_options); */
-	validate_and_set_defaults(conf_ptr, conf_hashtbl);
+	_validate_and_set_defaults(conf_ptr, conf_hashtbl);
 	conf_ptr->slurm_conf = xstrdup(name);
 }
 
@@ -1681,7 +1680,7 @@ static void _normalize_debug_level(uint16_t *level)
  * NOTE: if control_addr is NULL, it is over-written by control_machine
  */
 static void
-validate_and_set_defaults(slurm_ctl_conf_t *conf, s_p_hashtbl_t *hashtbl)
+_validate_and_set_defaults(slurm_ctl_conf_t *conf, s_p_hashtbl_t *hashtbl)
 {
 	char *temp_str = NULL;
 	long long_suspend_time;
@@ -1719,7 +1718,7 @@ validate_and_set_defaults(slurm_ctl_conf_t *conf, s_p_hashtbl_t *hashtbl)
 		conf->complete_wait = DEFAULT_COMPLETE_WAIT;
 
 	if (!s_p_get_string(&conf->control_machine, "ControlMachine", hashtbl))
-		fatal ("validate_and_set_defaults: "
+		fatal ("_validate_and_set_defaults: "
 		       "ControlMachine not specified.");
 	else if (strcasecmp("localhost", conf->control_machine) == 0) {
 		xfree (conf->control_machine);
@@ -1882,33 +1881,30 @@ validate_and_set_defaults(slurm_ctl_conf_t *conf, s_p_hashtbl_t *hashtbl)
 	if (!s_p_get_string(&conf->job_comp_host, "JobCompHost",
 			    hashtbl)) {
 		if(default_storage_host)
-			conf->job_comp_host =
-				xstrdup(default_storage_host);
+			conf->job_comp_host = xstrdup(default_storage_host);
 		else
 			conf->job_comp_host = xstrdup(DEFAULT_STORAGE_HOST);
 	}
 	if (!s_p_get_string(&conf->job_comp_user, "JobCompUser",
 			    hashtbl)) {
 		if(default_storage_user)
-			conf->job_comp_user =
-				xstrdup(default_storage_user);
+			conf->job_comp_user = xstrdup(default_storage_user);
 		else
 			conf->job_comp_user = xstrdup(DEFAULT_STORAGE_USER);
 	}
 	if (!s_p_get_string(&conf->job_comp_pass, "JobCompPass",
 			    hashtbl)) {
 		if(default_storage_pass)
-			conf->job_comp_pass =
-				xstrdup(default_storage_pass);
+			conf->job_comp_pass = xstrdup(default_storage_pass);
 	}
 	if (!s_p_get_uint32(&conf->job_comp_port, "JobCompPort",
 			    hashtbl)) {
 		if(default_storage_port)
 			conf->job_comp_port = default_storage_port;
 		else if(!strcmp(conf->job_comp_type, "job_comp/mysql")) 
-			conf->job_comp_port = 3306;
+			conf->job_comp_port = DEFAULT_MYSQL_PORT;
 		else if(!strcmp(conf->job_comp_type, "job_comp/pgsql")) 
-			conf->job_comp_port = 5432;
+			conf->job_comp_port = DEFAULT_PGSQL_PORT;
 		else 
 			conf->job_comp_port = DEFAULT_STORAGE_PORT;
 	}
@@ -2033,6 +2029,15 @@ validate_and_set_defaults(slurm_ctl_conf_t *conf, s_p_hashtbl_t *hashtbl)
 		if(default_storage_loc)
 			conf->accounting_storage_loc =
 				xstrdup(default_storage_loc);
+		else if(!strcmp(conf->accounting_storage_type, 
+				"accounting_storage/mysql")
+			|| !strcmp(conf->accounting_storage_type, 
+				"accounting_storage/pgsql")) 
+			conf->accounting_storage_loc =
+				xstrdup(DEFAULT_ACCOUNTING_DB);
+		else
+			conf->accounting_storage_loc =
+				xstrdup(DEFAULT_STORAGE_LOC);
 	}
 	if (!s_p_get_string(&conf->accounting_storage_user,
 			    "AccountingStorageUser", hashtbl)) {
@@ -2053,39 +2058,26 @@ validate_and_set_defaults(slurm_ctl_conf_t *conf, s_p_hashtbl_t *hashtbl)
 			    "AccountingStoragePort", hashtbl)) {
 		if(default_storage_port)
 			conf->accounting_storage_port = default_storage_port;
+		else if(!strcmp(conf->accounting_storage_type,
+				"accounting_storage/slurmdbd")) 
+			conf->accounting_storage_port = SLURMDBD_PORT;
+		else if(!strcmp(conf->accounting_storage_type, 
+			  "accounting_storage/mysql")) 
+			conf->accounting_storage_port = DEFAULT_MYSQL_PORT;
+		else if(!strcmp(conf->accounting_storage_type,
+			  "accounting_storage/pgsql")) 
+			conf->accounting_storage_port = DEFAULT_PGSQL_PORT;
 		else
-			conf->accounting_storage_port = NO_VAL;
+			conf->accounting_storage_port = DEFAULT_STORAGE_PORT;
 	}
-	/* set correct defaults so scontrol show config works
-	   correctly */
+	
+	/* remove the user and loc if using slurmdbd */
 	if(!strcmp(conf->accounting_storage_type,
 		   "accounting_storage/slurmdbd")) {
 		xfree(conf->accounting_storage_loc);
 		conf->accounting_storage_loc = xstrdup("N/A");
 		xfree(conf->accounting_storage_user);
 		conf->accounting_storage_user = xstrdup("N/A");
-		if(conf->accounting_storage_port == NO_VAL)
-			conf->accounting_storage_port = SLURMDBD_PORT;
-	} else if(!strcmp(conf->accounting_storage_type, 
-			  "accounting_storage/mysql")) {
-		if(conf->accounting_storage_port == NO_VAL)
-			conf->accounting_storage_port = 3306;
-		if(!conf->accounting_storage_loc)
-			conf->accounting_storage_loc =
-				xstrdup(DEFAULT_ACCOUNTING_DB);
-	} else if(!strcmp(conf->accounting_storage_type,
-			  "accounting_storage/pgsql")) {
-		if(conf->accounting_storage_port == NO_VAL)
-			conf->accounting_storage_port = 5432;
-		if(!conf->accounting_storage_loc)
-			conf->accounting_storage_loc =
-				xstrdup(DEFAULT_ACCOUNTING_DB);
-	} else {
-		if(conf->accounting_storage_port == NO_VAL)
-			conf->accounting_storage_port = DEFAULT_STORAGE_PORT;
-		if(!conf->accounting_storage_loc)
-			conf->accounting_storage_loc =
-				xstrdup(DEFAULT_STORAGE_LOC);
 	}
 
 	s_p_get_uint16(&conf->over_time_limit, "OverTimeLimit", hashtbl);
diff --git a/src/common/read_config.h b/src/common/read_config.h
index ce0973684c5..924d8517592 100644
--- a/src/common/read_config.h
+++ b/src/common/read_config.h
@@ -121,6 +121,8 @@ extern char *default_plugstack;
 #define DEFAULT_STORAGE_LOC         "/var/log/slurm_jobacct.log"
 #define DEFAULT_STORAGE_USER        "root"
 #define DEFAULT_STORAGE_PORT        0
+#define DEFAULT_PGSQL_PORT          5432
+#define DEFAULT_MYSQL_PORT          3306
 #define DEFAULT_SUSPEND_RATE        60
 #define DEFAULT_SUSPEND_TIME        0
 #define DEFAULT_SUSPEND_TIMEOUT     30
diff --git a/src/plugins/accounting_storage/mysql/accounting_storage_mysql.c b/src/plugins/accounting_storage/mysql/accounting_storage_mysql.c
index 5f2be55ddf1..431850f4599 100644
--- a/src/plugins/accounting_storage/mysql/accounting_storage_mysql.c
+++ b/src/plugins/accounting_storage/mysql/accounting_storage_mysql.c
@@ -2805,7 +2805,7 @@ static mysql_db_info_t *_mysql_acct_create_db_info()
 	mysql_db_info_t *db_info = xmalloc(sizeof(mysql_db_info_t));
 	db_info->port = slurm_get_accounting_storage_port();
 	if(!db_info->port) {
-		db_info->port = 3306;
+		db_info->port = DEFAULT_MYSQL_PORT;
 		slurm_set_accounting_storage_port(db_info->port);
 	}
 	db_info->host = slurm_get_accounting_storage_host();	
diff --git a/src/plugins/accounting_storage/pgsql/accounting_storage_pgsql.c b/src/plugins/accounting_storage/pgsql/accounting_storage_pgsql.c
index 3b6b3659c61..391d3be0576 100644
--- a/src/plugins/accounting_storage/pgsql/accounting_storage_pgsql.c
+++ b/src/plugins/accounting_storage/pgsql/accounting_storage_pgsql.c
@@ -137,7 +137,7 @@ static pgsql_db_info_t *_pgsql_acct_create_db_info()
 	/* it turns out it is better if using defaults to let postgres
 	   handle them on it's own terms */
 	if(!db_info->port) {
-		db_info->port = 5432;
+		db_info->port = DEFAULT_PGSQL_PORT;
 		slurm_set_accounting_storage_port(db_info->port);
 	}
 	db_info->host = slurm_get_accounting_storage_host();
diff --git a/src/plugins/jobcomp/mysql/jobcomp_mysql.c b/src/plugins/jobcomp/mysql/jobcomp_mysql.c
index 8e3182c83c7..59b88b1ba69 100644
--- a/src/plugins/jobcomp/mysql/jobcomp_mysql.c
+++ b/src/plugins/jobcomp/mysql/jobcomp_mysql.c
@@ -132,7 +132,7 @@ static mysql_db_info_t *_mysql_jobcomp_create_db_info()
 	mysql_db_info_t *db_info = xmalloc(sizeof(mysql_db_info_t));
 	db_info->port = slurm_get_jobcomp_port();
 	if(!db_info->port) {
-		db_info->port = 3306;
+		db_info->port = DEFAULT_MYSQL_PORT;
 		slurm_set_jobcomp_port(db_info->port);
 	}
 	db_info->host = slurm_get_jobcomp_host();	
diff --git a/src/plugins/jobcomp/pgsql/jobcomp_pgsql.c b/src/plugins/jobcomp/pgsql/jobcomp_pgsql.c
index 56df166c3b2..569fbf27b0e 100644
--- a/src/plugins/jobcomp/pgsql/jobcomp_pgsql.c
+++ b/src/plugins/jobcomp/pgsql/jobcomp_pgsql.c
@@ -131,7 +131,7 @@ static pgsql_db_info_t *_pgsql_jobcomp_create_db_info()
 	/* it turns out it is better if using defaults to let postgres
 	   handle them on it's own terms */
 	if(!db_info->port) {
-		db_info->port = 5432;
+		db_info->port = DEFAULT_PGSQL_PORT;
 		slurm_set_jobcomp_port(db_info->port);
 	}
 	db_info->host = slurm_get_jobcomp_host();
diff --git a/src/plugins/select/cons_res/select_cons_res.c b/src/plugins/select/cons_res/select_cons_res.c
index 14a4754daad..19b00a0d6f3 100644
--- a/src/plugins/select/cons_res/select_cons_res.c
+++ b/src/plugins/select/cons_res/select_cons_res.c
@@ -413,20 +413,6 @@ static struct node_use_record *_dup_node_usage(struct node_use_record *orig_ptr)
 	return new_use_ptr;
 }
 
-/* Restore a node_state information */
-static void _restore_node_usage(struct node_use_record *orig_ptr)
-{
-	uint32_t i;
-
-	if (orig_ptr == NULL)
-		return;
-
-	for (i = 0; i < select_node_cnt; i++) {
-		select_node_usage[i].node_state   = orig_ptr[i].node_state;
-		select_node_usage[i].alloc_memory = orig_ptr[i].alloc_memory;
-	}
-}
-
 /* delete the given row data */
 static void _destroy_row_data(struct part_row_data *row, uint16_t num_rows) {
 	uint16_t i;
@@ -859,10 +845,12 @@ static int _add_job_to_res(struct job_record *job_ptr, int action)
 			select_node_usage[i].alloc_memory +=
 						job->memory_allocated[n];
 			if (select_node_usage[i].alloc_memory >
-				select_node_record[i].real_memory) {
-				error("error: node %s mem is overallocated(%u)",
-					select_node_record[i].node_ptr->name,
-					select_node_usage[i].alloc_memory);
+			    select_node_record[i].real_memory) {
+				error("error: node %s mem is overallocated "
+				      "(%u) for job %u",
+				      select_node_record[i].node_ptr->name,
+				      select_node_usage[i].alloc_memory,
+				      job_ptr->job_id);
 				
 			}
 			n++;
@@ -949,11 +937,13 @@ static int _rm_job_from_res(struct part_res_record *part_record_ptr,
 			if (!bit_test(job->node_bitmap, i))
 				continue;
 			if (node_usage[i].alloc_memory <
-						job->memory_allocated[n]) {
-				error("error: %s mem is underalloc'd(%u-%u)",
-					select_node_record[i].node_ptr->name,
-					node_usage[i].alloc_memory,
-					job->memory_allocated[n]);
+			    job->memory_allocated[n]) {
+				error("error: node %s mem is underallocated "
+				      "(%u-%u) for job %u",
+				      select_node_record[i].node_ptr->name,
+				      node_usage[i].alloc_memory,
+				      job->memory_allocated[n], 
+				      job_ptr->job_id);
 				node_usage[i].alloc_memory = 0;
 			} else {
 				node_usage[i].alloc_memory -=
@@ -1017,14 +1007,14 @@ static int _rm_job_from_res(struct part_res_record *part_record_ptr,
 			for (n = 0; n < select_node_cnt; n++) {
 				if (bit_test(job->node_bitmap, n) == 0)
 					continue;
-				if (select_node_usage[n].node_state >=
+				if (node_usage[n].node_state >=
 				    job->node_req) {
-					select_node_usage[n].node_state -=
+					node_usage[n].node_state -=
 								job->node_req;
 				} else {
 					error("cons_res:_rm_job_from_res: "
 						"node_state mis-count");
-					select_node_usage[n].node_state =
+					node_usage[n].node_state =
 							NODE_CR_AVAILABLE;
 				}
 			}
@@ -1511,6 +1501,98 @@ extern int select_p_job_list_test(List req_list)
 	return EINVAL;
 }
 
+<<<<<<< .working
+=======
+
+/* _will_run_test - determine when and where a pending job can start, removes 
+ *	jobs from node table at termination time and run _test_job() after 
+ *	each one. */
+static int _will_run_test(struct job_record *job_ptr, bitstr_t *bitmap,
+			uint32_t min_nodes, uint32_t max_nodes, 
+			uint32_t req_nodes, uint16_t job_node_req)
+{
+	struct part_res_record *future_part;
+	struct node_use_record *future_usage;
+	struct job_record *tmp_job_ptr, **tmp_job_pptr;
+	List cr_job_list;
+	ListIterator job_iterator;
+	bitstr_t *orig_map;
+	int rc = SLURM_ERROR;
+	time_t now = time(NULL);
+
+	orig_map = bit_copy(bitmap);
+
+	/* Try to run with currently available nodes */
+	rc = cr_job_test(job_ptr, bitmap, min_nodes, max_nodes, req_nodes, 
+			 SELECT_MODE_WILL_RUN, cr_type, job_node_req,
+			 select_node_cnt, select_part_record, 
+			 select_node_usage);
+	if (rc == SLURM_SUCCESS) {
+		bit_free(orig_map);
+		job_ptr->start_time = time(NULL);
+		return SLURM_SUCCESS;
+	}
+
+	/* Job is still pending. Simulate termination of jobs one at a time 
+	 * to determine when and where the job can start. */
+
+	future_part = _dup_part_data(select_part_record);
+	if (future_part == NULL) {
+		bit_free(orig_map);
+		return SLURM_ERROR;
+	}
+	future_usage = _dup_node_usage(select_node_usage);
+	if (future_usage == NULL) {
+		_destroy_part_data(future_part);
+		bit_free(orig_map);
+		return SLURM_ERROR;
+	}
+
+	/* Build list of running jobs */
+	cr_job_list = list_create(NULL);
+	job_iterator = list_iterator_create(job_list);
+	while ((tmp_job_ptr = (struct job_record *) list_next(job_iterator))) {
+		if (tmp_job_ptr->job_state != JOB_RUNNING)
+			continue;
+		if (tmp_job_ptr->end_time == 0) {
+			error("Job %u has zero end_time", tmp_job_ptr->job_id);
+			continue;
+		}
+		tmp_job_pptr = xmalloc(sizeof(struct job_record *));
+		*tmp_job_pptr = tmp_job_ptr;
+		list_append(cr_job_list, tmp_job_pptr);
+	}
+	list_iterator_destroy(job_iterator);
+	list_sort(cr_job_list, _cr_job_list_sort);
+
+	/* Remove the running jobs one at a time from exp_node_cr and try
+	 * scheduling the pending job after each one */
+	job_iterator = list_iterator_create(cr_job_list);
+	while ((tmp_job_pptr = (struct job_record **) list_next(job_iterator))) {
+		tmp_job_ptr = *tmp_job_pptr;
+		_rm_job_from_res(future_part, future_usage, tmp_job_ptr, 0);
+		bit_or(bitmap, orig_map);
+		rc = cr_job_test(job_ptr, bitmap, min_nodes, max_nodes, 
+				 req_nodes, SELECT_MODE_WILL_RUN, cr_type,
+				 job_node_req, select_node_cnt, future_part,
+				 future_usage);
+		if (rc == SLURM_SUCCESS) {
+			if (tmp_job_ptr->end_time <= now)
+				job_ptr->start_time = now + 1;
+			else
+				job_ptr->start_time = tmp_job_ptr->end_time;
+			break;
+		}
+	}
+	list_iterator_destroy(job_iterator);
+	list_destroy(cr_job_list);
+	_destroy_part_data(future_part);
+	_destroy_node_data(future_usage, NULL);
+	bit_free(orig_map);
+	return rc;
+}
+
+>>>>>>> .merge-right.r17805
 extern int select_p_job_begin(struct job_record *job_ptr)
 {
 	return SLURM_SUCCESS;
diff --git a/src/sacctmgr/config_functions.c b/src/sacctmgr/config_functions.c
index 76cda0381db..642d993a0d0 100644
--- a/src/sacctmgr/config_functions.c
+++ b/src/sacctmgr/config_functions.c
@@ -43,6 +43,7 @@
 #include "src/common/xstring.h"
 #include "src/sacctmgr/sacctmgr.h"
 
+static char    *acct_storage_backup_host = NULL;
 static char    *acct_storage_host = NULL;
 static char    *acct_storage_loc  = NULL;
 static char    *acct_storage_pass = NULL;
@@ -91,6 +92,7 @@ static void _free_dbd_config(void)
 
 static void _load_slurm_config(void)
 {
+	acct_storage_backup_host = slurm_get_accounting_storage_backup_host();
 	acct_storage_host = slurm_get_accounting_storage_host();
 	acct_storage_loc  = slurm_get_accounting_storage_loc();
 	acct_storage_pass = slurm_get_accounting_storage_pass();
@@ -107,6 +109,7 @@ static void _load_slurm_config(void)
 
 static void _free_slurm_config(void)
 {
+	xfree(acct_storage_backup_host);
 	xfree(acct_storage_host);
 	xfree(acct_storage_loc);
 	xfree(acct_storage_pass);
@@ -123,6 +126,7 @@ static void _print_slurm_config(void)
 
 	slurm_make_time_str(&now, tmp_str, sizeof(tmp_str));
 	printf("Configuration data as of %s\n", tmp_str);
+	printf("AccountingStorageBackupHost  = %s\n", acct_storage_backup_host);
 	printf("AccountingStorageHost  = %s\n", acct_storage_host);
 	printf("AccountingStorageLoc   = %s\n", acct_storage_loc);
 	printf("AccountingStoragePass  = %s\n", acct_storage_pass);
diff --git a/src/slurmctld/node_mgr.c b/src/slurmctld/node_mgr.c
index 386d5948cfc..ad04a6cea23 100644
--- a/src/slurmctld/node_mgr.c
+++ b/src/slurmctld/node_mgr.c
@@ -1648,7 +1648,7 @@ extern int validate_node_specs(slurm_node_registration_status_msg_t *reg_msg)
 		threads1 = cores1 * reg_msg->threads;
 		cores2 = config_ptr->sockets * config_ptr->cores;
 		threads2 = cores2 * config_ptr->threads;
-		if ((cores1 < cores2) || (threads1 < threads2)) {
+		if (threads1 < threads2) {
 			error("Node %s has low socket*core*thread count %u",
 				reg_msg->node_name, threads1);
 			error_code = EINVAL;
diff --git a/src/slurmd/slurmstepd/io.c b/src/slurmd/slurmstepd/io.c
index 3fecbd60c2d..061c14b8cbe 100644
--- a/src/slurmd/slurmstepd/io.c
+++ b/src/slurmd/slurmstepd/io.c
@@ -126,6 +126,9 @@ struct client_io_info {
 	int  ltaskid_stdout, ltaskid_stderr;
 	bool labelio;
 	int  label_width;
+
+	/* true if writing to a file, false if writing to a socket */
+	bool is_local_file;
 };
 
 
@@ -475,20 +478,9 @@ static bool
 _local_file_writable(eio_obj_t *obj)
 {
 	struct client_io_info *client = (struct client_io_info *) obj->arg;
-	int rc;
 
 	xassert(client->magic == CLIENT_IO_MAGIC);
 
-	if (obj->shutdown) {
-		if (obj->fd >= 0) {
-			do {
-				rc = close(obj->fd);
-			} while (rc == -1 && errno == EINTR);
-			obj->fd = -1;
-		}
-		return false;
-	}
-
 	if (client->out_eof == true)
 		return false;
 
@@ -1297,6 +1289,33 @@ io_close_all(slurmd_job_t *job)
 	eio_signal_shutdown(job->eio);
 }
 
+void 
+io_close_local_fds(slurmd_job_t *job)
+{
+	ListIterator clients;
+	eio_obj_t *eio;
+	int rc;
+	struct client_io_info *client;
+
+	if (job == NULL || job->clients == NULL)
+		return;
+
+	clients = list_iterator_create(job->clients);
+	while((eio = list_next(clients))) {
+		client = (struct client_io_info *)eio->arg;
+		if (client->is_local_file) {
+			if (eio->fd >= 0) {
+				do {
+					rc = close(eio->fd);
+				} while (rc == -1 && errno == EINTR);
+				eio->fd = -1;
+			}
+		}
+	}
+}
+
+
+
 static void *
 _io_thr(void *arg)
 {
@@ -1337,11 +1356,6 @@ io_create_local_client(const char *filename, int file_flags,
 
 	fd = open(filename, file_flags, 0666);
 	if (fd == -1) {
-		/* error("Could not open stdout file: %m");
-		task->ofname = fname_create(job, "slurm-%J.out", 0);
-		fd = open(task->ofname, file_flags, 0666);
-		if (fd == -1)
-			return SLURM_ERROR; */
 		return ESLURMD_IO_ERROR;
 	}
 	fd_set_close_on_exec(fd);
@@ -1357,12 +1371,14 @@ io_create_local_client(const char *filename, int file_flags,
 	client->ltaskid_stdout = stdout_tasks;
 	client->ltaskid_stderr = stderr_tasks;
 	client->labelio = labelio;
+	client->is_local_file = true;
 
 	client->label_width = 1;
 	tmp = job->ntasks-1;
 	while ((tmp /= 10) > 0)
 		client->label_width++;
 
+
 	obj = eio_obj_create(fd, &local_file_ops, (void *)client);
 	list_append(job->clients, (void *)obj);
 	eio_new_initial_obj(job->eio, (void *)obj);
@@ -1430,6 +1446,7 @@ io_initial_client_connect(srun_info_t *srun, slurmd_job_t *job,
 	client->ltaskid_stderr = stderr_tasks;
 	client->labelio = false;
 	client->label_width = 0;
+	client->is_local_file = false;
 
 	obj = eio_obj_create(sock, &client_ops, (void *)client);
 	list_append(job->clients, (void *)obj);
@@ -1489,6 +1506,7 @@ io_client_connect(srun_info_t *srun, slurmd_job_t *job)
 	client->ltaskid_stderr = -1;     /* accept from all tasks */
 	client->labelio = false;
 	client->label_width = 0;
+	client->is_local_file = false;
 
 	/* client object adds itself to job->clients in _client_writable */
 
diff --git a/src/slurmd/slurmstepd/io.h b/src/slurmd/slurmstepd/io.h
index 7e9894e4aaa..53026256ccc 100644
--- a/src/slurmd/slurmstepd/io.h
+++ b/src/slurmd/slurmstepd/io.h
@@ -130,6 +130,7 @@ void io_close_task_fds(slurmd_job_t *job);
 
 void io_close_all(slurmd_job_t *job);
 
+void io_close_local_fds(slurmd_job_t *job);
 
 
 /* 
diff --git a/src/slurmd/slurmstepd/mgr.c b/src/slurmd/slurmstepd/mgr.c
index 8a4c259d81c..c6267cf5868 100644
--- a/src/slurmd/slurmstepd/mgr.c
+++ b/src/slurmd/slurmstepd/mgr.c
@@ -919,11 +919,9 @@ job_manager(slurmd_job_t *job)
 	/*
 	 * Wait for io thread to complete (if there is one)
 	 */
-	if (!job->batch && !job->user_managed_io && io_initialized) {
-		eio_signal_shutdown(job->eio);
+	if (!job->batch && !job->user_managed_io && io_initialized) 
 		_wait_for_io(job);
-	}
-
+	
 	debug2("Before call to spank_fini()");
 	if (spank_fini (job)  < 0) {
 		error ("spank_fini failed\n");
@@ -1472,6 +1470,9 @@ _wait_for_io(slurmd_job_t *job)
 	} else
 		info("_wait_for_io: ioid==0");
 
+	/* Close any files for stdout/stderr opened by the stepd */
+	io_close_local_fds(job);
+
 	return;
 }
 
diff --git a/src/slurmdbd/read_config.c b/src/slurmdbd/read_config.c
index 4e6c88d1350..e5819561f68 100644
--- a/src/slurmdbd/read_config.c
+++ b/src/slurmdbd/read_config.c
@@ -314,23 +314,33 @@ extern int read_slurmdbd_conf(void)
 	if (slurmdbd_conf->storage_type == NULL)
 		fatal("StorageType must be specified");
 
+	if (!slurmdbd_conf->storage_host)
+		slurmdbd_conf->storage_host = xstrdup(DEFAULT_STORAGE_HOST);
+
+	if (!slurmdbd_conf->storage_user) 		
+		slurmdbd_conf->storage_user = xstrdup(getlogin());
+	
 	if(!strcmp(slurmdbd_conf->storage_type, 
 			  "accounting_storage/mysql")) {
 		if(!slurmdbd_conf->storage_port)
-			slurmdbd_conf->storage_port = 3306;
+			slurmdbd_conf->storage_port = DEFAULT_MYSQL_PORT;
 		if(!slurmdbd_conf->storage_loc)
 			slurmdbd_conf->storage_loc =
 				xstrdup(DEFAULT_ACCOUNTING_DB);
 	} else if(!strcmp(slurmdbd_conf->storage_type,
 			  "accounting_storage/pgsql")) {
 		if(!slurmdbd_conf->storage_port)
-			slurmdbd_conf->storage_port = 5432;
+			slurmdbd_conf->storage_port = DEFAULT_PGSQL_PORT;
 		if(!slurmdbd_conf->storage_loc)
 			slurmdbd_conf->storage_loc =
 				xstrdup(DEFAULT_ACCOUNTING_DB);
-	} else 
+	} else {
 		if(!slurmdbd_conf->storage_port)
 			slurmdbd_conf->storage_port = DEFAULT_STORAGE_PORT;
+		if(!slurmdbd_conf->storage_loc)
+			slurmdbd_conf->storage_loc =
+				xstrdup(DEFAULT_STORAGE_LOC);
+	}
 
 	if (slurmdbd_conf->archive_dir) {
 		if(stat(slurmdbd_conf->archive_dir, &buf) < 0) 
diff --git a/testsuite/expect/test1.84 b/testsuite/expect/test1.84
index aadf24cbf77..b301a657066 100755
--- a/testsuite/expect/test1.84
+++ b/testsuite/expect/test1.84
@@ -113,6 +113,7 @@ if {[string compare $host ""] == 0} {
 if {$cpu_cnt != $task_cnt} {
 	send_user "FAILURE: should have run $cpu_cnt tasks (one per CPU) "
 	send_user "instead of $task_cnt tasks\n"
+	send_user "NOTE: This could be due to memory limit per allocated CPU\n\n"
 	set exit_code 1
 }
 if {$cpu_cnt < 2} {
@@ -151,6 +152,7 @@ expect {
 #
 if {$task_cnt != [expr $cpu_cnt / 2]} {
 	send_user "\nFAILURE: Improper task count for given cpus-per-task\n"
+	send_user "NOTE: This could be due to memory limit per allocated CPU\n\n"
 	set exit_code   1	
 }
 
-- 
GitLab