Skip to content
Snippets Groups Projects
Commit a273e27a authored by Moe Jette's avatar Moe Jette
Browse files
parent 1d4e932b
No related branches found
No related tags found
No related merge requests found
......@@ -87,6 +87,8 @@ documents those changes that are of interest to users and admins.
- In sched/wiki2, add support for EHost and EHostBackup configuration
parameters in wiki.conf file
- In sched/wiki2, fix memory management bug for JOBWILLRUN command.
- In sched/wiki2, consider job Busy while in Completing state for
KillWait+10 seconds (used to be 30 seconds).
* Changes in SLURM 1.1.17
=========================
......
......@@ -68,12 +68,6 @@ extern int event_notify(char *msg)
pthread_mutex_lock(&event_mutex);
if (event_addr_set == 0) {
/* Identify address for socket connection */
if (e_host[0] == '\0') {
slurm_ctl_conf_t *conf = slurm_conf_lock();
strncpy(e_host, conf->control_addr,
sizeof(e_host));
slurm_conf_unlock();
}
slurm_set_addr(&moab_event_addr, e_port, e_host);
event_addr_set = 1;
if (e_host_bu[0] != '\0') {
......
......@@ -319,15 +319,13 @@ static char * _get_job_state(struct job_record *job_ptr)
return "Running";
if (state & JOB_COMPLETING) {
/* Give 60 seconds to clear out, then
* then consider job done. Let Moab
* deal with inconsistency between
* job state (DONE) and node state
* (some IDLE and others still
* BUSY). */
/* Give configured KillWait+10 for job
* to clear out, then then consider job
* done. Moab will allocate jobs to
* nodes that are already Idle. */
int age = (int) difftime(time(NULL),
job_ptr->end_time);
if (age < 60)
if (age < (kill_wait+10))
return "Running";
}
......
......@@ -53,6 +53,7 @@ char e_host_bu[E_HOST_SIZE] = "";
uint16_t e_port = 0;
uint16_t job_aggregation_time = 10; /* Default value is 10 seconds */
int init_prio_mode = PRIO_HOLD;
uint16_t kill_wait;
uint16_t use_host_exp = 0;
static char * _get_wiki_conf_path(void);
......@@ -205,6 +206,13 @@ static void _parse_wiki_config(void)
s_p_hashtbl_t *tbl;
char *key = NULL, *priority_mode = NULL, *wiki_conf;
struct stat buf;
slurm_ctl_conf_t *conf;
/* Set default values */
conf = slurm_conf_lock();
strncpy(e_host, conf->control_addr, sizeof(e_host));
kill_wait = conf->kill_wait;
slurm_conf_unlock();
wiki_conf = _get_wiki_conf_path();
if ((wiki_conf == NULL) || (stat(wiki_conf, &buf) == -1)) {
......@@ -227,7 +235,8 @@ static void _parse_wiki_config(void)
if ( s_p_get_string(&key, "EHost", tbl)) {
strncpy(e_host, key, sizeof(e_host));
xfree(key);
}
} else
debug("wiki: Using ControlAddr for EHost value");
if ( s_p_get_string(&key, "EHostBackup", tbl)) {
strncpy(e_host_bu, key, sizeof(e_host_bu));
xfree(key);
......@@ -247,6 +256,15 @@ static void _parse_wiki_config(void)
s_p_hashtbl_destroy(tbl);
xfree(wiki_conf);
#if 0
info("AuthKey = %s", auth_key);
info("EHost = %s", e_host);
info("EHostBackup = %s", e_host_bu);
info("EPort = %u", e_port);
info("JobAggregationTime = %u sec", job_aggregation_time);
info("JobPriority = %s", init_prio_mode ? "run" : "hold");
info("KillWait = %u sec", kill_wait);
#endif
return;
}
......
......@@ -91,6 +91,7 @@ extern char e_host[E_HOST_SIZE];
extern char e_host_bu[E_HOST_SIZE];
extern uint16_t e_port;
extern uint16_t job_aggregation_time;
extern uint16_t kill_wait;
extern uint16_t use_host_exp;
extern int event_notify(char *msg);
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment