Skip to content
Snippets Groups Projects
Commit 0154fb34 authored by Moe Jette's avatar Moe Jette
Browse files

Don't try to recover cons_res state data on slurmctld -c (cold start).

parent a05519c2
No related branches found
No related tags found
No related merge requests found
...@@ -16,6 +16,8 @@ documents those changes that are of interest to users and admins. ...@@ -16,6 +16,8 @@ documents those changes that are of interest to users and admins.
info for jobs changed. info for jobs changed.
-- BLUEGENE - Changed logic for wiring bgl blocks to be more maintainable. -- BLUEGENE - Changed logic for wiring bgl blocks to be more maintainable.
(Haven't tested on large system yet, works on 2 base partition system) (Haven't tested on large system yet, works on 2 base partition system)
-- Do not read the select/cons_res state save file if slurmctld is
cold-started (with the "-c" option).
* Changes in SLURM 1.2.0-pre6 * Changes in SLURM 1.2.0-pre6
============================= =============================
......
...@@ -1139,6 +1139,10 @@ extern int select_p_state_restore(char *dir_name) ...@@ -1139,6 +1139,10 @@ extern int select_p_state_restore(char *dir_name)
info("cons_res: select_p_state_restore"); info("cons_res: select_p_state_restore");
if (!dir_name) {
error ("Starting cons_res with clean slate");
return SLURM_SUCCESS;
}
file_name = xstrdup(dir_name); file_name = xstrdup(dir_name);
xstrcat(file_name, "/cons_res_state"); xstrcat(file_name, "/cons_res_state");
state_fd = open (file_name, O_RDONLY); state_fd = open (file_name, O_RDONLY);
......
...@@ -255,8 +255,8 @@ int main(int argc, char *argv[]) ...@@ -255,8 +255,8 @@ int main(int argc, char *argv[])
if ( checkpoint_init(slurmctld_conf.checkpoint_type) != if ( checkpoint_init(slurmctld_conf.checkpoint_type) !=
SLURM_SUCCESS ) SLURM_SUCCESS )
fatal( "failed to initialize checkpoint plugin" ); fatal( "failed to initialize checkpoint plugin" );
if (select_g_state_restore(slurmctld_conf.state_save_location)) if (slurm_select_init() != SLURM_SUCCESS )
fatal( "failed to restore node selection plugin state"); fatal( "failed to initialize node selection plugin state");
while (1) { while (1) {
/* initialization for each primary<->backup switch */ /* initialization for each primary<->backup switch */
...@@ -290,12 +290,18 @@ int main(int argc, char *argv[]) ...@@ -290,12 +290,18 @@ int main(int argc, char *argv[])
info("Running as primary controller"); info("Running as primary controller");
/* Recover node scheduler state info */ /* Recover node scheduler state info */
if (select_g_state_restore(slurmctld_conf.state_save_location) if (recover) {
!= SLURM_SUCCESS ) { error_code = select_g_state_restore(
slurmctld_conf.state_save_location);
} else {
error_code = select_g_state_restore(NULL);
}
if (error_code != SLURM_SUCCESS ) {
error("failed to restore node selection state"); error("failed to restore node selection state");
abort(); abort();
} }
/* /*
* create attached thread to process RPCs * create attached thread to process RPCs
*/ */
...@@ -342,6 +348,7 @@ int main(int argc, char *argv[]) ...@@ -342,6 +348,7 @@ int main(int argc, char *argv[])
switch_save(slurmctld_conf.state_save_location); switch_save(slurmctld_conf.state_save_location);
if (slurmctld_config.resume_backup == false) if (slurmctld_config.resume_backup == false)
break; break;
recover = 2;
} }
/* Since pidfile is created as user root (its owner is /* Since pidfile is created as user root (its owner is
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment