Skip to content
Snippets Groups Projects
Commit 48776d34 authored by Moe Jette's avatar Moe Jette
Browse files

Backup slurmctld to generate core file on SIGABRT.

parent d35b8729
No related branches found
No related tags found
No related merge requests found
...@@ -24,6 +24,7 @@ documents those changes that are of interest to users and admins. ...@@ -24,6 +24,7 @@ documents those changes that are of interest to users and admins.
-- Documentation for writing scheduler, switch, and job completion -- Documentation for writing scheduler, switch, and job completion
logging plugins added logging plugins added
-- Added job and node state descriptions to the squeue and sinfo man pages -- Added job and node state descriptions to the squeue and sinfo man pages
-- Backup slurmctld to generate core file on SIGABRT
* Changes in SLURM 0.3.0.0-pre6 * Changes in SLURM 0.3.0.0-pre6
=============================== ===============================
......
...@@ -57,6 +57,9 @@ static void * _background_rpc_mgr(void *no_data); ...@@ -57,6 +57,9 @@ static void * _background_rpc_mgr(void *no_data);
static void * _background_signal_hand(void *no_data); static void * _background_signal_hand(void *no_data);
static int _ping_controller(void); static int _ping_controller(void);
/* Local variables */
static bool dump_core = false;
/* /*
* Static list of signals to block in this process * Static list of signals to block in this process
* *Must be zero-terminated* * *Must be zero-terminated*
...@@ -133,7 +136,10 @@ void run_backup(void) ...@@ -133,7 +136,10 @@ void run_backup(void)
verbose("Unable to remove pidfile '%s': %m", verbose("Unable to remove pidfile '%s': %m",
slurmctld_conf.slurmctld_pidfile); slurmctld_conf.slurmctld_pidfile);
log_fini(); log_fini();
exit(0); if (dump_core)
abort();
else
exit(0);
} }
error("ControlMachine %s not responding, " error("ControlMachine %s not responding, "
...@@ -172,13 +178,20 @@ static void *_background_signal_hand(void *no_data) ...@@ -172,13 +178,20 @@ static void *_background_signal_hand(void *no_data)
switch (sig) { switch (sig) {
case SIGINT: /* kill -2 or <CTRL-C> */ case SIGINT: /* kill -2 or <CTRL-C> */
case SIGTERM: /* kill -15 */ case SIGTERM: /* kill -15 */
case SIGABRT: /* abort */ info("Terminate signal (SIGINT or SIGTERM) received");
info("Terminate signal (SIGABRT, SIGINT or SIGTERM) received");
slurmctld_config.shutdown_time = time(NULL); slurmctld_config.shutdown_time = time(NULL);
/* send REQUEST_SHUTDOWN_IMMEDIATE RPC */ /* send REQUEST_SHUTDOWN_IMMEDIATE RPC */
slurmctld_shutdown(); slurmctld_shutdown();
return NULL; /* Normal termination */ return NULL; /* Normal termination */
break; break;
case SIGABRT: /* abort */
info("SIGABRT received");
slurmctld_config.shutdown_time = time(NULL);
/* send REQUEST_SHUTDOWN_IMMEDIATE RPC */
slurmctld_shutdown();
dump_core = true;
return NULL; /* Normal termination */
break;
default: default:
error("Invalid signal (%d) received", sig); error("Invalid signal (%d) received", sig);
} }
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment