diff --git a/NEWS b/NEWS index e8627fd0a100a9e5745b7df357c2fab6ccaa1867..3058d15c0632dd8285af1787e2d74a6d60a2ded9 100644 --- a/NEWS +++ b/NEWS @@ -24,6 +24,7 @@ documents those changes that are of interest to users and admins. -- Documentation for writing scheduler, switch, and job completion logging plugins added -- Added job and node state descriptions to the squeue and sinfo man pages + -- Backup slurmctld to generate core file on SIGABRT * Changes in SLURM 0.3.0.0-pre6 =============================== diff --git a/src/slurmctld/backup.c b/src/slurmctld/backup.c index 1d0a84c7187870e5fb9e285190e38b1a6f9d9a9c..48dd0058aaa852428eb3789458ba8768664a2d23 100644 --- a/src/slurmctld/backup.c +++ b/src/slurmctld/backup.c @@ -57,6 +57,9 @@ static void * _background_rpc_mgr(void *no_data); static void * _background_signal_hand(void *no_data); static int _ping_controller(void); +/* Local variables */ +static bool dump_core = false; + /* * Static list of signals to block in this process * *Must be zero-terminated* @@ -133,7 +136,10 @@ void run_backup(void) verbose("Unable to remove pidfile '%s': %m", slurmctld_conf.slurmctld_pidfile); log_fini(); - exit(0); + if (dump_core) + abort(); + else + exit(0); } error("ControlMachine %s not responding, " @@ -172,13 +178,20 @@ static void *_background_signal_hand(void *no_data) switch (sig) { case SIGINT: /* kill -2 or <CTRL-C> */ case SIGTERM: /* kill -15 */ - case SIGABRT: /* abort */ - info("Terminate signal (SIGABRT, SIGINT or SIGTERM) received"); + info("Terminate signal (SIGINT or SIGTERM) received"); slurmctld_config.shutdown_time = time(NULL); /* send REQUEST_SHUTDOWN_IMMEDIATE RPC */ slurmctld_shutdown(); return NULL; /* Normal termination */ break; + case SIGABRT: /* abort */ + info("SIGABRT received"); + slurmctld_config.shutdown_time = time(NULL); + /* send REQUEST_SHUTDOWN_IMMEDIATE RPC */ + slurmctld_shutdown(); + dump_core = true; + return NULL; /* Normal termination */ + break; default: error("Invalid signal (%d) received", sig); }