From 48776d349e500d3da0dc4261b5f6f0e116145a3d Mon Sep 17 00:00:00 2001 From: Moe Jette <jette1@llnl.gov> Date: Wed, 3 Mar 2004 17:56:23 +0000 Subject: [PATCH] Backup slurmctld to generate core file on SIGABRT. --- NEWS | 1 + src/slurmctld/backup.c | 19 ++++++++++++++++--- 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/NEWS b/NEWS index e8627fd0a10..3058d15c063 100644 --- a/NEWS +++ b/NEWS @@ -24,6 +24,7 @@ documents those changes that are of interest to users and admins. -- Documentation for writing scheduler, switch, and job completion logging plugins added -- Added job and node state descriptions to the squeue and sinfo man pages + -- Backup slurmctld to generate core file on SIGABRT * Changes in SLURM 0.3.0.0-pre6 =============================== diff --git a/src/slurmctld/backup.c b/src/slurmctld/backup.c index 1d0a84c7187..48dd0058aaa 100644 --- a/src/slurmctld/backup.c +++ b/src/slurmctld/backup.c @@ -57,6 +57,9 @@ static void * _background_rpc_mgr(void *no_data); static void * _background_signal_hand(void *no_data); static int _ping_controller(void); +/* Local variables */ +static bool dump_core = false; + /* * Static list of signals to block in this process * *Must be zero-terminated* @@ -133,7 +136,10 @@ void run_backup(void) verbose("Unable to remove pidfile '%s': %m", slurmctld_conf.slurmctld_pidfile); log_fini(); - exit(0); + if (dump_core) + abort(); + else + exit(0); } error("ControlMachine %s not responding, " @@ -172,13 +178,20 @@ static void *_background_signal_hand(void *no_data) switch (sig) { case SIGINT: /* kill -2 or <CTRL-C> */ case SIGTERM: /* kill -15 */ - case SIGABRT: /* abort */ - info("Terminate signal (SIGABRT, SIGINT or SIGTERM) received"); + info("Terminate signal (SIGINT or SIGTERM) received"); slurmctld_config.shutdown_time = time(NULL); /* send REQUEST_SHUTDOWN_IMMEDIATE RPC */ slurmctld_shutdown(); return NULL; /* Normal termination */ break; + case SIGABRT: /* abort */ + info("SIGABRT received"); + slurmctld_config.shutdown_time = time(NULL); + /* send REQUEST_SHUTDOWN_IMMEDIATE RPC */ + slurmctld_shutdown(); + dump_core = true; + return NULL; /* Normal termination */ + break; default: error("Invalid signal (%d) received", sig); } -- GitLab