From 8079e28d85b4a1c1dcea3941bf24c1f3b3036ea7 Mon Sep 17 00:00:00 2001 From: Morris Jette <jette@schedmd.com> Date: Fri, 22 Aug 2014 13:15:54 -0700 Subject: [PATCH] Fix for slurmctld agent watchdog logic Was failing to run watchdog thread to make note of hung threads and not completion of RPC, which is needed for node ping. This bug was introduced in commit d87d1b50ceedcca5f0c584fba6b8ff884a621e5e --- src/slurmctld/agent.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/src/slurmctld/agent.c b/src/slurmctld/agent.c index d5c2ac294a9..9f8db7d03a1 100644 --- a/src/slurmctld/agent.c +++ b/src/slurmctld/agent.c @@ -270,9 +270,10 @@ void *agent(void *args) /* start the watchdog thread */ if (getenv("SLURM_NO_WDOG")) { - /* Development don't want threads be interrupted - * by sigusr1. Remove later. - */ + /* Test purposes only. Do not want threads to be interrupted + * by SIGUSR1. Breaks normal operation */ + error("%s: Watchdog thread disabled", __func__); + } else { slurm_attr_init(&attr_wdog); if (pthread_attr_setdetachstate (&attr_wdog, PTHREAD_CREATE_JOINABLE)) @@ -332,12 +333,10 @@ void *agent(void *args) slurm_mutex_unlock(&agent_info_ptr->thread_mutex); } - /* wait for termination of remaining threads */ - - if (getenv("SLURM_NO_WDOG")) { - /* Development don't want threads be interrupted - * by sigusr1. Remove later. - */ + /* Wait for termination of remaining threads */ + if (!getenv("SLURM_NO_WDOG")) { + /* Test purposes only. Do not want threads to be interrupted + * by SIGUSR1 */ pthread_join(thread_wdog, NULL); } -- GitLab