From 30411bf1d67e7ae44e04f0655b8d6c9d2583441f Mon Sep 17 00:00:00 2001 From: Morris Jette <jette@schedmd.com> Date: Wed, 1 Aug 2012 11:11:33 -0700 Subject: [PATCH] Force slurmd exit after 2 minute wait, even if threads are hung. --- NEWS | 1 + src/slurmd/slurmd/slurmd.c | 21 ++++++++++++++++++--- 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/NEWS b/NEWS index e1858e8eb45..31988c1fa37 100644 --- a/NEWS +++ b/NEWS @@ -23,6 +23,7 @@ documents those changes that are of interest to users and admins. -- Add Google search to all web pages. -- Add sinfo -T option to print reservation information. Work by Bill Brophy, Bull. + -- Force slurmd exit after 2 minute wait, even if threads are hung. * Changes in SLURM 2.5.0.pre1 ============================= diff --git a/src/slurmd/slurmd/slurmd.c b/src/slurmd/slurmd/slurmd.c index df339cdaab1..7f66f6f2d97 100644 --- a/src/slurmd/slurmd/slurmd.c +++ b/src/slurmd/slurmd/slurmd.c @@ -426,7 +426,7 @@ static void _decrement_thd_count(void) { slurm_mutex_lock(&active_mutex); - if (active_threads>0) + if (active_threads > 0) active_threads--; pthread_cond_signal(&active_cond); slurm_mutex_unlock(&active_mutex); @@ -453,13 +453,28 @@ _increment_thd_count(void) static void _wait_for_all_threads(void) { + struct timespec ts; + int rc; + + ts.tv_sec = time(NULL); + ts.tv_nsec = 0; + ts.tv_sec += 120; /* 2 minutes allowed for shutdown */ + slurm_mutex_lock(&active_mutex); while (active_threads > 0) { verbose("waiting on %d active threads", active_threads); - pthread_cond_wait(&active_cond, &active_mutex); + rc = pthread_cond_timedwait(&active_cond, &active_mutex, &ts); + if (rc == ETIMEDOUT) { + error("Timeout waiting for completion of %d threads", + active_threads); + pthread_cond_signal(&active_cond); + slurm_mutex_unlock(&active_mutex); + return; + } } + pthread_cond_signal(&active_cond); slurm_mutex_unlock(&active_mutex); - verbose("all threads complete."); + verbose("all threads complete"); } static void -- GitLab