diff --git a/NEWS b/NEWS index e1858e8eb45b461680e970689c24ea9a492c705b..31988c1fa370b446839fce24255d6634de6b6a4d 100644 --- a/NEWS +++ b/NEWS @@ -23,6 +23,7 @@ documents those changes that are of interest to users and admins. -- Add Google search to all web pages. -- Add sinfo -T option to print reservation information. Work by Bill Brophy, Bull. + -- Force slurmd exit after 2 minute wait, even if threads are hung. * Changes in SLURM 2.5.0.pre1 ============================= diff --git a/src/slurmd/slurmd/slurmd.c b/src/slurmd/slurmd/slurmd.c index df339cdaab12ce21a7f5ff8bdf64ddcc841d49ee..7f66f6f2d973ba1f1633356cb0c70376a2eb96b4 100644 --- a/src/slurmd/slurmd/slurmd.c +++ b/src/slurmd/slurmd/slurmd.c @@ -426,7 +426,7 @@ static void _decrement_thd_count(void) { slurm_mutex_lock(&active_mutex); - if (active_threads>0) + if (active_threads > 0) active_threads--; pthread_cond_signal(&active_cond); slurm_mutex_unlock(&active_mutex); @@ -453,13 +453,28 @@ _increment_thd_count(void) static void _wait_for_all_threads(void) { + struct timespec ts; + int rc; + + ts.tv_sec = time(NULL); + ts.tv_nsec = 0; + ts.tv_sec += 120; /* 2 minutes allowed for shutdown */ + slurm_mutex_lock(&active_mutex); while (active_threads > 0) { verbose("waiting on %d active threads", active_threads); - pthread_cond_wait(&active_cond, &active_mutex); + rc = pthread_cond_timedwait(&active_cond, &active_mutex, &ts); + if (rc == ETIMEDOUT) { + error("Timeout waiting for completion of %d threads", + active_threads); + pthread_cond_signal(&active_cond); + slurm_mutex_unlock(&active_mutex); + return; + } } + pthread_cond_signal(&active_cond); slurm_mutex_unlock(&active_mutex); - verbose("all threads complete."); + verbose("all threads complete"); } static void