From 162d164bcbddd4c443f8ac8bfc018695e146e1af Mon Sep 17 00:00:00 2001
From: Moe Jette <jette1@llnl.gov>
Date: Wed, 15 Nov 2006 18:47:54 +0000
Subject: [PATCH] Make sure the backfill plugin pthread is terminated at
 slurmctld shutdown in order to free all memory (including that associated
 with pthread_create).

---
 src/plugins/sched/backfill/backfill.c         | 21 +++++++++++++------
 src/plugins/sched/backfill/backfill.h         |  3 +++
 src/plugins/sched/backfill/backfill_wrapper.c |  1 +
 src/slurmctld/controller.c                    |  2 +-
 4 files changed, 20 insertions(+), 7 deletions(-)

diff --git a/src/plugins/sched/backfill/backfill.c b/src/plugins/sched/backfill/backfill.c
index fdcabfd0a5d..404564b1d28 100644
--- a/src/plugins/sched/backfill/backfill.c
+++ b/src/plugins/sched/backfill/backfill.c
@@ -14,7 +14,7 @@
  *  "lx[06-08]", we can't start it without possibly delaying the higher 
  *  priority job.
  *****************************************************************************
- *  Copyright (C) 2003 The Regents of the University of California.
+ *  Copyright (C) 2003-2006 The Regents of the University of California.
  *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
  *  Written by Morris Jette <jette1@llnl.gov>
  *  UCRL-CODE-217948.
@@ -79,8 +79,9 @@ typedef struct node_space_map {
 } node_space_map_t;
 
 /*********************** local variables *********************/
-static bool altered_job = false;
-static bool new_work = false;
+static bool altered_job   = false;
+static bool new_work      = false;
+static bool stop_backfill = false;
 static pthread_mutex_t thread_flag_mutex = PTHREAD_MUTEX_INITIALIZER;
 
 static List pend_job_list = NULL;
@@ -93,7 +94,7 @@ static node_space_map_t node_space[MAX_JOB_CNT + 1];
 /* Set __DEBUG to get detailed logging for this thread without 
  * detailed logging for the entire slurmctld daemon */
 #define __DEBUG        0
-#define SLEEP_TIME     2
+#define SLEEP_TIME     1
 
 /*********************** local functions *********************/
 static int  _add_pending_job(struct job_record *job_ptr, 
@@ -162,6 +163,13 @@ static void _diff_tv_str(struct timeval *tv1,struct timeval *tv2,
 	snprintf(tv_str, len_tv_str, "usec=%ld", delta_t);
 }
 
+/* Terminate backfill_agent */
+extern void stop_backfill_agent(void)
+{
+	stop_backfill = true;
+}
+
+
 /* backfill_agent - detached thread periodically attempts to backfill jobs */
 extern void *
 backfill_agent(void *args)
@@ -175,9 +183,9 @@ backfill_agent(void *args)
 
 	if (slurm_get_root_filter())
 		filter_root = true;
-	while (1) {
+	while (!stop_backfill) {
 		sleep(SLEEP_TIME);      /* don't run continuously */
-		if (!_more_work())
+		if ((!_more_work()) || stop_backfill)
 			continue;
 
 		gettimeofday(&tv1, NULL);
@@ -210,6 +218,7 @@ backfill_agent(void *args)
 			schedule();	/* has own locks */
 		}
 	}
+	return NULL;
 }
 
 /* trigger the attempt of a backfill */
diff --git a/src/plugins/sched/backfill/backfill.h b/src/plugins/sched/backfill/backfill.h
index 2db7e766bbd..e960a3df037 100644
--- a/src/plugins/sched/backfill/backfill.h
+++ b/src/plugins/sched/backfill/backfill.h
@@ -38,6 +38,9 @@
 /* backfill_agent - detached thread periodically attempts to backfill jobs */
 extern void *backfill_agent(void *args);
 
+/* Terminate backfill_agent */
+extern void stop_backfill_agent(void);
+
 /* trigger the attempt of a backfill */
 extern void run_backfill (void);
 
diff --git a/src/plugins/sched/backfill/backfill_wrapper.c b/src/plugins/sched/backfill/backfill_wrapper.c
index dbf33c67cc7..3663aaf9a1e 100644
--- a/src/plugins/sched/backfill/backfill_wrapper.c
+++ b/src/plugins/sched/backfill/backfill_wrapper.c
@@ -112,6 +112,7 @@ void fini( void )
 	pthread_mutex_lock( &thread_flag_mutex );
 	if ( backfill_thread ) {
 		verbose( "Backfill scheduler plugin shutting down" );
+		stop_backfill_agent();
 		_cancel_thread( backfill_thread );
 		backfill_thread = false;
 	}
diff --git a/src/slurmctld/controller.c b/src/slurmctld/controller.c
index e453404b45e..8c4a2a9c5d4 100644
--- a/src/slurmctld/controller.c
+++ b/src/slurmctld/controller.c
@@ -377,7 +377,7 @@ int main(int argc, char *argv[])
 	slurm_cred_ctx_destroy(slurmctld_config.cred_ctx);
 	slurm_conf_destroy();
 	slurm_api_clear_config();
-	sleep(1);
+	sleep(2);
 #endif
 
 	info("Slurmctld shutdown completing");
-- 
GitLab