From a8f56cb2005a5db03601e11f44d73bf881f7a7cd Mon Sep 17 00:00:00 2001
From: Danny Auble <da@schedmd.com>
Date: Mon, 17 Jun 2013 15:52:21 -0700
Subject: [PATCH] PROFILING - Make sure polling threads end correctly

---
 src/common/slurm_acct_gather_profile.c | 71 +++++++++++++++++++++++---
 src/common/slurm_acct_gather_profile.h |  1 +
 src/slurmd/slurmstepd/mgr.c            |  4 +-
 3 files changed, 66 insertions(+), 10 deletions(-)

diff --git a/src/common/slurm_acct_gather_profile.c b/src/common/slurm_acct_gather_profile.c
index 2d3e5a1b1d3..3e3024640af 100644
--- a/src/common/slurm_acct_gather_profile.c
+++ b/src/common/slurm_acct_gather_profile.c
@@ -176,25 +176,44 @@ done:
 
 extern int acct_gather_profile_fini(void)
 {
-	int rc, i;
+	int rc = SLURM_SUCCESS, i;
 
 	if (!g_context)
 		return SLURM_SUCCESS;
 
+	slurm_mutex_lock(&g_context_lock);
+
+	if (g_context)
+		goto done;
+
 	init_run = false;
-	acct_gather_profile_running = false;
 
 	for (i=0; i < PROFILE_CNT; i++) {
-		/* end remote threads */
-		slurm_mutex_lock(&acct_gather_profile_timer[i].notify_mutex);
-		pthread_cond_signal(&acct_gather_profile_timer[i].notify);
-		slurm_mutex_unlock(&acct_gather_profile_timer[i].notify_mutex);
-		pthread_cond_destroy(&acct_gather_profile_timer[i].notify);
-		acct_gather_profile_timer[i].freq = 0;
+		switch (i) {
+		case PROFILE_ENERGY:
+			acct_gather_energy_fini();
+			break;
+		case PROFILE_TASK:
+			jobacct_gather_fini();
+			break;
+		case PROFILE_FILESYSTEM:
+			acct_gather_filesystem_fini();
+			break;
+		case PROFILE_NETWORK:
+			acct_gather_infiniband_fini();
+			break;
+		default:
+			fatal("Unhandled profile option %d please update "
+			      "slurm_acct_gather_profile.c "
+			      "(acct_gather_profile_fini)", i);
+		}
 	}
 
 	rc = plugin_context_destroy(g_context);
 	g_context = NULL;
+done:
+	slurm_mutex_unlock(&g_context_lock);
+
 	return rc;
 }
 
@@ -394,6 +413,42 @@ extern int acct_gather_profile_startpoll(char *freq, char *freq_def)
 	return retval;
 }
 
+extern void acct_gather_profile_endpoll(void)
+{
+	int i;
+
+	if (!acct_gather_profile_running) {
+		debug2("acct_gather_profile_startpoll: poll already ended!");
+		return;
+	}
+
+	acct_gather_profile_running = false;
+
+	for (i=0; i < PROFILE_CNT; i++) {
+		/* end remote threads */
+		slurm_mutex_lock(&acct_gather_profile_timer[i].notify_mutex);
+		pthread_cond_signal(&acct_gather_profile_timer[i].notify);
+		slurm_mutex_unlock(&acct_gather_profile_timer[i].notify_mutex);
+		pthread_cond_destroy(&acct_gather_profile_timer[i].notify);
+		acct_gather_profile_timer[i].freq = 0;
+		switch (i) {
+		case PROFILE_ENERGY:
+			break;
+		case PROFILE_TASK:
+			jobacct_gather_endpoll();
+			break;
+		case PROFILE_FILESYSTEM:
+			break;
+		case PROFILE_NETWORK:
+			break;
+		default:
+			fatal("Unhandled profile option %d please update "
+			      "slurm_acct_gather_profile.c "
+			      "(acct_gather_profile_endpoll)", i);
+		}
+	}
+}
+
 extern void acct_gather_profile_g_conf_options(s_p_options_t **full_options,
 					       int *full_options_cnt)
 {
diff --git a/src/common/slurm_acct_gather_profile.h b/src/common/slurm_acct_gather_profile.h
index 3864894bb45..d49d9e1470f 100644
--- a/src/common/slurm_acct_gather_profile.h
+++ b/src/common/slurm_acct_gather_profile.h
@@ -106,6 +106,7 @@ extern uint32_t acct_gather_profile_type_from_string(char *series_str);
 extern char *acct_gather_profile_type_t_name(acct_gather_profile_type_t type);
 
 extern int acct_gather_profile_startpoll(char *freq, char *freq_def);
+extern void acct_gather_profile_endpoll(void);
 
 /*
  * Define plugin local conf for acct_gather.conf
diff --git a/src/slurmd/slurmstepd/mgr.c b/src/slurmd/slurmstepd/mgr.c
index 01b8ceb8996..68ebae98626 100644
--- a/src/slurmd/slurmstepd/mgr.c
+++ b/src/slurmd/slurmstepd/mgr.c
@@ -1028,9 +1028,9 @@ job_manager(slurmd_job_t *job)
 	_send_launch_resp(job, 0);
 
 	_wait_for_all_tasks(job);
-	jobacct_gather_endpoll();
-	acct_gather_infiniband_fini();
+	acct_gather_profile_endpoll();
 	acct_gather_profile_g_node_step_end();
+	acct_gather_profile_fini();
 
 	job->state = SLURMSTEPD_STEP_ENDING;
 
-- 
GitLab