From a8f56cb2005a5db03601e11f44d73bf881f7a7cd Mon Sep 17 00:00:00 2001 From: Danny Auble <da@schedmd.com> Date: Mon, 17 Jun 2013 15:52:21 -0700 Subject: [PATCH] PROFILING - Make sure polling threads end correctly --- src/common/slurm_acct_gather_profile.c | 71 +++++++++++++++++++++++--- src/common/slurm_acct_gather_profile.h | 1 + src/slurmd/slurmstepd/mgr.c | 4 +- 3 files changed, 66 insertions(+), 10 deletions(-) diff --git a/src/common/slurm_acct_gather_profile.c b/src/common/slurm_acct_gather_profile.c index 2d3e5a1b1d3..3e3024640af 100644 --- a/src/common/slurm_acct_gather_profile.c +++ b/src/common/slurm_acct_gather_profile.c @@ -176,25 +176,44 @@ done: extern int acct_gather_profile_fini(void) { - int rc, i; + int rc = SLURM_SUCCESS, i; if (!g_context) return SLURM_SUCCESS; + slurm_mutex_lock(&g_context_lock); + + if (g_context) + goto done; + init_run = false; - acct_gather_profile_running = false; for (i=0; i < PROFILE_CNT; i++) { - /* end remote threads */ - slurm_mutex_lock(&acct_gather_profile_timer[i].notify_mutex); - pthread_cond_signal(&acct_gather_profile_timer[i].notify); - slurm_mutex_unlock(&acct_gather_profile_timer[i].notify_mutex); - pthread_cond_destroy(&acct_gather_profile_timer[i].notify); - acct_gather_profile_timer[i].freq = 0; + switch (i) { + case PROFILE_ENERGY: + acct_gather_energy_fini(); + break; + case PROFILE_TASK: + jobacct_gather_fini(); + break; + case PROFILE_FILESYSTEM: + acct_gather_filesystem_fini(); + break; + case PROFILE_NETWORK: + acct_gather_infiniband_fini(); + break; + default: + fatal("Unhandled profile option %d please update " + "slurm_acct_gather_profile.c " + "(acct_gather_profile_fini)", i); + } } rc = plugin_context_destroy(g_context); g_context = NULL; +done: + slurm_mutex_unlock(&g_context_lock); + return rc; } @@ -394,6 +413,42 @@ extern int acct_gather_profile_startpoll(char *freq, char *freq_def) return retval; } +extern void acct_gather_profile_endpoll(void) +{ + int i; + + if (!acct_gather_profile_running) { + debug2("acct_gather_profile_startpoll: poll already ended!"); + return; + } + + acct_gather_profile_running = false; + + for (i=0; i < PROFILE_CNT; i++) { + /* end remote threads */ + slurm_mutex_lock(&acct_gather_profile_timer[i].notify_mutex); + pthread_cond_signal(&acct_gather_profile_timer[i].notify); + slurm_mutex_unlock(&acct_gather_profile_timer[i].notify_mutex); + pthread_cond_destroy(&acct_gather_profile_timer[i].notify); + acct_gather_profile_timer[i].freq = 0; + switch (i) { + case PROFILE_ENERGY: + break; + case PROFILE_TASK: + jobacct_gather_endpoll(); + break; + case PROFILE_FILESYSTEM: + break; + case PROFILE_NETWORK: + break; + default: + fatal("Unhandled profile option %d please update " + "slurm_acct_gather_profile.c " + "(acct_gather_profile_endpoll)", i); + } + } +} + extern void acct_gather_profile_g_conf_options(s_p_options_t **full_options, int *full_options_cnt) { diff --git a/src/common/slurm_acct_gather_profile.h b/src/common/slurm_acct_gather_profile.h index 3864894bb45..d49d9e1470f 100644 --- a/src/common/slurm_acct_gather_profile.h +++ b/src/common/slurm_acct_gather_profile.h @@ -106,6 +106,7 @@ extern uint32_t acct_gather_profile_type_from_string(char *series_str); extern char *acct_gather_profile_type_t_name(acct_gather_profile_type_t type); extern int acct_gather_profile_startpoll(char *freq, char *freq_def); +extern void acct_gather_profile_endpoll(void); /* * Define plugin local conf for acct_gather.conf diff --git a/src/slurmd/slurmstepd/mgr.c b/src/slurmd/slurmstepd/mgr.c index 01b8ceb8996..68ebae98626 100644 --- a/src/slurmd/slurmstepd/mgr.c +++ b/src/slurmd/slurmstepd/mgr.c @@ -1028,9 +1028,9 @@ job_manager(slurmd_job_t *job) _send_launch_resp(job, 0); _wait_for_all_tasks(job); - jobacct_gather_endpoll(); - acct_gather_infiniband_fini(); + acct_gather_profile_endpoll(); acct_gather_profile_g_node_step_end(); + acct_gather_profile_fini(); job->state = SLURMSTEPD_STEP_ENDING; -- GitLab