From 3e6a353a2657cc98b6016daff8bafa79f5954e31 Mon Sep 17 00:00:00 2001 From: Morris Jette <jette@schedmd.com> Date: Tue, 26 Jun 2012 14:26:53 -0700 Subject: [PATCH] Correction for possible race condition in plugin initialization --- src/common/checkpoint.c | 5 ++++- src/common/mpi.c | 12 +++++++----- src/common/node_select.c | 10 ++++++---- src/common/slurm_accounting_storage.c | 6 +++++- src/common/slurm_auth.c | 5 ++++- src/common/slurm_cred.c | 6 ++++-- src/common/slurm_jobacct_gather.c | 9 ++++++--- src/common/slurm_jobcomp.c | 6 +++++- src/common/slurm_priority.c | 6 +++++- src/common/slurm_topology.c | 6 +++++- src/common/switch.c | 5 ++++- src/slurmctld/job_submit.c | 5 ++++- src/slurmctld/preempt.c | 5 ++++- src/slurmctld/sched_plugin.c | 5 ++++- src/slurmd/common/proctrack.c | 5 ++++- src/slurmd/common/task_plugin.c | 5 ++++- src/srun/libsrun/launch.c | 5 ++++- 17 files changed, 79 insertions(+), 27 deletions(-) diff --git a/src/common/checkpoint.c b/src/common/checkpoint.c index 11ae26a6d84..d9b99877c68 100644 --- a/src/common/checkpoint.c +++ b/src/common/checkpoint.c @@ -102,6 +102,7 @@ static const char *syms[] = { static slurm_checkpoint_ops_t ops; static plugin_context_t *g_context = NULL; static pthread_mutex_t context_lock = PTHREAD_MUTEX_INITIALIZER; +static bool init_run = false; /* initialize checkpoint plugin */ extern int @@ -110,7 +111,7 @@ checkpoint_init(char *type) int retval = SLURM_SUCCESS; char *plugin_type = "checkpoint"; - if (g_context) + if (init_run && g_context) return retval; slurm_mutex_lock(&context_lock); @@ -126,6 +127,7 @@ checkpoint_init(char *type) retval = SLURM_ERROR; goto done; } + init_run = true; verbose("Checkpoint plugin loaded: %s", type); done: @@ -143,6 +145,7 @@ checkpoint_fini(void) return SLURM_SUCCESS; slurm_mutex_lock(&context_lock); + init_run = false; rc = plugin_context_destroy(g_context); slurm_mutex_unlock(&context_lock); return rc; diff --git a/src/common/mpi.c b/src/common/mpi.c index 076c7fd4a36..9dc2cd187bd 100644 --- a/src/common/mpi.c +++ b/src/common/mpi.c @@ -84,7 +84,7 @@ static const char *syms[] = { static slurm_mpi_ops_t ops; static plugin_context_t *g_context = NULL; static pthread_mutex_t context_lock = PTHREAD_MUTEX_INITIALIZER; - +static bool init_run = false; int _mpi_init (char *mpi_type) { @@ -93,7 +93,7 @@ int _mpi_init (char *mpi_type) char *type = NULL; int got_default = 0; - if ( g_context ) + if (init_run && g_context) return retval; slurm_mutex_lock( &context_lock ); @@ -122,10 +122,11 @@ int _mpi_init (char *mpi_type) retval = SLURM_ERROR; goto done; } + init_run = true; done: xfree(type); - if(got_default) + if (got_default) xfree(mpi_type); slurm_mutex_unlock( &context_lock ); return retval; @@ -137,7 +138,7 @@ int mpi_hook_slurmstepd_init (char ***env) debug("mpi type = %s", mpi_type); - if(_mpi_init(mpi_type) == SLURM_ERROR) + if (_mpi_init(mpi_type) == SLURM_ERROR) return SLURM_ERROR; unsetenvp (*env, "SLURM_MPI_TYPE"); @@ -165,7 +166,7 @@ int mpi_hook_client_init (char *mpi_type) { debug("mpi type = %s", mpi_type); - if(_mpi_init(mpi_type) == SLURM_ERROR) + if (_mpi_init(mpi_type) == SLURM_ERROR) return SLURM_ERROR; return SLURM_SUCCESS; @@ -210,6 +211,7 @@ int mpi_fini (void) if (!g_context) return SLURM_SUCCESS; + init_run = false; rc = plugin_context_destroy(g_context); return rc; } diff --git a/src/common/node_select.c b/src/common/node_select.c index abb2a725ff2..ce4e7cadf59 100644 --- a/src/common/node_select.c +++ b/src/common/node_select.c @@ -123,7 +123,7 @@ static int select_context_default = -1; static slurm_select_ops_t *ops = NULL; static plugin_context_t **select_context = NULL; static pthread_mutex_t select_context_lock = PTHREAD_MUTEX_INITIALIZER; - +static bool init_run = false; /** * delete a block request */ @@ -196,7 +196,7 @@ extern int slurm_select_init(bool only_default) char *dir_array = NULL, *head = NULL; char *plugin_type = "select"; - if ( select_context ) + if ( init_run && select_context ) return retval; slurm_mutex_lock( &select_context_lock ); @@ -352,7 +352,7 @@ extern int slurm_select_init(bool only_default) } skip_load_all: - if(select_context_default == -1) + if (select_context_default == -1) fatal("Can't find plugin for %s", type); /* Insure that plugin_id is valid and unique */ @@ -374,6 +374,7 @@ skip_load_all: } } + init_run = true; done: slurm_mutex_unlock( &select_context_lock ); @@ -389,7 +390,8 @@ extern int slurm_select_fini(void) slurm_mutex_lock(&select_context_lock); if (!select_context) goto fini; - + + init_run = false; for (i=0; i<select_context_cnt; i++) { j = plugin_context_destroy(select_context[i]); if (j != SLURM_SUCCESS) diff --git a/src/common/slurm_accounting_storage.c b/src/common/slurm_accounting_storage.c index c3ded92528d..be905428e83 100644 --- a/src/common/slurm_accounting_storage.c +++ b/src/common/slurm_accounting_storage.c @@ -1,3 +1,4 @@ + /*****************************************************************************\ * slurm_accounting_storage.c - account storage plugin wrapper. * @@ -255,6 +256,7 @@ static const char *syms[] = { static slurm_acct_storage_ops_t ops; static plugin_context_t *plugin_context = NULL; static pthread_mutex_t plugin_context_lock = PTHREAD_MUTEX_INITIALIZER; +static bool init_run = false; /* * Initialize context for acct_storage plugin @@ -265,7 +267,7 @@ extern int slurm_acct_storage_init(char *loc) char *plugin_type = "accounting_storage"; char *type = NULL; - if (plugin_context) + if (init_run && plugin_context) return retval; slurm_mutex_lock(&plugin_context_lock); @@ -286,6 +288,7 @@ extern int slurm_acct_storage_init(char *loc) retval = SLURM_ERROR; goto done; } + init_run = true; done: slurm_mutex_unlock(&plugin_context_lock); @@ -300,6 +303,7 @@ extern int slurm_acct_storage_fini(void) if (!plugin_context) return SLURM_SUCCESS; + init_run = false; // (*(ops.acct_storage_fini))(); rc = plugin_context_destroy(plugin_context); plugin_context = NULL; diff --git a/src/common/slurm_auth.c b/src/common/slurm_auth.c index 72dbcacc04d..3867b59ad53 100644 --- a/src/common/slurm_auth.c +++ b/src/common/slurm_auth.c @@ -53,6 +53,7 @@ #include "src/common/arg_desc.h" static bool auth_dummy = false; /* for security testing */ +static bool init_run = false; /* * WARNING: Do not change the order of these fields or add additional @@ -175,7 +176,7 @@ extern int slurm_auth_init( char *auth_type ) char *type = NULL; char *plugin_type = "auth"; - if (g_context) + if (init_run && g_context) return retval; slurm_mutex_lock(&context_lock); @@ -202,6 +203,7 @@ extern int slurm_auth_init( char *auth_type ) retval = SLURM_ERROR; goto done; } + init_run = true; done: xfree(type); @@ -218,6 +220,7 @@ slurm_auth_fini( void ) if (!g_context) return SLURM_SUCCESS; + init_run = false; rc = plugin_context_destroy(g_context); g_context = NULL; return rc; diff --git a/src/common/slurm_cred.c b/src/common/slurm_cred.c index 92e12679676..fc0a3288003 100644 --- a/src/common/slurm_cred.c +++ b/src/common/slurm_cred.c @@ -225,7 +225,7 @@ static const char *syms[] = { static slurm_crypto_ops_t ops; static plugin_context_t *g_context = NULL; static pthread_mutex_t g_context_lock = PTHREAD_MUTEX_INITIALIZER; - +static bool init_run = false; /* * Static prototypes: @@ -282,7 +282,7 @@ static int _slurm_crypto_init(void) char *type = NULL; int retval = SLURM_SUCCESS; - if ( g_context ) /* mostly avoid locks for better speed */ + if ( init_run && g_context ) /* mostly avoid locks for better speed */ return retval; slurm_mutex_lock( &g_context_lock ); @@ -298,6 +298,7 @@ static int _slurm_crypto_init(void) retval = SLURM_ERROR; goto done; } + init_run = true; done: slurm_mutex_unlock( &g_context_lock ); @@ -313,6 +314,7 @@ static int _slurm_crypto_fini(void) if (!g_context) return SLURM_SUCCESS; + init_run = false; rc = plugin_context_destroy(g_context); g_context = NULL; return rc; diff --git a/src/common/slurm_jobacct_gather.c b/src/common/slurm_jobacct_gather.c index d607b769743..944619e3149 100644 --- a/src/common/slurm_jobacct_gather.c +++ b/src/common/slurm_jobacct_gather.c @@ -95,6 +95,7 @@ static const char *syms[] = { static slurm_jobacct_gather_ops_t ops; static plugin_context_t *g_context = NULL; static pthread_mutex_t g_context_lock = PTHREAD_MUTEX_INITIALIZER; +static bool init_run = false; static int freq = 0; static bool pgid_plugin = false; @@ -206,7 +207,7 @@ extern int jobacct_gather_init(void) char *type = NULL; int retval=SLURM_SUCCESS; - if (g_context) + if (init_run && g_context) return retval; slurm_mutex_lock(&g_context_lock); @@ -229,7 +230,7 @@ extern int jobacct_gather_init(void) plugin_type = type; type = slurm_get_proctrack_type(); - if(!strcasecmp(type, "proctrack/pgid")) { + if (!strcasecmp(type, "proctrack/pgid")) { info("WARNING: We will use a much slower algorithm with " "proctrack/pgid, use Proctracktype=proctrack/linuxproc " "or some other proctrack when using %s", @@ -240,12 +241,13 @@ extern int jobacct_gather_init(void) xfree(plugin_type); type = slurm_get_accounting_storage_type(); - if(!strcasecmp(type, ACCOUNTING_STORAGE_TYPE_NONE)) { + if (!strcasecmp(type, ACCOUNTING_STORAGE_TYPE_NONE)) { error("WARNING: Even though we are collecting accounting " "information you have asked for it not to be stored " "(%s) if this is not what you have in mind you will " "need to change it.", ACCOUNTING_STORAGE_TYPE_NONE); } + init_run = true; done: slurm_mutex_unlock(&g_context_lock); @@ -260,6 +262,7 @@ extern int jobacct_gather_fini(void) slurm_mutex_lock(&g_context_lock); if (g_context) { + init_run = false; rc = plugin_context_destroy(g_context); g_context = NULL; } diff --git a/src/common/slurm_jobcomp.c b/src/common/slurm_jobcomp.c index d8c13326efb..bacac57f701 100644 --- a/src/common/slurm_jobcomp.c +++ b/src/common/slurm_jobcomp.c @@ -1,3 +1,4 @@ + /*****************************************************************************\ * slurm_jobcomp.c - implementation-independent job completion logging * functions @@ -86,6 +87,7 @@ static const char *syms[] = { static slurm_jobcomp_ops_t ops; static plugin_context_t *g_context = NULL; static pthread_mutex_t context_lock = PTHREAD_MUTEX_INITIALIZER; +static bool init_run = false; extern void jobcomp_destroy_job(void *object) @@ -119,7 +121,7 @@ g_slurm_jobcomp_init( char *jobcomp_loc ) char *plugin_type = "jobcomp"; char *type; - if (g_context) + if (init_run && g_context) return retval; slurm_mutex_lock( &context_lock ); @@ -136,6 +138,7 @@ g_slurm_jobcomp_init( char *jobcomp_loc ) retval = SLURM_ERROR; goto done; } + init_run = true; done: xfree(type); @@ -153,6 +156,7 @@ g_slurm_jobcomp_fini(void) if ( !g_context) goto done; + init_run = false; plugin_context_destroy ( g_context ); g_context = NULL; diff --git a/src/common/slurm_priority.c b/src/common/slurm_priority.c index 312c2376981..9f8b52c522e 100644 --- a/src/common/slurm_priority.c +++ b/src/common/slurm_priority.c @@ -66,6 +66,7 @@ static const char *syms[] = { static slurm_priority_ops_t ops; static plugin_context_t *g_priority_context = NULL; static pthread_mutex_t g_priority_context_lock = PTHREAD_MUTEX_INITIALIZER; +static bool init_run = false; /* * Initialize context for priority plugin @@ -76,7 +77,7 @@ extern int slurm_priority_init(void) char *plugin_type = "priority"; char *type = NULL; - if (g_priority_context) + if (init_run && g_priority_context) return retval; slurm_mutex_lock(&g_priority_context_lock); @@ -94,6 +95,7 @@ extern int slurm_priority_init(void) retval = SLURM_ERROR; goto done; } + init_run = true; done: slurm_mutex_unlock(&g_priority_context_lock); @@ -108,6 +110,7 @@ extern int slurm_priority_fini(void) if (!g_priority_context) return SLURM_SUCCESS; + init_run = false; rc = plugin_context_destroy(g_priority_context); g_priority_context = NULL; return rc; @@ -158,3 +161,4 @@ extern List priority_g_get_priority_factors_list( return (*(ops.get_priority_factors))(req_msg, uid); } + diff --git a/src/common/slurm_topology.c b/src/common/slurm_topology.c index e3090580b04..264ee2789e1 100644 --- a/src/common/slurm_topology.c +++ b/src/common/slurm_topology.c @@ -1,3 +1,4 @@ + /*****************************************************************************\ * slurm_topology.c - Topology plugin function setup. ***************************************************************************** @@ -73,6 +74,7 @@ static const char *syms[] = { static slurm_topo_ops_t ops; static plugin_context_t *g_context = NULL; static pthread_mutex_t g_context_lock = PTHREAD_MUTEX_INITIALIZER; +static bool init_run = false; /* *********************************************************************** */ /* TAG( slurm_topo_init ) */ @@ -89,7 +91,7 @@ slurm_topo_init( void ) char *plugin_type = "topo"; char *type = NULL; - if (g_context) + if (init_run && g_context) return retval; slurm_mutex_lock(&g_context_lock); @@ -107,6 +109,7 @@ slurm_topo_init( void ) retval = SLURM_ERROR; goto done; } + init_run = true; done: slurm_mutex_unlock(&g_context_lock); @@ -125,6 +128,7 @@ slurm_topo_fini( void ) if (!g_context) return SLURM_SUCCESS; + init_run = false; rc = plugin_context_destroy(g_context); g_context = NULL; return rc; diff --git a/src/common/switch.c b/src/common/switch.c index cf497ae67e8..488ce1b6798 100644 --- a/src/common/switch.c +++ b/src/common/switch.c @@ -162,6 +162,7 @@ static const char *syms[] = { static slurm_switch_ops_t ops; static plugin_context_t *g_context = NULL; static pthread_mutex_t context_lock = PTHREAD_MUTEX_INITIALIZER; +static bool init_run = false; extern int switch_init( void ) { @@ -169,7 +170,7 @@ extern int switch_init( void ) char *plugin_type = "switch"; char *type = NULL; - if ( g_context ) + if ( init_run && g_context ) return retval; slurm_mutex_lock( &context_lock ); @@ -186,6 +187,7 @@ extern int switch_init( void ) retval = SLURM_ERROR; goto done; } + init_run = true; done: slurm_mutex_unlock( &context_lock ); @@ -200,6 +202,7 @@ extern int switch_fini(void) if (!g_context) return SLURM_SUCCESS; + init_run = false; rc = plugin_context_destroy(g_context); return rc; } diff --git a/src/slurmctld/job_submit.c b/src/slurmctld/job_submit.c index 957fcc02fa9..3d72a4fbdbc 100644 --- a/src/slurmctld/job_submit.c +++ b/src/slurmctld/job_submit.c @@ -95,6 +95,7 @@ static slurm_submit_ops_t *ops = NULL; static plugin_context_t **g_context = NULL; static char *submit_plugin_list = NULL; static pthread_mutex_t g_context_lock = PTHREAD_MUTEX_INITIALIZER; +static bool init_run = false; /* * Initialize the job submit plugin. @@ -108,7 +109,7 @@ extern int job_submit_plugin_init(void) char *plugin_type = "job_submit"; char *type; - if (g_context_cnt >= 0) + if (init_run && (g_context_cnt >= 0)) return rc; slurm_mutex_lock(&g_context_lock); @@ -144,6 +145,7 @@ extern int job_submit_plugin_init(void) g_context_cnt++; names = NULL; /* for next iteration */ } + init_run = true; fini: slurm_mutex_unlock(&g_context_lock); @@ -167,6 +169,7 @@ extern int job_submit_plugin_fini(void) if (g_context_cnt < 0) goto fini; + init_run = false; for (i=0; i<g_context_cnt; i++) { if (g_context[i]) { j = plugin_context_destroy(g_context[i]); diff --git a/src/slurmctld/preempt.c b/src/slurmctld/preempt.c index b93009647a3..50923f08b2b 100644 --- a/src/slurmctld/preempt.c +++ b/src/slurmctld/preempt.c @@ -72,6 +72,7 @@ static const char *syms[] = { static slurm_preempt_ops_t ops; static plugin_context_t *g_context = NULL; static pthread_mutex_t g_context_lock = PTHREAD_MUTEX_INITIALIZER; +static bool init_run = false; /* *********************************************************************** */ /* TAG( _preempt_signal ) */ @@ -142,7 +143,7 @@ extern int slurm_preempt_init(void) /* This function is called frequently, so it should be as fast as * possible. The test below will be TRUE almost all of the time and * is as fast as possible. */ - if (g_context) + if (init_run && g_context) return retval; slurm_mutex_lock(&g_context_lock); @@ -159,6 +160,7 @@ extern int slurm_preempt_init(void) retval = SLURM_ERROR; goto done; } + init_run = true; done: slurm_mutex_unlock(&g_context_lock); @@ -176,6 +178,7 @@ extern int slurm_preempt_fini(void) if (!g_context) return SLURM_SUCCESS; + init_run = false; rc = plugin_context_destroy(g_context); g_context = NULL; return rc; diff --git a/src/slurmctld/sched_plugin.c b/src/slurmctld/sched_plugin.c index d4a970ef0a5..2f078e9a010 100644 --- a/src/slurmctld/sched_plugin.c +++ b/src/slurmctld/sched_plugin.c @@ -89,6 +89,7 @@ static const char *syms[] = { static slurm_sched_ops_t ops; static plugin_context_t *g_context = NULL; static pthread_mutex_t g_context_lock = PTHREAD_MUTEX_INITIALIZER; +static bool init_run = false; /* *********************************************************************** */ /* TAG( slurm_sched_init ) */ @@ -105,7 +106,7 @@ slurm_sched_init( void ) char *plugin_type = "sched"; char *type = NULL; - if ( g_context ) + if ( init_run && g_context ) return retval; slurm_mutex_lock( &g_context_lock ); @@ -122,6 +123,7 @@ slurm_sched_init( void ) retval = SLURM_ERROR; goto done; } + init_run = true; done: slurm_mutex_unlock( &g_context_lock ); @@ -140,6 +142,7 @@ slurm_sched_fini( void ) if (!g_context) return SLURM_SUCCESS; + init_run = false; rc = plugin_context_destroy(g_context); g_context = NULL; diff --git a/src/slurmd/common/proctrack.c b/src/slurmd/common/proctrack.c index 6c46cd91d48..7621410ff5b 100644 --- a/src/slurmd/common/proctrack.c +++ b/src/slurmd/common/proctrack.c @@ -76,6 +76,7 @@ static const char *syms[] = { static slurm_proctrack_ops_t ops; static plugin_context_t *g_context = NULL; static pthread_mutex_t g_context_lock = PTHREAD_MUTEX_INITIALIZER; +static bool init_run = false; /* *********************************************************************** */ /* TAG( slurm_proctrack_init ) */ @@ -89,7 +90,7 @@ extern int slurm_proctrack_init(void) char *plugin_type = "proctrack"; char *type = NULL; - if (g_context) + if (init_run && g_context) return retval; slurm_mutex_lock(&g_context_lock); @@ -106,6 +107,7 @@ extern int slurm_proctrack_init(void) retval = SLURM_ERROR; goto done; } + init_run = true; done: slurm_mutex_unlock(&g_context_lock); @@ -123,6 +125,7 @@ extern int slurm_proctrack_fini(void) if (!g_context) return SLURM_SUCCESS; + init_run = false; rc = plugin_context_destroy(g_context); g_context = NULL; return rc; diff --git a/src/slurmd/common/task_plugin.c b/src/slurmd/common/task_plugin.c index 7d211a48b9d..df648d96d3a 100644 --- a/src/slurmd/common/task_plugin.c +++ b/src/slurmd/common/task_plugin.c @@ -88,6 +88,7 @@ static slurmd_task_ops_t *ops = NULL; static plugin_context_t **g_task_context = NULL; static int g_task_context_num = -1; static pthread_mutex_t g_task_context_lock = PTHREAD_MUTEX_INITIALIZER; +static bool init_run = false; /* * Initialize the task plugin. @@ -101,7 +102,7 @@ extern int slurmd_task_init(void) char *task_plugin_type = NULL; char *last = NULL, *task_plugin_list, *type = NULL; - if ( g_task_context_num >= 0 ) + if ( init_run && (g_task_context_num >= 0) ) return retval; slurm_mutex_lock( &g_task_context_lock ); @@ -138,6 +139,7 @@ extern int slurmd_task_init(void) g_task_context_num++; task_plugin_list = NULL; /* for next iteration */ } + init_run = true; done: slurm_mutex_unlock( &g_task_context_lock ); @@ -162,6 +164,7 @@ extern int slurmd_task_fini(void) if (!g_task_context) goto done; + init_run = false; for (i = 0; i < g_task_context_num; i++) { if (g_task_context[i]) { if (plugin_context_destroy(g_task_context[i]) != diff --git a/src/srun/libsrun/launch.c b/src/srun/libsrun/launch.c index e674b9a68bf..d79cba9fde7 100644 --- a/src/srun/libsrun/launch.c +++ b/src/srun/libsrun/launch.c @@ -74,6 +74,7 @@ static const char *syms[] = { static plugin_ops_t ops; static plugin_context_t *plugin_context = NULL; static pthread_mutex_t plugin_context_lock = PTHREAD_MUTEX_INITIALIZER; +static bool init_run = false; static int _is_local_file (fname_t *fname) @@ -96,7 +97,7 @@ extern int launch_init(void) char *plugin_type = "launch"; char *type = NULL; - if (plugin_context) + if (init_run && plugin_context) return retval; slurm_mutex_lock(&plugin_context_lock); @@ -113,6 +114,7 @@ extern int launch_init(void) retval = SLURM_ERROR; goto done; } + init_run = true; done: slurm_mutex_unlock(&plugin_context_lock); @@ -128,6 +130,7 @@ extern int location_fini(void) if (!plugin_context) return SLURM_SUCCESS; + init_run = false; rc = plugin_context_destroy(plugin_context); plugin_context = NULL; -- GitLab