Skip to content
Snippets Groups Projects
Commit 6f05bf0a authored by Moe Jette's avatar Moe Jette
Browse files

Report state of locks on shutdown (in case any left set). Remove semaphores

on normal completion.
parent 06a35b97
No related branches found
No related tags found
No related merge requests found
...@@ -66,6 +66,7 @@ void fill_ctld_conf ( slurm_ctl_conf_t * build_ptr ); ...@@ -66,6 +66,7 @@ void fill_ctld_conf ( slurm_ctl_conf_t * build_ptr );
void init_ctld_conf ( slurm_ctl_conf_t * build_ptr ); void init_ctld_conf ( slurm_ctl_conf_t * build_ptr );
void parse_commandline( int argc, char* argv[], slurm_ctl_conf_t * ); void parse_commandline( int argc, char* argv[], slurm_ctl_conf_t * );
void *process_rpc ( void * req ); void *process_rpc ( void * req );
void report_locks_set ( void );
void *slurmctld_background ( void * no_data ); void *slurmctld_background ( void * no_data );
void *slurmctld_rpc_mgr( void * no_data ); void *slurmctld_rpc_mgr( void * no_data );
int slurm_shutdown ( void ); int slurm_shutdown ( void );
...@@ -335,19 +336,55 @@ slurmctld_background ( void * no_data ) ...@@ -335,19 +336,55 @@ slurmctld_background ( void * no_data )
sleep (1); sleep (1);
if (server_thread_count) if (server_thread_count)
info ("warning: shutting down with server_thread_count of %d", server_thread_count); info ("warning: shutting down with server_thread_count of %d", server_thread_count);
report_locks_set ( );
last_checkpoint_time = now;
/* don't lock to insure checkpoint never blocks */
/* issue call to save state */
}
else {
last_checkpoint_time = now;
lock_slurmctld (state_write_lock);
/* issue call to save state */
unlock_slurmctld (state_write_lock);
} }
last_checkpoint_time = now;
lock_slurmctld (state_write_lock);
/* issue call to save state */
unlock_slurmctld (state_write_lock);
} }
} }
debug3 ("slurmctld_background shutting down"); debug3 ("slurmctld_background shutting down");
remove_locks ( );
pthread_exit ((void *)0); pthread_exit ((void *)0);
} }
/* report_locks_set - report any slurmctld locks left set */
void
report_locks_set ( void )
{
slurmctld_lock_flags_t lock_flags;
char config[4]="", job[4]="", node[4]="", partition[4]="";
get_lock_values (&lock_flags);
if (lock_flags.config.read) strcat (config, "R");
if (lock_flags.config.write) strcat (config, "W");
if (lock_flags.config.write_wait) strcat (config, "P");
if (lock_flags.job.read) strcat (job, "R");
if (lock_flags.job.write) strcat (job, "W");
if (lock_flags.job.write_wait) strcat (job, "P");
if (lock_flags.node.read) strcat (node, "R");
if (lock_flags.node.write) strcat (node, "W");
if (lock_flags.node.write_wait) strcat (node, "P");
if (lock_flags.partition.read) strcat (partition, "R");
if (lock_flags.partition.write) strcat (partition, "W");
if (lock_flags.partition.write_wait) strcat (partition, "P");
if ((strlen (config) + strlen (job) + strlen (node) + strlen (partition)) > 0)
error ("The following locks were left set config:%s, job:%s, node:%s, part:%s",
config, job, node, partition);
}
/* process_rpc - process an RPC request and close the connection */ /* process_rpc - process an RPC request and close the connection */
void * void *
process_rpc ( void * req ) process_rpc ( void * req )
...@@ -1300,7 +1337,7 @@ init_ctld_conf ( slurm_ctl_conf_t * conf_ptr ) ...@@ -1300,7 +1337,7 @@ init_ctld_conf ( slurm_ctl_conf_t * conf_ptr )
conf_ptr->slurmd_timeout = 300 ; conf_ptr->slurmd_timeout = 300 ;
conf_ptr->slurm_conf = SLURM_CONFIG_FILE ; conf_ptr->slurm_conf = SLURM_CONFIG_FILE ;
conf_ptr->state_save_location = xstrdup ("/tmp") ; conf_ptr->state_save_location = xstrdup ("/tmp") ;
conf_ptr->tmp_fs = NULL ; conf_ptr->tmp_fs = xstrdup ("/tmp") ;
servent = getservbyname (SLURMCTLD_PORT, NULL); servent = getservbyname (SLURMCTLD_PORT, NULL);
if (servent) if (servent)
......
...@@ -23,6 +23,9 @@ ...@@ -23,6 +23,9 @@
* with SLURM; if not, write to the Free Software Foundation, Inc., * with SLURM; if not, write to the Free Software Foundation, Inc.,
* 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
\*****************************************************************************/ \*****************************************************************************/
/* NOTE: These functions closely resemble the semget/semop/semctl functions,
* but are written using the pthread_mutex_ functions
\*****************************************************************************/
#ifdef HAVE_CONFIG_H #ifdef HAVE_CONFIG_H
# include <config.h> # include <config.h>
...@@ -36,6 +39,18 @@ ...@@ -36,6 +39,18 @@
#include <src/slurmctld/locks.h> #include <src/slurmctld/locks.h>
#include <src/slurmctld/slurmctld.h> #include <src/slurmctld/slurmctld.h>
#if defined(__GNU_LIBRARY__) && !defined(_SEM_SEMUN_UNDEFINED)
/* union semun is defined by including <sys/sem.h> */
#else
/* according to X/OPEN we have to define it ourselves */
union semun {
int val; /* value for SETVAL */
struct semid_ds *buf; /* buffer for IPC_STAT, IPC_SET */
unsigned short int *array; /* array for GETALL, SETALL */
struct seminfo *__buf; /* buffer for IPC_INFO */
};
#endif
/* available data structure locks /* available data structure locks
* we actually use three semaphores for each, see macros below * we actually use three semaphores for each, see macros below
* (lock_datatype_t * 3 + 0) = read_lock * (lock_datatype_t * 3 + 0) = read_lock
...@@ -65,8 +80,10 @@ void wr_wrunlock (lock_datatype_t datatype); ...@@ -65,8 +80,10 @@ void wr_wrunlock (lock_datatype_t datatype);
void void
init_locks ( ) init_locks ( )
{ {
if (sem_id == -1) if (sem_id >= 0)
sem_id = semget ( IPC_PRIVATE, (COUNT_OF_LOCKS * 3), IPC_CREAT | 0600 ); return;
sem_id = semget ( IPC_PRIVATE, (COUNT_OF_LOCKS * 3), IPC_CREAT | 0600 );
if (sem_id < 0) if (sem_id < 0)
fatal ("semget errno %d", errno); fatal ("semget errno %d", errno);
...@@ -202,3 +219,43 @@ wr_wrunlock (lock_datatype_t datatype) ...@@ -202,3 +219,43 @@ wr_wrunlock (lock_datatype_t datatype)
if (semop (sem_id, wrunlock_op, 1) == -1) if (semop (sem_id, wrunlock_op, 1) == -1)
fatal ("semop errno %d", errno); fatal ("semop errno %d", errno);
} }
/* get_lock_values - Get the current value of all locks */
void
get_lock_values (slurmctld_lock_flags_t *lock_flags)
{
union semun arg;
unsigned short int array[12];
arg.array = array;
if (semctl (sem_id, 0, GETALL, arg)) {
error ("semctld GETALL errno %d", errno);
return;
}
lock_flags -> config.read = arg.array[0];
lock_flags -> config.write = arg.array[1];
lock_flags -> config.write_wait = arg.array[2];
lock_flags -> job.read = arg.array[3];
lock_flags -> job.write = arg.array[4];
lock_flags -> job.write_wait = arg.array[5];
lock_flags -> node.read = arg.array[6];
lock_flags -> node.write = arg.array[7];
lock_flags -> node.write_wait = arg.array[8];
lock_flags -> partition.read = arg.array[9];
lock_flags -> partition.write = arg.array[10];
lock_flags -> partition.write_wait = arg.array[11];
}
/* remove_locks - remove semaphores associated with our locks */
void
remove_locks ( void )
{
union semun arg;
if (semctl (sem_id, 0, IPC_RMID, arg))
error ("semctl IPC_RMID errno %d", errno);
}
...@@ -81,7 +81,23 @@ typedef struct { ...@@ -81,7 +81,23 @@ typedef struct {
lock_level_t partition; lock_level_t partition;
} slurmctld_lock_t; } slurmctld_lock_t;
typedef struct {
unsigned read;
unsigned write;
unsigned write_wait;
} lock_flags_t;
typedef struct {
lock_flags_t config;
lock_flags_t job;
lock_flags_t node;
lock_flags_t partition;
} slurmctld_lock_flags_t;
extern void get_lock_values (slurmctld_lock_flags_t *lock_flags);
extern void init_locks ( ); extern void init_locks ( );
extern void lock_slurmctld (slurmctld_lock_t lock_levels); extern void lock_slurmctld (slurmctld_lock_t lock_levels);
extern void remove_locks ( void );
extern void unlock_slurmctld (slurmctld_lock_t lock_levels); extern void unlock_slurmctld (slurmctld_lock_t lock_levels);
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment