diff --git a/src/slurmctld/controller.c b/src/slurmctld/controller.c index da582fdd702c91f09086fa2de6a67c483d125520..6285d29a17e12652f5dc8a8a140c10be96f77f3e 100644 --- a/src/slurmctld/controller.c +++ b/src/slurmctld/controller.c @@ -66,6 +66,7 @@ void fill_ctld_conf ( slurm_ctl_conf_t * build_ptr ); void init_ctld_conf ( slurm_ctl_conf_t * build_ptr ); void parse_commandline( int argc, char* argv[], slurm_ctl_conf_t * ); void *process_rpc ( void * req ); +void report_locks_set ( void ); void *slurmctld_background ( void * no_data ); void *slurmctld_rpc_mgr( void * no_data ); int slurm_shutdown ( void ); @@ -335,19 +336,55 @@ slurmctld_background ( void * no_data ) sleep (1); if (server_thread_count) info ("warning: shutting down with server_thread_count of %d", server_thread_count); + report_locks_set ( ); + last_checkpoint_time = now; + /* don't lock to insure checkpoint never blocks */ + /* issue call to save state */ + } + else { + last_checkpoint_time = now; + lock_slurmctld (state_write_lock); + /* issue call to save state */ + unlock_slurmctld (state_write_lock); } - - last_checkpoint_time = now; - lock_slurmctld (state_write_lock); - /* issue call to save state */ - unlock_slurmctld (state_write_lock); } } debug3 ("slurmctld_background shutting down"); + remove_locks ( ); pthread_exit ((void *)0); } +/* report_locks_set - report any slurmctld locks left set */ +void +report_locks_set ( void ) +{ + slurmctld_lock_flags_t lock_flags; + char config[4]="", job[4]="", node[4]="", partition[4]=""; + + get_lock_values (&lock_flags); + + if (lock_flags.config.read) strcat (config, "R"); + if (lock_flags.config.write) strcat (config, "W"); + if (lock_flags.config.write_wait) strcat (config, "P"); + + if (lock_flags.job.read) strcat (job, "R"); + if (lock_flags.job.write) strcat (job, "W"); + if (lock_flags.job.write_wait) strcat (job, "P"); + + if (lock_flags.node.read) strcat (node, "R"); + if (lock_flags.node.write) strcat (node, "W"); + if (lock_flags.node.write_wait) strcat (node, "P"); + + if (lock_flags.partition.read) strcat (partition, "R"); + if (lock_flags.partition.write) strcat (partition, "W"); + if (lock_flags.partition.write_wait) strcat (partition, "P"); + + if ((strlen (config) + strlen (job) + strlen (node) + strlen (partition)) > 0) + error ("The following locks were left set config:%s, job:%s, node:%s, part:%s", + config, job, node, partition); +} + /* process_rpc - process an RPC request and close the connection */ void * process_rpc ( void * req ) @@ -1300,7 +1337,7 @@ init_ctld_conf ( slurm_ctl_conf_t * conf_ptr ) conf_ptr->slurmd_timeout = 300 ; conf_ptr->slurm_conf = SLURM_CONFIG_FILE ; conf_ptr->state_save_location = xstrdup ("/tmp") ; - conf_ptr->tmp_fs = NULL ; + conf_ptr->tmp_fs = xstrdup ("/tmp") ; servent = getservbyname (SLURMCTLD_PORT, NULL); if (servent) diff --git a/src/slurmctld/locks.c b/src/slurmctld/locks.c index 6d9b82cf9ba0aebd71f1f1fa8fbd6513fe2665eb..5bf0dfdaa32af5c01c05f12b4c444ea486ef84ab 100644 --- a/src/slurmctld/locks.c +++ b/src/slurmctld/locks.c @@ -23,6 +23,9 @@ * with SLURM; if not, write to the Free Software Foundation, Inc., * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. \*****************************************************************************/ +/* NOTE: These functions closely resemble the semget/semop/semctl functions, + * but are written using the pthread_mutex_ functions +\*****************************************************************************/ #ifdef HAVE_CONFIG_H # include <config.h> @@ -36,6 +39,18 @@ #include <src/slurmctld/locks.h> #include <src/slurmctld/slurmctld.h> +#if defined(__GNU_LIBRARY__) && !defined(_SEM_SEMUN_UNDEFINED) +/* union semun is defined by including <sys/sem.h> */ +#else +/* according to X/OPEN we have to define it ourselves */ +union semun { + int val; /* value for SETVAL */ + struct semid_ds *buf; /* buffer for IPC_STAT, IPC_SET */ + unsigned short int *array; /* array for GETALL, SETALL */ + struct seminfo *__buf; /* buffer for IPC_INFO */ +}; +#endif + /* available data structure locks * we actually use three semaphores for each, see macros below * (lock_datatype_t * 3 + 0) = read_lock @@ -65,8 +80,10 @@ void wr_wrunlock (lock_datatype_t datatype); void init_locks ( ) { - if (sem_id == -1) - sem_id = semget ( IPC_PRIVATE, (COUNT_OF_LOCKS * 3), IPC_CREAT | 0600 ); + if (sem_id >= 0) + return; + + sem_id = semget ( IPC_PRIVATE, (COUNT_OF_LOCKS * 3), IPC_CREAT | 0600 ); if (sem_id < 0) fatal ("semget errno %d", errno); @@ -202,3 +219,43 @@ wr_wrunlock (lock_datatype_t datatype) if (semop (sem_id, wrunlock_op, 1) == -1) fatal ("semop errno %d", errno); } + +/* get_lock_values - Get the current value of all locks */ +void +get_lock_values (slurmctld_lock_flags_t *lock_flags) +{ + union semun arg; + unsigned short int array[12]; + + arg.array = array; + if (semctl (sem_id, 0, GETALL, arg)) { + error ("semctld GETALL errno %d", errno); + return; + } + + lock_flags -> config.read = arg.array[0]; + lock_flags -> config.write = arg.array[1]; + lock_flags -> config.write_wait = arg.array[2]; + + lock_flags -> job.read = arg.array[3]; + lock_flags -> job.write = arg.array[4]; + lock_flags -> job.write_wait = arg.array[5]; + + lock_flags -> node.read = arg.array[6]; + lock_flags -> node.write = arg.array[7]; + lock_flags -> node.write_wait = arg.array[8]; + + lock_flags -> partition.read = arg.array[9]; + lock_flags -> partition.write = arg.array[10]; + lock_flags -> partition.write_wait = arg.array[11]; + +} + +/* remove_locks - remove semaphores associated with our locks */ +void +remove_locks ( void ) +{ + union semun arg; + if (semctl (sem_id, 0, IPC_RMID, arg)) + error ("semctl IPC_RMID errno %d", errno); +} diff --git a/src/slurmctld/locks.h b/src/slurmctld/locks.h index ae3c0d719c4eb58ba34c8fc74db7c980a540103c..06371956dfe3b16d23397032a3134f785e25cb80 100644 --- a/src/slurmctld/locks.h +++ b/src/slurmctld/locks.h @@ -81,7 +81,23 @@ typedef struct { lock_level_t partition; } slurmctld_lock_t; +typedef struct { + unsigned read; + unsigned write; + unsigned write_wait; +} lock_flags_t; + +typedef struct { + lock_flags_t config; + lock_flags_t job; + lock_flags_t node; + lock_flags_t partition; +} slurmctld_lock_flags_t; + + +extern void get_lock_values (slurmctld_lock_flags_t *lock_flags); extern void init_locks ( ); extern void lock_slurmctld (slurmctld_lock_t lock_levels); +extern void remove_locks ( void ); extern void unlock_slurmctld (slurmctld_lock_t lock_levels);