From ca81faf2a2d8ac48943d5d6bcc56a4464898f6c0 Mon Sep 17 00:00:00 2001 From: tewk <tewk@unknown> Date: Tue, 25 Jun 2002 17:37:02 +0000 Subject: [PATCH] Shared memory code to allow for IPC across forks --- src/slurmd/Makefile.am | 3 +- src/slurmd/shmem_struct.c | 136 ++++++++++++++++++++++++++++++++++++++ src/slurmd/shmem_struct.h | 41 +++++++++++- src/slurmd/slurmd.c | 5 ++ src/slurmd/task_mgr.c | 69 ++++++------------- src/slurmd/task_mgr.h | 11 --- 6 files changed, 201 insertions(+), 64 deletions(-) create mode 100644 src/slurmd/shmem_struct.c diff --git a/src/slurmd/Makefile.am b/src/slurmd/Makefile.am index 7f051edad3b..d0eac608c9f 100644 --- a/src/slurmd/Makefile.am +++ b/src/slurmd/Makefile.am @@ -11,5 +11,6 @@ LDADD = $(top_srcdir)/src/common/libcommon.la slurmd_SOURCES = slurmd.c \ get_mach_stat.c \ read_proc.c \ - task_mgr.c + task_mgr.c \ + shmem_struct.c diff --git a/src/slurmd/shmem_struct.c b/src/slurmd/shmem_struct.c new file mode 100644 index 00000000000..145978efe22 --- /dev/null +++ b/src/slurmd/shmem_struct.c @@ -0,0 +1,136 @@ +#include <stdlib.h> +#include <assert.h> +#include <sys/types.h> +#include <sys/shm.h> +#include <string.h> + +#include <src/common/log.h> +#include <src/common/slurm_protocol_api.h> +#include <src/slurmd/shmem_struct.h> + +#define SHMEM_KEY "slurm_shmem_key" +void * get_shmem ( ) +{ + int shmem_id ; + void * shmem_addr ; + shmem_id = shmget ( 0 , sizeof ( slurmd_shmem_t ) , IPC_CREAT ); + assert ( shmem_id != SLURM_ERROR ) ; + shmem_addr = shmat ( shmem_id , NULL , 0 ) ; + assert ( shmem_addr != (void * ) SLURM_ERROR ) ; + return shmem_addr ; +} + +void init_shmem ( slurmd_shmem_t * shmem ) +{ + int i ; + memset ( shmem , 0 , sizeof ( slurmd_shmem_t ) ); + for ( i=0 ; i < MAX_TASKS ; i ++ ) + { + clear_task ( & shmem->tasks[i] ) ; +/* + shmem->tasks[i] . used = false ; + shmem->tasks[i] . job_step = NULL ; + shmem->tasks[i] . next = NULL ; +*/ + } + for ( i=0 ; i < MAX_JOB_STEPS ; i ++ ) + { + clear_job_step ( & shmem->job_steps[i] ) ; +/* + shmem->job_steps[i] . used = false ; + shmem->job_steps[i] . haed_task = NULL ; +*/ + } +} + +void * add_job_step ( slurmd_shmem_t * shmem , job_step_t * new_job_step ) +{ + int i ; + for ( i=0 ; i < MAX_JOB_STEPS ; i ++ ) + { + if (shmem -> job_steps[i].used == false ) + { + shmem -> job_steps[i].used = true ; + copy_job_step ( & shmem -> job_steps[i] , new_job_step ); + return & shmem -> job_steps[i] ; + } + } + fatal ( "No available job_step slots in shmem segment"); + return (void * ) SLURM_ERROR ; +} + +void * add_task ( slurmd_shmem_t * shmem , task_t * new_task ) +{ + int i ; + for ( i=0 ; i < MAX_TASKS ; i ++ ) + { + if (shmem -> tasks[i].used == false ) + { + shmem -> tasks[i].used = true ; + copy_task ( & shmem -> tasks[i] , new_task ) ; + return & shmem -> tasks[i] ; + } + } + fatal ( "No available task slots in shmem segment"); + return (void * ) SLURM_ERROR ; +} + +void copy_task ( task_t * dest , task_t * const src ) +{ + dest -> threadid = src -> threadid; + dest -> pid = src -> pid; + dest -> task_id = src -> task_id; + dest -> uid = src -> uid; + dest -> gid = src -> gid; +} + +void copy_job_step ( job_step_t * dest , job_step_t * src ) +{ + dest -> job_id = src -> job_id ; + dest -> job_step_id = src -> job_step_id ; +} + +int prepend_task ( slurmd_shmem_t * shmem , job_step_t * job_step , task_t * task ) +{ + task_t * new_task ; + if ( ( new_task = add_task ( shmem , task ) ) == ( void * ) SLURM_ERROR ) + { + fatal ( "No available task slots in shmem segment during prepend_task call "); + return SLURM_ERROR ; + } + /* prepend operation*/ + /* newtask next pointer gets head of the jobstep task list */ + new_task -> next = job_step -> head_task ; + /* newtask pointer becomes the new head of the jobstep task list */ + job_step -> head_task = new_task ; + /* set back pointer from task to job_step */ + new_task -> job_step = job_step ; + return SLURM_SUCCESS ; +} + +int deallocate_job_step ( job_step_t * jobstep ) +{ + task_t * task_ptr = jobstep -> head_task ; + task_t * task_temp_ptr ; + while ( task_ptr != NULL ) + { + task_temp_ptr = task_ptr -> next ; + clear_task ( task_ptr ) ; + task_ptr = task_temp_ptr ; + } + clear_job_step ( jobstep ) ; + return SLURM_SUCCESS ; +} + +void clear_task ( task_t * task ) +{ + task -> used = false ; + task -> job_step = NULL ; + task -> next = NULL ; +} + +void clear_job_step( job_step_t * job_step ) +{ + job_step -> used = false ; + job_step -> head_task = NULL ; +} diff --git a/src/slurmd/shmem_struct.h b/src/slurmd/shmem_struct.h index 16877e2174d..641591b194d 100644 --- a/src/slurmd/shmem_struct.h +++ b/src/slurmd/shmem_struct.h @@ -1,9 +1,46 @@ #ifndef _SHMEM_STRUCT_H #define _SHMEM_STRUCT_H +#define MAX_TASKS 128 +#define MAX_JOB_STEPS 128 + +typedef struct job_step job_step_t ; +typedef struct task task_t ; +struct task +{ + pthread_t threadid; + uint32_t pid; + uint32_t task_id; + uint32_t uid; + uint32_t gid; + char used; + job_step_t * job_step; + task_t * next; +} ; + +struct job_step +{ + uint32_t job_id; + uint32_t job_step_id; + char used; + task_t * head_task; +} ; + typedef struct slurmd_shmem { - pthread_mutex mutex; - task_desc tasks[128]; + pthread_mutex_t mutex; + task_t tasks[MAX_TASKS]; + job_step_t job_steps[MAX_JOB_STEPS]; } slurmd_shmem_t ; + +void * get_shmem ( ); +void init_shmem ( slurmd_shmem_t * shmem ); +void * add_job_step ( slurmd_shmem_t * shmem , job_step_t * new_job_step ) ; +void * add_task ( slurmd_shmem_t * shmem , task_t * new_task ); +void copy_task ( task_t * dest , task_t * const src ); +void copy_job_step ( job_step_t * dest , job_step_t * src ); +int prepend_task ( slurmd_shmem_t * shmem , job_step_t * job_step , task_t * task ); +int deallocate_job_step ( job_step_t * jobstep ); +void clear_task ( task_t * task ); +void clear_job_step( job_step_t * job_step ); #endif diff --git a/src/slurmd/slurmd.c b/src/slurmd/slurmd.c index 2f5957f10e1..18e6d5f94fc 100644 --- a/src/slurmd/slurmd.c +++ b/src/slurmd/slurmd.c @@ -42,6 +42,7 @@ #include <src/slurmd/get_mach_stat.h> #include <src/slurmd/slurmd.h> #include <src/slurmd/task_mgr.h> +#include <src/slurmd/shmem_struct.h> #define BUF_SIZE 1024 #define MAX_NAME_LEN 1024 @@ -50,6 +51,7 @@ /* global variables */ time_t init_time; +slurmd_shmem_t * shmem_seg ; /* function prototypes */ void slurmd_req ( slurm_msg_t * msg ); @@ -78,6 +80,9 @@ int main (int argc, char *argv[]) if ( ( error_code = read_slurm_conf (SLURM_CONF) ) ) fatal ("slurmd: error %d from read_slurm_conf reading %s", error_code, SLURM_CONF); */ + + shmem_seg = get_shmem ( ) ; + init_shmem ( shmem_seg ) ; if ( ( error_code = gethostname (node_name, MAX_NAME_LEN) ) ) fatal ("slurmd: errno %d from gethostname", errno); task_mgr_init ( ) ; diff --git a/src/slurmd/task_mgr.c b/src/slurmd/task_mgr.c index 4c72da95bbe..7b1ac5c4a3b 100644 --- a/src/slurmd/task_mgr.c +++ b/src/slurmd/task_mgr.c @@ -9,11 +9,11 @@ #include <src/common/slurm_protocol_api.h> #include <src/common/slurm_errno.h> #include <src/slurmd/task_mgr.h> +#include <src/slurmd/shmem_struct.h> /* global variables */ -static List task_list ; /* file descriptor defines */ @@ -30,16 +30,26 @@ int iowatch_launch ( launch_tasks_msg_t * launch_msg ) ; int match_job_id_job_step_id ( void * _x, void * _key ) ; int append_task_to_list ( launch_tasks_msg_t * launch_msg , int pid ) ; int kill_task ( task_t * task ) ; +int interconnect_init ( launch_tasks_msg_t * launch_msg ); +int fan_out_task_launch ( launch_tasks_msg_t * launch_msg ); void task_mgr_init ( ) { - task_list = list_create ( slurm_free_task ) ; } int launch_tasks ( launch_tasks_msg_t * launch_msg ) { -#ifdef ELAN -#else + return interconnect_init ( launch_msg ); +} + +/* Contains interconnect specific setup instructions and then calls fan_out_task_launch */ +int interconnect_init ( launch_tasks_msg_t * launch_msg ) +{ + return fan_out_task_launch ( launch_msg ) ; +} + +int fan_out_task_launch ( launch_tasks_msg_t * launch_msg ) +{ int i ; int cpid[64] ; for ( i = 0 ; i < launch_msg->tasks_to_launch ; i ++ ) @@ -50,8 +60,9 @@ int launch_tasks ( launch_tasks_msg_t * launch_msg ) break ; } } - - if ( i == launch_msg->tasks_to_launch ) /*parent*/ + + /*parent*/ + if ( i == launch_msg->tasks_to_launch ) { int waiting = i ; int j ; @@ -68,11 +79,11 @@ int launch_tasks ( launch_tasks_msg_t * launch_msg ) } } } - else /*child*/ + /*child*/ + else { iowatch_launch ( launch_msg ) ; } -#endif return SLURM_SUCCESS ; } @@ -167,36 +178,16 @@ int append_task_to_list ( launch_tasks_msg_t * launch_msg , int pid ) return ENOMEM ; task -> pid = pid; - task -> job_id = launch_msg -> job_id; - task -> job_step_id = launch_msg -> job_step_id; task -> uid = launch_msg -> uid; task -> gid = launch_msg -> gid; - list_append ( task_list , task ) ; return SLURM_SUCCESS ; } int kill_tasks ( kill_tasks_msg_t * kill_task_msg ) { int error_code ; - task_t key ; - task_t * curr_task ; - ListIterator iterator ; - iterator = list_iterator_create ( task_list ) ; - - key . job_id = kill_task_msg -> job_id ; - key . job_id = kill_task_msg -> job_step_id ; - - while ( ( curr_task = list_find ( iterator , match_job_id_job_step_id , & key ) ) ) - { - if ( kill_task ( curr_task ) ) - { - error_code = ESLURMD_KILL_TASK_FAILED ; - } - list_delete ( iterator ) ; - } - list_iterator_destroy ( iterator ) ; return error_code ; } @@ -205,25 +196,3 @@ int kill_task ( task_t * task ) { return kill ( task -> pid , SIGKILL ) ; } - -int match_job_id_job_step_id ( void * _x, void * _key ) -{ - task_t * x = ( task_t * ) _x ; - task_t * key = ( task_t * ) _key ; - - if ( x->job_id == key->job_id && x->job_step_id == key->job_step_id ) - { - return true ; - } - else - { - return false ; - } -} - -void slurm_free_task ( void * _task ) -{ - task_t * task = ( task_t * ) _task ; - if ( task ) - free ( task ) ; -} diff --git a/src/slurmd/task_mgr.h b/src/slurmd/task_mgr.h index db16fc5dee9..fcaf46dcad4 100644 --- a/src/slurmd/task_mgr.h +++ b/src/slurmd/task_mgr.h @@ -19,15 +19,4 @@ void task_mgr_init ( ) ; int launch_tasks ( launch_tasks_msg_t * launch_msg ) ; int kill_tasks ( kill_tasks_msg_t * kill_task_msg ) ; -typedef struct task -{ - pthread_t threadid; - uint32_t pid; - uint32_t job_id; - uint32_t job_step_id; - uint32_t task_id; - uint32_t uid; - uint32_t gid; -} task_t ; - -- GitLab