From f05e7f763a56ffe6641dba42ef0a1837b866376e Mon Sep 17 00:00:00 2001 From: Mark Grondona <mgrondona@llnl.gov> Date: Wed, 23 Oct 2002 15:30:57 +0000 Subject: [PATCH] o Check in of new slurmd codebase. Fixes for many bugs in old slurmd. --- src/common/util_signals.c | 87 -- src/common/util_signals.h | 9 - src/slurmd/Makefile.am | 50 +- src/slurmd/circular_buffer.c | 485 --------- src/slurmd/circular_buffer.h | 117 --- src/slurmd/cntl.h | 40 - src/slurmd/elan_interconnect.c | 97 +- src/slurmd/forked_ctrl.c | 107 -- src/slurmd/forked_io.c | 165 ---- src/slurmd/interconnect.h | 43 +- src/slurmd/io.c | 1202 +++++++++++++++-------- src/slurmd/io.h | 115 ++- src/slurmd/io_threads.c | 362 ------- src/slurmd/job.c | 272 +++++ src/slurmd/job.h | 123 +++ src/slurmd/locks.c | 164 ---- src/slurmd/locks.h | 115 --- src/slurmd/mgr.c | 312 ++++++ src/slurmd/{reconnect_utils.h => mgr.h} | 39 +- src/slurmd/nbio.c | 741 -------------- src/slurmd/nbio.h | 32 - src/slurmd/no_interconnect.c | 43 +- src/slurmd/pipes.c | 101 -- src/slurmd/pipes.h | 58 -- src/slurmd/reconnect_utils.c | 152 --- src/slurmd/semaphore.c | 321 ++++++ src/slurmd/semaphore.h | 93 ++ src/slurmd/setenvpf.c | 9 +- src/slurmd/setenvpf.h | 25 + src/slurmd/shm.c | 763 ++++++++++++++ src/slurmd/shm.h | 229 +++++ src/slurmd/shmem_struct.c | 263 ----- src/slurmd/shmem_struct.h | 82 -- src/slurmd/slurmd.c | 171 ++-- src/slurmd/task_mgr.c | 384 -------- src/slurmd/task_mgr.h | 102 -- src/slurmd/threaded_ctrl.c | 89 -- src/slurmd/threaded_io.c | 114 --- 38 files changed, 3255 insertions(+), 4421 deletions(-) delete mode 100644 src/common/util_signals.c delete mode 100644 src/common/util_signals.h delete mode 100644 src/slurmd/circular_buffer.c delete mode 100644 src/slurmd/circular_buffer.h delete mode 100644 src/slurmd/cntl.h delete mode 100644 src/slurmd/forked_ctrl.c delete mode 100644 src/slurmd/forked_io.c delete mode 100644 src/slurmd/io_threads.c create mode 100644 src/slurmd/job.c create mode 100644 src/slurmd/job.h delete mode 100644 src/slurmd/locks.c delete mode 100644 src/slurmd/locks.h create mode 100644 src/slurmd/mgr.c rename src/slurmd/{reconnect_utils.h => mgr.h} (56%) delete mode 100644 src/slurmd/nbio.c delete mode 100644 src/slurmd/nbio.h delete mode 100644 src/slurmd/pipes.c delete mode 100644 src/slurmd/pipes.h delete mode 100644 src/slurmd/reconnect_utils.c create mode 100644 src/slurmd/semaphore.c create mode 100644 src/slurmd/semaphore.h create mode 100644 src/slurmd/shm.c create mode 100644 src/slurmd/shm.h delete mode 100644 src/slurmd/shmem_struct.c delete mode 100644 src/slurmd/shmem_struct.h delete mode 100644 src/slurmd/task_mgr.c delete mode 100644 src/slurmd/task_mgr.h delete mode 100644 src/slurmd/threaded_ctrl.c delete mode 100644 src/slurmd/threaded_io.c diff --git a/src/common/util_signals.c b/src/common/util_signals.c deleted file mode 100644 index 03b773a0d85..00000000000 --- a/src/common/util_signals.c +++ /dev/null @@ -1,87 +0,0 @@ -#include <signal.h> -#include <errno.h> -#include <src/common/log.h> -#include <src/common/slurm_errno.h> -#include <src/common/util_signals.h> -int posix_signal_pipe_ignore () -{ - return posix_signal_ignore ( SIGPIPE ) ; -} - -int posix_signal_ignore ( int signal ) -{ - struct sigaction newaction ; - struct sigaction oldaction ; - newaction . sa_handler = SIG_IGN ; - if ( sigaction( signal , &newaction, &oldaction) )/* ignore tty input */ - { - error ("posix_signal_ignore: sigaction %m errno %d", errno); - return SLURM_ERROR ; - } - return SLURM_SUCCESS ; -} - -int unblock_all_signals_pthread ( ) -{ - sigset_t set; - if (sigfillset (&set)) - { - error ("unblock_all_signals_pthread: sigfillset %m errno %d", errno); - return SLURM_ERROR ; - } - if (pthread_sigmask (SIG_UNBLOCK, &set, NULL)) - { - error ("unblock_all_signals_pthread: pthread_sigmask %m errno %d", errno); - return SLURM_ERROR ; - } - return SLURM_SUCCESS ; -} - -int block_all_signals_pthread ( ) -{ - sigset_t set; - if (sigfillset (&set)) - { - error ("block_all_signals_pthread: sigfillset %m errno %d", errno); - return SLURM_ERROR ; - } - if (pthread_sigmask (SIG_BLOCK, &set, NULL)) - { - error ("block_all_signals_pthread: pthread_sigmask %m errno %d", errno); - return SLURM_ERROR ; - } - return SLURM_SUCCESS ; -} - -int unblock_all_signals ( ) -{ - sigset_t set; - if (sigfillset (&set)) - { - error ("unblock_all_signals: sigfillset %m errno %d", errno); - return SLURM_ERROR ; - } - if (sigprocmask (SIG_UNBLOCK, &set, NULL)) - { - error ("unblock_all_signals: sigprocmask %m errno %d", errno); - return SLURM_ERROR ; - } - return SLURM_SUCCESS ; -} - -int block_all_signals ( ) -{ - sigset_t set; - if (sigfillset (&set)) - { - error ("block_all_signals: sigfillset %m errno %d", errno); - return SLURM_ERROR ; - } - if (sigprocmask (SIG_BLOCK, &set, NULL)) - { - error ("block_all_signals: sigprocmask %m errno %d", errno); - return SLURM_ERROR ; - } - return SLURM_SUCCESS ; -} - diff --git a/src/common/util_signals.h b/src/common/util_signals.h deleted file mode 100644 index 1ac345f8a05..00000000000 --- a/src/common/util_signals.h +++ /dev/null @@ -1,9 +0,0 @@ -#ifndef _UTIL_SIGNALS_H -#define _UTIL_SIGNALS_H -int posix_signal_pipe_ignore () ; -int posix_signal_ignore ( int signal ) ; -int unblock_all_signals_pthread ( ) ; -int block_all_signals_pthread ( ) ; -int unblock_all_signals ( ) ; -int block_all_signals ( ) ; -#endif diff --git a/src/slurmd/Makefile.am b/src/slurmd/Makefile.am index 5245377e4a0..c1098d33420 100644 --- a/src/slurmd/Makefile.am +++ b/src/slurmd/Makefile.am @@ -7,20 +7,12 @@ sbin_PROGRAMS = slurmd if WITH_ELAN interconnect_lib = libelan_interconnect.la -process_lib = libforked.la -io_lib = libnbio.la -noinst_LTLIBRARIES = \ - libelan_interconnect.la \ - libforked.la \ - libnbio.la +noinst_LTLIBRARIES = \ + libelan_interconnect.la else interconnect_lib = libno_interconnect.la -process_lib = libthreaded.la -io_lib = libthreaded_io.la -noinst_LTLIBRARIES = \ - libno_interconnect.la \ - libthreaded.la \ - libthreaded_io.la +noinst_LTLIBRARIES = \ + libno_interconnect.la endif LDADD = $(top_srcdir)/src/common/libcommon.la \ @@ -28,29 +20,21 @@ LDADD = $(top_srcdir)/src/common/libcommon.la \ $(top_srcdir)/src/common/libcred.la \ $(SSL_LIBS) -slurmd_LDADD = $(process_lib) $(io_lib) $(LDADD) $(interconnect_lib) +slurmd_LDADD = $(LDADD) $(interconnect_lib) -common_sources = slurmd.c \ - batch_mgr.c \ - get_mach_stat.c \ - read_proc.c \ - task_mgr.c \ - shmem_struct.c \ - circular_buffer.c \ - pipes.c \ - locks.c \ - setenvpf.c +common_sources = \ + slurmd.c \ + mgr.c \ + get_mach_stat.c \ + read_proc.c \ + job.c job.h \ + io.c io.h \ + semaphore.c semaphore.h \ + shm.c shm.h \ + setenvpf.c setenvpf.h slurmd_SOURCES = $(common_sources) -libforked_la_SOURCES = forked_ctrl.c -libthreaded_la_SOURCES = threaded_ctrl.c -#libforked_io_la_SOURCES = forked_io.c reconnect_utils.c io_threads.c -libthreaded_io_la_SOURCES = threaded_io.c reconnect_utils.c io_threads.c - -libnbio_la_SOURCES = nbio.c reconnect_utils.c - -libelan_interconnect_la_SOURCES = elan_interconnect.c -libno_interconnect_la_SOURCES = no_interconnect.c - +libelan_interconnect_la_SOURCES = elan_interconnect.c +libno_interconnect_la_SOURCES = no_interconnect.c diff --git a/src/slurmd/circular_buffer.c b/src/slurmd/circular_buffer.c deleted file mode 100644 index 40b2c5afbd6..00000000000 --- a/src/slurmd/circular_buffer.c +++ /dev/null @@ -1,485 +0,0 @@ -/*****************************************************************************\ - * circular_buffers.c - - ***************************************************************************** - * Copyright (C) 2002 The Regents of the University of California. - * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). - * Written by Kevin Tew <tew1@llnl.gov> et. al. - * UCRL-CODE-2002-040. - * - * This file is part of SLURM, a resource management program. - * For details, see <http://www.llnl.gov/linux/slurm/>. - * - * SLURM is free software; you can redistribute it and/or modify it under - * the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) - * any later version. - * - * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY - * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS - * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more - * details. - * - * You should have received a copy of the GNU General Public License along - * with ConMan; if not, write to the Free Software Foundation, Inc., - * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. -\*****************************************************************************/ - -#include <assert.h> -#include <stdlib.h> -#include <string.h> -#include <src/common/log.h> -#include <src/common/xmalloc.h> -#include <src/common/slurm_errno.h> - -#include <src/slurmd/circular_buffer.h> - -#define DEF_INITIAL_BUFFER_SIZE 8192 -#define DEF_INCREMENTAL_BUFFER_SIZE 8192 -#define DEF_MAX_BUFFER_SIZE ( ( 8192 * 10 ) ) -#define BUFFER_FULL_DUMP_SIZE ( buf->min_size/2 ) - -static int assert_checks(circular_buffer_t * buf); -static int assert_checks_2(circular_buffer_t * buf); -static int expand_buffer(circular_buffer_t * buf); -static int shrink_buffer(circular_buffer_t * buf); -static void common_init(circular_buffer_t * buf); - -void free_circular_buffer(circular_buffer_t * buf_ptr) -{ - if (buf_ptr) { - if (buf_ptr->buffer) - xfree(buf_ptr->buffer); - xfree(buf_ptr); - } -} - -int init_circular_buffer(circular_buffer_t ** buf_ptr) -{ - circular_buffer_t *buf; - *buf_ptr = xmalloc(sizeof(circular_buffer_t)); - buf = *buf_ptr; - - buf->min_size = DEF_INITIAL_BUFFER_SIZE; - buf->max_size = DEF_MAX_BUFFER_SIZE; - buf->incremental_size = DEF_INCREMENTAL_BUFFER_SIZE; - - common_init(buf); - - return SLURM_SUCCESS; -} - -int init_circular_buffer2(circular_buffer_t ** buf_ptr, int min_size, - int max_size, int incremental_size) -{ - circular_buffer_t *buf; - *buf_ptr = xmalloc(sizeof(circular_buffer_t)); - buf = *buf_ptr; - - buf->min_size = min_size; - buf->max_size = max_size; - buf->incremental_size = incremental_size; - - common_init(buf); - - return SLURM_SUCCESS; -} - -static void common_init(circular_buffer_t * buf) -{ - buf->buffer = xmalloc(buf->min_size); - buf->buf_size = buf->min_size; - - buf->start = buf->buffer; - buf->end = buf->start + buf->buf_size; - - buf->head = buf->start; - buf->tail = buf->start; - - buf->read_size = 0; - buf->write_size = buf->min_size; -} - -void print_circular_buffer(circular_buffer_t * buf) -{ - info("--"); - info("buffer %X", buf->buffer); - info("start %X", buf->start); - info("end %X", buf->end); - info("head %X", buf->head); - info("tail %X", buf->tail); - info("rhead %i", buf->head - buf->start); - info("rtail %i", buf->tail - buf->start); - info("size %i", buf->buf_size); - info("read s %i", buf->read_size); - info("write s %i", buf->write_size); -} - -int cir_buf_read_update(circular_buffer_t * buf, unsigned int size) -{ - /*if zero read, just return */ - if (size == 0) { - debug("zero length read in cirular buffer"); - return SLURM_SUCCESS; - } - - /* before modifing the buffer lets do some sanity checks */ - assert(size <= buf->read_size); - assert_checks(buf); - assert_checks_2(buf); - - /* modify head position of the buffer */ - buf->head = buf->head + size; - - /* after modifing the buffer lets do some sanity checks */ - assert_checks(buf); - - /* take care of wrap around issues */ - if (buf->tail > buf->head) { /* CASE tail after head */ - buf->read_size = buf->tail - buf->head; - buf->write_size = buf->end - buf->tail; - } else if (buf->tail < buf->head) { /* CASE tail befpre head */ - if (buf->head == buf->end) { /* CASE tail == end */ - if (buf->tail == buf->start) { /* CASE head == start */ - /* buffer empty */ - shrink_buffer(buf); - } else { - buf->head = buf->start; - buf->read_size = buf->tail - buf->head; - buf->write_size = buf->end - buf->tail; - } - } else { - buf->read_size = buf->end - buf->head; - buf->write_size = buf->head - buf->tail; - } - } else if (buf->tail == buf->head) { /* CASE head == tail */ - /* buffer empty */ - shrink_buffer(buf); - } - - /* final sanity check */ - assert_checks(buf); - assert_checks_2(buf); - - return SLURM_SUCCESS; -} - -int cir_buf_write_update(circular_buffer_t * buf, unsigned int size) -{ - /* if zero read, just return */ - if (size == 0) { - info("zero length write in cirular buffer"); - return SLURM_SUCCESS; - } - - /* before modifing the buffer lets do some sanity checks */ - assert(size <= buf->write_size); - assert_checks(buf); - assert_checks_2(buf); - - /*modify headning position of the buffer */ - buf->tail = buf->tail + size; - - /* after modifing the buffer lets do some sanity checks */ - assert_checks(buf); - - /* take care of wrap around issues */ - if (buf->tail > buf->head) { /* CASE tail after head */ - if (buf->tail == buf->end) { /* CASE tail == end */ - if (buf->head == buf->start) { /* CASE head == start */ - /* buffer full */ - buf->write_size -= size; - buf->read_size += size; - expand_buffer(buf); - } else { - buf->tail = buf->start; - buf->write_size = buf->head - buf->tail; - buf->read_size = buf->end - buf->head; - } - } else { - buf->write_size = buf->end - buf->tail; - buf->read_size = buf->tail - buf->head; - } - } else if (buf->tail < buf->head) { /* CASE tail before head */ - buf->write_size = buf->head - buf->tail; - buf->read_size = buf->end - buf->head; - } else if (buf->tail == buf->head) { /* CASE head == tail */ - /* buffer full */ - buf->write_size -= size; - buf->read_size += size; - expand_buffer(buf); - } - - /* final sanity check */ - assert_checks(buf); - assert_checks_2(buf); - return SLURM_SUCCESS; -} - -static int assert_checks_2(circular_buffer_t * buf) -{ - /* sanity checks */ - - /* head pointer is between start and end */ - assert(buf->head >= buf->start); - assert(buf->head < buf->end); - - /* tail pointer is between start and end */ - assert(buf->tail >= buf->start); - assert(buf->tail < buf->end); - - if (buf->tail > buf->head) { - assert(buf->write_size == buf->end - buf->tail); - assert(buf->read_size == buf->tail - buf->head); - } else if (buf->tail < buf->head) { - assert(buf->write_size == buf->head - buf->tail); - assert(buf->read_size == buf->end - buf->head); - } else if (buf->tail == buf->head) { - assert(buf->write_size == buf->buf_size); - assert(buf->read_size == 0); - } - - return SLURM_SUCCESS; -} - -static int assert_checks(circular_buffer_t * buf) -{ - /* sanity checks */ - /* insures that dump data when MAX_BUFFER_SIZE is full will work correctly */ - - assert(buf != NULL); /* buf struct is not null */ - assert(buf->start == buf->buffer); /* stat hasn't moved */ - assert((buf->start) < (buf->end)); /* buf_end is after start */ - assert(buf->end - buf->start == buf->buf_size); /* buffer start and end haven't moved */ - - /* head pointer is between start and end */ - assert(buf->head >= buf->start); - assert(buf->head <= buf->end); - - /* tail pointer is between start and end */ - assert(buf->tail >= buf->start); - assert(buf->tail <= buf->end); - - return SLURM_SUCCESS; -} - -static int shrink_buffer(circular_buffer_t * buf) -{ - char *new_buffer; - - if (buf->buf_size == buf->min_size) { - /* info ( "circular buffer at minimum" ) ; */ - - buf->head = buf->start; - buf->tail = buf->start; - - buf->read_size = 0; - buf->write_size = buf->min_size; - - return SLURM_SUCCESS; - } else { - new_buffer = xmalloc(buf->min_size); - xfree(buf->buffer); - buf->buffer = new_buffer; - buf->buf_size = buf->min_size; - - buf->start = new_buffer; - buf->end = new_buffer + buf->min_size; - - buf->head = new_buffer; - buf->tail = new_buffer; - - buf->read_size = 0; - buf->write_size = buf->min_size; - - return SLURM_SUCCESS; - } -} - -static int expand_buffer(circular_buffer_t * buf) -{ - char *new_buffer; - int data_size; - int data_size_blk1; - int data_size_blk2; - - debug3("expanding circular buffer"); - /* print_circular_buffer(buf); */ - - /* buffer has reached its maximum size going to dump some data - * out the bit bucket - */ - if (buf->buf_size == buf->max_size) { - - debug3("circular buffer maxed out, dumping %d bytes of data", - BUFFER_FULL_DUMP_SIZE); - - if (buf->tail - buf->start >= BUFFER_FULL_DUMP_SIZE) { - buf->tail = buf->tail - BUFFER_FULL_DUMP_SIZE; - buf->write_size = BUFFER_FULL_DUMP_SIZE; - - if (buf->tail > buf->head) /* CASE tail after head */ - buf->read_size -= BUFFER_FULL_DUMP_SIZE; - - /* CASE tail befpre head */ - /* read_size stays the same */ - - } else { - int datasize_blk1 = buf->tail - buf->start; - int datasize_blk2 = - BUFFER_FULL_DUMP_SIZE - datasize_blk1; - buf->tail = buf->end - datasize_blk2; - buf->write_size = datasize_blk2; - buf->read_size = buf->tail - buf->head; - } - - return SLURM_SUCCESS; - } - - if (buf->tail > buf->head) { - new_buffer = - xmalloc(buf->buf_size + buf->incremental_size); - data_size = buf->tail - buf->head; - memcpy(new_buffer, buf->head, data_size); - xfree(buf->buffer); - - } else if (buf->tail <= buf->head) { /* CASE B */ - new_buffer = - xmalloc(buf->buf_size + buf->incremental_size); - data_size_blk1 = buf->end - buf->head; - data_size_blk2 = buf->tail - buf->start; - data_size = data_size_blk1 + data_size_blk2; - memcpy(new_buffer, buf->head, data_size_blk1); - memcpy(new_buffer + data_size_blk1, buf->start, - data_size_blk2); - xfree(buf->buffer); - } else { - fatal("Logical impossibility found in circular buffer"); - } - - /* set up new state variables - * !!!statement order below does matter - */ - buf->buffer = new_buffer; - buf->start = new_buffer; - buf->head = new_buffer; - buf->tail = new_buffer + data_size; - buf->end = new_buffer + buf->buf_size + buf->incremental_size; - buf->buf_size += buf->incremental_size; - buf->read_size = data_size; - buf->write_size = buf->end - buf->tail; - - return SLURM_SUCCESS; -} - -int cir_buf_get_line(circular_buffer_t * buf, cir_buf_line_t * line) -{ - char *tmp_head; - size_t tmp_length = 0; - size_t tmp_length2 = 0; - - /* stage one */ - line->line[0] = buf->head; - tmp_head = buf->head; - - while (true) { - /* check for max line length for transmit */ - if (tmp_length >= line->max_line_length) { - debug("cir_buf_get_line: max line length reached"); - line->line_count = 1; - line->line_length[0] = tmp_length; - return SLURM_SUCCESS; - } - - /* physical end of buffer reached need to wrap */ - if (tmp_head >= buf->end) { - debug("cir_buf_get_line: end of buffer reached"); - line->line_length[0] = tmp_length; - break; - } - - /* logical end of data reached, we are done, - * no more data in buffer - */ - if (tmp_head == buf->tail) { - debug("cir_buf_get_line: end of data"); - line->line_count = 1; - line->line_length[0] = tmp_length; - return SLURM_SUCCESS; - } - - /* new line found */ - if (*tmp_head == '\n') { - debug("cir_buf_get_line: newline character found"); - line->line_count = 1; - line->line_length[0] = tmp_length + 1; - return SLURM_SUCCESS; - } - - tmp_head++; - tmp_length++; - } - - /* stage one */ - line->line[1] = buf->start; - tmp_head = buf->start; - - while (true) { - /* max line length for transmit reached */ - if (tmp_length >= line->max_line_length) { - debug("cir_buf_get_line: max line length reached"); - line->line_count = 2; - line->line_length[1] = tmp_length2; - return SLURM_SUCCESS; - } - - /* physical end of buffer reached this shouldn't happen in stage two */ - if (tmp_head >= buf->end) { - error("VERY BAD - End of buffer reached in stage 2"); - line->line_length[1] = tmp_length2; - break; - } - - /* XXX: Is this if block redundant or what? */ - /* - if (*tmp_head == '\n') { - debug("cir_buf_get_line: newline character found"); - line->line_count = 1; - line->line_length[0] = tmp_length + 1; - return SLURM_SUCCESS; - } - */ - if (tmp_head == buf->tail) { - debug("cir_buf_get_line: end of data reached"); - line->line_count = 2; - line->line_length[1] = tmp_length2; - return SLURM_SUCCESS; - } - /* new line found */ - if (*tmp_head == '\n') { - info("New line character found in "); - line->line_count = 1; - line->line_length[0] = tmp_length2 + 1; - return SLURM_SUCCESS; - } - - tmp_head++; - tmp_length++; - tmp_length2++; - } - - return SLURM_SUCCESS; -} - -int cir_buf_update_line(circular_buffer_t * buf, cir_buf_line_t * line) -{ - int i; - if (line->line_count > 2) { - error(" VERY BAD line -> line_count is too big %i ", - line->line_count); - return SLURM_ERROR; - } - for (i = 0; i < line->line_count; i++) { - cir_buf_write_update(buf, line->line_length[i]); - } - return SLURM_SUCCESS; -} diff --git a/src/slurmd/circular_buffer.h b/src/slurmd/circular_buffer.h deleted file mode 100644 index 995418129fa..00000000000 --- a/src/slurmd/circular_buffer.h +++ /dev/null @@ -1,117 +0,0 @@ -/*****************************************************************************\ - * circular_buffer.h - ***************************************************************************** - * Copyright (C) 2002 The Regents of the University of California. - * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). - * Written by Kevin Tew <tew1@llnl.gov> et. al. - * UCRL-CODE-2002-040. - * - * This file is part of SLURM, a resource management program. - * For details, see <http://www.llnl.gov/linux/slurm/>. - * - * SLURM is free software; you can redistribute it and/or modify it under - * the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) - * any later version. - * - * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY - * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS - * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more - * details. - * - * You should have received a copy of the GNU General Public License along - * with ConMan; if not, write to the Free Software Foundation, Inc., - * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. -\*****************************************************************************/ - -#ifndef _CIRCULAR_BUFFER_H -#define _CIRCULAR_BUFFER_H - -typedef struct circular_buffer { - char *buffer; /* buffer pointer - this never changes - except during allocate and deallocate */ - - char *start; /* buffer pointer copy - this never changes - except during allocate and deallocate , - but it is used in a lot of arithmetic - hence the paranoia copy */ - - char *end; /* one char past the last char of the buffer - - ths never changes except during allocate - and deallocate , but it is used in a lot - of arithmetic */ - - unsigned int buf_size; /* buffer size - this never changes except - during allocate and deallocate */ - - unsigned int read_size; /* buffer size that can be read */ - - unsigned int write_size;/* buffer size that can be written */ - - char *head; /* beginning of the used portion of the - buffer */ - - char *tail; /* end of the used portion of the buffer */ - - unsigned int min_size; /* min buffer size */ - unsigned int max_size; /* max buffer size */ - - unsigned int incremental_size; /* incremental buffer size */ - -} circular_buffer_t; - -typedef struct cir_buf_line { - char *line[2]; - size_t line_length[2]; - size_t line_count; - size_t max_line_length; -} cir_buf_line_t; - -/* init_circular_buffer2 - * allocated buffer structure and sets default parameter according to passed parameters - * OUT buf_ptr - the allocate buffer - * IN min_size - buffer min size default 8K - * IN max_size - buffer max size 10 * 8K - * IN incremental_size - buffer increment size 8K - */ -int inline init_circular_buffer2(circular_buffer_t ** buf_ptr, - int min_size, int max_size, - int incremental_size); - -/* init_circular_buffer2 - * allocated buffer structure and sets default parameter according to passed parameters - * OUT buf_ptr - the allocate buffer - */ -int inline init_circular_buffer(circular_buffer_t ** buf_ptr); - -/* free_circular_buffer - * deallocates the buffer - * IN buf_ptr - the allocated buffer - */ -void inline free_circular_buffer(circular_buffer_t * buf_ptr); - -/* print_circular_buffer - * prints the buffer - * IN buf_ptr - the buffer to print - */ -void inline print_circular_buffer(circular_buffer_t * buf_ptr); - -/* cir_buf_read_update - * updated the buffer state after a read from the buffer - * IN buf_ptr - the allocated buffer - * IN size - size of the read - */ -int cir_buf_read_update(circular_buffer_t * buf, unsigned int size); - -/* cir_buf_write_update - * updated the buffer state after a write to the buffer - * IN buf_ptr - the allocated buffer - * IN size - size of the write - */ -int cir_buf_write_update(circular_buffer_t * buf, unsigned int size); - -int cir_buf_get_line(circular_buffer_t * buf, cir_buf_line_t * line); - -int cir_buf_update_line(circular_buffer_t * buf, cir_buf_line_t * line); - -#endif /* !_CIRCULAR_BUFFER_H */ diff --git a/src/slurmd/cntl.h b/src/slurmd/cntl.h deleted file mode 100644 index 12952479570..00000000000 --- a/src/slurmd/cntl.h +++ /dev/null @@ -1,40 +0,0 @@ -/*****************************************************************************\ - * cntl.h - ***************************************************************************** - * Copyright (C) 2002 The Regents of the University of California. - * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). - * Written by Kevin Tew <tew1@llnl.gov> et. al. - * UCRL-CODE-2002-040. - * - * This file is part of SLURM, a resource management program. - * For details, see <http://www.llnl.gov/linux/slurm/>. - * - * SLURM is free software; you can redistribute it and/or modify it under - * the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) - * any later version. - * - * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY - * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS - * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more - * details. - * - * You should have received a copy of the GNU General Public License along - * with ConMan; if not, write to the Free Software Foundation, Inc., - * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. -\*****************************************************************************/ - -#ifndef _SLURMD_IO_H_ -#define _SLURMD_IO_H_ - -#include <src/slurmd/task_mgr.h> - -int launch_task(task_start_t * task_start); - -int wait_for_tasks(launch_tasks_request_msg_t * launch_msg, - task_start_t ** task_start); - -int kill_launched_tasks(launch_tasks_request_msg_t * launch_msg, - task_start_t ** task_start, int i); - -#endif /* !_SLURMD_IO_H_ */ diff --git a/src/slurmd/elan_interconnect.c b/src/slurmd/elan_interconnect.c index 00cfc7f50cf..2032eb604f3 100644 --- a/src/slurmd/elan_interconnect.c +++ b/src/slurmd/elan_interconnect.c @@ -1,11 +1,10 @@ /*****************************************************************************\ - * elan_interconnect.c - Demo the routines in common/qsw.c - * This can run mping on the local node (uses shared memory comms). - * ./runqsw /usr/lib/mpi-test/mping 1 1024 + * src/slurmd/elan_interconnect.c Elan interconnect implementation ***************************************************************************** * Copyright (C) 2002 The Regents of the University of California. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). - * Written by Kevin Tew <tew1@llnl.gov> et. al. + * Written by Kevin Tew <tew1@llnl.gov> + * and Mark Grondona <mgrondona@llnl.gov> * UCRL-CODE-2002-040. * * This file is part of SLURM, a resource management program. @@ -22,11 +21,11 @@ * details. * * You should have received a copy of the GNU General Public License along - * with ConMan; if not, write to the Free Software Foundation, Inc., + * with SLURM; if not, write to the Free Software Foundation, Inc., * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. \*****************************************************************************/ -#define HAVE_LIBELAN3 +#include <src/slurmd/interconnect.h> #include <sys/types.h> #include <sys/wait.h> @@ -43,21 +42,9 @@ #include <src/common/qsw.h> #include <src/common/slurm_errno.h> #include <src/common/slurm_protocol_api.h> -#include <src/slurmd/task_mgr.h> #include <src/slurmd/interconnect.h> #include <src/slurmd/setenvpf.h> - - - -/* exported module funtion to launch tasks */ -/*launch_tasks should really be named launch_job_step*/ -int -launch_tasks(launch_tasks_request_msg_t * launch_msg) -{ - pthread_atfork(NULL, NULL, pthread_fork_child_after); - debug("launch_tasks: calling interconnect_init()"); - return interconnect_init(launch_msg); -} +#include <src/slurmd/shm.h> static int _wait_and_destroy_prg(qsw_jobinfo_t qsw_job, pid_t pid) @@ -65,10 +52,15 @@ _wait_and_destroy_prg(qsw_jobinfo_t qsw_job, pid_t pid) int i = 0; int sleeptime = 1; + shm_init(); + debug3("waiting to destory program description..."); + again: if (waitpid(pid, NULL, 0) < 0) { + if (errno == EINTR) + goto again; error("waitpid: %m"); - return SLURM_ERROR; + exit(1); } while(qsw_prgdestroy(qsw_job) < 0) { @@ -88,12 +80,16 @@ _wait_and_destroy_prg(qsw_jobinfo_t qsw_job, pid_t pid) sleep(sleeptime*=2); } + shm_fini(); + exit(0); return SLURM_SUCCESS; } -/* Contains interconnect specific setup instructions and then calls - * fan_out_task_launch */ -int interconnect_init ( launch_tasks_request_msg_t * launch_msg ) +/* + * prepare node for interconnect use + */ +int +interconnect_init(slurmd_job_t *job) { pid_t pid; @@ -101,43 +97,44 @@ int interconnect_init ( launch_tasks_request_msg_t * launch_msg ) switch ((pid = fork())) { case -1: - error ("elan_interconnect_init fork(): %m"); + error ("elan_interconnect_prepare fork(): %m"); return SLURM_ERROR ; case 0: /* child falls thru */ break; default: /* parent */ - return _wait_and_destroy_prg(launch_msg->qsw_job, pid); + return _wait_and_destroy_prg(job->qsw_job, pid); } /* Process 2: */ debug("calling qsw_prog_init from process %ld", getpid()); - if (qsw_prog_init(launch_msg->qsw_job, launch_msg->uid) < 0) { + if (qsw_prog_init(job->qsw_job, job->uid) < 0) { error ("elan interconnect_init: qsw_prog_init: %m"); /* we may lose the following info if not logging to stderr */ - qsw_print_jobinfo(stderr, launch_msg->qsw_job); - _exit(1) ; + qsw_print_jobinfo(stderr, job->qsw_job); + return SLURM_ERROR; } - fan_out_task_launch(launch_msg); - _exit(0); - - return SLURM_ERROR; /* XXX: why? */ + return SLURM_SUCCESS; } -int interconnect_set_capabilities(task_start_t * task_start) +int +interconnect_fini(slurmd_job_t *job) { - pid_t pid; - int nodeid, nnodes, nprocs, procid; + return SLURM_SUCCESS; +} +int +interconnect_attach(slurmd_job_t *job, int procid) +{ + int nodeid, nnodes, nprocs; - nodeid = task_start->launch_msg->srun_node_id; - nnodes = task_start->launch_msg->nnodes; - procid = task_start->local_task_id; - nprocs = task_start->launch_msg->nprocs; + nodeid = job->nodeid; + nnodes = job->nnodes; + nprocs = job->nprocs; debug3("nodeid=%d nnodes=%d procid=%d nprocs=%d", nodeid, nnodes, procid, nprocs); debug3("setting capability in process %ld", getpid()); - if (qsw_setcap(task_start->launch_msg->qsw_job, procid) < 0) { + if (qsw_setcap(job->qsw_job, procid) < 0) { error("qsw_setcap: %m"); return SLURM_ERROR; } @@ -148,25 +145,21 @@ int interconnect_set_capabilities(task_start_t * task_start) /* * Set environment variables needed by QSW MPICH / libelan. */ -int interconnect_env(char ***env, uint16_t *envc, int nodeid, int nnodes, - int procid, int nprocs) +int interconnect_env(slurmd_job_t *job, int taskid) { - int cnt = *envc; + int cnt = job->envc; + int rank = job->task[taskid]->gid; - if (setenvpf(env, &cnt, "RMS_RANK=%d", procid) < 0) + if (setenvpf(&job->env, &cnt, "RMS_RANK=%d", rank ) < 0) return -1; - if (setenvpf(env, &cnt, "RMS_NODEID=%d", nodeid) < 0) + if (setenvpf(&job->env, &cnt, "RMS_NODEID=%d", job->nodeid) < 0) return -1; - if (setenvpf(env, &cnt, "RMS_PROCID=%d", procid) < 0) + if (setenvpf(&job->env, &cnt, "RMS_PROCID=%d", rank ) < 0) return -1; - if (setenvpf(env, &cnt, "RMS_NNODES=%d", nnodes) < 0) + if (setenvpf(&job->env, &cnt, "RMS_NNODES=%d", job->nnodes) < 0) return -1; - if (setenvpf(env, &cnt, "RMS_NPROCS=%d", nprocs) < 0) + if (setenvpf(&job->env, &cnt, "RMS_NPROCS=%d", job->nprocs) < 0) return -1; return 0; } - -void pthread_fork_child() -{ -} diff --git a/src/slurmd/forked_ctrl.c b/src/slurmd/forked_ctrl.c deleted file mode 100644 index 0ed37932174..00000000000 --- a/src/slurmd/forked_ctrl.c +++ /dev/null @@ -1,107 +0,0 @@ -/*****************************************************************************\ - * forked_ctrl.c - - ***************************************************************************** - * Copyright (C) 2002 The Regents of the University of California. - * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). - * Written by Kevin Tew <tew1@llnl.gov> et. al. - * UCRL-CODE-2002-040. - * - * This file is part of SLURM, a resource management program. - * For details, see <http://www.llnl.gov/linux/slurm/>. - * - * SLURM is free software; you can redistribute it and/or modify it under - * the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) - * any later version. - * - * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY - * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS - * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more - * details. - * - * You should have received a copy of the GNU General Public License along - * with ConMan; if not, write to the Free Software Foundation, Inc., - * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. -\*****************************************************************************/ - -#include <stdlib.h> -#include <sys/types.h> -#include <pwd.h> -#include <grp.h> -#include <sys/wait.h> -#include <errno.h> -#include <unistd.h> -#include <string.h> -#include <pthread.h> - -#include <src/common/log.h> -#include <src/common/list.h> -#include <src/common/xmalloc.h> -#include <src/common/slurm_protocol_api.h> -#include <src/common/slurm_errno.h> -#include <src/common/util_signals.h> - -#include <src/slurmd/task_mgr.h> -#include <src/slurmd/shmem_struct.h> -#include <src/slurmd/circular_buffer.h> -#include <src/slurmd/io.h> -#include <src/slurmd/pipes.h> -#include <src/slurmd/reconnect_utils.h> - -/* global variables */ - - -/****************************************************************** - *task launch method call hierarchy - * - *launch_tasks() - * interconnect_init() - * fan_out_task_launch() (pthread_create) - * task_exec_thread() (fork) for task exec - * task_exec_thread() (pthread_create) for io piping - ******************************************************************/ - -int launch_task(task_start_t * task_start) -{ - pid_t pid = fork(); - switch (pid) { - case -1: - slurm_perror("fork"); - return SLURM_ERROR; - break; - case 0: - task_exec_thread(task_start); - _exit(0); - default: - task_start->pthread_id = pid; - return SLURM_SUCCESS; - break; - - } - return SLURM_SUCCESS; -} - -int wait_for_tasks(launch_tasks_request_msg_t * launch_msg, - task_start_t ** task_start) -{ - int i; - int rc; - for (i = 0; i < launch_msg->tasks_to_launch; i++) { - rc = waitpid(task_start[i]->pthread_id, NULL, 0); - debug3 ("fan_out_task_launch: thread %i pthread_id %i joined ", - i, task_start[i]->pthread_id); - } - return SLURM_SUCCESS; -} - -int kill_launched_tasks(launch_tasks_request_msg_t * launch_msg, - task_start_t ** task_start, int i) -{ - /* - int rc ; - for ( i-- ; i >= 0 ; i -- ) { - rc = kill(task_start[i]->pthread_id, SIGKILL); - } - */ - return SLURM_SUCCESS; -} diff --git a/src/slurmd/forked_io.c b/src/slurmd/forked_io.c deleted file mode 100644 index 6307c942cd9..00000000000 --- a/src/slurmd/forked_io.c +++ /dev/null @@ -1,165 +0,0 @@ -/*****************************************************************************\ - * forked_io.c - - ***************************************************************************** - * Copyright (C) 2002 The Regents of the University of California. - * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). - * Written by Kevin Tew <tew1@llnl.gov> et. al. - * UCRL-CODE-2002-040. - * - * This file is part of SLURM, a resource management program. - * For details, see <http://www.llnl.gov/linux/slurm/>. - * - * SLURM is free software; you can redistribute it and/or modify it under - * the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) - * any later version. - * - * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY - * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS - * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more - * details. - * - * You should have received a copy of the GNU General Public License along - * with ConMan; if not, write to the Free Software Foundation, Inc., - * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. -\*****************************************************************************/ - -#include <stdlib.h> -#include <sys/types.h> -#include <pwd.h> -#include <grp.h> -#include <sys/wait.h> -#include <errno.h> -#include <unistd.h> -#include <string.h> -#include <pthread.h> - -#include <src/common/log.h> -#include <src/common/list.h> -#include <src/common/xmalloc.h> -#include <src/common/slurm_protocol_api.h> -#include <src/common/slurm_errno.h> -#include <src/common/util_signals.h> - -#include <src/slurmd/task_mgr.h> -#include <src/slurmd/shmem_struct.h> -#include <src/slurmd/circular_buffer.h> -#include <src/slurmd/io.h> -#include <src/slurmd/pipes.h> -#include <src/slurmd/reconnect_utils.h> - -/* global variables */ - - -/****************************************************************** - *task launch method call hierarchy - * - *launch_tasks() - * interconnect_init() - * fan_out_task_launch() (pthread_create) - * task_exec_thread() (fork) for task exec - * task_exec_thread() (pthread_create) for io piping - ******************************************************************/ -int forward_io ( task_start_t * task_start ) -{ - pid_t cpid ; - int * pipes = task_start -> pipes ; - -#define FORK_ERROR -1 - - //posix_signal_pipe_ignore ( ) ; - - /* open stdout*/ - connect_io_stream ( task_start , STDIN_OUT_SOCK ) ; - /* open stderr*/ - connect_io_stream ( task_start , SIG_STDERR_SOCK ) ; - - switch ( ( cpid = fork () ) ) - { - case FORK_ERROR : - goto return_label; - break; - case 0 : /*CHILD*/ - close ( pipes[CHILD_IN_RD_PIPE] ); - close ( pipes[CHILD_OUT_RD_PIPE] ); - close ( pipes[CHILD_OUT_WR_PIPE] ); - close ( pipes[CHILD_ERR_RD_PIPE] ); - close ( pipes[CHILD_ERR_WR_PIPE] ); - stdin_io_pipe_thread ( task_start ) ; - _exit( 0 ) ; - break; - default : /*PARENT*/ - task_start->io_pthread_id[STDIN_FILENO] = cpid ; - break ; - } - - switch ( ( cpid = fork () ) ) - { - case FORK_ERROR : - goto kill_stdin_thread; - break; - case 0 : /*CHILD*/ - close ( pipes[CHILD_IN_RD_PIPE] ); - close ( pipes[CHILD_IN_WR_PIPE] ); - close ( pipes[CHILD_OUT_WR_PIPE] ); - close ( pipes[CHILD_ERR_RD_PIPE] ); - close ( pipes[CHILD_ERR_WR_PIPE] ); - stdout_io_pipe_thread ( task_start ) ; - _exit( 0 ) ; - break; - default : /*PARENT*/ - task_start->io_pthread_id[STDOUT_FILENO] = cpid ; - break ; - } - - switch ( ( cpid = fork ( ) ) ) - { - case FORK_ERROR : - goto kill_stdout_thread; - break; - case 0 : /*CHILD*/ - close ( pipes[CHILD_IN_RD_PIPE] ); - close ( pipes[CHILD_IN_WR_PIPE] ); - close ( pipes[CHILD_OUT_RD_PIPE] ); - close ( pipes[CHILD_OUT_WR_PIPE] ); - close ( pipes[CHILD_ERR_WR_PIPE] ); - stderr_io_pipe_thread ( task_start ) ; - _exit( 0 ) ; - break; - default : /*PARENT*/ - task_start->io_pthread_id[STDERR_FILENO] = cpid ; - break ; - } - - goto return_label; - -kill_stdout_thread: - kill ( task_start->io_pthread_id[STDOUT_FILENO] , SIGKILL ); -kill_stdin_thread: - kill ( task_start->io_pthread_id[STDIN_FILENO] , SIGKILL ); -return_label: - return SLURM_SUCCESS ; -} - -int wait_on_io_threads ( task_start_t * task_start ) -{ - info ( "%i: err pid: %i " , task_start -> local_task_id , task_start->io_pthread_id[STDERR_FILENO] ) ; - info ( "%i: out pid: %i " , task_start -> local_task_id , task_start->io_pthread_id[STDOUT_FILENO] ) ; - info ( "%i: in pid: %i " , task_start -> local_task_id , task_start->io_pthread_id[STDIN_FILENO] ) ; - /* threads have been detatched*/ - waitpid ( task_start->io_pthread_id[STDERR_FILENO] , NULL , 0 ) ; - info ( "%i: errexit pid: %i " , task_start -> local_task_id , task_start->io_pthread_id[STDERR_FILENO] ) ; - waitpid ( task_start->io_pthread_id[STDOUT_FILENO] , NULL , 0 ) ; - info ( "%i: outexit pid: %i " , task_start -> local_task_id , task_start->io_pthread_id[STDOUT_FILENO] ) ; - /* waitpid ( task_start->io_pthread_id[STDIN_FILENO] , NULL ) ;*/ - kill ( task_start->io_pthread_id[STDIN_FILENO] , SIGKILL ); - info ( "%i: inexit pid: %i " , task_start -> local_task_id , task_start->io_pthread_id[STDIN_FILENO] ) ; - /* thread join on stderr or stdout signifies task termination we should kill the stdin thread */ - info ( "leaving wait_on_io_threads" ) ; - return SLURM_SUCCESS ; -} - -int iotype_init_pipes ( int * pipes ) -{ - return SLURM_SUCCESS ; -} diff --git a/src/slurmd/interconnect.h b/src/slurmd/interconnect.h index d083909c33d..405bdb00420 100644 --- a/src/slurmd/interconnect.h +++ b/src/slurmd/interconnect.h @@ -1,9 +1,11 @@ /*****************************************************************************\ - * interconnect.h - + * src/slurmd/interconnect.h - general interconnect routines for slurmd + * $Id$ ***************************************************************************** * Copyright (C) 2002 The Regents of the University of California. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). - * Written by Kevin Tew <tew1@llnl.gov> et. al. + * Written by Kevin Tew <tew1@llnl.gov> + * modified by Mark Grondona <mgrondona@llnl.gov> * UCRL-CODE-2002-040. * * This file is part of SLURM, a resource management program. @@ -20,41 +22,34 @@ * details. * * You should have received a copy of the GNU General Public License along - * with ConMan; if not, write to the Free Software Foundation, Inc., + * with SLURM; if not, write to the Free Software Foundation, Inc., * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. \*****************************************************************************/ -#ifndef _SLURMD_INTERCONNECT_H_ -#define _SLURMD_INTERCONNECT_H_ +#ifndef _INTERCONNECT_H_ +#define _INTERCONNECT_H_ #include <src/common/slurm_protocol_api.h> -#include <src/slurmd/task_mgr.h> +#include <src/slurmd/job.h> -/* interconnect_init - * called by launch_tasks to initialize the interconnect - * IN launch_msg - launch_tasks_msg - * RET int - return_code +/* + * initialize interconnect on node */ -int interconnect_init ( launch_tasks_request_msg_t * launch_msg ); +int interconnect_init(slurmd_job_t *job); -/* fan_out_task_launch - * called by launch_tasks to do the task fan out - * IN launch_msg - launch_tasks_msg - * RET int - return_code +/* + * finalize and detach from interconnect on node */ -int fan_out_task_launch ( launch_tasks_request_msg_t * launch_msg ); +int interconnect_fini(slurmd_job_t *job); -/* interconnect_set_capabilities - * called by fan_out_task_launch to set interconnect capabilities - * IN task_start - task_start structure - * RET int - return_code +/* + * attach process to interconnect */ -int interconnect_set_capabilities ( task_start_t * task_start ) ; +int interconnect_attach(slurmd_job_t *job, int taskid); /* * Set environment variables needed. */ -int interconnect_env(char ***env, uint16_t *envc, int nodeid, int nnodes, - int procid, int nprocs) ; +int interconnect_env(slurmd_job_t *job, int taskid); -#endif +#endif /* _INTERCONNECT_H */ diff --git a/src/slurmd/io.c b/src/slurmd/io.c index dae35c43044..5ca27c814df 100644 --- a/src/slurmd/io.c +++ b/src/slurmd/io.c @@ -1,9 +1,10 @@ /*****************************************************************************\ - * io.c - + * src/slurmd/io.c - I/O handling routines for slurmd + * $Id$ ***************************************************************************** * Copyright (C) 2002 The Regents of the University of California. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). - * Written by Kevin Tew <tew1@llnl.gov> et. al. + * Written by Mark Grondona <mgrondona@llnl.gov>. * UCRL-CODE-2002-040. * * This file is part of SLURM, a resource management program. @@ -20,455 +21,890 @@ * details. * * You should have received a copy of the GNU General Public License along - * with ConMan; if not, write to the Free Software Foundation, Inc., + * with SLURM; if not, write to the Free Software Foundation, Inc., * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. \*****************************************************************************/ -#include <stdlib.h> -#include <sys/types.h> -#include <pwd.h> -#include <grp.h> -#include <sys/wait.h> -#include <errno.h> +#if HAVE_CONFIG_H +# include <config.h> +#endif + +#if HAVE_UNISTD_H +# include <unistd.h> +#endif + +#if HAVE_STRING_H +# include <string.h> +#endif + +#if HAVE_STDLIB_H +# include <stdlib.h> +#endif + #include <unistd.h> -#include <string.h> -#include <pthread.h> +#include <errno.h> +#include <src/common/eio.h> +#include <src/common/cbuf.h> #include <src/common/log.h> +#include <src/common/fd.h> #include <src/common/list.h> -#include <src/common/pack.h> #include <src/common/xmalloc.h> -#include <src/common/slurm_protocol_api.h> -#include <src/common/slurm_errno.h> -#include <src/common/util_signals.h> -#include <src/slurmd/task_mgr.h> -#include <src/slurmd/shmem_struct.h> -#include <src/slurmd/circular_buffer.h> +#include <src/slurmd/job.h> +#include <src/slurmd/shm.h> #include <src/slurmd/io.h> -#include <src/slurmd/pipes.h> - -/* global variables */ -int connect_io_stream(task_start_t * task_start, int out_or_err); -int send_io_stream_header(task_start_t * task_start, int out_or_err); -ssize_t read_EINTR(int fd, void *buf, size_t count); - - -/****************************************************************** - *task launch method call hierarchy - * - *launch_tasks() - * interconnect_init() - * fan_out_task_launch() (pthread_create) - * task_exec_thread() (fork) for task exec - * task_exec_thread() (pthread_create) for io piping - ******************************************************************/ -int forward_io(task_start_t * task_start) -{ - pthread_attr_t pthread_attr; - -#define STDIN_OUT_SOCK 0 -#define SIG_STDERR_SOCK 1 - - /* open stdout */ - connect_io_stream(task_start, STDIN_OUT_SOCK); - /* open stderr */ - connect_io_stream(task_start, SIG_STDERR_SOCK); - - /* spawn io pipe threads */ - /* set detatch state */ - pthread_attr_init(&pthread_attr); - - if (pthread_create - (&task_start->io_pthread_id[STDIN_FILENO], NULL, - stdin_io_pipe_thread, task_start)) - goto return_label; - if (pthread_create - (&task_start->io_pthread_id[STDOUT_FILENO], NULL, - stdout_io_pipe_thread, task_start)) - goto kill_stdin_thread; - if (pthread_create - (&task_start->io_pthread_id[STDERR_FILENO], NULL, - stderr_io_pipe_thread, task_start)) - goto kill_stdout_thread; - - - - goto return_label; - - kill_stdout_thread: - pthread_kill(task_start->io_pthread_id[STDOUT_FILENO], SIGKILL); - kill_stdin_thread: - pthread_kill(task_start->io_pthread_id[STDIN_FILENO], SIGKILL); - return_label: - return SLURM_SUCCESS; + +typedef enum slurmd_io_tupe { + TASK_STDERR, + TASK_STDOUT, + TASK_STDIN, + CLIENT_STDERR, + CLIENT_STDOUT, +} slurmd_io_type_t; + +static char *slurmd_io_str[] = +{ + "domain socket", + "task stderr", + "task stdout", + "task stdin", + "client stderr", + "client stdout" +}; + + +struct io_info { +#ifndef NDEBUG +#define IO_MAGIC 0x10101 + int magic; +#endif + uint32_t id; + cbuf_t buf; + List readers; + List writers; + slurmd_io_type_t type; + unsigned eof:1; + unsigned disconnected:1; +}; + + +static int _io_init_pipes(task_info_t *t); +static void _io_prepare_clients(slurmd_job_t *); +static void _io_prepare_tasks(slurmd_job_t *); +static void * _io_thr(void *); +static int _io_write_header(struct io_info *, srun_info_t *); +static void _io_connect_objs(io_obj_t *, io_obj_t *); +static int _validate_io_list(List objList); +static int _shutdown_task_obj(struct io_info *t); + +static struct io_obj * _io_obj_create(int fd, void *arg); +static struct io_info * _io_info_create(uint32_t id); +static struct io_obj * _io_obj(int fd, uint id, int type); +static void * _io_thr(void *arg); + + +/* Slurmd I/O objects: + * N task stderr, stdout objs (read-only) + * N*M client stderr, stdout objs (read-write) (possibly a file) + * N task stdin objs (write only) (possibly a file) + */ + +static bool _readable(io_obj_t *); +static bool _writable(io_obj_t *); +static int _write(io_obj_t *, List); +static int _task_read(io_obj_t *, List); +static int _client_read(io_obj_t *, List); +static int _task_error(io_obj_t *, List); +static int _client_error(io_obj_t *, List); + + +struct io_operations task_out_ops = { + readable: &_readable, + handle_read: &_task_read, + handle_error: &_task_error +}; + +struct io_operations task_in_ops = { + writable: &_writable, + handle_write: &_write, + handle_error: &_task_error, +}; + +struct io_operations client_ops = { + readable: &_readable, + writable: &_writable, + handle_read: &_client_read, + handle_write: &_write, + handle_error: &_client_error, +}; + +int +io_spawn_handler(slurmd_job_t *job) +{ + pthread_attr_t attr; + + if (io_init_pipes(job) == SLURM_FAILURE) { + error("io_handler: init_pipes failed: %m"); + return SLURM_FAILURE; + } + + /* create task IO objects and append these to the objs list + * + * XXX check for errors? + */ + _io_prepare_tasks(job); + + /* open 2*ntask initial connections or files for stdout/err + * append these to objs list + */ + _io_prepare_clients(job); + + if ((errno = pthread_attr_init(&attr)) != 0) + error("pthread_attr_init: %m"); + +#ifdef PTHREAD_SCOPE_SYSTEM + if ((errno = pthread_attr_setscope(&attr, PTHREAD_SCOPE_SYSTEM)) != 0) + error("pthread_attr_setscope: %m"); +#endif + xassert(_validate_io_list(job->objs)); + + return pthread_create(&job->ioid, &attr, &_io_thr, (void *)job); } -int wait_on_io_threads(task_start_t * task_start) +static int +_xclose(int fd) { - /* threads have been detatched */ - pthread_join(task_start->io_pthread_id[STDERR_FILENO], NULL); - info("errexit"); - pthread_join(task_start->io_pthread_id[STDOUT_FILENO], NULL); - info("outexit"); - /*pthread_join ( task_start->io_pthread_id[STDIN_FILENO] , NULL ) ; */ - pthread_cancel(task_start->io_pthread_id[STDIN_FILENO]); - info("inexit"); - /* thread join on stderr or stdout signifies task termination we should kill the stdin thread */ - return SLURM_SUCCESS; + int rc; + do rc = close(fd); + while (rc == -1 && errno == EINTR); + return rc; } -void *stdin_io_pipe_thread(void *arg) +/* Close child fds in parent */ +static void +_io_finalize(task_info_t *t) { - task_start_t *task_start = (task_start_t *) arg; - int bytes_read; - int bytes_written; - int local_errno; - circular_buffer_t *cir_buf; + if (_xclose(t->pin[0] ) < 0) + error("close(stdin) : %m"); + if (_xclose(t->pout[1]) < 0) + error("close(stdout): %m"); + if (_xclose(t->perr[1]) < 0) + error("close(stderr): %m"); +} + +void +io_close_all(slurmd_job_t *job) +{ + int i; + for (i = 0; i < job->ntasks; i++) + _io_finalize(job->task[i]); +} + +static void * +_io_thr(void *arg) +{ + slurmd_job_t *job = (slurmd_job_t *) arg; + log_reinit(); + io_handle_events(job->objs); + verbose("IO handler exited"); + return (void *)1; +} - init_circular_buffer(&cir_buf); +static void +_io_prepare_tasks(slurmd_job_t *job) +{ + int i; + srun_info_t *srun; + task_info_t *t; - posix_signal_pipe_ignore(); + srun = list_peek(job->sruns); - while (true) { - if ((cir_buf->write_size == 0)) { - info("stdin cir_buf->write_size == 0 this shouldn't happen"); - continue; + for (i = 0; i < job->ntasks; i++) { + t = job->task[i]; + + t->in = _io_obj(t->pin[1], t->gid, TASK_STDIN ); + list_append(job->objs, (void *)t->in ); + + t->out = _io_obj(t->pout[0], t->gid, TASK_STDOUT); + list_append(job->objs, (void *)t->out); + + t->err = _io_obj(t->perr[0], t->gid, TASK_STDERR); + list_append(job->objs, (void *)t->err); + } +} + +#if 0 +/* + * create initial file objs for N tasks + */ +static void +_io_prepare_files(slurmd_job_t *job) +{ + int i, fd; + int err_flags = O_WRONLY | O_CREAT | O_EXCL; + int out_flags = O_WRONLY | O_CREAT | O_EXCL; + int in_flags = O_RDONLY | O_CREAT | O_EXCL; + char *filename; + io_obj_t *obj; + + if (job->outf) { + if + for (i = 0; i < job->ntasks; i++) { + char *buf[4096]; + snprintf(buf, 4096, job->outf, i); + if (open(buf, out_flags) < 0) + error("can't open file `%s': %m", buf); + } + } +} +#endif + +/* + * create initial client objs for N tasks + */ +static void +_io_prepare_clients(slurmd_job_t *job) +{ + int i, sock; + io_obj_t *obj; + srun_info_t *srun; - if ((bytes_read = - slurm_read_stream(task_start->sockets[STDIN_OUT_SOCK], - cir_buf->tail, - cir_buf->write_size)) <= 0) { - local_errno = errno; - if (bytes_read == 0) { - info("0 returned EOF on socket "); - break; - } else if (bytes_read == -1) { - switch (local_errno) { - case EBADF: - case EPIPE: - case ECONNREFUSED: - case ECONNRESET: - case ENOTCONN: - break; - default: - info("error reading stdin stream for task %i, %m errno: %i , bytes read %i ", task_start->launch_msg->global_task_ids[task_start->local_task_id], local_errno, bytes_read); - error("uncaught errno %i", - local_errno); - break; - } - } else { - info("bytes_read: %i don't know what to do with this return code ", bytes_read); - } - } else { - cir_buf_write_update(cir_buf, bytes_read); + xassert(list_count(job->sruns) == 1); + + srun = list_peek(job->sruns); + + /* create sockets for stdout/err + */ + for (i = 0; i < job->ntasks; i++) { + task_info_t *t = job->task[i]; + + sock = (int) slurm_open_stream(&srun->ioaddr); + if (sock < 1) { + error("connect io: %m"); + return; } + fd_set_nonblocking(sock); + fd_set_close_on_exec(sock); + obj = _io_obj(sock, t->gid, CLIENT_STDOUT); + _io_write_header(obj->arg, srun); + list_append(job->objs, obj); + + _io_connect_objs(t->out, obj); + _io_connect_objs(obj, t->in ); + + sock = (int) slurm_open_stream(&srun->ioaddr); + fd_set_nonblocking(sock); + fd_set_close_on_exec(sock); + obj = _io_obj(sock, t->gid, CLIENT_STDERR); + _io_write_header(obj->arg, srun); + list_append(job->objs, obj); + + _io_connect_objs(t->err, obj); + } +} - /* debug */ - //write ( 1 , "stdin-", 6 ) ; - //write ( 1 , cir_buf->head , cir_buf->read_size ) ; - info("%i stdin bytes read", bytes_read); - /* debug */ - - while (true) { - - if ((bytes_written = - write(task_start->pipes[CHILD_IN_WR], - cir_buf->head, - cir_buf->read_size)) <= 0) { - if ((bytes_written == SLURM_PROTOCOL_ERROR) - && (errno == EINTR)) { - continue; - } else { - - local_errno = errno; - info("error sending stdin stream for task %i, %m errno: %i , bytes read %i ", task_start->launch_msg->global_task_ids[task_start->local_task_id], local_errno, bytes_read); - goto stdin_return; - } - } else { - cir_buf_read_update(cir_buf, - bytes_written); - break; - } +static void +_io_connect_objs(io_obj_t *obj1, io_obj_t *obj2) +{ + struct io_info *src = (struct io_info *) obj1->arg; + struct io_info *dst = (struct io_info *) obj2->arg; + xassert(src->magic == IO_MAGIC); + xassert(dst->magic == IO_MAGIC); + list_append(src->readers, dst); + list_append(dst->writers, src); +} + +static int +_validate_task_out(struct io_info *t, int type) +{ + ListIterator i; + struct io_info *r; + int retval = 1; + + xassert(t->magic == IO_MAGIC); + + if (t->writers) + retval = 0; + + i = list_iterator_create(t->readers); + while ((r = list_next(i))) { + if (r->type != type) { + fatal("_validate_io: %s reader is %s", + slurmd_io_str[t->type], + slurmd_io_str[r->type]); } } - stdin_return: - free_circular_buffer(cir_buf); - close(task_start->pipes[CHILD_IN_WR]); - pthread_exit(NULL); + list_iterator_destroy(i); + + return retval; } -#define RECONNECT_RETRY_TIME 1 -void *stdout_io_pipe_thread(void *arg) +static int +_validate_task_in(struct io_info *t) { - task_start_t *task_start = (task_start_t *) arg; - int bytes_read; - int sock_bytes_written; - int local_errno; - int attempt_reconnect = false; - time_t last_reconnect_try = 0; - circular_buffer_t *cir_buf; + ListIterator i; + struct io_info *r; + int retval = 1; - init_circular_buffer(&cir_buf); + xassert(t->magic == IO_MAGIC); - posix_signal_pipe_ignore(); + if (t->readers) + retval = 0; - while (true) { - if ((cir_buf->write_size == 0)) { - info("stdout cir_buf->write_size == 0 this shouldn't happen"); - continue; + i = list_iterator_create(t->writers); + while ((r = list_next(i)) != NULL) { + if (r->magic != IO_MAGIC) { + error("_validate_io: %s writer is invalid", + slurmd_io_str[t->type]); + return 0; + } + if (r->type != CLIENT_STDOUT) { + error("_validate_io: %s writer is %s", + slurmd_io_str[t->type], + slurmd_io_str[r->type]); + retval = 0; } + } + list_iterator_destroy(i); + + return retval; +} - /* read stdout code */ - if ((bytes_read = - read_EINTR(task_start->pipes[CHILD_OUT_RD], - cir_buf->tail, - cir_buf->write_size)) <= 0) { - local_errno = errno; - info("error reading stdout stream for task %i, %m errno: %i , bytes read %i ", task_start->launch_msg->global_task_ids[task_start->local_task_id], local_errno, bytes_read); - goto stdout_return; - } else { - cir_buf_write_update(cir_buf, bytes_read); + +static int +_validate_client_stdout(struct io_info *client) +{ + ListIterator i; + struct io_info *t; + int retval = 1; + + xassert(client->magic == IO_MAGIC); + + i = list_iterator_create(client->readers); + while ((t = list_next(i))) { + if (t->type != TASK_STDIN) { + error("_validate_io: client stdin reader is %s", + slurmd_io_str[t->type]); + retval = 0; } + } + list_iterator_destroy(i); + + i = list_iterator_create(client->writers); + while ((t = list_next(i))) { + if (t->type != TASK_STDOUT) { + error("_validate_io: client stdout writer is %s", + slurmd_io_str[t->type]); + retval = 0; + } + } + list_iterator_destroy(i); + + return retval; +} - /* debug */ - write(1, cir_buf->head, cir_buf->read_size); - info("%i stdout bytes read", bytes_read); - /* debug */ - - /* reconnect code */ - if (attempt_reconnect) { - time_t curr_time = time(NULL); - if (difftime(curr_time, last_reconnect_try) > - RECONNECT_RETRY_TIME) { - slurm_close_stream(task_start-> - sockets - [STDIN_OUT_SOCK]); - if ((task_start->sockets[STDIN_OUT_SOCK] = - slurm_open_stream(& - (task_start-> - io_streams_dest))) - == SLURM_PROTOCOL_ERROR) { - local_errno = errno; - info("error reconnecting socket to srun to pipe stdout errno %i", local_errno); - last_reconnect_try = time(NULL); - continue; - } - attempt_reconnect = false; - } else { - continue; - } +static int +_validate_client_stderr(struct io_info *client) +{ + ListIterator i; + struct io_info *t; + int retval = 1; + + xassert(client->magic == IO_MAGIC); + + if (client->readers) + retval = 0; + + i = list_iterator_create(client->writers); + while ((t = list_next(i))) { + if (t->type != TASK_STDERR) { + error("_validate_io: client stderr writer is %s", + slurmd_io_str[t->type]); + retval = 0; } + } + list_iterator_destroy(i); - /* write out socket code */ - if ((sock_bytes_written = - slurm_write_stream(task_start-> - sockets[STDIN_OUT_SOCK], - cir_buf->head, - cir_buf->read_size)) == - SLURM_PROTOCOL_ERROR) { - local_errno = errno; - switch (local_errno) { - case EBADF: - case EPIPE: - case ECONNREFUSED: - case ECONNRESET: - case ENOTCONN: - info("std out connection losti %i", - local_errno); - attempt_reconnect = true; - slurm_close_stream(task_start-> - sockets - [STDIN_OUT_SOCK]); - break; - default: - info("error sending stdout stream for task %i, errno %i", task_start->launch_msg->global_task_ids[task_start->local_task_id], local_errno); - error("uncaught errno %i", local_errno); - break; - } - continue; + return retval; +} + +static int +_validate_io_list(List objList) +{ + io_obj_t *obj; + int retval = 1; + ListIterator i = list_iterator_create(objList); + while ((obj = list_next(i))) { + struct io_info *io = (struct io_info *) obj->arg; + switch (io->type) { + case TASK_STDOUT: + xassert(_validate_task_out(io, CLIENT_STDOUT)); + break; + case TASK_STDERR: + xassert(_validate_task_out(io, CLIENT_STDERR)); + break; + case TASK_STDIN: + xassert(_validate_task_in(io)); + break; + case CLIENT_STDERR: + xassert(_validate_client_stderr(io)); + break; + case CLIENT_STDOUT: + xassert(_validate_client_stdout(io)); + break; } - cir_buf_read_update(cir_buf, sock_bytes_written); } + list_iterator_destroy(i); + return retval; +} + +static int +find_obj(struct io_info *obj, struct io_info *key) +{ + xassert(obj != NULL); + xassert(key != NULL); - stdout_return: - free_circular_buffer(cir_buf); - slurm_close_stream(task_start->sockets[STDIN_OUT_SOCK]); - close(task_start->pipes[CHILD_OUT_RD]); - pthread_exit(NULL); + return (obj == key); } -void *stderr_io_pipe_thread(void *arg) + +static void +_io_disconnect_client(struct io_info *client) { - task_start_t *task_start = (task_start_t *) arg; - int bytes_read; - int sock_bytes_written; - int local_errno; - int attempt_reconnect = false; - time_t last_reconnect_try = 0; - circular_buffer_t *cir_buf; + struct io_info *t; + int n; + + xassert(client->magic == IO_MAGIC); + client->disconnected = 1; + + /* delete client from its writer->readers list + * (a client should have only one writer) + */ + t = list_peek(client->writers); + + xassert(!t || t->type == TASK_STDERR || t->type == TASK_STDOUT); + if (t && list_count(t->readers) > 1) { + n = list_delete_all(t->readers, (ListFindF)find_obj, client); + if (n <= 0) + error("deleting client from readers"); + } - init_circular_buffer(&cir_buf); + if (!client->readers) + return; + + /* delete STDOUT client from its reader->writers list + * (a client obj should have only one reader) + */ + t = list_peek(client->readers); + if (t) { + n = list_delete_all(t->writers, (ListFindF)find_obj, client); + if (n <= 0) + error("deleting client from readers"); + } +} - posix_signal_pipe_ignore(); - while (true) { - if ((cir_buf->write_size == 0)) { - info("stderr cir_buf->write_size == 0 this shouldn't happen"); - continue; - } +io_obj_t * +_io_obj(int fd, uint32_t id, int type) +{ + struct io_info *io = _io_info_create(id); + struct io_obj *obj = _io_obj_create(fd, (void *)io); + + xassert(io->magic == IO_MAGIC); + xassert(type >= 0); + + io->type = type; + switch (type) { + case TASK_STDERR: + case TASK_STDOUT: + obj->ops = &task_out_ops; + io->readers = list_create(NULL); + break; + case TASK_STDIN: + obj->ops = &task_in_ops; + io->buf = cbuf_create(512, 10240); + io->writers = list_create(NULL); + break; + case CLIENT_STDOUT: + io->readers = list_create(NULL); + case CLIENT_STDERR: + obj->ops = &client_ops; + io->buf = cbuf_create(16, 1048576); + io->writers = list_create(NULL); + break; + default: + error("io: unknown I/O obj type %d", type); + } + return obj; +} - /* read stderr code */ - if ((bytes_read = - read(task_start->pipes[CHILD_ERR_RD], cir_buf->tail, - cir_buf->write_size)) <= 0) { - debug("bytes_read: %i , errno: %i", bytes_read, - errno); - if ((bytes_read == SLURM_PROTOCOL_ERROR) - && (errno == EINTR)) { - continue; - } else { - - local_errno = errno; - info("error reading stderr stream for task %i, errno %i , bytes read %i ", task_start->launch_msg->global_task_ids[task_start->local_task_id], local_errno, bytes_read); - goto stderr_return; - } - } else { - cir_buf_write_update(cir_buf, bytes_read); - } +void +io_obj_destroy(io_obj_t *obj) +{ + struct io_info *io = (struct io_info *) obj->arg; + + xassert(obj != NULL); + xassert(io != NULL); + xassert(io->magic == IO_MAGIC); + + switch (io->type) { + case TASK_STDERR: + case TASK_STDOUT: + list_destroy(io->readers); + break; + case TASK_STDIN: + cbuf_destroy(io->buf); + list_destroy(io->writers); + break; + case CLIENT_STDOUT: + list_destroy(io->readers); + case CLIENT_STDERR: + cbuf_destroy(io->buf); + list_destroy(io->writers); + break; + default: + error("unknown IO object type: %ld", io->type); + } - /* debug */ - /* - info ( "%i stderr bytes read", bytes_read ) ; - write ( 2 , cir_buf->head , cir_buf->read_size ) ; - */ - /* debug */ - - /* reconnect code */ - if (attempt_reconnect) { - time_t curr_time = time(NULL); - if (difftime(curr_time, last_reconnect_try) > - RECONNECT_RETRY_TIME) { - slurm_close_stream(task_start-> - sockets - [SIG_STDERR_SOCK]); - if ((task_start->sockets[SIG_STDERR_SOCK] = - slurm_open_stream(& - (task_start-> - io_streams_dest))) - == SLURM_PROTOCOL_ERROR) { - local_errno = errno; - info("error reconnecting socket to srun to pipe stderr errno %i", local_errno); - last_reconnect_try = time(NULL); - continue; - } - attempt_reconnect = false; - } else { - continue; - } - } + xassert(io->magic = ~IO_MAGIC); + xfree(io); + xfree(obj); +} - /* write out socket code */ - if ((sock_bytes_written = - slurm_write_stream(task_start-> - sockets[SIG_STDERR_SOCK], - cir_buf->head, - cir_buf->read_size)) == - SLURM_PROTOCOL_ERROR) { - local_errno = errno; - switch (local_errno) { - case EBADF: - case EPIPE: - case ECONNREFUSED: - case ECONNRESET: - case ENOTCONN: - info("std err connection lost %i ", - local_errno); - attempt_reconnect = true; - slurm_close_stream(task_start-> - sockets - [SIG_STDERR_SOCK]); - break; - default: - info("error sending stderr stream for task %i , %m errno: %i", task_start->launch_msg->global_task_ids[task_start->local_task_id], local_errno); - error("uncaught errno %i", local_errno); - break; - } - continue; +static io_obj_t * +_io_obj_create(int fd, void *arg) +{ + io_obj_t *obj = xmalloc(sizeof(*obj)); + obj->fd = fd; + obj->arg = arg; + obj->ops = NULL; + return obj; +} + +static struct io_info * +_io_info_create(uint32_t id) +{ + struct io_info *io = (struct io_info *) xmalloc(sizeof(*io)); + xassert(io->magic = IO_MAGIC); + io->id = id; + io->buf = NULL; + io->type = -1; + return io; +} + +int +io_init_pipes(slurmd_job_t *job) +{ + int i; + for (i = 0; i < job->ntasks; i++) { + if (_io_init_pipes(job->task[i]) == SLURM_FAILURE) { + error("init_pipes <task %d> failed", i); + return SLURM_FAILURE; } - cir_buf_read_update(cir_buf, sock_bytes_written); } + return SLURM_SUCCESS; +} - stderr_return: - free_circular_buffer(cir_buf); - slurm_close_stream(task_start->sockets[SIG_STDERR_SOCK]); - close(task_start->pipes[CHILD_ERR_RD]); - pthread_exit(NULL); +static int +_io_write_header(struct io_info *client, srun_info_t *srun) +{ + slurm_io_stream_header_t hdr; + char *buf; + int retval; + int size = sizeof(hdr); + Buf buffer = init_buf(size); + + hdr.version = SLURM_PROTOCOL_VERSION; + memcpy(hdr.key, srun->key->data, SLURM_SSL_SIGNATURE_LENGTH); + hdr.task_id = client->id; + hdr.type = client->type == CLIENT_STDOUT ? 0 : 1; + + pack_io_stream_header(&hdr, buffer); + + /* XXX Shouldn't have to jump through these hoops to + * support slurm Buf type. Need a better way to do this + */ + size = buffer->processed; + buf = xfer_buf_data(buffer); + retval = cbuf_write(client->buf, buf, size, NULL); + xfree(buf); + return retval; } -int connect_io_stream(task_start_t * task_start, int out_or_err) +static int +_io_init_pipes(task_info_t *t) { - int local_errno; - if ((task_start->sockets[out_or_err] = - slurm_open_stream(&(task_start->io_streams_dest))) == - SLURM_PROTOCOL_ERROR) { - local_errno = errno; - info("error opening socket to srun to pipe %s %m errno: %i", out_or_err ? "stdout" : "stderr", local_errno); - return SLURM_PROTOCOL_ERROR; - } else { - return send_io_stream_header(task_start, out_or_err); + if ( (pipe(t->pin) < 0) + || (pipe(t->pout) < 0) + || (pipe(t->perr) < 0) ) { + error("io_init_pipes: pipe: %m"); + return SLURM_FAILURE; } + fd_set_close_on_exec(t->pin[1]); + fd_set_close_on_exec(t->pout[0]); + fd_set_close_on_exec(t->perr[0]); + + fd_set_nonblocking(t->pin[1]); + fd_set_nonblocking(t->pout[0]); + fd_set_nonblocking(t->perr[0]); + + return SLURM_SUCCESS; } -int send_io_stream_header(task_start_t * task_start, int out_or_err) +/* prepare for child I/O: + * dup stdin,stdout,stderr onto appropriate pipes and + * close write end of stdin, and read end of stdout/err + */ +int +io_prepare_child(task_info_t *t) { - slurm_io_stream_header_t io_header; - Buf buffer; - int rc; + if (dup2(t->pin[0], STDIN_FILENO ) < 0) { + error("dup2(stdin): %m"); + return SLURM_FAILURE; + } - buffer = init_buf (sizeof(slurm_io_stream_header_t)); - if (out_or_err == STDIN_OUT_SOCK) { - init_io_stream_header(&io_header, - task_start->launch_msg->credential-> - signature, - task_start->launch_msg-> - global_task_ids[task_start->local_task_id], - SLURM_IO_STREAM_INOUT); - pack_io_stream_header(&io_header, buffer); - rc = slurm_write_stream(task_start->sockets[STDIN_OUT_SOCK], - get_buf_data(buffer), get_buf_offset(buffer)); - } else { - - init_io_stream_header(&io_header, - task_start->launch_msg->credential-> - signature, - task_start->launch_msg-> - global_task_ids[task_start->local_task_id], - SLURM_IO_STREAM_SIGERR); - pack_io_stream_header(&io_header, buffer); - rc = slurm_write_stream(task_start->sockets[SIG_STDERR_SOCK], - get_buf_data(buffer), get_buf_offset(buffer)); + if (dup2(t->pout[1], STDOUT_FILENO) < 0) { + error("dup2(stdout): %m"); + return SLURM_FAILURE; } - free_buf(buffer); + if (dup2(t->perr[1], STDERR_FILENO) < 0) { + error("dup2(stderr): %m"); + return SLURM_FAILURE; + } + + /* ignore errors on close */ + close(t->pin[1] ); + close(t->pout[0]); + close(t->perr[0]); + return SLURM_SUCCESS; +} + +static bool +_readable(io_obj_t *obj) +{ + bool rc; + struct io_info *io = (struct io_info *) obj->arg; + + xassert(io->magic == IO_MAGIC); + + if ((rc = (!io->disconnected && !io->eof && (obj->fd > 0)))) + debug3("readable %s", slurmd_io_str[io->type]); + + return rc; +} + +static bool +_writable(io_obj_t *obj) +{ + bool rc; + struct io_info *io = (struct io_info *) obj->arg; + + xassert(io->magic == IO_MAGIC); + + rc = (!io->disconnected + && ((cbuf_used(io->buf) > 0) || io->eof)); + if (rc) + debug3("writable %s", slurmd_io_str[io->type]); return rc; } +static int +_write(io_obj_t *obj, List objs) +{ + struct io_info *io = (struct io_info *) obj->arg; + int n; + + xassert(io->magic == IO_MAGIC); + xassert(io->type >= 0); + + if (io->disconnected) + return 0; + + verbose("Need to write %ld bytes to %s %d", + cbuf_used(io->buf), slurmd_io_str[io->type], io->id); + + + if (io->eof && (cbuf_used(io->buf) == 0)) { + if (close(obj->fd) < 0) + error("close: %m"); + obj->fd = -1; + if (io->type == CLIENT_STDERR || io->type == CLIENT_STDOUT) + _io_disconnect_client(io); + else + _shutdown_task_obj(io); + list_delete_all(objs, (ListFindF)find_obj, obj); + return 0; + } + + while ((n = cbuf_read_fd(io->buf, obj->fd, -1)) < 0) { + int local_errno = errno; + if ((errno == EAGAIN) || (errno == EWOULDBLOCK)) + continue; + error("task <%ld> write failed: %s", io->id, + slurm_strerror(local_errno)); + return -1; + } + + verbose("Wrote %d bytes to %s %d", + n, slurmd_io_str[io->type], io->id); + + return 0; +} + +/* */ +static int +_shutdown_task_obj(struct io_info *t) +{ + List l; + ListIterator i; + struct io_info *r; + + l = (t->type == TASK_STDIN) ? t->writers : t->readers; + + i = list_iterator_create(l); + while ((r = list_next(i))) { + List rlist = (t->type == TASK_STDIN) ? r->readers : r->writers; + r->eof = 1; + list_delete_all(rlist, (ListFindF) find_obj, t); + } + list_iterator_destroy(i); + + return 0; +} -ssize_t read_EINTR(int fd, void *buf, size_t count) +static int +_task_read(io_obj_t *obj, List objs) { - ssize_t bytes_read; - while (true) { - if ((bytes_read = read(fd, buf, count)) <= 0) { - debug("bytes_read: %i , %m errno: %i", bytes_read, - errno); - if ((bytes_read == SLURM_PROTOCOL_ERROR) - && (errno == EINTR)) { - continue; - } + struct io_info *r, *t; + char buf[4096]; /* XXX Configurable? */ + ssize_t n, len = sizeof(buf); + ListIterator i; + + t = (struct io_info *) obj->arg; + + xassert(t->magic == IO_MAGIC); + xassert((t->type == TASK_STDOUT) || (t->type == TASK_STDERR)); + xassert(_validate_io_list(objs)); + + again: + if ((n = read(obj->fd, (void *) buf, len)) < 0) { + if (errno == EINTR) + goto again; + if ((errno == EAGAIN) || (errno == EWOULDBLOCK)) { + error("%s %d: read returned EAGAIN", + slurmd_io_str[t->type], t->id); + return 0; } - return bytes_read; + error("Unable to read from task %ld fd %d errno %d %m", + t->id, obj->fd, errno); + return -1; + } + verbose("read %d bytes from %s %d", + n, slurmd_io_str[t->type], t->id); + + if (n == 0) { /* got eof */ + verbose("got eof on task %ld", t->id); + _shutdown_task_obj(t); + close(obj->fd); + obj->fd = -1; + if (list_delete_all(objs, (ListFindF) find_obj, obj) <= 0) + error("Unable to remove task object from list"); + return 0; + } + + /* copy buf to all readers */ + i = list_iterator_create(t->readers); + while((r = list_next(i))) { + n = cbuf_write(r->buf, (void *) buf, n, NULL); + verbose("wrote %ld bytes into %s buf", n, + slurmd_io_str[r->type]); + } + list_iterator_destroy(i); + + return 0; +} + +static int +_task_error(io_obj_t *obj, List objs) +{ + struct io_info *t = (struct io_info *) obj->arg; + xassert(t->magic == IO_MAGIC); + + error("error on %s %d", slurmd_io_str[t->type], t->id); + _shutdown_task_obj(t); + obj->fd = -1; + list_delete_all(objs, (ListFindF) find_obj, obj); + + xassert(_validate_io_list(objs)); + return -1; +} + +static int +_client_read(io_obj_t *obj, List objs) +{ + struct io_info *client = (struct io_info *) obj->arg; + struct io_info *reader; + char buf[1024]; /* XXX Configurable? */ + ssize_t n, len = sizeof(buf); + ListIterator i; + + xassert(client->magic == IO_MAGIC); + xassert(_validate_io_list(objs)); + xassert((client->type == CLIENT_STDOUT) + || (client->type == CLIENT_STDERR)); + + again: + if ((n = read(obj->fd, (void *) buf, len)) < 0) { + if (errno == EINTR) + goto again; + if ((errno == EAGAIN) || (errno == EWOULDBLOCK)) + fatal("client read"); + error("read from client %ld: %m", client->id); + return -1; + } + + debug("read %d bytes from %s %d", n, slurmd_io_str[client->type], + client->id); + + if (n == 0) { /* got eof, disconnect this client */ + verbose("client %d closed connection", client->id); + if (!client->disconnected) + _io_disconnect_client(client); + xassert(_validate_io_list(objs)); + return 0; + } + + if (client->type == CLIENT_STDERR) { + /* unsigned long int signo = strtoul(buf, NULL, 10); */ + /* return kill(client->id, signo); */ + return 0; + } + + /* copy buf to all readers + * XXX Client should never have more than one reader, + * unless we choose to support this? + */ + i = list_iterator_create(client->readers); + while((reader = list_next(i))) { + n = cbuf_write(reader->buf, (void *) buf, n, NULL); } + list_iterator_destroy(i); + + return 0; +} + +static int +_client_error(io_obj_t *obj, List objs) +{ + struct io_info *io = (struct io_info *) obj->arg; + + xassert(io->magic == IO_MAGIC); + + error("%s task %d", slurmd_io_str[io->type], io->id); + return 0; } + diff --git a/src/slurmd/io.h b/src/slurmd/io.h index 52691f9b5c2..d15c4ca519d 100644 --- a/src/slurmd/io.h +++ b/src/slurmd/io.h @@ -1,9 +1,10 @@ /*****************************************************************************\ - * io.h - + * src/slurmd/io.h - slurmd IO routines + * $Id$ ***************************************************************************** * Copyright (C) 2002 The Regents of the University of California. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). - * Written by Kevin Tew <tew1@llnl.gov> et. al. + * Written by Mark Grondona <mgrondona@llnl.gov>. * UCRL-CODE-2002-040. * * This file is part of SLURM, a resource management program. @@ -20,61 +21,79 @@ * details. * * You should have received a copy of the GNU General Public License along - * with ConMan; if not, write to the Free Software Foundation, Inc., + * with SLURM; if not, write to the Free Software Foundation, Inc., * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. \*****************************************************************************/ -#ifndef _SLURMD_IO_H_ -#define _SLURMD_IO_H_ -#include <src/slurmd/task_mgr.h> +#ifndef _IO_H +#define _IO_H -/* file descriptor defines */ - -#define MAX_TASKS_PER_LAUNCH 64 - -enum { - CHILD_IN_PIPE = 0, - CHILD_IN_RD_PIPE = 0, - CHILD_IN_WR_PIPE = 1, - CHILD_OUT_PIPE = 2, - CHILD_OUT_RD_PIPE = 2, - CHILD_OUT_WR_PIPE = 3, - CHILD_ERR_PIPE = 4, - CHILD_ERR_RD_PIPE = 4, - CHILD_ERR_WR_PIPE = 5 -}; - -/* prototypes */ -enum { - STDIN_OUT_SOCK = 0, - SIG_STDERR_SOCK = 1 -}; +#include <src/slurmd/job.h> +#include <src/common/eio.h> +/* + * Spawn IO handling thread. + * Initializes IO pipes, creates IO objects and appends them to job->objs, + * and opens 2*ntask initial connections for stdout/err, also appending these + * to job->objs list. + */ +int io_spawn_handler(slurmd_job_t *job); -/* forward_io - * controlling thread for io forwarding or io piping - * IN task_arg - task_arg structure containing per task launch info - * RET int - return_code +/* + * Frees memory associated with the given IO object */ -int forward_io(task_start_t * task_arg); +void io_obj_destroy(io_obj_t *obj); -/* individual io piping threads called by forward_io */ -void *stdin_io_pipe_thread(void *arg); -void *stdout_io_pipe_thread(void *arg); -void *stderr_io_pipe_thread(void *arg); +int io_init_pipes(slurmd_job_t *job); +int io_prepare_child(task_info_t *t); -/* wait_on_io_threads - * called by exec_task_thread parent proccess to insure streams have been flushed before returning task exit status - * IN task_arg - task_arg structure containing per task launch info - * RET int - return_code - */ -int wait_on_io_threads(task_start_t * task_start); +void io_close_all(slurmd_job_t *job); -int launch_task(task_start_t * task_start); -int wait_for_tasks(launch_tasks_request_msg_t * launch_msg, - task_start_t ** task_start); +/* Notes: + * + * slurmd <-+---> client (e.g. srun, file) + * `---> client + * + * slurmd can handle multiple client connections. Each task writes + * stdout and stderr data to the client and reads stdin and signals + * from the client streams. + * + * I/O objects: + * task stdout: R/0 pipe created by slurmd + * - buffer is null + * - readers list has at least one client reader (may be a file obj) + * - writers list is empty + * + * task stderr: R/O pipe created by slurmd + * - buffer is null + * - readers list has at least one client reader (may be a file obj) + * - writers list is empty + * + * task stdin: W/O pipe created by slurmd + * - circular buffer + * - readers list is empty + * - writers list contains only one client (may be a file obj) + * + * client stdout/in socket: + * - circular buffer for stdout data + * - readers list is one task stdin obj or empty + * - writers list is one task stdout obj + * + * client stderr/sig socket: + * - circular buffer for stderr data + * - readers list is null (data read is converted to signal) + * - writers list is one task stderr obj + * + * stdout/err file obj: + * - circular buffer for stdout/err data + * - readers list is empty + * - writers list is one task stdout/err obj + * + * stdin file obj + * - buffer is null + * - readers list is one or more task stdin obj's + * - writers list is empty + */ -int kill_launched_tasks(launch_tasks_request_msg_t * launch_msg, - task_start_t ** task_start, int i); -#endif +#endif /* !_IO_H */ diff --git a/src/slurmd/io_threads.c b/src/slurmd/io_threads.c deleted file mode 100644 index e4bf7b778dc..00000000000 --- a/src/slurmd/io_threads.c +++ /dev/null @@ -1,362 +0,0 @@ -/*****************************************************************************\ - * io_threads.c - - ***************************************************************************** - * Copyright (C) 2002 The Regents of the University of California. - * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). - * Written by Kevin Tew <tew1@llnl.gov> et. al. - * UCRL-CODE-2002-040. - * - * This file is part of SLURM, a resource management program. - * For details, see <http://www.llnl.gov/linux/slurm/>. - * - * SLURM is free software; you can redistribute it and/or modify it under - * the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) - * any later version. - * - * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY - * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS - * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more - * details. - * - * You should have received a copy of the GNU General Public License along - * with ConMan; if not, write to the Free Software Foundation, Inc., - * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. -\*****************************************************************************/ - -#include <stdlib.h> -#include <sys/types.h> -#include <pwd.h> -#include <grp.h> -#include <sys/wait.h> -#include <errno.h> -#include <unistd.h> -#include <string.h> -#include <pthread.h> - -#include <src/common/log.h> -#include <src/common/list.h> -#include <src/common/xmalloc.h> -#include <src/common/slurm_protocol_api.h> -#include <src/common/slurm_errno.h> -#include <src/common/util_signals.h> - -#include <src/slurmd/task_mgr.h> -#include <src/slurmd/shmem_struct.h> -#include <src/slurmd/circular_buffer.h> -#include <src/slurmd/io.h> -#include <src/slurmd/pipes.h> -#include <src/slurmd/reconnect_utils.h> - -void * stdin_io_pipe_thread ( void * arg ) -{ - task_start_t * task_start = ( task_start_t * ) arg ; - int bytes_read ; - int bytes_written ; - int local_errno ; - circular_buffer_t * cir_buf ; - - init_circular_buffer ( & cir_buf ) ; - - posix_signal_pipe_ignore ( ) ; - - while ( true ) - { - if ( ( cir_buf->write_size == 0 ) ) - { - debug3 ( "stdin cir_buf->write_size == 0 this shouldn't happen" ) ; - break ; - } - - if ( ( bytes_read = slurm_read_stream ( task_start->sockets[STDIN_OUT_SOCK] , cir_buf->tail , cir_buf->write_size ) ) <= 0 ) - { - sleep (1) ; - local_errno = errno ; - if ( bytes_read == 0) - { - debug3 ( "STDIN stdin 0 returned EOF on socket ") ; - continue ; - //break ; - } - else if ( bytes_read == -1 ) - { - switch ( local_errno ) - { - case EBADF: - case EPIPE: - case ECONNREFUSED: - case ECONNRESET: - case ENOTCONN: - debug3 ( "STDIN stdin connection lost %m errno: %i", local_errno ) ; - continue ; - //break ; - default: - debug3 ( "%i STDIN uncaught error reading stdin sock stream, %m errno: %i , bytes read %i ", - task_start -> launch_msg -> global_task_ids[ task_start -> local_task_id ] , local_errno , bytes_read ) ; - continue ; - //break; - } - } - else - { - debug3 ( "STDIN bytes_read: %i don't know what to do with this return code ", bytes_read ) ; - continue ; - //break ; - } - } - else - { - cir_buf_write_update ( cir_buf , bytes_read ) ; - } - - /* debug */ - /* - write ( 1 , "stdin-", 6 ) ; - write ( 1 , cir_buf->head , cir_buf->read_size ) ; - debug3 ( "%i stdin bytes read", bytes_read ) ; - */ - /* debug */ - - - if ( ( bytes_written = write_EINTR ( task_start->pipes[CHILD_IN_WR_PIPE] , cir_buf->head , cir_buf->read_size ) ) <= 0 ) - { - - local_errno = errno ; - debug3 ( "%i error sending stdin pipe stream, %m errno: %i , bytes written %i ", - task_start -> launch_msg -> global_task_ids[ task_start -> local_task_id ] , local_errno , bytes_written) ; - goto stdin_return ; - } - else - { - cir_buf_read_update ( cir_buf , bytes_written ) ; - } - } - stdin_return: - free_circular_buffer ( cir_buf ) ; - close ( task_start->pipes[CHILD_IN_WR_PIPE] ) ; - pthread_exit ( NULL ) ; -} - -#define RECONNECT_RETRY_TIME 1 -void * stdout_io_pipe_thread ( void * arg ) -{ - task_start_t * task_start = ( task_start_t * ) arg ; - int bytes_read ; - int sock_bytes_written ; - int local_errno ; - int attempt_reconnect = false ; - time_t last_reconnect_try = 0 ; - circular_buffer_t * cir_buf ; - - init_circular_buffer ( & cir_buf ) ; - - posix_signal_pipe_ignore ( ) ; - - while ( true ) - { - if ( ( cir_buf->write_size == 0 ) ) - { - debug3 ( "stdout cir_buf->write_size == 0 this shouldn't happen" ) ; - break ; - } - - /* read stdout code */ - if ( ( bytes_read = read_EINTR ( task_start->pipes[CHILD_OUT_RD_PIPE] , cir_buf->tail , cir_buf->write_size ) ) <= 0 ) - { - local_errno = errno ; - debug3 ( "%i error reading stdout pipe stream, %m errno: %i , bytes read %i ", - task_start -> launch_msg -> global_task_ids[ task_start -> local_task_id ] , local_errno , bytes_read ) ; - goto stdout_return ; - } - else - { - cir_buf_write_update ( cir_buf , bytes_read ) ; - } - - /* debug */ - /* - write ( 1 , cir_buf->head , cir_buf->read_size ) ; - debug3 ( "%i stdout bytes read", bytes_read ) ; - */ - /* debug */ - - /* reconnect code */ - if ( attempt_reconnect ) - { - time_t curr_time = time ( NULL ) ; - if ( difftime ( curr_time , last_reconnect_try ) > RECONNECT_RETRY_TIME ) - { - slurm_close_stream ( task_start->sockets[STDIN_OUT_SOCK] ) ; - if ( ( task_start->sockets[STDIN_OUT_SOCK] = slurm_open_stream ( & ( task_start -> io_streams_dest ) ) ) == SLURM_PROTOCOL_ERROR ) - { - local_errno = errno ; - debug3 ( "error reconnecting socket to srun to pipe stdout errno %i" , local_errno ) ; - last_reconnect_try = time ( NULL ) ; - continue ; - } - attempt_reconnect = false ; - } - else - { - continue ; - } - } - - /* write out socket code */ - if ( ( sock_bytes_written = slurm_write_stream ( task_start->sockets[STDIN_OUT_SOCK] , cir_buf->head , cir_buf->read_size ) ) <= 0 ) - { - local_errno = errno ; - if ( sock_bytes_written == 0) - { - debug3 ( "stdout 0 returned EOF on socket ") ; - break ; - } - else if ( sock_bytes_written == -1 ) - { - switch ( local_errno ) - { - case EBADF: - case EPIPE: - case ECONNREFUSED: - case ECONNRESET: - case ENOTCONN: - debug3 ( "stdout connection lost %m errno: %i", local_errno ) ; - attempt_reconnect = true ; - slurm_close_stream ( task_start->sockets[STDIN_OUT_SOCK] ) ; - break ; - default: - debug3 ( "%i uncaught error sending stdout sock stream, errno %i sock bytes written %i", - task_start -> launch_msg -> global_task_ids[ task_start -> local_task_id ] , local_errno , sock_bytes_written ) ; - break ; - } - } - else - { - debug3 ( "bytes_read: %i don't know what to do with this return code ", bytes_read ) ; - break ; - } - } - else - { - cir_buf_read_update ( cir_buf , sock_bytes_written ) ; - } - } - - stdout_return: - free_circular_buffer ( cir_buf ) ; - slurm_close_stream ( task_start->sockets[STDIN_OUT_SOCK] ) ; - close ( task_start->pipes[CHILD_OUT_RD_PIPE] ) ; - pthread_exit ( NULL ) ; -} - -void * stderr_io_pipe_thread ( void * arg ) -{ - task_start_t * task_start = ( task_start_t * ) arg ; - int bytes_read ; - int sock_bytes_written ; - int local_errno ; - int attempt_reconnect = false ; - time_t last_reconnect_try = 0 ; - circular_buffer_t * cir_buf ; - - init_circular_buffer ( & cir_buf ) ; - - posix_signal_pipe_ignore ( ) ; - - while ( true ) - { - if ( ( cir_buf->write_size == 0 ) ) - { - debug3 ( "stderr cir_buf->write_size == 0 this shouldn't happen" ) ; - break ; - } - - /* read stderr code */ - if ( ( bytes_read = read_EINTR ( task_start->pipes[CHILD_ERR_RD_PIPE] , cir_buf->tail , cir_buf->write_size ) ) <= 0 ) - { - local_errno = errno ; - debug3 ( "%i error reading stderr pipe stream, errno %i , bytes read %i ", - task_start -> launch_msg -> global_task_ids[ task_start -> local_task_id ] , local_errno , bytes_read ) ; - goto stderr_return ; - } - else - { - cir_buf_write_update ( cir_buf , bytes_read ) ; - } - - /* debug */ - /* - debug3 ( "%i stderr bytes read", bytes_read ) ; - write ( 2 , cir_buf->head , cir_buf->read_size ) ; - */ - /* debug */ - - /* reconnect code */ - if ( attempt_reconnect ) - { - time_t curr_time = time ( NULL ) ; - if ( difftime ( curr_time , last_reconnect_try ) > RECONNECT_RETRY_TIME ) - { - slurm_close_stream ( task_start->sockets[SIG_STDERR_SOCK] ) ; - if ( ( task_start->sockets[SIG_STDERR_SOCK] = slurm_open_stream ( &( task_start -> io_streams_dest ) ) ) == SLURM_PROTOCOL_ERROR ) - { - local_errno = errno ; - debug3 ( "error reconnecting socket to srun to pipe stderr errno %i" , local_errno ) ; - last_reconnect_try = time ( NULL ) ; - continue ; - } - attempt_reconnect = false ; - } - else - { - continue ; - } - } - - /* write out socket code */ - if ( ( sock_bytes_written = slurm_write_stream ( task_start->sockets[SIG_STDERR_SOCK] , cir_buf->head , cir_buf->read_size ) ) <= 0 ) - { - local_errno = errno ; - if ( sock_bytes_written == 0) - { - debug3 ( "stderr 0 returned EOF on socket ") ; - break ; - } - else if ( sock_bytes_written == -1 ) - { - switch ( local_errno ) - { - case EBADF: - case EPIPE: - case ECONNREFUSED: - case ECONNRESET: - case ENOTCONN: - debug3 ( "stderr connection lost %m errno: %i", local_errno ) ; - attempt_reconnect = true ; - slurm_close_stream ( task_start->sockets[SIG_STDERR_SOCK] ) ; - break ; - default: - debug3 ( "%i uncaught error sending stderr sock stream, %m errno: %i sock bytes %i", - task_start -> launch_msg -> global_task_ids[ task_start -> local_task_id ] , local_errno , sock_bytes_written ) ; - break ; - } - } - else - { - debug3 ( "bytes_read: %i don't know what to do with this return code ", bytes_read ) ; - break ; - } - } - else - { - cir_buf_read_update ( cir_buf , sock_bytes_written ) ; - } - } - - stderr_return: - free_circular_buffer ( cir_buf ) ; - slurm_close_stream ( task_start->sockets[SIG_STDERR_SOCK] ) ; - close ( task_start->pipes[CHILD_ERR_RD_PIPE] ) ; - pthread_exit ( NULL ) ; -} - diff --git a/src/slurmd/job.c b/src/slurmd/job.c new file mode 100644 index 00000000000..5d7c2515d23 --- /dev/null +++ b/src/slurmd/job.c @@ -0,0 +1,272 @@ +/*****************************************************************************\ + * src/slurmd/job.c - slurmd_job_t routines + * $Id$ + ***************************************************************************** + * Copyright (C) 2002 The Regents of the University of California. + * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). + * Written by Mark Grondona <mgrondona@llnl.gov>. + * UCRL-CODE-2002-040. + * + * This file is part of SLURM, a resource management program. + * For details, see <http://www.llnl.gov/linux/slurm/>. + * + * SLURM is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with SLURM; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. +\*****************************************************************************/ + +#if HAVE_CONFIG_H +# include <config.h> +#endif + +#if HAVE_STRING_H +# include <string.h> +#endif + +#include <src/common/xmalloc.h> +#include <src/common/xassert.h> +#include <src/common/xstring.h> +#include <src/common/log.h> +#include <src/common/eio.h> +#include <src/common/slurm_protocol_api.h> + +#include <src/slurmd/job.h> +#include <src/slurmd/shm.h> +#include <src/slurmd/io.h> + +static char ** _array_copy(int n, char **src); +static void _array_free(int n, char ***array); +static void _srun_info_destructor(void *arg); +static void _job_init_task_info(slurmd_job_t *job, + launch_tasks_request_msg_t *msg); + + +/* create a slurmd job structure from a launch tasks message */ +slurmd_job_t * +job_create(launch_tasks_request_msg_t *msg) +{ + slurmd_job_t *job; + srun_info_t *srun; + + xassert(msg != NULL); + + job = xmalloc(sizeof(*job)); + + job->jobid = msg->job_id; + job->stepid = msg->job_step_id; + job->uid = msg->uid; + job->nprocs = msg->nprocs; + job->nnodes = msg->nnodes; + job->nodeid = msg->srun_node_id; + job->ntasks = msg->tasks_to_launch; + + job->timelimit = msg->credential->expiration_time; + + job->envc = msg->envc; + job->env = _array_copy(job->envc, msg->env); + job->argc = msg->argc; + job->argv = _array_copy(job->argc, msg->argv); + + job->cwd = xstrdup(msg->cwd); + +#ifdef HAVE_ELAN + job->qsw_job = msg->qsw_job; +#endif + + job->objs = list_create((ListDelF) io_obj_destroy); + + srun = srun_info_create( (void *)msg->credential->signature, + msg->response_addr, + msg->streams + ); + + job->sruns = list_create((ListDelF) _srun_info_destructor); + + list_append(job->sruns, (void *) srun); + + _job_init_task_info(job, msg); + + return job; +} + +static void +_job_init_task_info(slurmd_job_t *job, launch_tasks_request_msg_t *msg) +{ + int i; + int n = job->ntasks; + srun_info_t *srun = (srun_info_t *) list_peek(job->sruns); + + job->task = (task_info_t **) xmalloc(n * sizeof(task_info_t *)); + + for (i = 0; i < n; i++){ + uint32_t gid = msg->global_task_ids[i]; + job->task[i] = task_info_create(i, gid); + list_append(job->task[i]->srun_list, (void *)srun); + } +} + + +/* remove job from shared memory, kill initiated tasks, etc */ +void +job_kill(slurmd_job_t *job) +{ + job_state_t *state; + + xassert(job != NULL); + + if (!(state = shm_lock_step_state(job->jobid, job->stepid))) + return; + + if (*state > SLURMD_JOB_STARTING) { + /* singnal all tasks on step->task_list + * This will result in task exit msgs being sent to srun + * XXX IMPLEMENT + */ + /* job_signal_tasks(job, SIGKILL); */ + } + *state = SLURMD_JOB_ENDING; + shm_unlock_step_state(job->jobid, job->stepid); + + /* forward remaining task_exit messages? */ + /* send_exit_codes() */ +} + +void +job_destroy(slurmd_job_t *job) +{ + int i; + + _array_free(job->envc, &job->env); + _array_free(job->argc, &job->argv); + + for (i = 0; i < job->ntasks; i++) + task_info_destroy(job->task[i]); + list_destroy(job->sruns); + list_destroy(job->objs); + + xfree(job); +} + +static char ** +_array_copy(int n, char **src) +{ + char **dst = xmalloc((n+1) * sizeof(char *)); + while (--n >= 0) + dst[n] = xstrdup(src[n]); + dst[n] = NULL; + return dst; +} + +static void +_array_free(int n, char ***array) +{ + while (--n >= 0) + xfree(*array[n]); + xfree(*array); + *array = NULL; +} + + +struct srun_info * +srun_info_create(void *keydata, slurm_addr resp_addr, slurm_addr ioaddr) +{ + struct srun_info *srun = xmalloc(sizeof(*srun)); + srun_key_t *key = xmalloc(sizeof(*key )); + + memcpy((void *) key->data, keydata, sizeof(*key->data)); + + srun->key = key; + srun->ioaddr = ioaddr; + srun->resp_addr = resp_addr; + return srun; +} + +/* destructor for list routines */ +static void +_srun_info_destructor(void *arg) +{ + struct srun_info *srun = (struct srun_info *)arg; + srun_info_destroy(srun); +} + +void +srun_info_destroy(struct srun_info *srun) +{ + xfree(srun->key); + xfree(srun); +} + +task_info_t * +task_info_create(int taskid, int gtaskid) +{ + task_info_t *t = (task_info_t *) xmalloc(sizeof(*t)); + + xassert(taskid >= 0); + xassert(gtaskid >= 0); + + slurm_mutex_init(&t->mutex); + slurm_mutex_lock(&t->mutex); + t->state = SLURMD_TASK_INIT; + t->id = taskid; + t->gid = gtaskid; + t->pid = (pid_t) -1; + t->pin[0] = -1; + t->pin[1] = -1; + t->pout[0] = -1; + t->pout[1] = -1; + t->perr[0] = -1; + t->perr[1] = -1; + t->estatus = -1; + t->in = NULL; + t->out = NULL; + t->err = NULL; + t->srun_list = list_create(NULL); + slurm_mutex_unlock(&t->mutex); + return t; +} + + +void +task_info_destroy(task_info_t *t) +{ + slurm_mutex_lock(&t->mutex); + list_destroy(t->srun_list); + slurm_mutex_unlock(&t->mutex); + slurm_mutex_destroy(&t->mutex); + xfree(t); +} + +void +job_update_shm(slurmd_job_t *job) +{ + job_step_t s; + + s.uid = job->uid; + s.jobid = job->jobid; + s.stepid = job->stepid; + s.ntasks = job->ntasks; + s.timelimit = job->timelimit; + + s.sw_id = 0; + + if (shm_insert_step(&s) == SLURM_FAILURE) + error("Updating shmem with new step info: %m"); +} + +void +job_delete_shm(slurmd_job_t *job) +{ + if (shm_delete_step(job->jobid, job->stepid) == SLURM_FAILURE) + error("deleting step: %ld.%ld not found in shmem", + job->jobid, job->stepid); +} diff --git a/src/slurmd/job.h b/src/slurmd/job.h new file mode 100644 index 00000000000..8778f854dae --- /dev/null +++ b/src/slurmd/job.h @@ -0,0 +1,123 @@ +/*****************************************************************************\ + * src/slurmd/job.h slurmd_job_t definition + * $Id$ + ***************************************************************************** + * Copyright (C) 2002 The Regents of the University of California. + * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). + * Written by Mark Grondona <mgrondona@llnl.gov>. + * UCRL-CODE-2002-040. + * + * This file is part of SLURM, a resource management program. + * For details, see <http://www.llnl.gov/linux/slurm/>. + * + * SLURM is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with SLURM; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. +\*****************************************************************************/ + +#ifndef _JOB_H +#define _JOB_H + +#ifdef HAVE_PTHREAD +#include <pthread.h> +#endif + +#include <src/common/macros.h> +#include <src/common/slurm_protocol_api.h> +#include <src/common/list.h> +#include <src/common/eio.h> + + +#define SLURM_KEY_SIZE SLURM_SSL_SIGNATURE_LENGTH +typedef struct srun_key { + unsigned char data[SLURM_KEY_SIZE]; +} srun_key_t; + +typedef enum task_state { + SLURMD_TASK_INIT, + SLURMD_TASK_STARTING, + SLURMD_TASK_RUNNING, + SLURMD_TASK_COMPLETE +} task_state_t; + +typedef struct task_info { + pthread_mutex_t mutex; /* mutex to protect task state */ + task_state_t state; /* task state */ + + int id; /* local task id */ + uint32_t gid; /* global task id */ + pid_t pid; /* task pid */ + int pin[2]; /* stdin pipe */ + int pout[2]; /* stdout pipe */ + int perr[2]; /* stderr pipe */ + io_obj_t *in, + *out, /* I/O objects used in IO event loop */ + *err; + int estatus; /* this task's exit status */ + char * ofile; /* output file (if any) */ + char * errfile; /* error file (if any) */ + List srun_list; /* List of srun objs for this task */ +} task_info_t; + + +typedef struct srun_info { + srun_key_t *key; /* srun key for IO verification */ + slurm_addr resp_addr; /* response addr for task exit msg */ + slurm_addr ioaddr; /* Address to connect on for I/O */ +} srun_info_t; + +typedef struct slurmd_job { + uint32_t jobid; + uint32_t stepid; + uint32_t nnodes; + uint32_t nprocs; + uint32_t nodeid; + uint32_t ntasks; + uint16_t envc; + uint16_t argc; + char **env; + char **argv; + char *cwd; +#ifdef HAVE_ELAN + qsw_jobinfo_t qsw_job; +#endif + uid_t uid; + time_t timelimit; + task_info_t **task; + List objs; + List sruns; + int unixsock; + pthread_t ioid; +} slurmd_job_t; + + +slurmd_job_t * job_create(launch_tasks_request_msg_t *msg); + +void job_kill(slurmd_job_t *job); + +void job_destroy(slurmd_job_t *job); + +struct srun_info * srun_info_create(void *keydata, slurm_addr resp_addr, + slurm_addr ioaddr); + +void srun_info_destroy(struct srun_info *srun); + +struct task_info * task_info_create(int taskid, int gtaskid); + +void task_info_destroy(struct task_info *t); + +void job_update_shm(slurmd_job_t *job); + +void job_delete_shm(slurmd_job_t *job); + +#endif /* !_JOB_H */ diff --git a/src/slurmd/locks.c b/src/slurmd/locks.c deleted file mode 100644 index a0c1991301f..00000000000 --- a/src/slurmd/locks.c +++ /dev/null @@ -1,164 +0,0 @@ -/*****************************************************************************\ - * locks.c - semaphore functions for slurmd - ***************************************************************************** - * Copyright (C) 2002 The Regents of the University of California. - * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). - * Written by Moe Jette <jette@llnl.gov>, Randy Sanchez <rsancez@llnl.gov> - * UCRL-CODE-2002-040. - * - * This file is part of SLURM, a resource management program. - * For details, see <http://www.llnl.gov/linux/slurm/>. - * - * SLURM is free software; you can redistribute it and/or modify it under - * the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) - * any later version. - * - * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY - * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS - * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more - * details. - * - * You should have received a copy of the GNU General Public License along - * with SLURM; if not, write to the Free Software Foundation, Inc., - * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. -\*****************************************************************************/ - -#ifdef HAVE_CONFIG_H -# include <config.h> -#endif - -#include <errno.h> -#include <pthread.h> -#include <string.h> -#include <sys/types.h> - -#include <src/slurmd/locks.h> -#include <src/common/log.h> - -pthread_mutex_t locks_mutex = PTHREAD_MUTEX_INITIALIZER; -pthread_cond_t locks_cond = PTHREAD_COND_INITIALIZER; -slurmd_lock_flags_t slurmd_locks; - -void wr_rdlock (lock_datatype_t datatype); -void wr_rdunlock (lock_datatype_t datatype); -void wr_wrlock (lock_datatype_t datatype); -void wr_wrunlock (lock_datatype_t datatype); - -/* init_locks - create locks used for slurmd data structure access control */ -void -init_locks ( void ) -{ - /* just clear all semaphores */ - memset ((void *)&slurmd_locks, 0, sizeof (slurmd_locks) ); -} - -/* lock_slurmd - Issue the required lock requests in a well defined order - * Returns 0 on success, -1 on failure */ -void -lock_slurmd (slurmd_lock_t lock_levels) -{ - if (lock_levels.jobs == READ_LOCK) - wr_rdlock (JOB_LIST_LOCK); - else if (lock_levels.jobs == WRITE_LOCK) - wr_wrlock (JOB_LIST_LOCK); - - if (lock_levels.tasks == READ_LOCK) - wr_rdlock (TASK_LIST_LOCK); - else if (lock_levels.tasks == WRITE_LOCK) - wr_wrlock (TASK_LIST_LOCK); - - if (lock_levels.credentials == READ_LOCK) - wr_rdlock (CREDENTIAL_STATE_LOCK); - else if (lock_levels.credentials == WRITE_LOCK) - wr_wrlock (CREDENTIAL_STATE_LOCK); -} - -/* unlock_slurmd - Issue the required unlock requests in a well defined order */ -void -unlock_slurmd (slurmd_lock_t lock_levels) -{ - if (lock_levels.credentials == READ_LOCK) - wr_rdunlock (CREDENTIAL_STATE_LOCK); - else if (lock_levels.credentials == WRITE_LOCK) - wr_wrunlock (CREDENTIAL_STATE_LOCK); - - if (lock_levels.tasks == READ_LOCK) - wr_rdunlock (TASK_LIST_LOCK); - else if (lock_levels.tasks == WRITE_LOCK) - wr_wrunlock (TASK_LIST_LOCK); - - if (lock_levels.jobs == READ_LOCK) - wr_rdunlock (JOB_LIST_LOCK); - else if (lock_levels.jobs == WRITE_LOCK) - wr_wrunlock (JOB_LIST_LOCK); -} - -/* wr_rdlock - Issue a read lock on the specified data type */ -void -wr_rdlock (lock_datatype_t datatype) -{ - pthread_mutex_lock (&locks_mutex); - while (1) { - if ((slurmd_locks.entity [write_wait_lock (datatype)] == 0) && - (slurmd_locks.entity [write_lock (datatype)] == 0)) { - slurmd_locks.entity [read_lock (datatype)]++; - break; - } - else { /* wait for state change and retry */ - pthread_cond_wait (&locks_cond, &locks_mutex); - } - } - pthread_mutex_unlock (&locks_mutex); -} - -/* wr_rdunlock - Issue a read unlock on the specified data type */ -void -wr_rdunlock (lock_datatype_t datatype) -{ - pthread_mutex_lock (&locks_mutex); - slurmd_locks.entity [read_lock (datatype)]--; - pthread_mutex_unlock (&locks_mutex); - pthread_cond_broadcast (&locks_cond); -} - -/* wr_wrlock - Issue a write lock on the specified data type */ -void -wr_wrlock (lock_datatype_t datatype) -{ - pthread_mutex_lock (&locks_mutex); - slurmd_locks.entity [write_wait_lock (datatype)]++; - - while (1) { - if ((slurmd_locks.entity [read_lock (datatype)] == 0) && - (slurmd_locks.entity [write_lock (datatype)] == 0)) { - slurmd_locks.entity [write_lock (datatype)]++; - slurmd_locks.entity [write_wait_lock (datatype)]--; - break; - } - else { /* wait for state change and retry */ - pthread_cond_wait (&locks_cond, &locks_mutex); - } - } - pthread_mutex_unlock (&locks_mutex); -} - -/* wr_wrunlock - Issue a write unlock on the specified data type */ -void -wr_wrunlock (lock_datatype_t datatype) -{ - pthread_mutex_lock (&locks_mutex); - slurmd_locks.entity [write_lock (datatype)]--; - pthread_mutex_unlock (&locks_mutex); - pthread_cond_broadcast (&locks_cond); -} - -/* get_lock_values - Get the current value of all locks */ -void -get_lock_values (slurmd_lock_flags_t *lock_flags) -{ - if (lock_flags == NULL) - fatal ("get_lock_values passed null pointer"); - - memcpy ((void *)lock_flags, (void *) &slurmd_locks, sizeof (slurmd_locks) ); -} diff --git a/src/slurmd/locks.h b/src/slurmd/locks.h deleted file mode 100644 index 608b818d3fc..00000000000 --- a/src/slurmd/locks.h +++ /dev/null @@ -1,115 +0,0 @@ -/*****************************************************************************\ - * locks.h - definitions for semaphore functions for slurmd (locks.c) - ***************************************************************************** - * Copyright (C) 2002 The Regents of the University of California. - * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). - * Written by Moe Jette <jette@llnl.gov>, Randy Sanchez <rsancez@llnl.gov> - * UCRL-CODE-2002-040. - * - * This file is part of SLURM, a resource management program. - * For details, see <http://www.llnl.gov/linux/slurm/>. - * - * SLURM is free software; you can redistribute it and/or modify it under - * the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) - * any later version. - * - * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY - * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS - * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more - * details. - * - * You should have received a copy of the GNU General Public License along - * with SLURM; if not, write to the Free Software Foundation, Inc., - * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. -\*****************************************************************************/ - -/*****************************************************************************\ - * Read/write locks are implemented by the routines in this directory by using - * a set of three (3) UNIX semaphores to lock a resource. - * - * The set of three (3) semaphores represent a reader semaphore, - * a writer semaphore and a writers waiting semaphore. - * - * The reader semaphore indicates the number of readers that currently have a - * read lock on the resource. - * The writers semaphore indicates that a writer has the resource locked. - * The writers waiting semaphore indicates the number of writers waiting to - * lock the resource. - * - * Readers cannot lock the resource until there are no writers waiting for the - * resource and the resource is not locked by a writer. - * - * Writers cannot lock the resource if the resource is locked by other writers - * or if any readers have the resource locked. - * - * Writers will have priority in locking the resource over readers because - * of the writers waiting semaphore. The writers waiting semaphore is incremented - * by a writer that is waiting to lock the resource. A reader cannot lock - * the resource until there are no writers waiting to lock the resource and - * the resource is not locked by a writer. - * - * So, if the resource is locked by an unspecified number of readers, - * and a writer trys to lock the resource, then the writer will be blocked - * until all of the previous readers have unlocked the resource. But, - * just before the writer checked to see if there were any readers locking - * the resource, the writer incremented the writers waiting semaphore, - * indicating that there is now a writer waiting to lock the resource. - * In the mean time, if an unspecified number of readers try to lock the - * resource after a writer (or writers) has tried to lock the resource, - * those readers will be blocked until all writers have obtained the lock on - * the resource, used the resource and unlocked the resource. The subsequent - * unspecified number of readers are blocked because they are waiting for the - * number of writers waiting semaphore to become 0, meaning that there are no - * writers waiting to lock the resource. - * - * use init_locks() to initialize the locks then - * lock_slurd() and unlock_slurmd() to get the ordering so as to - * prevent deadlock. The arguments indicate the lock type required for - * each entity (job, node, etc.) in a well defined order. - * For example: no lock on the config data structure, read lock on the job - * and node data structures, and write lock on the partition data structure - * would look like this: "{ NO_LOCK, READ_LOCK, READ_LOCK, WRITE_LOCK }" -\*****************************************************************************/ - -/* levels of locking required for each data structure */ -typedef enum { - NO_LOCK, - READ_LOCK, - WRITE_LOCK -} lock_level_t; - -/* slurmd specific data structures to lock via APIs */ -typedef struct { - lock_level_t jobs; - lock_level_t tasks; - lock_level_t credentials; -} slurmd_lock_t; - -/* Interval lock structure - * we actually use three semaphores for each data type, see macros below - * (lock_datatype_t * 3 + 0) = read_lock - * (lock_datatype_t * 3 + 1) = write_lock - * (lock_datatype_t * 3 + 2) = write_wait_lock - */ -typedef enum { - JOB_LIST_LOCK, - TASK_LIST_LOCK, - CREDENTIAL_STATE_LOCK, - ENTITY_COUNT -} lock_datatype_t; - -#define read_lock(data_type) (data_type * 3 + 0) -#define write_lock(data_type) (data_type * 3 + 1) -#define write_wait_lock(data_type) (data_type * 3 + 2) - -typedef struct { - int entity[ENTITY_COUNT * 3]; -} slurmd_lock_flags_t; - - -extern void get_lock_values (slurmd_lock_flags_t *lock_flags); -extern void init_locks ( void ); -extern void lock_slurmd (slurmd_lock_t lock_levels); -extern void unlock_slurmd (slurmd_lock_t lock_levels); - diff --git a/src/slurmd/mgr.c b/src/slurmd/mgr.c new file mode 100644 index 00000000000..a2885ab179a --- /dev/null +++ b/src/slurmd/mgr.c @@ -0,0 +1,312 @@ +/*****************************************************************************\ + * src/slurmd/mgr.c - job manager functions for slurmd + * $Id$ + ***************************************************************************** + * Copyright (C) 2002 The Regents of the University of California. + * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). + * Written by Mark Grondona <mgrondona@llnl.gov>. + * UCRL-CODE-2002-040. + * + * This file is part of SLURM, a resource management program. + * For details, see <http://www.llnl.gov/linux/slurm/>. + * + * SLURM is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with SLURM; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. +\*****************************************************************************/ + +#if HAVE_CONFIG_H +# include <config.h> +#endif + +#if HAVE_SYS_TYPES_H +# include <sys/types.h> +#endif + +#include <sys/wait.h> +#include <unistd.h> +#include <pwd.h> +#include <grp.h> +#include <signal.h> + +#if HAVE_STDLIB_H +# include <stdlib.h> +#endif + +#include <src/common/log.h> + +#include <src/slurmd/mgr.h> +#include <src/slurmd/io.h> +#include <src/slurmd/shm.h> +#include <src/slurmd/interconnect.h> + +static int _unblock_all_signals(void); +static int _send_exit_msg(int rc, task_info_t *t); + +/* Launch a job step on this node + */ +int +mgr_launch_tasks(launch_tasks_request_msg_t *msg) +{ + slurmd_job_t *job; + + log_reinit(); + if (shm_init() < 0) + return SLURM_ERROR; + if (!(job = job_create(msg))) + return SLURM_ERROR; + slurmd_run_job(job); + debug2("%ld returned from slurmd_run_job()", getpid()); + shm_fini(); + exit(0); + return 0; /* not reached */ +} + +/* Instance of a slurmd "job" or job step: + * We run: + * interconnect_prepare() : prepare node for interconnect (if any) + * interconnect_init() : initialize interconnect on node + * fork() N tasks --> wait() --> interconnect_fini() + * \ + * `--> interconnect_attach() : attach each proc to interconnect + * interconnect_env() : setup child environment + * exec() + */ +void +slurmd_run_job(slurmd_job_t *job) +{ + int rc; + /* Insert job info into shared memory */ + job_update_shm(job); + + if (interconnect_init(job) == SLURM_ERROR) { + error("interconnect_init failed"); + rc = 2; + goto done; + } + + /* initialize I/O, connect back to srun, and spawn thread for + * forwarding I/O. + */ + /* Option: connect slurmd stderr to srun local task 0: stderr? */ + if (io_spawn_handler(job) == SLURM_ERROR) { + error("unable to spawn io handler"); + rc = 3; + goto done; + } + + job_launch_tasks(job); + verbose("job %d.%d complete, waiting on IO", job->jobid, job->stepid); + io_close_all(job); + pthread_join(job->ioid, NULL); + verbose("job %d.%d IO complete", job->jobid, job->stepid); + + done: + interconnect_fini(job); /* ignore errors */ + verbose("removing job %d.%d from system", job->jobid, job->stepid); + job_delete_shm(job); /* again, ignore errors */ + return; +} + +static void +xsignal(int signo, void (*handler)(int)) +{ + struct sigaction sa, old_sa; + + sa.sa_handler = handler; + sigemptyset(&sa.sa_mask); + sigaddset(&sa.sa_mask, signo); + sa.sa_flags = 0; + sigaction(signo, &sa, &old_sa); +} + +static void +_wait_for_all_tasks(slurmd_job_t *job) +{ + int waiting = job->ntasks; + int i; + + while (waiting > 0) { + int status; + pid_t pid = waitpid(0, &status, 0); + if (pid < (pid_t) 0) { + error("waitpid: %m"); + /* job_cleanup() */ + } + for (i = 0; i < job->ntasks; i++) { + if (job->task[i]->pid == pid) { + _send_exit_msg(status, job->task[i]); + waiting--; + } + } + } + return; +} + +static void +_task_exec(slurmd_job_t *job, int i) +{ + struct passwd *pwd; + log_options_t opts = LOG_OPTS_STDERR_ONLY; + + io_prepare_child(job->task[i]); + + /* + * Reinitialize slurm log facility to send errors back to client + */ + log_init("slurmd", opts, 0, NULL); + + if (_unblock_all_signals() == SLURM_ERROR) { + error("unable to unblock signals"); + exit(1); + } + + /* attach to interconnect */ + if (interconnect_attach(job, i) < 0) { + error("interconnect attach failed: %m"); + exit(1); + } + + if (interconnect_env(job, i) < 0) { + error("interconnect_env: %m"); + } + + if ((pwd = getpwuid(job->uid)) == NULL) { + error("User not found on node"); + exit(1); + } + + if (setgid(pwd->pw_gid) < 0) { + error("setgid: %m"); + exit(1); + } + + if (initgroups(pwd->pw_name, pwd->pw_gid) < 0) { + ; + /* error("initgroups: %m"); */ + } + + if (setuid(job->uid) < 0) { + error("setuid: %m"); + exit(1); + } + + if (chdir(job->cwd) < 0) { + error("couldn't chdir to `%s': %m: going to /tmp instead", + job->cwd); + if (chdir("/tmp") < 0) { + error("couldn't chdir to /tmp either. dying."); + exit(1); + } + } + + /* exec the cmdline */ + execve(job->argv[0], job->argv, job->env); + + /* error and clean up if execve() returns: + */ + error("execve(): %s: %m", job->argv[0]); + exit(errno); +} + +void +job_launch_tasks(slurmd_job_t *job) +{ + pid_t sid; + int i; + + debug3("%ld entered job_launch_tasks", getpid()); + + xsignal(SIGPIPE, SIG_IGN); + + if ((sid = setsid()) < (pid_t) 0) + error("setsid: %m"); + + if (shm_update_step_sid(job->jobid, job->stepid, sid) < 0) + error("shm_update_step_sid: %m"); + + debug2("invoking %d tasks for job %d.%d", job->ntasks, job->jobid, + job->stepid); + + for (i = 0; i < job->ntasks; i++) { + task_t t; + verbose("going to fork task %d", i); + t.id = i; + t.global_id = job->task[i]->gid; + t.ppid = getpid(); + + if ((t.pid = fork()) < 0) { + error("fork: %m"); + exit(1); + /* job_cleanup() */ + } else if (t.pid == 0) + break; + + /* Parent continues loop: */ + + job->task[i]->pid = t.pid; + + debug2("%ld: forked child process %ld for task %d", + getpid(), (long) t.pid, i); + debug2("going to add task %d to shm", i); + if (shm_add_task(job->jobid, job->stepid, &t) < 0) + error("shm_add_task: %m"); + debug2("task %d added to shm", i); + + } + + if (i == job->ntasks) + _wait_for_all_tasks(job); + else + _task_exec(job, i); + + return; +} + +static int +_send_exit_msg(int rc, task_info_t *t) +{ + slurm_msg_t resp; + task_exit_msg_t msg; + ListIterator i; + srun_info_t *srun; + + msg.return_code = rc; + msg.task_id = t->gid; + resp.data = &msg; + resp.msg_type = MESSAGE_TASK_EXIT; + + i = list_iterator_create(t->srun_list); + while ((srun = list_next(i))) { + resp.address = srun->resp_addr; + slurm_send_only_node_msg(&resp); + } + list_iterator_destroy(i); + + return SLURM_SUCCESS; +} + +static int +_unblock_all_signals(void) +{ + sigset_t set; + if (sigfillset(&set)) { + error("sigfillset: %m"); + return SLURM_ERROR; + } + if (sigprocmask(SIG_UNBLOCK, &set, NULL)) { + error("sigprocmask: %m"); + return SLURM_ERROR; + } + return SLURM_SUCCESS; +} diff --git a/src/slurmd/reconnect_utils.h b/src/slurmd/mgr.h similarity index 56% rename from src/slurmd/reconnect_utils.h rename to src/slurmd/mgr.h index 794fbd039f7..9c426ccea3f 100644 --- a/src/slurmd/reconnect_utils.h +++ b/src/slurmd/mgr.h @@ -1,9 +1,9 @@ /*****************************************************************************\ - * reconnect_utils.h - + * src/slurmd/mgr.c - job management functions for slurmd ***************************************************************************** * Copyright (C) 2002 The Regents of the University of California. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). - * Written by Kevin Tew <tew1@llnl.gov> et. al. + * Written by Mark Grondona <mgrondona@llnl.gov>. * UCRL-CODE-2002-040. * * This file is part of SLURM, a resource management program. @@ -20,22 +20,35 @@ * details. * * You should have received a copy of the GNU General Public License along - * with ConMan; if not, write to the Free Software Foundation, Inc., + * with SLURM; if not, write to the Free Software Foundation, Inc., * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. \*****************************************************************************/ +#ifndef _MGR_H +#define _MGR_H -#ifndef _SLURMD_RECONNECT_UTILS_H_ -#define _SLURMD_RECONNECT_UTILS_H_ +#if HAVE_CONFIG_H +# include <config.h> +#endif + +#include <src/common/slurm_protocol_defs.h> -/* connect_io_stream - * called by the io_threads to establish a connection to srun +#include <src/slurmd/job.h> + +/* Launch a job step on this node */ -int connect_io_stream(task_start_t * task_start, int out_or_err); +int mgr_launch_tasks(launch_tasks_request_msg_t *msg); -/* connect_io_stream - * called by connect_io_stream to send stream identification info +/* Instance of a slurmd "job" or job step: + * We run: + * interconnect_prepare() : prepare node for interconnect (if any) + * interconnect_init() : initialize interconnect on node + * fork() N tasks --> wait() --> interconnect_fini() + * \ + * `--> interconnect_attach() : attach each proc to interconnect + * interconnect_env() : setup child environment + * exec() */ -int send_io_stream_header(task_start_t * task_start, int out_or_err); -ssize_t read_EINTR(int fd, void *buf, size_t count); -ssize_t write_EINTR(int fd, void *buf, size_t count); +void slurmd_run_job(slurmd_job_t *job); +void job_launch_tasks(slurmd_job_t *job); + #endif diff --git a/src/slurmd/nbio.c b/src/slurmd/nbio.c deleted file mode 100644 index 1de4de1a3a3..00000000000 --- a/src/slurmd/nbio.c +++ /dev/null @@ -1,741 +0,0 @@ -/*****************************************************************************\ - * nbio.c - Non-blocking I/O - ***************************************************************************** - * Copyright (C) 2002 The Regents of the University of California. - * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). - * Written by Kevin Tew <tew1@llnl.gov> et. al. - * UCRL-CODE-2002-040. - * - * This file is part of SLURM, a resource management program. - * For details, see <http://www.llnl.gov/linux/slurm/>. - * - * SLURM is free software; you can redistribute it and/or modify it under - * the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) - * any later version. - * - * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY - * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS - * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more - * details. - * - * You should have received a copy of the GNU General Public License along - * with ConMan; if not, write to the Free Software Foundation, Inc., - * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. -\*****************************************************************************/ - -#include <stdlib.h> -#include <sys/types.h> -#include <pwd.h> -#include <grp.h> -#include <sys/wait.h> -#include <errno.h> -#include <unistd.h> -#include <string.h> -#include <pthread.h> - -#include <src/common/log.h> -#include <src/common/list.h> -#include <src/common/xmalloc.h> -#include <src/common/slurm_protocol_api.h> -#include <src/common/slurm_errno.h> -#include <src/common/util_signals.h> - -#include <src/slurmd/task_mgr.h> -#include <src/slurmd/shmem_struct.h> -#include <src/slurmd/circular_buffer.h> -#include <src/slurmd/reconnect_utils.h> -#include <src/slurmd/io.h> -#include <src/slurmd/pipes.h> -#include <src/slurmd/nbio.h> - -#define RECONNECT_TIMEOUT_SECONDS 1 -#define RECONNECT_TIMEOUT_MICROSECONDS 0 -typedef enum { - IN_OUT_FD, - SIG_ERR_FD, - CHILD_IN_WR_FD, - CHILD_OUT_RD_FD, - CHILD_ERR_RD_FD -} nbio_fd_t; - -typedef enum { - RD_SET, - WR_SET, - ER_SET -} nbio_set_t; - -typedef enum { - CONNECTED, - RECONNECT, - DRAIN, - DRAINED -} reconnect_state_t; - -typedef struct nbio_attr { - task_start_t *task_start; - slurm_fd_set init_set[3]; - slurm_fd_set next_set[3]; - slurm_fd fd[5]; - circular_buffer_t *in_cir_buf; - circular_buffer_t *out_cir_buf; - circular_buffer_t *err_cir_buf; - int flush_flag; - int die; - int reconnect_flags[2]; - time_t reconnect_timers[2]; - int max_fd; - struct timeval select_timer; -} nbio_attr_t; - -typedef struct io_debug { - char *name; - int local_task_id; - int global_task_id; -} io_debug_t; - -/* TODO - * timers on reconnect - * line oriented code - */ -int forward_io(task_start_t *tsk); -int nbio_set_init(nbio_attr_t * nbio_attr, slurm_fd_set * set_ptr); -int memcpy_sets(slurm_fd_set * init_set, slurm_fd_set * next_set); -int write_task_socket(circular_buffer_t * cir_buf, slurm_fd write_fd, - io_debug_t * dbg); -int read_task_pipe(circular_buffer_t * cir_buf, slurm_fd write_fd, - io_debug_t * dbg); -int write_task_pipe(circular_buffer_t * cir_buf, slurm_fd write_fd, - io_debug_t * dbg); -int read_task_socket(circular_buffer_t * cir_buf, slurm_fd read_fd, - io_debug_t * dbg); -int error_task_pipe(nbio_attr_t * nbio_attr, int fd_index); -int error_task_socket(nbio_attr_t * nbio_attr, int fd_index); -int set_max_fd(nbio_attr_t * nbio_attr); -int nbio_cleanup(nbio_attr_t * nbio_attr); -int reconnect(nbio_attr_t * nbio_attr); -int test_error_conditions(nbio_attr_t * nbio_attr); -int print_nbio_sets(nbio_attr_t * nbio_attr, slurm_fd_set * set_ptr); - -int forward_io(task_start_t *tsk) -{ - return do_nbio((void *)tsk); -} - -int wait_on_io_threads(task_start_t *tsk) -{ - return SLURM_SUCCESS; -} - -static void delay(struct timeval *tv) -{ - select(0, NULL, NULL, NULL, tv); -} - - -int init_io_debug(io_debug_t * io_dbg, task_start_t * task_start, - char *name) -{ - io_dbg->name = name; - io_dbg->local_task_id = task_start->local_task_id; - io_dbg->global_task_id = - task_start->launch_msg->global_task_ids[task_start-> - local_task_id]; - return SLURM_SUCCESS; -} - -int init_nbio_attr(nbio_attr_t * nbio_attr, task_start_t * task_start) -{ - int i; - nbio_attr->max_fd = 0; - nbio_attr->flush_flag = false; - nbio_attr->die = false; - nbio_attr->task_start = task_start; - nbio_attr->fd[IN_OUT_FD] = task_start->sockets[STDIN_OUT_SOCK]; - nbio_attr->fd[SIG_ERR_FD] = task_start->sockets[SIG_STDERR_SOCK]; - nbio_attr->fd[CHILD_IN_WR_FD] = - task_start->pipes[CHILD_IN_WR_PIPE]; - nbio_attr->fd[CHILD_OUT_RD_FD] = - task_start->pipes[CHILD_OUT_RD_PIPE]; - nbio_attr->fd[CHILD_ERR_RD_FD] = - task_start->pipes[CHILD_ERR_RD_PIPE]; - init_circular_buffer(&nbio_attr->in_cir_buf); - init_circular_buffer(&nbio_attr->out_cir_buf); - init_circular_buffer(&nbio_attr->err_cir_buf); - for (i = 0; i < 2; i++) { - nbio_attr->reconnect_flags[i] = RECONNECT; - nbio_attr->reconnect_timers[i] = 0; - } - nbio_set_init(nbio_attr, nbio_attr->init_set); - nbio_attr->select_timer.tv_sec = RECONNECT_TIMEOUT_SECONDS; - nbio_attr->select_timer.tv_usec = RECONNECT_TIMEOUT_MICROSECONDS; - return SLURM_SUCCESS; -} - -int do_nbio(void *arg) -{ - nbio_attr_t nbio_attr; - task_start_t *task_start = (task_start_t *) arg; - io_debug_t in_dbg; - io_debug_t out_dbg; - io_debug_t err_dbg; - struct timeval tv; - - debug3("do_nbio: enter"); - - /* init_io_debug(&in_dbg, task_start, "stdin"); */ - /* init_io_debug(&out_dbg, task_start, "stdout"); */ - /* init_io_debug(&err_dbg, task_start, "stderr"); */ - init_nbio_attr(&nbio_attr, task_start); - debug3("after init_nbio_attr()"); - - posix_signal_pipe_ignore(); - debug3("after posix_signal_pipe_ignore()"); - - reconnect(&nbio_attr); - - while (true) { - int rc; - - set_max_fd(&nbio_attr); - - /* print_nbio_sets(&nbio_attr, nbio_attr.init_set); */ - debug3("entering slurm_select"); - rc = slurm_select(nbio_attr.max_fd, - &nbio_attr.init_set[RD_SET], - &nbio_attr.init_set[WR_SET], - &nbio_attr.init_set[ER_SET], - &nbio_attr.select_timer); - - debug3("returned from slurm_select() with rc = %d", rc); - - /* print_nbio_sets(&nbio_attr, nbio_attr.init_set); */ - if (rc == SLURM_ERROR) { - error("select error in IO loop %m"); - nbio_set_init(&nbio_attr, nbio_attr.init_set); - continue; - } else if (rc == 0) { - reconnect(&nbio_attr); - nbio_set_init(&nbio_attr, nbio_attr.init_set); - /* these are here to set the write set after the fd - * numbers could have changed in reconnect */ - if (nbio_attr.out_cir_buf->read_size > 0) { - slurm_FD_SET(nbio_attr.fd[IN_OUT_FD], - &nbio_attr.init_set[WR_SET]); - } - if (nbio_attr.err_cir_buf->read_size > 0) { - slurm_FD_SET(nbio_attr.fd[SIG_ERR_FD], - &nbio_attr.init_set[WR_SET]); - } - if (test_error_conditions(&nbio_attr)) - break; - - nbio_attr.select_timer.tv_sec = - RECONNECT_TIMEOUT_SECONDS; - nbio_attr.select_timer.tv_usec = - RECONNECT_TIMEOUT_MICROSECONDS; - continue; - } else if (rc < 0) { - debug3("select has unknown error: %i", rc); - break; - } - - if (test_error_conditions(&nbio_attr)) - break; - - nbio_set_init(&nbio_attr, nbio_attr.next_set); - - /* error fd set */ - if (slurm_FD_ISSET( nbio_attr.fd[CHILD_IN_WR_FD], - &nbio_attr.init_set[ER_SET]) ) - error_task_pipe(&nbio_attr, CHILD_IN_WR_FD); - - if (slurm_FD_ISSET( nbio_attr.fd[CHILD_OUT_RD_FD], - &nbio_attr.init_set[ER_SET]) ) - error_task_pipe(&nbio_attr, CHILD_OUT_RD_FD); - - if (slurm_FD_ISSET( nbio_attr.fd[CHILD_ERR_RD_FD], - &nbio_attr.init_set[ER_SET]) ) - error_task_pipe(&nbio_attr, CHILD_ERR_RD_FD); - - if (slurm_FD_ISSET( nbio_attr.fd[IN_OUT_FD], - &nbio_attr.init_set[ER_SET]) ) - error_task_socket(&nbio_attr, IN_OUT_FD); - - if (slurm_FD_ISSET( nbio_attr.fd[SIG_ERR_FD], - &nbio_attr.init_set[ER_SET]) ) - error_task_socket(&nbio_attr, SIG_ERR_FD); - - /* read fd set */ - if (slurm_FD_ISSET - (nbio_attr.fd[IN_OUT_FD], &nbio_attr.init_set[RD_SET]) - && nbio_attr.reconnect_flags[IN_OUT_FD] == CONNECTED) { - if (read_task_socket - (nbio_attr.in_cir_buf, nbio_attr.fd[IN_OUT_FD], - &in_dbg)) { - error_task_socket(&nbio_attr, IN_OUT_FD); - } else - slurm_FD_SET(nbio_attr.fd[CHILD_IN_WR_FD], - &nbio_attr.next_set[WR_SET]); - } - if (slurm_FD_ISSET - (nbio_attr.fd[CHILD_OUT_RD_FD], - &nbio_attr.init_set[RD_SET])) { - if (read_task_pipe - (nbio_attr.out_cir_buf, - nbio_attr.fd[CHILD_OUT_RD_FD], &out_dbg)) { - error_task_pipe(&nbio_attr, - CHILD_OUT_RD_FD); - } else - slurm_FD_SET(nbio_attr.fd[IN_OUT_FD], - &nbio_attr.next_set[WR_SET]); - } - if (slurm_FD_ISSET - (nbio_attr.fd[CHILD_ERR_RD_FD], - &nbio_attr.init_set[RD_SET])) { - if (read_task_pipe - (nbio_attr.err_cir_buf, - nbio_attr.fd[CHILD_ERR_RD_FD], &err_dbg)) { - error_task_pipe(&nbio_attr, - CHILD_ERR_RD_FD); - } else - slurm_FD_SET(nbio_attr.fd[SIG_ERR_FD], - &nbio_attr.next_set[WR_SET]); - } - - /* write fd set */ - if (slurm_FD_ISSET - (nbio_attr.fd[CHILD_IN_WR_FD], - &nbio_attr.next_set[WR_SET])) { - if (write_task_pipe - (nbio_attr.in_cir_buf, - nbio_attr.fd[CHILD_IN_WR_FD], &in_dbg)) { - error_task_pipe(&nbio_attr, - CHILD_IN_WR_FD); - } else - slurm_FD_CLR(nbio_attr.fd[CHILD_IN_WR_FD], - &nbio_attr.next_set[WR_SET]); - } - if (slurm_FD_ISSET - (nbio_attr.fd[IN_OUT_FD], &nbio_attr.next_set[WR_SET]) - && nbio_attr.reconnect_flags[IN_OUT_FD] == CONNECTED) { - if (write_task_socket - (nbio_attr.out_cir_buf, - nbio_attr.fd[IN_OUT_FD], &out_dbg)) { - error_task_socket(&nbio_attr, IN_OUT_FD); - } else - slurm_FD_CLR(nbio_attr.fd[IN_OUT_FD], - &nbio_attr.next_set[WR_SET]); - } - if (slurm_FD_ISSET - (nbio_attr.fd[SIG_ERR_FD], &nbio_attr.next_set[WR_SET]) - && nbio_attr.reconnect_flags[IN_OUT_FD] == CONNECTED) { - if (write_task_socket - (nbio_attr.err_cir_buf, - nbio_attr.fd[SIG_ERR_FD], &err_dbg)) { - error_task_socket(&nbio_attr, SIG_ERR_FD); - } else - slurm_FD_CLR(nbio_attr.fd[SIG_ERR_FD], - &nbio_attr.next_set[WR_SET]); - } - - if (nbio_attr.flush_flag) - nbio_set_init(&nbio_attr, nbio_attr.init_set); - else - memcpy_sets(nbio_attr.init_set, - nbio_attr.next_set); - } - - nbio_cleanup(&nbio_attr); - return SLURM_SUCCESS; -} - -int memcpy_sets(slurm_fd_set * init_set, slurm_fd_set * next_set) -{ - int i; - - for (i = 0; i < 3; i++) { - memcpy(&init_set[i], &next_set[i], sizeof(slurm_fd_set)); - } - return SLURM_SUCCESS; -} - -int read_task_pipe(circular_buffer_t * cir_buf, slurm_fd read_fd, - io_debug_t * dbg) -{ - int bytes_read; - int local_errno; - - debug3("read_task_pipe: enter"); - - /* test for wierd state */ - if ((cir_buf->write_size == 0)) { - if (dbg) - debug3("%s write_size == 0 this shouldn't happen", - dbg->name); - slurm_seterrno_ret(ESLURMD_CIRBUF_POINTER_0); - } - - /* read stdout code */ - if ((bytes_read = read_EINTR(read_fd, cir_buf->tail, - cir_buf->write_size )) <= 0) { - if (dbg) - debug3("%d: read_EINTR: %m: bytes read %d", - dbg->global_task_id, dbg->name, local_errno, - bytes_read); - slurm_seterrno_ret(ESLURMD_PIPE_DISCONNECT); - } else { - cir_buf_write_update(cir_buf, bytes_read); - debug3("read_task_pipe fd: %d bytes_read %d", read_fd, - bytes_read); - return SLURM_SUCCESS; - } -} - - -int write_task_pipe(circular_buffer_t * cir_buf, slurm_fd write_fd, - io_debug_t * dbg) -{ - int bytes_written; - int local_errno; - - /* test for wierd state */ - if ((cir_buf->read_size == 0)) { - if (dbg) - debug3("%s read_size == 0 this shouldn't happen", - dbg->name); - slurm_seterrno_ret(ESLURMD_CIRBUF_POINTER_0); - } - - if ((bytes_written = - write_EINTR(write_fd, cir_buf->head, - cir_buf->read_size)) <= 0) { - local_errno = errno; - if (dbg) - debug3("%d: %s: write_EINTR: %m: bytes written %d", - dbg->global_task_id, bytes_written); - slurm_seterrno_ret(ESLURMD_PIPE_DISCONNECT); - } else { - cir_buf_read_update(cir_buf, bytes_written); - //debug3 ( "write_task_pipe fd: %i bytes_written %i" , write_fd , bytes_written ) ; - return SLURM_SUCCESS; - } -} - -int read_task_socket(circular_buffer_t * cir_buf, slurm_fd read_fd, - io_debug_t * dbg) -{ - int bytes_read; - int local_errno; - - /* test for wierd state */ - if ((cir_buf->write_size == 0)) { - if (dbg) - debug3 - ("%s cir_buf->write_size == 0 this shouldn't happen", - dbg->name); - slurm_seterrno_ret(ESLURMD_CIRBUF_POINTER_0); - } - - if ((bytes_read = - slurm_read_stream(read_fd, cir_buf->tail, - cir_buf->write_size)) <= 0) { - local_errno = errno; - /* test for EOF on socket */ - if (bytes_read == 0) { - if (dbg) - debug3("%i 0 returned EOF on socket", - dbg->global_task_id); - slurm_seterrno_ret(ESLURMD_EOF_ON_SOCKET); - } else if (bytes_read == -1) { - switch (local_errno) { - case EBADF: - case EPIPE: - case ECONNREFUSED: - case ECONNRESET: - case ENOTCONN: - if (dbg) - debug3 - ("lost %s socket connection %m errno: %i", - dbg->name, local_errno); - slurm_seterrno_ret - (ESLURMD_SOCKET_DISCONNECT); - break; - default: - if (dbg) - debug3 - ("%i error reading %s sock stream, %m errno: %i , bytes read %i ", - dbg->global_task_id, - dbg->name, local_errno, - bytes_read); - slurm_seterrno_ret - (ESLURMD_UNKNOWN_SOCKET_ERROR); - break; - } - } else { - debug3 - ("bytes_read: %i don't know what to do with this return code ", - bytes_read); - slurm_seterrno_ret(ESLURMD_UNKNOWN_SOCKET_ERROR); - } - } else { - cir_buf_write_update(cir_buf, bytes_read); - //debug3 ( "read_task_socket fd: %i bytes_read %i" , read_fd , bytes_read ) ; - return SLURM_SUCCESS; - } -} - -int write_task_socket(circular_buffer_t * cir_buf, slurm_fd write_fd, - io_debug_t * dbg) -{ - int sock_bytes_written; - int local_errno; - - debug3("write_task_socket: entered"); - /* test for wierd state */ - if ((cir_buf->read_size == 0)) { - if (dbg) - debug3 - ("%s cir_buf->read_size == 0 this shouldn't happen", - dbg->name); - slurm_seterrno_ret(ESLURMD_CIRBUF_POINTER_0); - } - - if ((sock_bytes_written = - slurm_write_stream(write_fd, cir_buf->head, - cir_buf->read_size)) <= 0) { - local_errno = errno; - /* test for EOF on socket */ - if (sock_bytes_written == 0) { - if (dbg) - debug3("%i 0 returned EOF on socket", - dbg->global_task_id); - slurm_seterrno_ret(ESLURMD_EOF_ON_SOCKET); - } else if (sock_bytes_written == -1) { - switch (local_errno) { - case EBADF: - case EPIPE: - case ECONNREFUSED: - case ECONNRESET: - case ENOTCONN: - if (dbg) - debug3 - ("lost %s socket connection %m errno: %i", - dbg->name, local_errno); - slurm_seterrno_ret - (ESLURMD_SOCKET_DISCONNECT); - break; - default: - if (dbg) - debug3 - ("%i error sending %s sock stream, %m errno %i, sock bytes written %i", - dbg->global_task_id, - dbg->name, local_errno, - sock_bytes_written); - slurm_seterrno_ret - (ESLURMD_UNKNOWN_SOCKET_ERROR); - break; - } - } else { - debug3 - ("bytes_read: %i don't know what to do with this return code ", - sock_bytes_written); - slurm_seterrno_ret(ESLURMD_UNKNOWN_SOCKET_ERROR); - } - } else { - cir_buf_read_update(cir_buf, sock_bytes_written); - debug3("write_task_socket fd: %i bytes_written %i", - write_fd, sock_bytes_written); - return SLURM_SUCCESS; - } -} - -int error_task_pipe(nbio_attr_t * nbio_attr, int fd_index) -{ - switch (errno) { - case ESLURMD_CIRBUF_POINTER_0: - break; - case ESLURMD_PIPE_DISCONNECT: - nbio_attr->flush_flag = true; - break; - } - return SLURM_SUCCESS; -} - -int error_task_socket(nbio_attr_t * nbio_attr, int fd_index) -{ - switch (errno) { - case ESLURMD_CIRBUF_POINTER_0: - if (nbio_attr->flush_flag) { - nbio_attr->reconnect_flags[fd_index] = DRAINED; - } else { - debug3 - ("ESLURMD_CIRBUF_POINTER_0 shouldn't have occured"); - } - break; - case ESLURMD_UNKNOWN_SOCKET_ERROR: - case ESLURMD_SOCKET_DISCONNECT: - case ESLURMD_EOF_ON_SOCKET: - if (!slurm_close_stream(nbio_attr->fd[fd_index])); - nbio_attr->fd[fd_index] = -1; - - switch (nbio_attr->reconnect_flags[fd_index]) { - case CONNECTED: - nbio_attr->reconnect_flags[fd_index] = RECONNECT; - break; - case DRAIN: - case DRAINED: - nbio_attr->die = true; - break; - case RECONNECT: - break; - default: - debug3 - ("Unknown case in error_task_socket:ESLURMD_EOF_ON_SOCKET: %i", - nbio_attr->reconnect_flags[fd_index]); - break; - } - break; - default: - debug3("Unknown case in error_task_socket: %i", - nbio_attr->reconnect_flags[fd_index]); - break; - } - return SLURM_SUCCESS; -} - -int nbio_set_init(nbio_attr_t * nbio_attr, slurm_fd_set * set_ptr) -{ - int i; - - for (i = 0; i < 3; i++) { - FD_ZERO(&set_ptr[i]); - } - - if (nbio_attr->flush_flag) { - /* write fds */ - slurm_FD_SET(nbio_attr->fd[IN_OUT_FD], &set_ptr[WR_SET]); - slurm_FD_SET(nbio_attr->fd[SIG_ERR_FD], &set_ptr[WR_SET]); - - /* error fds */ - slurm_FD_SET(nbio_attr->fd[IN_OUT_FD], &set_ptr[ER_SET]); - slurm_FD_SET(nbio_attr->fd[SIG_ERR_FD], &set_ptr[ER_SET]); - } - { - /* read fds */ - slurm_FD_SET(nbio_attr->fd[IN_OUT_FD], &set_ptr[RD_SET]); - slurm_FD_SET(nbio_attr->fd[CHILD_OUT_RD_FD], - &set_ptr[RD_SET]); - slurm_FD_SET(nbio_attr->fd[CHILD_ERR_RD_FD], - &set_ptr[RD_SET]); - - /* error fds */ - for (i = 0; i < 5; i++) { - slurm_FD_SET(nbio_attr->fd[i], &set_ptr[ER_SET]); - } - - } - return SLURM_SUCCESS; -} - -int set_max_fd(nbio_attr_t * nbio_attr) -{ - int i; - nbio_attr->max_fd = 0; - for (i = 0; i < 5; i++) { - nbio_attr->max_fd = - MAX(nbio_attr->max_fd, nbio_attr->fd[i]); - } - nbio_attr->max_fd++; - return SLURM_SUCCESS; -} - -int nbio_cleanup(nbio_attr_t * nbio_attr) -{ - free_circular_buffer(nbio_attr->in_cir_buf); - free_circular_buffer(nbio_attr->out_cir_buf); - free_circular_buffer(nbio_attr->err_cir_buf); - - slurm_close_stream(nbio_attr->fd[IN_OUT_FD]); - slurm_close_stream(nbio_attr->fd[SIG_ERR_FD]); - close(nbio_attr->fd[CHILD_IN_WR_FD]); - close(nbio_attr->fd[CHILD_OUT_RD_FD]); - close(nbio_attr->fd[CHILD_ERR_RD_FD]); - - return SLURM_SUCCESS; -} - -int reconnect(nbio_attr_t * nbio_attr) -{ - if (nbio_attr->reconnect_flags[IN_OUT_FD] == RECONNECT) { - if (connect_io_stream(nbio_attr->task_start, STDIN_OUT_SOCK) > 0) { - nbio_attr->fd[IN_OUT_FD] = - nbio_attr->task_start->sockets[STDIN_OUT_SOCK]; - slurm_set_stream_non_blocking(nbio_attr-> - fd[IN_OUT_FD]); - nbio_attr->reconnect_flags[IN_OUT_FD] = CONNECTED; - } - } - if (nbio_attr->reconnect_flags[SIG_ERR_FD] == RECONNECT) { - if (connect_io_stream - (nbio_attr->task_start, SIG_STDERR_SOCK) > 0) { - nbio_attr->fd[SIG_ERR_FD] = - nbio_attr->task_start-> - sockets[SIG_STDERR_SOCK]; - slurm_set_stream_non_blocking(nbio_attr-> - fd[SIG_ERR_FD]); - nbio_attr->reconnect_flags[SIG_ERR_FD] = CONNECTED; - } - } - return SLURM_SUCCESS; -} - -int test_error_conditions(nbio_attr_t * nbio_attr) -{ - /* task has died and io is flushed */ - if (nbio_attr->out_cir_buf->read_size == 0 - && nbio_attr->err_cir_buf->read_size == 0 - && nbio_attr->flush_flag) { - return SLURM_ERROR; - } - - if (nbio_attr->die) { - return SLURM_ERROR; - } - /* if ( waitpid ( nbio_attr -> task_start -> exec_pid , NULL , - WNOHANG ) > 0 ) - return SLURM_ERROR ; - */ - return SLURM_SUCCESS; -} - -int print_nbio_sets(nbio_attr_t * nbio_attr, slurm_fd_set * set_ptr) -{ - int i; - printf("fds "); - for (i = 0; i < 5; i++) - printf(" %i ", nbio_attr->fd[i]); - printf("\n"); - printf(" %i %i %i %i %i %i \n", - nbio_attr->in_cir_buf->read_size, - nbio_attr->in_cir_buf->write_size, - nbio_attr->out_cir_buf->read_size, - nbio_attr->out_cir_buf->write_size, - nbio_attr->err_cir_buf->read_size, - nbio_attr->err_cir_buf->write_size); - printf("--- 00000000001111111111222222222233\n"); - printf("--- 01234567890123456789012345678901\n"); - printf("rd "); - for (i = 0; i < 32; i++) - printf("%i", slurm_FD_ISSET(i, &set_ptr[RD_SET])); - printf("\n"); - printf("wr "); - for (i = 0; i < 32; i++) - printf("%i", slurm_FD_ISSET(i, &set_ptr[WR_SET])); - printf("\n"); - printf("er "); - for (i = 0; i < 32; i++) - printf("%i", slurm_FD_ISSET(i, &set_ptr[ER_SET])); - printf("\n"); - return SLURM_SUCCESS; -} diff --git a/src/slurmd/nbio.h b/src/slurmd/nbio.h deleted file mode 100644 index d7ebe243ae7..00000000000 --- a/src/slurmd/nbio.h +++ /dev/null @@ -1,32 +0,0 @@ -/*****************************************************************************\ - * nbio.h - Non-blocking I/O header for nbio.c - ***************************************************************************** - * Copyright (C) 2002 The Regents of the University of California. - * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). - * Written by Kevin Tew <tew1@llnl.gov> et. al. - * UCRL-CODE-2002-040. - * - * This file is part of SLURM, a resource management program. - * For details, see <http://www.llnl.gov/linux/slurm/>. - * - * SLURM is free software; you can redistribute it and/or modify it under - * the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) - * any later version. - * - * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY - * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS - * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more - * details. - * - * You should have received a copy of the GNU General Public License along - * with ConMan; if not, write to the Free Software Foundation, Inc., - * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. -\*****************************************************************************/ - -#ifndef _SLURMD_NBIO_H -#define _SLURMD_NBIO_H - -int do_nbio(void *arg); - -#endif diff --git a/src/slurmd/no_interconnect.c b/src/slurmd/no_interconnect.c index 63a7087625d..0e445b50f13 100644 --- a/src/slurmd/no_interconnect.c +++ b/src/slurmd/no_interconnect.c @@ -25,37 +25,42 @@ * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. \*****************************************************************************/ -#include <src/common/slurm_protocol_api.h> -#include <src/slurmd/task_mgr.h> #include <src/slurmd/interconnect.h> +#include <src/slurmd/setenvpf.h> -/* exported module funtion to launch tasks */ -/*launch_tasks should really be named launch_job_step*/ -int launch_tasks ( launch_tasks_request_msg_t * launch_msg ) +int interconnect_init (slurmd_job_t *job) { - pthread_atfork ( NULL , NULL , pthread_fork_child_after ) ; - return interconnect_init ( launch_msg ); + return SLURM_SUCCESS; } -/* Contains interconnect specific setup instructions and then calls - * fan_out_task_launch */ -int interconnect_init ( launch_tasks_request_msg_t * launch_msg ) +int interconnect_attach (slurmd_job_t *job, int taskid) { - return fan_out_task_launch ( launch_msg ) ; -} - -int interconnect_set_capabilities ( task_start_t * task_start ) -{ - return SLURM_SUCCESS ; + return SLURM_SUCCESS; } /* * Set env variables needed for this interconnect */ -int interconnect_env(char ***env, uint16_t *envc, int nodeid, int nnodes, - int procid, int nprocs) +int interconnect_env(slurmd_job_t *job, int taskid) +{ + int cnt = job->envc; + task_info_t *t = job->task[taskid]; + + if (setenvpf(&job->env, &cnt, "SLURM_NODEID=%d", job->nodeid) < 0) + return -1; + if (setenvpf(&job->env, &cnt, "SLURM_PROCID=%d", t->gid ) < 0) + return -1; + if (setenvpf(&job->env, &cnt, "SLURM_NNODES=%d", job->nnodes) < 0) + return -1; + if (setenvpf(&job->env, &cnt, "SLURM_NPROCS=%d", job->nprocs) < 0) + return -1; + + return SLURM_SUCCESS; +} + +int interconnect_fini(slurmd_job_t *job) { - return SLURM_SUCCESS ; + return SLURM_SUCCESS; } diff --git a/src/slurmd/pipes.c b/src/slurmd/pipes.c deleted file mode 100644 index db6f0b41816..00000000000 --- a/src/slurmd/pipes.c +++ /dev/null @@ -1,101 +0,0 @@ -/*****************************************************************************\ - * pipes.c - - ***************************************************************************** - * Copyright (C) 2002 The Regents of the University of California. - * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). - * Written by Kevin Tew <tew1@llnl.gov> et. al. - * UCRL-CODE-2002-040. - * - * This file is part of SLURM, a resource management program. - * For details, see <http://www.llnl.gov/linux/slurm/>. - * - * SLURM is free software; you can redistribute it and/or modify it under - * the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) - * any later version. - * - * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY - * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS - * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more - * details. - * - * You should have received a copy of the GNU General Public License along - * with ConMan; if not, write to the Free Software Foundation, Inc., - * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. -\*****************************************************************************/ - -#include <unistd.h> -#include <errno.h> - -#include <src/common/slurm_errno.h> -#include <src/common/log.h> -#include <src/slurmd/pipes.h> -#include <src/slurmd/io.h> - -void setup_parent_pipes(int *pipes) -{ - close(pipes[CHILD_IN_RD_PIPE]); - close(pipes[CHILD_OUT_WR_PIPE]); - close(pipes[CHILD_ERR_WR_PIPE]); -} - -void cleanup_parent_pipes(int *pipes) -{ - close(pipes[CHILD_IN_WR_PIPE]); - close(pipes[CHILD_OUT_RD_PIPE]); - close(pipes[CHILD_ERR_RD_PIPE]); -} - -int init_parent_pipes(int *pipes) -{ - int rc; - - /* open pipes to be used in dup after fork */ - if ((rc = pipe(&pipes[CHILD_IN_PIPE]))) - slurm_seterrno_ret(ESLRUMD_PIPE_ERROR_ON_TASK_SPAWN); - if ((rc = pipe(&pipes[CHILD_OUT_PIPE]))) - slurm_seterrno_ret(ESLRUMD_PIPE_ERROR_ON_TASK_SPAWN); - if ((rc = pipe(&pipes[CHILD_ERR_PIPE]))) - slurm_seterrno_ret(ESLRUMD_PIPE_ERROR_ON_TASK_SPAWN); - - return SLURM_SUCCESS; -} - -int setup_child_pipes(int *pipes) -{ - int error_code = SLURM_SUCCESS; - - /* dup stdin */ - /* close ( STDIN_FILENO ); */ - - if (SLURM_ERROR == - (error_code |= dup2(pipes[CHILD_IN_RD_PIPE], STDIN_FILENO))) { - error("dup failed on child standard in pipe %d: %m", - pipes[CHILD_IN_RD_PIPE]); - } - close(pipes[CHILD_IN_RD_PIPE]); - close(pipes[CHILD_IN_WR_PIPE]); - - /* dup stdout */ - /* close ( STDOUT_FILENO ); */ - if (SLURM_ERROR == - (error_code |= - dup2(pipes[CHILD_OUT_WR_PIPE], STDOUT_FILENO))) { - error("dup failed on child standard out pipe %i: %m", - pipes[CHILD_OUT_WR_PIPE]); - } - close(pipes[CHILD_OUT_RD_PIPE]); - close(pipes[CHILD_OUT_WR_PIPE]); - - /* dup stderr */ - /* close ( STDERR_FILENO ); */ - if (SLURM_ERROR == - (error_code |= - dup2(pipes[CHILD_ERR_WR_PIPE], STDERR_FILENO))) { - error("dup failed on child standard err pipe %i: %m", - pipes[CHILD_ERR_WR_PIPE]); - } - close(pipes[CHILD_ERR_RD_PIPE]); - close(pipes[CHILD_ERR_WR_PIPE]); - return error_code; -} diff --git a/src/slurmd/pipes.h b/src/slurmd/pipes.h deleted file mode 100644 index f499f991b4a..00000000000 --- a/src/slurmd/pipes.h +++ /dev/null @@ -1,58 +0,0 @@ -/*****************************************************************************\ - * pipes.h - headers for slurmd pipes (pipes.c) - ***************************************************************************** - * Copyright (C) 2002 The Regents of the University of California. - * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). - * Written by Kevin Tew <tew1@llnl.gov> et. al. - * UCRL-CODE-2002-040. - * - * This file is part of SLURM, a resource management program. - * For details, see <http://www.llnl.gov/linux/slurm/>. - * - * SLURM is free software; you can redistribute it and/or modify it under - * the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) - * any later version. - * - * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY - * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS - * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more - * details. - * - * You should have received a copy of the GNU General Public License along - * with ConMan; if not, write to the Free Software Foundation, Inc., - * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. -\*****************************************************************************/ - -#ifndef _SLURMD_PIPES_H_ -#define _SLURMD_PIPES_H_ - -/*pipes.c*/ -/* init_parent_pipes - * initializes pipes in the parent to be used for child io ipc after fork and exec - * IN pipes - array of six file desciptors - * OUT int - return_code - */ -int init_parent_pipes(int *pipes); - -/* setup_parent_pipes - * setups the parent side of the pipes after fork - * IN pipes - array of six file desciptors - */ -void setup_parent_pipes(int *pipes); - - -/* setup_child_pipes - * setups the child side of the pipes after fork - * IN pipes - array of six file desciptors - * OUT int - return_code - */ -int setup_child_pipes(int *pipes); - -/* cleanup_parent_pipes - * cleans up the parent side of the pipes after task exit - * IN pipes - array of six file desciptors - */ -void cleanup_parent_pipes(int *pipes); - -#endif /* !_SLURMD_PIPES_H */ diff --git a/src/slurmd/reconnect_utils.c b/src/slurmd/reconnect_utils.c deleted file mode 100644 index f8c1fb30491..00000000000 --- a/src/slurmd/reconnect_utils.c +++ /dev/null @@ -1,152 +0,0 @@ -/*****************************************************************************\ - * reconnect_utils.c - - ***************************************************************************** - * Copyright (C) 2002 The Regents of the University of California. - * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). - * Written by Kevin Tew <tew1@llnl.gov> et. al. - * UCRL-CODE-2002-040. - * - * This file is part of SLURM, a resource management program. - * For details, see <http://www.llnl.gov/linux/slurm/>. - * - * SLURM is free software; you can redistribute it and/or modify it under - * the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) - * any later version. - * - * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY - * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS - * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more - * details. - * - * You should have received a copy of the GNU General Public License along - * with ConMan; if not, write to the Free Software Foundation, Inc., - * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. -\*****************************************************************************/ - -#include <stdlib.h> -#include <sys/types.h> -#include <pwd.h> -#include <grp.h> -#include <sys/wait.h> -#include <errno.h> -#include <unistd.h> -#include <string.h> -#include <pthread.h> - -#include <src/common/log.h> -#include <src/common/list.h> -#include <src/common/xmalloc.h> -#include <src/common/slurm_protocol_api.h> -#include <src/common/slurm_errno.h> -#include <src/common/util_signals.h> - -#include <src/slurmd/task_mgr.h> -#include <src/slurmd/shmem_struct.h> -#include <src/slurmd/circular_buffer.h> -#include <src/slurmd/io.h> -#include <src/slurmd/pipes.h> -#include <src/slurmd/reconnect_utils.h> - -int connect_io_stream(task_start_t * task_start, int out_or_err) -{ - if ((task_start->sockets[out_or_err] = - slurm_open_stream(&(task_start->io_streams_dest))) == - SLURM_PROTOCOL_ERROR) { - error("connect_io_stream: <%s>: %m", - out_or_err ? "stdout" : "stderr"); - return SLURM_PROTOCOL_ERROR; - } else - return send_io_stream_header(task_start, out_or_err); -} - -int send_io_stream_header(task_start_t * task_start, int out_or_err) -{ - slurm_io_stream_header_t io_header; - Buf buffer; - int rc; - - buffer = init_buf (sizeof(slurm_io_stream_header_t)); - if (out_or_err == STDIN_OUT_SOCK) { - init_io_stream_header(&io_header, - task_start->launch_msg->credential-> - signature, - task_start->launch_msg-> - global_task_ids[task_start->local_task_id], - SLURM_IO_STREAM_INOUT); - pack_io_stream_header(&io_header, buffer); - rc = slurm_write_stream(task_start->sockets[STDIN_OUT_SOCK], - get_buf_data(buffer), get_buf_offset(buffer)); - } else { - - init_io_stream_header(&io_header, - task_start->launch_msg->credential-> - signature, - task_start->launch_msg-> - global_task_ids[task_start-> - local_task_id], - SLURM_IO_STREAM_SIGERR); - pack_io_stream_header(&io_header, buffer); - rc = slurm_write_stream(task_start->sockets[SIG_STDERR_SOCK], - get_buf_data(buffer), get_buf_offset(buffer)); - } - - free_buf(buffer); - return rc; -} - -ssize_t read_EINTR(int fd, void *buf, size_t count) -{ - ssize_t bytes_read; - while (true) { - if ((bytes_read = read(fd, buf, count)) <= 0) { - if ((bytes_read == SLURM_PROTOCOL_ERROR) - && (errno == EINTR)) { - debug - ("read_EINTR: bytes_read: %i , fd: %i %m errno: %i", - bytes_read, fd, errno); - continue; - } - } - return bytes_read; - } -} - -ssize_t write_EINTR(int fd, void *buf, size_t count) -{ - ssize_t bytes_written; - while (true) { - if ((bytes_written = write(fd, buf, count)) <= 0) { - if ((bytes_written == SLURM_PROTOCOL_ERROR) - && (errno == EINTR)) { - debug - ("write_EINTR: bytes_written: %i , fd: %i %m errno: %i", - bytes_written, fd, errno); - continue; - } - } - return bytes_written; - } -} - -struct timeval timeval_diff(struct timeval *last, struct timeval *first) -{ - struct timeval temp; - double lastd = last->tv_sec * 1000000 + last->tv_usec; - double firstd = first->tv_sec * 1000000 + first->tv_usec; - double diffd = lastd - firstd; - temp.tv_sec = diffd / 1000000; - temp.tv_usec = (long long) diffd % 1000000; - return temp; -} - -double timeval_diffd(struct timeval *last, struct timeval *first, - struct timeval *remaining) -{ - double lastd = last->tv_sec * 1000000 + last->tv_usec; - double firstd = first->tv_sec * 1000000 + first->tv_usec; - double diffd = lastd - firstd; - remaining->tv_sec = diffd / 1000000; - remaining->tv_usec = (long long) diffd % 1000000; - return diffd; -} diff --git a/src/slurmd/semaphore.c b/src/slurmd/semaphore.c new file mode 100644 index 00000000000..9b9e0be9caa --- /dev/null +++ b/src/slurmd/semaphore.c @@ -0,0 +1,321 @@ +/*****************************************************************************\ + * semaphore.c - POSIX semaphore implementation via SysV semaphores + * $Id$ + ***************************************************************************** + * Copyright (C) 2002 The Regents of the University of California. + * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). + * Written by Chris Dunlap <cdunlap@llnl.gov>. + * UCRL-CODE-2002-040. + * + * This file is part of SLURM, a resource management program. + * For details, see <http://www.llnl.gov/linux/slurm/>. + * + * SLURM is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with SLURM; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. +\*****************************************************************************/ + +/* + * semaphore.c + * by Chris Dunlap <cdunlap@llnl.gov> + * + * Posix Semaphores implementation using System V Semaphores + * (cf. Stevens' Unix Network Programming, v2, 2e, Section 10.16) + * + * Id: semaphore.c,v 1.1.1.1 2000/10/20 21:56:06 dun Exp + */ + + +#include <errno.h> +#include <stdlib.h> +#include <unistd.h> +#include <stdarg.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <sys/ipc.h> +#include <sys/sem.h> +#include <stdio.h> +#include "semaphore.h" + + +#define MAX_TRIES 32 + + +sem_t * sem_open(const char *name, int oflag, ...) +{ + va_list ap; + mode_t mode; + unsigned int value; + int i, fd, errno_bak; + key_t key; + int semflag, semid; + union semun semarg; + struct sembuf semval; + struct semid_ds seminfo; + sem_t *sem; + + if (!name) { + return(SEM_FAILED); + } + + semid = -1; + + /* Create a new semaphore. + */ + if (oflag & O_CREAT) { + va_start(ap, oflag); + mode = va_arg(ap, mode_t); + value = va_arg(ap, unsigned int); + va_end(ap); + + /* Create ancillary file and map pathname into SysV IPC key. + */ + if ((fd = open(name, oflag, mode)) == -1) { + /* + * If the O_EXCL flag is specified and we return before the sem + * is actually created in the following semget(), we create a + * race-condition. + * This can present itself when two processes try to simultaneously + * open the same sem. Suppose the first process succeeds in + * opening the file. The second process will fail in its call to + * open() because of the O_EXCL flag and sem_open() will return + * SEM_FAILED. Now suppose a subsequent sem_open() call is made + * w/o the O_EXCL flag to open the (presumably existing) sem. + * If the first process has not returned from semget() by now, + * this sem_open() will return SEM_FAILED with errno=ENOENT since + * the sem does not yet exist! + */ + if ((errno == EEXIST) && (oflag & O_EXCL)) { + if ((key = ftok(name, 1)) == -1) { + return(SEM_FAILED); + } + for (i=0; i<MAX_TRIES; i++) { + if (((semget(key, 0, 0)) != -1) || (errno != ENOENT)) { + break; + } + sleep(1); + } + errno = EEXIST; /* don't let semget() change errno */ + } + return(SEM_FAILED); + } + close(fd); + if ((key = ftok(name, 1)) == -1) { + return(SEM_FAILED); + } + + /* Convert Posix sem flag to SysV sem flag. + */ + semflag = IPC_CREAT | (mode & 0777); + if (oflag & O_EXCL) { + semflag |= IPC_EXCL; + } + + /* Create SysV semaphore set with one member. + * Note that semget() sets sem_otime to zero during sem creation. + */ + if ((semid = semget(key, 1, semflag | IPC_EXCL)) >= 0) { + /* + * With IPC_EXCL, we're the first to create sem, so init to 0. + */ + semarg.val = 0; + if (semctl(semid, 0, SETVAL, semarg) == -1) { + goto err; + } + /* SysV sems are normally stored as ushorts, so enforce max val. + */ + if (value > SEMVMX) { + errno = EINVAL; + goto err; + } + /* Now increment sem by 'value' w/ semop() to set sem_otime nonzero. + */ + semval.sem_num = 0; + semval.sem_op = value; + semval.sem_flg = 0; + if (semop(semid, &semval, 1) == -1) { + goto err; + } + goto end; + } + /* If the sem already exists and the caller does not specify O_EXCL, + * this is NOT an error. Instead, fall-thru to open existing sem. + */ + else if ((errno != EEXIST) || ((semflag & IPC_EXCL) != 0)) { + goto err; + } + } + + /* Open (presumably) existing semaphore. Either O_CREAT was not specified, + * or O_CREAT was specified w/o O_EXCL and the semaphore already exists. + */ + if ((key = ftok(name, 1)) == -1) { + goto err; + } + if ((semid = semget(key, 0, 0)) == -1) { + goto err; + } + + /* If sem_otime is 0, sem has not yet been initialized by its creator. + * Spin up to MAX_TRIES before giving up. + * + * DANGER, WILL ROBINSON! + * Unfortunatley, semop() on a BSD system does not appear to update the + * sem_otime member for some sick and twisted reason. So we'll sleep, + * cross our fingers, and hope for the best. + */ +#ifdef HAVE_BROKEN_SEM_OTIME + sleep(1); + goto end; +#endif /* HAVE_BROKEN_SEM_OTIME */ + + semarg.buf = &seminfo; + for (i=0; i<MAX_TRIES; i++) { + if (semctl(semid, 0, IPC_STAT, semarg) == -1) { + goto err; + } + if (seminfo.sem_otime != 0) { + goto end; + } + sleep(1); + } + errno = ETIMEDOUT; + /* fall-thru to 'err' */ + + +/* Clean up failed semaphore before returning. + */ +err: + errno_bak = errno; /* don't let semctl() change errno */ + if (semid != -1) { + semctl(semid, 0, IPC_RMID); + } + errno = errno_bak; + return(SEM_FAILED); + +/* SysV sem creation was successful, so create Posix sem wrapper. + */ +end: + if ((sem = malloc(sizeof(sem_t))) == NULL) { + goto err; + } + sem->id = semid; + return(sem); +} + + +int sem_close(sem_t *sem) +{ + if (sem->id < 0) { + errno = EINVAL; + return(-1); + } + sem->id = -1; + free(sem); + return(0); +} + + +int sem_unlink(const char *name) +{ + key_t key; + int semid; + + if (!name) { + return(-1); + } + if ((key = ftok(name, 1)) == -1) { + return(-1); + } + if (unlink(name) == -1) { + return(-1); + } + if ((semid = semget(key, 0, 0)) == -1) { + return(-1); + } + if (semctl(semid, 0, IPC_RMID) == -1) { + return(-1); + } + return(0); +} + + +int sem_wait(sem_t *sem) +{ + struct sembuf op; + + if (sem->id < 0) { + errno = EINVAL; + return(-1); + } + op.sem_num = 0; + op.sem_op = -1; + op.sem_flg = 0; + if (semop(sem->id, &op, 1) == -1) { + return(-1); + } + return(0); +} + + +int sem_trywait(sem_t *sem) +{ + struct sembuf op; + + if (sem->id < 0) { + errno = EINVAL; + return(-1); + } + op.sem_num = 0; + op.sem_op = -1; + op.sem_flg = IPC_NOWAIT; + if (semop(sem->id, &op, 1) == -1) { + return(-1); + } + return(0); +} + + +int sem_post(sem_t *sem) +{ + struct sembuf op; + + if (sem->id < 0) { + errno = EINVAL; + return(-1); + } + op.sem_num = 0; + op.sem_op = 1; + op.sem_flg = 0; + if (semop(sem->id, &op, 1) == -1) { + return(-1); + } + return(0); +} + + +int sem_getvalue(sem_t *sem, int *valp) +{ + int val; + + if (sem->id < 0) { + errno = EINVAL; + return(-1); + } + if ((val = semctl(sem->id, 0, GETVAL)) == -1) { + return(-1); + } + *valp = val; + return(0); +} diff --git a/src/slurmd/semaphore.h b/src/slurmd/semaphore.h new file mode 100644 index 00000000000..084ef13f635 --- /dev/null +++ b/src/slurmd/semaphore.h @@ -0,0 +1,93 @@ +/*****************************************************************************\ + * src/slurmd/semaphore.h - POSIX semaphore implementation via SysV semaphores + * $Id$ + ***************************************************************************** + * Copyright (C) 2002 The Regents of the University of California. + * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). + * Written by Chris Dunlap <Dunlap@llnl.gov>. + * UCRL-CODE-2002-040. + * + * This file is part of SLURM, a resource management program. + * For details, see <http://www.llnl.gov/linux/slurm/>. + * + * SLURM is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with SLURM; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. +\*****************************************************************************/ + +/* + * semaphore.h + * by Chris Dunlap <cdunlap@llnl.gov> + * + * Posix Semaphores implementation using System V Semaphores + * (cf. Stevens' Unix Network Programming, v2, 2e, Section 10.16) + * + * Id: semaphore.h,v 1.1.1.1 2000/10/02 20:56:53 dun Exp + * + */ + + +#ifndef DUN_SEMAPHORE_H +#define DUN_SEMAPHORE_H + +#include "config.h" + +#ifdef HAVE_POSIX_SEMS +#include <semaphore.h> +#else + + +typedef struct { + int id; /* SysV semaphore ID */ +} sem_t; + +#ifdef SEM_FAILED +#undef SEM_FAILED +#endif /* SEM_FAILED */ +#define SEM_FAILED ((sem_t *)(-1)) /* avoid compiler warnings */ + +#ifndef SEMVMX +#define SEMVMX 32767 /* historical SysV sem max value */ +#endif /* !SEMVMX */ + +/* Default perms for new SysV semaphores. + */ +#define SYSV_SEM_DEF_MODE (S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH) + +#ifndef HAVE_SEMUN_UNION +union semun { + int val; /* value for SETVAL */ + struct semid_ds *buf; /* buffer for IPC_SET and IPC_STAT */ + unsigned short int *array; /* array for GETALL and SETALL */ +}; +#endif /* !HAVE_SEMUN_UNION */ + + +sem_t * sem_open(const char *name, int oflag, ...); + +int sem_close(sem_t *sem); + +int sem_unlink(const char *name); + +int sem_wait(sem_t *sem); + +int sem_trywait(sem_t *sem); + +int sem_post(sem_t *sem); + +int sem_getvalue(sem_t *sem, int *valp); + + +#endif /* !HAVE_POSIX_SEMS */ + +#endif /* !DUN_SEMAPHORE_H */ diff --git a/src/slurmd/setenvpf.c b/src/slurmd/setenvpf.c index 1530c60d4a4..d016756534c 100644 --- a/src/slurmd/setenvpf.c +++ b/src/slurmd/setenvpf.c @@ -1,5 +1,6 @@ /*****************************************************************************\ - * setenvpf.c - add an environment variable to environment vector + * src/slurmd/setenvpf.c - add an environment variable to environment vector + * $Id$ ***************************************************************************** * Copyright (C) 2002 The Regents of the University of California. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). @@ -24,6 +25,10 @@ * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. \*****************************************************************************/ +#if HAVE_CONFIG_H +# include <config.h> +#endif + #include <stdio.h> #include <stdarg.h> @@ -34,7 +39,7 @@ * xmalloc() extending *envp if necessary. * * envp Pointer to environment array allocated with xmalloc() - * envc Pointer to current count of environment vars + * envc Pointer to current count of environment vars * fmt printf style format (e.g. "SLURM_NPROCS=%d") * */ diff --git a/src/slurmd/setenvpf.h b/src/slurmd/setenvpf.h index 140b16009de..765b3589c84 100644 --- a/src/slurmd/setenvpf.h +++ b/src/slurmd/setenvpf.h @@ -1,3 +1,28 @@ +/*****************************************************************************\ + * src/slurmd/setenvpf.h - environment vector manipulation + ***************************************************************************** + * Copyright (C) 2002 The Regents of the University of California. + * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). + * Written by Mark Grondona <mgrondona@llnl.gov>. + * UCRL-CODE-2002-040. + * + * This file is part of SLURM, a resource management program. + * For details, see <http://www.llnl.gov/linux/slurm/>. + * + * SLURM is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with SLURM; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. +\*****************************************************************************/ #ifndef _SETENVPF_H #define _SETENVPF_H diff --git a/src/slurmd/shm.c b/src/slurmd/shm.c new file mode 100644 index 00000000000..d2e1c98ff40 --- /dev/null +++ b/src/slurmd/shm.c @@ -0,0 +1,763 @@ +/*****************************************************************************\ + * src/slurmd/shm.c - slurmd shared memory routines + * $Id$ + ***************************************************************************** + * Copyright (C) 2002 The Regents of the University of California. + * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). + * Written by Mark Grondona <mgrondona@llnl.gov>. + * UCRL-CODE-2002-040. + * + * This file is part of SLURM, a resource management program. + * For details, see <http://www.llnl.gov/linux/slurm/>. + * + * SLURM is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with SLURM; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. +\*****************************************************************************/ + +#if HAVE_CONFIG_H +# include <config.h> +#endif + +#if HAVE_SYS_IPC_H +# include <sys/ipc.h> +#endif + +#if HAVE_SYS_SHM_H +# include <sys/shm.h> +#endif + +#if HAVE_SYS_SEM_H +# include <sys/sem.h> +#endif + +#if HAVE_STRING_H +# include <string.h> +#endif + +#if HAVE_ERRNO_H +# include <errno.h> +#endif + +#include <stdio.h> +#include <stdarg.h> +#include <fcntl.h> +#include <sys/stat.h> +#include <stdlib.h> +#include <signal.h> + +#include <src/common/list.h> +#include <src/common/log.h> +#include <src/common/xmalloc.h> +#include <src/common/xassert.h> +#include <src/common/slurm_errno.h> + +#include <src/slurmd/shm.h> + +/* We use Chris Dunlap's POSIX semaphore implementation if necessary */ +#include <src/slurmd/test/semaphore.h> + +#define MAX_JOB_STEPS 16 +#define MAX_TASKS 1024 + +#define SHM_LOCKNAME "/.slurm.lock" + +/* Increment SHM_VERSION if format changes */ +#define SHM_VERSION 0x1001 + +typedef struct shmem_struct { + int version; + int users; + job_step_t step[MAX_JOB_STEPS]; + task_t task[MAX_TASKS]; +} slurmd_shm_t; + + +/* static variables: */ +static sem_t *shm_lock; +static char *lockname; +static int shmid; +static slurmd_shm_t *slurmd_shm; + +/* static function prototypes: */ +static int _is_valid_ipc_name(const char *name); +static char *_create_ipc_name(const char *name); +static int _shm_unlink_lock(void); +static int _shm_lock_and_initialize(void); +static void _shm_lock(void); +static void _shm_unlock(void); +static void _shm_initialize(void); +static void _shm_prepend_task_to_step(job_step_t *, task_t *); +static void _shm_task_copy(task_t *, task_t *); +static void _shm_step_copy(job_step_t *, job_step_t *); +static void _shm_clear_task(task_t *); +static void _shm_clear_step(job_step_t *); +static int _shm_find_step(uint32_t, uint32_t); +static task_t * _shm_alloc_task(void); +static task_t * _shm_find_task_in_step(job_step_t *s, int taskid); + + +/* initialize shared memory: + * Attach if shared region already exists, otherwise create and attach +*/ +int +shm_init(void) +{ + return _shm_lock_and_initialize(); +} + +/* Detach from shared memory */ +int +shm_fini(void) +{ + int destroy = 0; + info("process %ld detaching from shm", getpid()); + xassert(slurmd_shm != NULL); + _shm_lock(); + if (--slurmd_shm->users == 0) + destroy = 1; + + /* detach segment from local memory */ + if (shmdt(slurmd_shm) < 0) { + error("shmdt: %m"); + return -1; + } + + if (destroy && (shmctl(shmid, IPC_RMID, NULL) < 0)) { + error("shmctl: %m"); + return -1; + } + _shm_unlock(); + if (destroy && (_shm_unlink_lock() < 0)) { + error("_shm_unlink_lock: %m"); + return -1; + } + + return 0; +} + +void +shm_cleanup(void) +{ + char *s; + + if ((s = _create_ipc_name(SHM_LOCKNAME))) { + info("going to destroy shm lock `%s'", s); + if (sem_unlink(s) < 0) + error("sem_unlink: %m"); + xfree(s); + } + + + +} + +static int +_is_valid_ipc_name(const char *name) +{ + if (!name) + return(0); + else if (strlen(name) <= 1) + return(2); + else if (strlen(name) >= PATH_MAX) + return(3); + else if (strcmp(name, "/.") == 0) + return(4); + else if (strcmp(name, "/..") == 0) + return(5); + else if (strrchr(name, '/') != name) + return(6); + return(1); +} + +static char * +_create_ipc_name(const char *name) +{ + char *dst, *dir, *slash; + int rc; + + if ((rc = _is_valid_ipc_name(name)) != 1) { + error("invalid ipc name: `%s' %d", name, rc); + return NULL; + } + else if (!(dst = xmalloc(PATH_MAX))) + return NULL; + +#if defined(POSIX_IPC_PREFIX) && defined(HAVE_POSIX_SEMS) + dir = POSIX_IPC_PREFIX; +#else + if (!(dir = getenv("TMPDIR")) || !strlen(dir)) + dir = "/tmp"; +#endif /* POSIX_IPC_PREFIX */ + + slash = (dir[strlen(dir) - 1] == '/') ? "" : "/"; + +#ifdef HAVE_SNPRINTF + snprintf(dst, PATH_MAX, "%s%s%s", dir, slash, name+1); +#else + sprintf(dst, "%s%s%s", dir, slash, name+1); +#endif /* HAVE_SNPRINTF */ + + return(dst); +} + +static int +_shm_unlink_lock() +{ + debug3("process %ld removing shm lock", getpid()); + if (sem_unlink(lockname) == -1) + return 0; + xfree(lockname); + return 1; +} + +static sem_t * +_sem_open(const char *name, int oflag, ...) +{ + sem_t *sem; + va_list ap; + mode_t mode; + unsigned int value; + + if (!(lockname = _create_ipc_name(name))) { + fatal("sem_open failed for [%s]: invalid IPC name", name); + } + + if (oflag & O_CREAT) { + va_start(ap, oflag); + mode = va_arg(ap, mode_t); + value = va_arg(ap, unsigned int); + va_end(ap); + sem = sem_open(lockname, oflag, mode, value); + } else + sem = sem_open(lockname, oflag); + + return(sem); +} + + +static void +_shm_initialize() +{ + int i; + memset(slurmd_shm, 0, sizeof(slurmd_shm_t)); + for (i = 0; i < MAX_TASKS; i++) + slurmd_shm->task[i].used = false; + for (i = 0; i < MAX_JOB_STEPS; i++) + slurmd_shm->step[i].state = SLURMD_JOB_UNUSED; + slurmd_shm->version = SHM_VERSION; +} + +int +shm_insert_step(job_step_t *step) +{ + int i = 0; + _shm_lock(); + if (_shm_find_step(step->jobid, step->stepid) >= 0) { + _shm_unlock(); + slurm_seterrno_ret(EEXIST); + } + + for (i = 0; i < MAX_JOB_STEPS; i++) { + if (slurmd_shm->step[i].state == SLURMD_JOB_UNUSED) + break; + } + if (i == MAX_JOB_STEPS) { + _shm_unlock(); + slurm_seterrno_ret(ENOSPC); + } else + _shm_step_copy(&slurmd_shm->step[i], step); + + _shm_unlock(); + return SLURM_SUCCESS; +} + +int +shm_delete_step(uint32_t jobid, uint32_t stepid) +{ + int i; + _shm_lock(); + if ((i = _shm_find_step(jobid, stepid)) < 0) { + _shm_unlock(); + slurm_seterrno_ret(ESRCH); + } + _shm_clear_step(&slurmd_shm->step[i]); + _shm_unlock(); + return 0; +} + +int +shm_update_step(job_step_t *step) +{ + int i, retval = 0; + _shm_lock(); + if ((i = _shm_find_step(step->jobid, step->stepid)) >= 0) + _shm_step_copy(&slurmd_shm->step[i], step); + else + retval = -1; + _shm_unlock(); + return retval; +} + +int +shm_signal_step(uint32_t jobid, uint32_t stepid, uint32_t signal) +{ + int signo = (int) signal; + int retval = SLURM_SUCCESS; + int i; + job_step_t *s; + task_t *t; + + _shm_lock(); + if ((i = _shm_find_step(jobid, stepid)) >= 0) { + s = &slurmd_shm->step[i]; + for (t = s->task_list; t; t = t->next) { + if (t->pid > 0 && kill(t->pid, signo) < 0) { + error("kill %d.%d pid %ld: %m", + jobid, stepid, (long)t->pid); + retval = errno; + } + } + } else + retval = ESRCH; + + _shm_unlock(); + if (retval > 0) + slurm_seterrno_ret(retval); + else + return SLURM_SUCCESS; +} + + +job_step_t * +shm_get_step(uint32_t jobid, uint32_t stepid) +{ + int i; + job_step_t *s = NULL; + task_t *t; + + _shm_lock(); + if ((i = _shm_find_step(jobid, stepid)) >= 0) { + s = xmalloc(sizeof(job_step_t)); + _shm_step_copy(s, &slurmd_shm->step[i]); + for (t = slurmd_shm->step[i].task_list; t; t = t->next) { + task_t *u = xmalloc(sizeof(task_t)); + _shm_task_copy(u, t); + _shm_prepend_task_to_step(s, u); + } + + } + _shm_unlock(); + return s; +} + +void +shm_free_step(job_step_t *step) +{ + task_t *p, *t; + if ((t = step->task_list)) { + do { + p = t->next; + xfree(t); + } while ((t = p)); + } + xfree(step); +} + +int +shm_update_step_sid(uint32_t jobid, uint32_t stepid, int sid) +{ + int i, retval = SLURM_SUCCESS; + _shm_lock(); + if ((i = _shm_find_step(jobid, stepid)) >= 0) + slurmd_shm->step[i].sid = sid; + else { + slurm_seterrno(ESRCH); + retval = SLURM_FAILURE; + } + _shm_unlock(); + return retval; +} + +int +shm_step_sid(uint32_t jobid, uint32_t stepid) +{ + int i, sid; + _shm_lock(); + if ((i = _shm_find_step(jobid, stepid)) >= 0) + sid = slurmd_shm->step[i].sid; + else { + slurm_seterrno(ESRCH); + sid = SLURM_FAILURE; + } + _shm_unlock(); + return sid; +} + + +int +shm_update_step_state(uint32_t jobid, uint32_t stepid, job_state_t state) +{ + int i, retval = SLURM_SUCCESS; + _shm_lock(); + if ((i = _shm_find_step(jobid, stepid)) >= 0) + slurmd_shm->step[i].state = state; + else { + slurm_seterrno(ESRCH); + retval = SLURM_FAILURE; + } + _shm_unlock(); + return retval; +} + +job_state_t * +shm_lock_step_state(uint32_t jobid, uint32_t stepid) +{ + int i; + job_state_t *state = NULL; + _shm_lock(); + if ((i = _shm_find_step(jobid, stepid)) >= 0) + state = &slurmd_shm->step[i].state; + else { + slurm_seterrno(ESRCH); + _shm_unlock(); + } + /* caller is responsible for unlocking */ + return state; +} + +void +shm_unlock_step_state(uint32_t jobid, uint32_t stepid) +{ + /* May support individual job locks in the future, so we + * keep the arguments above + */ + _shm_unlock(); +} + + +int +shm_update_step_addrs(uint32_t jobid, uint32_t stepid, + slurm_addr *ioaddr, slurm_addr *respaddr) +{ + int i, retval = SLURM_SUCCESS; + _shm_lock(); + if ((i = _shm_find_step(jobid, stepid)) >= 0) { + job_step_t *s = &slurmd_shm->step[i]; + + /* Only allow one addr update at a time */ + if (!s->io_update) { + s->ioaddr = *ioaddr; + s->respaddr = *respaddr; + s->io_update = true; + } else { + slurm_seterrno(EAGAIN); + retval = SLURM_FAILURE; + } + + } else { + slurm_seterrno(ESRCH); + retval = SLURM_FAILURE; + } + _shm_unlock(); + return retval; +} + +int +shm_step_addrs(uint32_t jobid, uint32_t stepid, + slurm_addr *ioaddr, slurm_addr *respaddr) +{ + int i, retval = SLURM_SUCCESS; + xassert(ioaddr != NULL); + xassert(respaddr != NULL); + _shm_lock(); + if ((i = _shm_find_step(jobid, stepid)) >= 0) { + job_step_t *s = &slurmd_shm->step[i]; + *ioaddr = s->ioaddr; + *respaddr = s->respaddr; + s->io_update = false; + } else { + slurm_seterrno(ESRCH); + retval = SLURM_FAILURE; + } + _shm_unlock(); + return retval; +} + +int +shm_update_step_timelimit(uint32_t jobid, uint32_t stepid, time_t newlim) +{ + int i, retval = SLURM_SUCCESS; + _shm_lock(); + if ((i = _shm_find_step(jobid, stepid)) >= 0) + slurmd_shm->step[i].timelimit = newlim; + else { + slurm_seterrno(ESRCH); + retval = SLURM_FAILURE; + } + _shm_unlock(); + return retval; +} + +time_t +shm_step_timelimit(uint32_t jobid, uint32_t stepid) +{ + int i; + time_t timelimit; + _shm_lock(); + if ((i = _shm_find_step(jobid, stepid)) >= 0) + timelimit = slurmd_shm->step[i].timelimit; + else { + slurm_seterrno(ESRCH); + timelimit = (time_t) SLURM_FAILURE; + } + _shm_unlock(); + return timelimit; +} + +static int +_shm_find_step(uint32_t jobid, uint32_t stepid) +{ + int i; + for (i = 0; i < MAX_JOB_STEPS; i++) { + job_step_t *s = &slurmd_shm->step[i]; + if (s->jobid == jobid && s->stepid == stepid) + return i; + } + return -1; +} + +int +shm_add_task(uint32_t jobid, uint32_t stepid, task_t *task) +{ + int i; + job_step_t *s; + task_t *t; + xassert(task != NULL); + _shm_lock(); + if ((i = _shm_find_step(jobid, stepid)) < 0) { + _shm_unlock(); + slurm_seterrno_ret(ESRCH); + } + s = &slurmd_shm->step[i]; + if (_shm_find_task_in_step(s, task->id)) { + _shm_unlock(); + slurm_seterrno_ret(EEXIST); + } + if (!(t = _shm_alloc_task())) { + _shm_unlock(); + slurm_seterrno_ret(ENOMEM); + } + _shm_task_copy(t, task); + _shm_prepend_task_to_step(s, t); + _shm_unlock(); + return 0; +} + +static void +_shm_prepend_task_to_step(job_step_t *s, task_t *task) +{ + task->next = s->task_list; + s->task_list = task; + task->job_step = s; +} + +static task_t * +_shm_find_task_in_step(job_step_t *s, int taskid) +{ + task_t *t = NULL; + for (t = s->task_list; t && t->used; t = t->next) { + if (t->id == taskid) + break; + } + return t; +} + +static task_t * +_shm_alloc_task(void) +{ + int i; + for (i = 0; i < MAX_TASKS; i++) { + if (!slurmd_shm->task[i].used) + return &slurmd_shm->task[i]; + } + return NULL; +} + +static void +_shm_task_copy(task_t *to, task_t *from) +{ + *to = *from; + /* next and step are not valid for copying */ + to->next = NULL; + to->job_step = NULL; +} + +static void +_shm_step_copy(job_step_t *to, job_step_t *from) +{ + task_t *t = NULL; + if (to->task_list) + t = to->task_list; + *to = *from; + to->state = SLURMD_JOB_ALLOCATED; + to->task_list = t; /* addition of tasks is another step */ +} + +static void +_shm_clear_task(task_t *t) +{ + memset(t, 0, sizeof(*t)); +} + +static void +_shm_clear_step(job_step_t *s) +{ + task_t *p, *t = s->task_list; + do { + p = t->next; + _shm_clear_task(t); + } while ((t = p)); + + memset(s, 0, sizeof(*s)); +} + + +static int +_shm_create() +{ + int oflags = IPC_CREAT | IPC_EXCL | 0600; + key_t key = ftok(".", 'a'); + + if ((shmid = shmget(key, sizeof(slurmd_shm_t), oflags)) < 0) { + if ((shmid = shmget(key, sizeof(slurmd_shm_t), 0600)) < 0) + error("shmget: %m"); + return SLURM_ERROR; + } + + slurmd_shm = shmat(shmid, NULL, 0); + if (slurmd_shm == (void *)-1 || slurmd_shm == NULL) { + error("shmat: %m"); + return SLURM_ERROR; + } + + _shm_initialize(); + + return 1; +} + +static int +_shm_attach() +{ + int oflags = 0; + key_t key = ftok(".", 'a'); + + if ((shmid = shmget(key, sizeof(slurmd_shm_t), oflags)) < 0) + fatal("shm_attach: %m"); + + slurmd_shm = shmat(shmid, NULL, 0); + if (slurmd_shm == (void *)-1 || !slurmd_shm) + fatal("shmat: %m"); + + return 1; +} + +/* + * Create shared memory region if it doesn't exist, if it does exist, + * reinitialize it. + * + */ +static int +_shm_new() +{ + if ((_shm_create() < 0) && (_shm_attach() < 0)) { + error("shm_attach: %m"); + return SLURM_FAILURE; + } + _shm_initialize(); + slurmd_shm->users = 1; + _shm_unlock(); + return SLURM_SUCCESS; +} + +static int +_shm_reopen() +{ + int retval = SLURM_SUCCESS; + + if ((shm_lock = _sem_open(SHM_LOCKNAME, 0)) == SEM_FAILED) { + error("Unable to initialize semaphore: %m"); + return SLURM_FAILURE; + } + + /* Attach to shared memory region */ + _shm_attach(); + + /* Lock and unlock semaphore to ensure data is initialized */ + _shm_lock(); + if (slurmd_shm->version != SHM_VERSION) { + error("shm_init: Wrong version in shared memory"); + retval = SLURM_FAILURE; + } else + slurmd_shm->users++; + _shm_unlock(); + + return retval; +} + + +/* get and initialize, if necessary, the shm semaphore + * if lock did not exist, assume we need to initialize shared region + */ +static int +_shm_lock_and_initialize() +{ + if (slurmd_shm && slurmd_shm->version == SHM_VERSION) { + /* we've already opened shared memory */ + _shm_lock(); + slurmd_shm->users++; + _shm_unlock(); + return SLURM_SUCCESS; + } + + shm_lock = _sem_open(SHM_LOCKNAME, O_CREAT|O_EXCL, S_IRUSR|S_IWUSR, 0); + + if (shm_lock != SEM_FAILED) /* lock didn't exist. Create shmem */ + return _shm_new(); + else /* lock exists. Attach to shared memory */ + return _shm_reopen(); +} + +static void +_shm_lock() +{ + restart: + if (sem_wait(shm_lock) == -1) { + if (errno == EINTR) + goto restart; + fatal("_shm_lock: %m"); + } + return; +} + +static void +_shm_unlock() +{ + restart: + if (sem_post(shm_lock) == -1) { + if (errno == EINTR) + goto restart; + fatal("_shm_unlock: %m"); + } + return; +} diff --git a/src/slurmd/shm.h b/src/slurmd/shm.h new file mode 100644 index 00000000000..74eb5788c0f --- /dev/null +++ b/src/slurmd/shm.h @@ -0,0 +1,229 @@ +/*****************************************************************************\ + * src/slurmd/shm.h - shared memory routines for slurmd + * $Id$ + ***************************************************************************** + * Copyright (C) 2002 The Regents of the University of California. + * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). + * Written by Mark Grondona <mgrondona@llnl.gov>. + * UCRL-CODE-2002-040. + * + * This file is part of SLURM, a resource management program. + * For details, see <http://www.llnl.gov/linux/slurm/>. + * + * SLURM is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with SLURM; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. +\*****************************************************************************/ +#ifndef _SHM_H +#define _SHM_H + +#if HAVE_CONFIG_H +# include <config.h> +#endif + +#if HAVE_INTTYPES_H +# include <inttypes.h> +#else +# if HAVE_STDINT_H +# include <stdint.h> +# endif +#endif /* HAVE_INTTYPES_H */ + +#if HAVE_SYS_TYPES_H +# include <sys/types.h> +#endif + +#if HAVE_UNISTD_H +# include <unistd.h> +#endif + +#include <src/common/slurm_protocol_api.h> + +#include <src/slurmd/job.h> + +/* local job states */ +typedef enum job_state { + SLURMD_JOB_UNUSED = 0, + SLURMD_JOB_ALLOCATED, + SLURMD_JOB_STARTING, + SLURMD_JOB_STARTED, + SLURMD_JOB_ENDING, + SLURMD_JOB_COMPLETE +} job_state_t; + +typedef struct task task_t; +typedef struct job_step job_step_t; + +struct task { + int used; + int id; /* local task id */ + int global_id; /* global task id */ + pid_t pid; /* pid of user process */ + pid_t ppid; /* parent pid of user process */ + pid_t mpid; /* manager pid of this task */ + /* reverse pointer back to controlling job step */ + job_step_t *job_step; + task_t *next; /* next task in this job step */ +}; + +struct job_step { + uid_t uid; + uint32_t jobid; + uint32_t stepid; + uint32_t sw_id; /* Switch/Interconnect specific id */ + int ntasks; /* number of tasks in this job */ + pid_t sid; /* Job session id */ + + int io_update; /* srun address has been updated */ + slurm_addr respaddr; /* Addr to send messages to srun on */ + slurm_addr ioaddr; /* Addr to connect to initialize IO */ + srun_key_t key; /* last key from srun client */ + + + job_state_t state; /* Job step status */ + time_t timelimit; /* job time limit */ + task_t *task_list; /* list of this step's tasks */ +}; + + +/* + * Attach to and initialize slurmd shared memory segment + * Returns -1 and sets errno on failure. + */ +int shm_init(void); + +/* + * Release slurmd shared memory segment. Deallocates segment if no + * other processes are currently attached. + */ +int shm_fini(void); + +/* + * Force cleanup of any stale shared memory locks + */ +void shm_cleanup(void); + +/* + * Insert a new step into shared memory, the step passed in by address + * should be filled in with the appropriate values, excepting the + * task_list pointer (see add_task below to add tasks to a job step) + * The resulting step will not be modified nor freed. The step information is + * *copied* into shared memory + * + * Failure modes are: + * EEXIST: A step already exists in shared memory with that jobid,stepid + * ENOSPC: No step slots remain in shared memory + */ +int shm_insert_step(job_step_t *step); + +/* + * Delete the job step record from shared memory, if it exists + * + * Returns SLURM_FAILURE and sets errno if job step cannot be deleted + * ESRCH: Job step with jobid,stepid not found + */ +int shm_delete_step(uint32_t jobid, uint32_t stepid); + +/* + * Return a *copy* of the job step with jobid,stepid from shared + * memory. The copy must be freed with xfree() + * + * Returns NULL if job step is not found in shared memory. + */ +job_step_t *shm_get_step(uint32_t jobid, uint32_t stepid); + +/* + * Update an existing job step to match "step" + * returns SLURM_FAILURE if job step cannot be found + */ +int shm_update_step(job_step_t *step); + +/* + * Deallocate memory used by step struct returned from shm_get_step() + */ +void shm_free_step(job_step_t *step); + +/* + * Lock shared memory and send `signal' to all tasks in step + */ +int shm_signal_step(uint32_t jobid, uint32_t stepid, uint32_t signal); + +/* + * Add a task record to a job step in memory + * + * Returns SLURM_FAILURE and following errnos if not successful: + * ESRCH: Cannot find job step + * EEXIST: A task with that id is already associated with job step + * ENOMEM: No more task slots available in shared memory + */ +int shm_add_task(uint32_t jobid, uint32_t stepid, task_t *task); + + +/* + * update job step session id + */ +int shm_update_step_sid(uint32_t jobid, uint32_t stepid, int sid); + + +/* + * update job step state + */ +int shm_update_step_state(uint32_t jobid, uint32_t stepid, job_state_t state); + + +/* + * lock and return _pointer_ to step state in shared memory + * Caller must subsequently call shm_unlock_step_state() or shared memory + * will be locked for everyone else. + * (Note: This function is different from most others in this module as + * it returns a pointer into the shared memory region instead of a copy + * of the data. Callers should remain cognizant of this fact. ) + */ +job_state_t *shm_lock_step_state(uint32_t jobid, uint32_t stepid); + +/* unlock job step state + */ +void shm_unlock_step_state(uint32_t jobid, uint32_t stepid); + +/* + * update job step io_addr + */ +int shm_update_step_addrs(uint32_t jobid, uint32_t stepid, + slurm_addr *ioaddr, slurm_addr *respaddr); + + +/* + * Return true if ioaddr was updated + */ +bool shm_addr_updated(uint32_t jobid, uint32_t stepid); + + +/* + * Atomically return current ioaddr and reset io_update field to false + */ +int shm_step_addrs(uint32_t jobid, uint32_t stepid, + slurm_addr *ioaddr, slurm_addr *respaddr); + + +/* + * update job step timelimit + */ +int shm_update_step_timelimit(uint32_t jobid, uint32_t stepid, time_t newlim); + + +/* + * Return job step timelimit + */ +time_t shm_step_timelimit(uint32_t jobid, uint32_t stepid); + +#endif /* !_SHM_H */ diff --git a/src/slurmd/shmem_struct.c b/src/slurmd/shmem_struct.c deleted file mode 100644 index fd7de3f644f..00000000000 --- a/src/slurmd/shmem_struct.c +++ /dev/null @@ -1,263 +0,0 @@ -/*****************************************************************************\ - * shmem_struct.c - shared memory support functions - ***************************************************************************** - * Copyright (C) 2002 The Regents of the University of California. - * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). - * Written by Kevin Tew <tew1@llnl.gov> et. al. - * UCRL-CODE-2002-040. - * - * This file is part of SLURM, a resource management program. - * For details, see <http://www.llnl.gov/linux/slurm/>. - * - * SLURM is free software; you can redistribute it and/or modify it under - * the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) - * any later version. - * - * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY - * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS - * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more - * details. - * - * You should have received a copy of the GNU General Public License along - * with ConMan; if not, write to the Free Software Foundation, Inc., - * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. -\*****************************************************************************/ - -#include <stdlib.h> -#include <assert.h> -#include <pthread.h> -#include <sys/types.h> -#include <sys/shm.h> -#include <string.h> - -#include <src/common/slurm_errno.h> -#include <src/common/log.h> -#include <src/common/slurm_protocol_api.h> -#include <src/slurmd/shmem_struct.h> - -extern int errno; -static int shmem_gid; -#define SHMEM_PERMS 0600 - -/* function prototypes */ -static void clear_task(task_t * task); -static void clear_job_step(job_step_t * job_step); -static int prepend_task(slurmd_shmem_t * shmem, job_step_t * job_step, - task_t * task); - -/* gets a pointer to the slurmd shared memory segment - * if it doesn't exist, one is created - * returns - a void * pointer to the shared memory segment - */ -void *get_shmem() -{ - void *shmem_addr; - int key = ftok(".", 'a'); - - assert(key != SLURM_ERROR); - - shmem_gid = shmget(key, sizeof(slurmd_shmem_t), IPC_CREAT | SHMEM_PERMS); - - debug("shmget id = %i ", shmem_gid); - if (shmem_gid == SLURM_ERROR) - fatal("can't get shared memory segment: %m "); - - shmem_addr = shmat(shmem_gid, NULL, 0); - if (shmem_addr == (void *)SLURM_ERROR) - fatal("Unable to attach to shared memory: %m"); - - return shmem_addr; -} - -int rel_shmem(void *shmem_addr) -{ - if ((shmdt(shmem_addr)) < 0) - error("unable to release shared memory: %m"); - return shmctl(shmem_gid, IPC_RMID, NULL); -} - -/* initializes the shared memory segment, this should only be called - * once by the master slurmd after the initial get_shmem call. - * - * shmem - pointer to the shared memory segment returned by get_shmem ( ) - */ -void init_shmem(slurmd_shmem_t * shmem) -{ - int i; - - /* set everthing to zero */ - memset(shmem, 0, sizeof(slurmd_shmem_t)); - - /* sanity check */ - /* set all task objects to unused */ - for (i = 0; i < MAX_TASKS; i++) { - clear_task(&shmem->tasks[i]); - } - - /* set all job_step objects to unused */ - for (i = 0; i < MAX_JOB_STEPS; i++) { - clear_job_step(&shmem->job_steps[i]); - } - pthread_mutex_init(&shmem->mutex, NULL); -} - -/* runs through the job_step array looking for a unused job_step. - * upon finding one the passed src job_step is copied into the shared mem job_step array - * shmem - pointer to the shared memory segment returned by get_shmem ( ) - * job_step_t - src job_step to be added to the shared memory list - * returns - the address of the assigned job_step in the shared mem job_step array or - * the function dies on a fatal log call if the array is full - */ -job_step_t *alloc_job_step(slurmd_shmem_t * shmem, int job_id, - int job_step_id) -{ - int i; - pthread_mutex_lock(&shmem->mutex); - for (i = 0; i < MAX_JOB_STEPS; i++) { - if (shmem->job_steps[i].used == false) { - clear_job_step(&shmem->job_steps[i]); - shmem->job_steps[i].used = true; - shmem->job_steps[i].job_id = job_id; - shmem->job_steps[i].job_step_id = job_step_id; - pthread_mutex_unlock(&shmem->mutex); - return &shmem->job_steps[i]; - } - } - pthread_mutex_unlock(&shmem->mutex); - error("No available job_step slots in shmem segment"); - slurm_seterrno(ESLURMD_NO_AVAILABLE_JOB_STEP_SLOTS_IN_SHMEM); - - return (void *) SLURM_ERROR; -} - -/* runs through the task array looking for a unused task. - * upon finding one the passed src task is copied into the shared mem task array - * shmem - pointer to the shared memory segment returned by get_shmem ( ) - * new_task - src task to be added to the shared memory list - * returns - the address of the assigned task in the shared mem task array - * the function dies on a fatal log call if the array is full - */ -task_t *alloc_task(slurmd_shmem_t * shmem, job_step_t * job_step) -{ - int i; - pthread_mutex_lock(&shmem->mutex); - for (i = 0; i < MAX_TASKS; i++) { - if (shmem->tasks[i].used == false) { - clear_task(&shmem->tasks[i]); - shmem->tasks[i].used = true; - prepend_task(shmem, job_step, &shmem->tasks[i]); - pthread_mutex_unlock(&shmem->mutex); - return &shmem->tasks[i]; - } - } - pthread_mutex_unlock(&shmem->mutex); - error("No available task slots in shmem segment"); - slurm_seterrno(ESLURMD_NO_AVAILABLE_TASK_SLOTS_IN_SHMEM); - return (void *) SLURM_ERROR; -} - - -/* prepends a new task onto the front of a list of tasks assocuated with a job_step. - * it calls add_task which copies the passed task into a task array in shared memoery - * sets pointers from the task to the corresponding job_step array - * note if the task array is full, the add_task function will assert and exiti - * shmem - pointer to the shared memory segment returned by get_shmem ( ) - * job_step - job_step to receive the new task - * task - task to be prepended - */ -static int prepend_task(slurmd_shmem_t * shmem, job_step_t * job_step, - task_t * task) -{ - /* newtask next pointer gets head of the jobstep task list */ - task->next = job_step->head_task; - - /* newtask pointer becomes the new head of the jobstep task list */ - job_step->head_task = task; - - /* set back pointer from task to job_step */ - task->job_step = job_step; - - return SLURM_SUCCESS; -} - -/* clears a job_step and associated task list for future use */ -int deallocate_job_step(job_step_t * jobstep) -{ - task_t *task_ptr = jobstep->head_task; - task_t *task_temp_ptr; - while (task_ptr != NULL) { - task_temp_ptr = task_ptr->next; - clear_task(task_ptr); - task_ptr = task_temp_ptr; - } - clear_job_step(jobstep); - return SLURM_SUCCESS; -} - -/* clears a task array member for future use - */ -static void clear_task(task_t * task) -{ - task->used = false; - task->job_step = NULL; - task->next = NULL; -} - -/* clears a job_step array memeber for future use - */ -static void clear_job_step(job_step_t * job_step) -{ - job_step->used = false; - job_step->head_task = NULL; -} - -/* api call for DPCS to return a job_id given a session_id - */ -int find_job_id_for_session(slurmd_shmem_t * shmem, int session_id) -{ - int i; - pthread_mutex_lock(&shmem->mutex); - for (i = 0; i < MAX_JOB_STEPS; i++) { - if (shmem->job_steps[i].used == true) { - if (shmem->job_steps[i].session_id == session_id) - - pthread_mutex_unlock(&shmem->mutex); - return shmem->job_steps[i].job_id; - } - } - pthread_mutex_unlock(&shmem->mutex); - debug("No job_id found for session_id %i", session_id); - return SLURM_FAILURE; -} - -job_step_t *find_job_step(slurmd_shmem_t * shmem, int job_id, - int job_step_id) -{ - int i; - pthread_mutex_lock(&shmem->mutex); - for (i = 0; i < MAX_JOB_STEPS; i++) { - if (shmem->job_steps[i].used == true - && shmem->job_steps[i].job_id == job_id - && shmem->job_steps[i].job_step_id == job_step_id) { - debug3("found step %d.%d in slot %d", - job_id, job_step_id, i); - pthread_mutex_unlock(&shmem->mutex); - return &shmem->job_steps[i]; - } - } - debug3("find_job_step: unable to find %d.%d", job_id, job_step_id); - pthread_mutex_unlock(&shmem->mutex); - return (void *) SLURM_ERROR; -} - -task_t *find_task(job_step_t * job_step_ptr, int task_id) -{ - task_t *task_ptr = job_step_ptr->head_task; - while (task_ptr != NULL) { - if (task_ptr->task_id == task_id) { - return task_ptr; - } - } - return (void *) SLURM_ERROR; -} diff --git a/src/slurmd/shmem_struct.h b/src/slurmd/shmem_struct.h deleted file mode 100644 index 7f82be6f7f8..00000000000 --- a/src/slurmd/shmem_struct.h +++ /dev/null @@ -1,82 +0,0 @@ -#ifndef _SHMEM_STRUCT_H -#define _SHMEM_STRUCT_H - -#include <src/slurmd/task_mgr.h> - -#define MAX_TASKS 128 -#define MAX_JOB_STEPS 128 - -typedef struct job_step job_step_t; -typedef struct task task_t; -/* represents a task running on a node */ -struct task { - uint32_t task_id; /* srun assigned globally unique taskid */ - task_start_t task_start; /* task_start_message see task_mgr.h */ - - /* boolean type that is marked when this record is used */ - char used; - - job_step_t *job_step; /* reverse pointer to the cntrllng job_step */ - task_t *next; /* next task pointer in the job_step */ -}; - -/* represents a job_step consisting of a list of tasks */ -struct job_step { - uint32_t job_id; /* slurmctld assigned jobid */ - uint32_t job_step_id; /* slurmctld assigned job_step id */ - uint32_t session_id; - - /* boolean type that is marked when this record is used */ - char used; - - task_t *head_task; /* fist task in the job_step */ -}; - -/* shared memory structure. This structure is overlayed on top of the allocated shared ram */ -typedef struct slurmd_shmem { - pthread_mutex_t mutex; /* mutex to protect shared ram */ - task_t tasks[MAX_TASKS]; /* array of task objects */ - job_step_t job_steps[MAX_JOB_STEPS]; /* array of job_step objects */ -} slurmd_shmem_t; - -/* gets shared memory segment, allocating it if needed - */ -void *get_shmem(); - -/* should only be called once after allocation of shared ram - * Marks all task and job_step objects as unused - */ -void init_shmem(slurmd_shmem_t * shmem); - -/* detaches from shared ram and deallocates shared ram if no other - * attachments exist - */ -int rel_shmem(void *shmem_addr); - -/* allocates job step from shared memory array - */ -job_step_t *alloc_job_step(slurmd_shmem_t * shmem, int job_id, - int job_step_id); - -/* allocates task from shared memory array - */ -task_t *alloc_task(slurmd_shmem_t * shmem, job_step_t * job_step); - -/* api call for DPCS to return a job_id given a session_id - */ -int find_job_id_for_session(slurmd_shmem_t * shmem, int session_id); - -/* clears a job_step and associated task list for future use - */ -int deallocate_job_step(job_step_t * jobstep); - -/* find a particular job_step - */ -job_step_t *find_job_step(slurmd_shmem_t * shmem, int job_id, - int job_step_id); - -/* find a particular task - */ -task_t *find_task(job_step_t * job_step, int task_id); - -#endif /* _SHMEM_STRUCT_H */ diff --git a/src/slurmd/slurmd.c b/src/slurmd/slurmd.c index f05d0aa3608..6c1cb963f25 100644 --- a/src/slurmd/slurmd.c +++ b/src/slurmd/slurmd.c @@ -20,7 +20,7 @@ * details. * * You should have received a copy of the GNU General Public License along - * with ConMan; if not, write to the Free Software Foundation, Inc., + * with SLURM; if not, write to the Free Software Foundation, Inc., * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. \*****************************************************************************/ @@ -44,14 +44,14 @@ #include <src/common/xstring.h> #include <src/common/list.h> #include <src/common/slurm_protocol_api.h> -#include <src/common/util_signals.h> #include <src/common/log.h> +#include <src/common/fd.h> #include <src/slurmd/batch_mgr.h> #include <src/slurmd/get_mach_stat.h> #include <src/slurmd/slurmd.h> -#include <src/slurmd/task_mgr.h> -#include <src/slurmd/shmem_struct.h> +#include <src/slurmd/mgr.h> +#include <src/slurmd/shm.h> #include <src/common/signature_utils.h> #include <src/common/credential_utils.h> @@ -64,6 +64,7 @@ typedef struct slurmd_config { log_options_t log_opts; char *slurm_conf; int daemonize; + slurm_fd serverfd; } slurmd_config_t; typedef struct connection_arg { @@ -73,7 +74,6 @@ typedef struct connection_arg { time_t init_time; pid_t slurmd_pid; time_t shutdown_time = (time_t) 0; -slurmd_shmem_t *shmem_seg; char hostname[MAX_NAME_LEN]; slurm_ssl_key_ctx_t verify_ctx; List credential_state_list; @@ -138,13 +138,14 @@ int main(int argc, char *argv[]) /* send registration message to slurmctld */ send_node_registration_status_msg(); - /* block SIGHUP, SIGTERM, and SIGINT in all threads */ - /* block_some_signals(); */ - /* create attached thread to handle signals */ - if (pthread_create(&sigthr, NULL, &slurmd_handle_signals, - (void *)NULL) != 0) - fatal("pthread_create: %m"); + { /* XXX fix this properly */ + pthread_attr_t attr; + pthread_attr_init(&attr); + if (pthread_create(&sigthr, &attr, &slurmd_handle_signals, + (void *)NULL) != 0) + fatal("pthread_create: %m"); + } slurmd_msg_engine((void *)NULL); @@ -201,8 +202,7 @@ void *slurmd_handle_signals(void *args) int slurmd_init() { slurmd_pid = getpid(); - shmem_seg = get_shmem(); - init_shmem(shmem_seg); + shm_init(); slurm_ssl_init(); slurm_init_verifier(&verify_ctx, public_cert_filename()); initialize_credential_state_list(&credential_state_list); @@ -262,7 +262,7 @@ static char *public_cert_filename() int slurmd_destroy() { destroy_credential_state_list(credential_state_list); - rel_shmem(shmem_seg); + shm_fini(); slurm_destroy_ssl_key_ctx(&verify_ctx); slurm_ssl_destroy(); return SLURM_SUCCESS; @@ -296,15 +296,20 @@ fill_in_node_registration_status_msg(slurm_node_registration_status_msg_t * /* fill in data structure */ node_reg_msg->timestamp = time(NULL); node_reg_msg->node_name = xstrdup(hostname); + get_procs(&node_reg_msg->cpus); get_memory(&node_reg_msg->real_memory_size); get_tmp_disk(&node_reg_msg->temporary_disk_space); -/* FIXME: Need to set correct count of currently running job stepss and their ID's below */ -/* This is needed to more reliably recover from restarts of daemons */ + + /* FIXME: Need to set correct count of currently running job + * steps and their ID's below */ + /* This is needed to more reliably recover from restarts of daemons */ + node_reg_msg->job_count = 0; node_reg_msg->job_id = NULL; node_reg_msg->step_id = NULL; - info("Configuration name=%s cpus=%u real_memory=%u, tmp_disk=%u, job_count=%u", + info("Configuration name=%s cpus=%u real_memory=%u, " + "tmp_disk=%u, job_count=%u", hostname, node_reg_msg->cpus, node_reg_msg->real_memory_size, node_reg_msg->temporary_disk_space, @@ -330,6 +335,9 @@ void *slurmd_msg_engine(void *args) == SLURM_SOCKET_ERROR) fatal("slurm_init_msg_engine_port: %m"); + fd_set_close_on_exec((int) sockfd); + slurmd_conf.serverfd = sockfd; + if ((rc = pthread_attr_init(&thread_attr))) error("pthread_attr_init returned %d", rc); @@ -343,7 +351,8 @@ void *slurmd_msg_engine(void *args) xmalloc(sizeof(connection_arg_t)); /* accept needed for stream implementation - * is a no-op in mongo implementation that just passes sockfd to newsockfd + * is a no-op in mongo implementation that just passes + * sockfd to newsockfd */ if ((newsockfd = slurm_accept_msg_conn(sockfd, &cli_addr)) == SLURM_SOCKET_ERROR) { @@ -351,14 +360,15 @@ void *slurmd_msg_engine(void *args) continue; } - /* receive message call that must occur before thread spawn because in message - * implementation their is no connection and the message is the sign of a new connection */ + /* receive message call that must occur before thread + * spawn because in message implementation their is no + * connection and the message is the sign of a new connection + */ conn_arg->newsockfd = newsockfd; if (shutdown_time) { service_connection((void *) conn_arg); - pthread_exit((void *) 0); - + break; } if ((rc = pthread_create(&request_thread_id, @@ -369,6 +379,7 @@ void *slurmd_msg_engine(void *args) error("slurmd_msg_engine: pthread_create: %m"); service_connection((void *) conn_arg); } + } slurm_shutdown_msg_engine(sockfd); return NULL; @@ -455,35 +466,60 @@ void slurmd_req(slurm_msg_t * msg) /* rpc methods */ /******************************/ +static int _launch_tasks(launch_tasks_request_msg_t *req) +{ + pid_t pid; + + switch ((pid = fork())) { + case -1: + error("launch_tasks: fork: %m"); + return SLURM_ERROR; + break; + case 0: /* child runs job */ + slurm_shutdown_msg_engine(slurmd_conf.serverfd); + destroy_credential_state_list(credential_state_list); + slurm_destroy_ssl_key_ctx(&verify_ctx); + slurm_ssl_destroy(); + mgr_launch_tasks(req); + break; + default: + verbose("created process %ld for job %d.%d", + pid, req->job_id, req->job_step_id); + break; + } + + return SLURM_SUCCESS; +} + /* Launches tasks */ void slurm_rpc_launch_tasks(slurm_msg_t * msg) { /* init */ int rc = SLURM_SUCCESS; clock_t start_time; - launch_tasks_request_msg_t *task_desc = - (launch_tasks_request_msg_t *) msg->data; slurm_msg_t resp_msg; launch_tasks_response_msg_t task_resp; + launch_tasks_request_msg_t *req = + (launch_tasks_request_msg_t *) msg->data; start_time = clock(); info("slurmd_req: launch tasks message received"); - - slurm_print_launch_task_msg(task_desc); + slurm_print_launch_task_msg(req); /* do RPC call */ /* test credentials */ - /* rc = */ verify_credential(&verify_ctx, task_desc->credential, + /* rc = */ verify_credential(&verify_ctx, req->credential, credential_state_list); + if (rc == SLURM_SUCCESS) + rc = _launch_tasks(req); task_resp.node_name = hostname; - task_resp.srun_node_id = task_desc->srun_node_id; + task_resp.srun_node_id = req->srun_node_id; - resp_msg.address = task_desc->response_addr; + resp_msg.address = req->response_addr; resp_msg.data = &task_resp; resp_msg.msg_type = RESPONSE_LAUNCH_TASKS; - task_resp.return_code = rc; /* return result */ @@ -494,7 +530,6 @@ void slurm_rpc_launch_tasks(slurm_msg_t * msg) info("slurmd_req: launch authorization completed " "successfully, time=%ld", (long) (clock() - start_time)); slurm_send_only_node_msg(&resp_msg); - launch_tasks(task_desc); } } @@ -507,23 +542,17 @@ void slurm_rpc_ping(slurm_msg_t * msg) /* Kills Launched Tasks */ void slurm_rpc_kill_tasks(slurm_msg_t * msg) { - /* init */ - int error_code; - clock_t start_time; - kill_tasks_msg_t *kill_tasks_msg = (kill_tasks_msg_t *) msg->data; - - start_time = clock(); + int rc; + kill_tasks_msg_t *req = (kill_tasks_msg_t *) msg->data; - /* do RPC call */ - error_code = kill_tasks(kill_tasks_msg); + rc = shm_signal_step(req->job_id, req->job_step_id, req->signal); /* return result */ - if (error_code) { - error("slurmd_req: kill tasks error %d, time=%ld", - error_code, (long) (clock() - start_time)); - slurm_send_rc_msg(msg, error_code); + if (rc) { + error("slurmd_req: kill tasks error %d", rc); + slurm_send_rc_msg(msg, rc); } else { - info("slurmd_req: kill tasks completed successfully, time=%ld", (long) (clock() - start_time)); + verbose("slurmd_req: kill tasks completed"); slurm_send_rc_msg(msg, SLURM_SUCCESS); } } @@ -539,7 +568,7 @@ void slurm_rpc_reattach_tasks_streams(slurm_msg_t * msg) start_time = clock(); /* do RPC call */ - error_code = reattach_tasks_streams(reattach_tasks_steams_msg); + /* error_code = reattach_tasks_streams(reattach_tasks_steams_msg);*/ /* return result */ if (error_code) { @@ -556,22 +585,19 @@ void slurm_rpc_reattach_tasks_streams(slurm_msg_t * msg) void slurm_rpc_revoke_credential(slurm_msg_t * msg) { /* init */ - int error_code = SLURM_SUCCESS; + int rc = SLURM_SUCCESS; clock_t start_time; - revoke_credential_msg_t *revoke_credential_msg = - (revoke_credential_msg_t *) msg->data; + revoke_credential_msg_t *req = (revoke_credential_msg_t *) msg->data; start_time = clock(); /* do RPC call */ - error_code = - revoke_credential(revoke_credential_msg, - credential_state_list); + rc = revoke_credential(req, credential_state_list); /* return result */ - if (error_code) { + if (rc) { error("slurmd_req: error %m errno %d, time=%ld", - error_code, (long) (clock() - start_time)); + rc, (long) (clock() - start_time)); slurm_send_rc_msg(msg, errno); } else { info("slurmd_req: completed successfully, time=%ld", @@ -607,7 +633,7 @@ int slurmd_shutdown() return_code_msg_t *slurm_rc_msg; slurm_addr slurmd_addr; - kill_all_tasks(); + /* kill_all_tasks();*/ /* init message connection for message communication with controller */ slurm_set_addr_char(&slurmd_addr, slurm_get_slurmd_port(), @@ -647,24 +673,16 @@ int slurmd_shutdown() void slurm_rpc_launch_batch_job(slurm_msg_t * msg) { - /* init */ - int error_code = SLURM_SUCCESS; - clock_t start_time; - batch_job_launch_msg_t *batch_job_launch_msg = ( batch_job_launch_msg_t * ) msg->data ; - - start_time = clock(); + int rc; + batch_job_launch_msg_t *req = (batch_job_launch_msg_t *) msg->data ; - /* do RPC call */ - error_code = launch_batch_job(batch_job_launch_msg); + rc = SLURM_SUCCESS; /* launch_batch_job(req); */ - /* return result */ - if (error_code) { - error("slurmd_req: error %d, time=%ld", - error_code, (long) (clock() - start_time)); - slurm_send_rc_msg(msg, error_code); + if (rc) { + error("slurmd_req: error %d", rc); + slurm_send_rc_msg(msg, rc); } else { - info("slurmd_req: completed successfully, time=%ld", - (long) (clock() - start_time)); + info("slurmd_req: completed successfully"); slurm_send_rc_msg(msg, SLURM_SUCCESS); } } @@ -728,7 +746,7 @@ int parse_commandline_args(int argc, char **argv, {0, 0, 0, 0} }; - c = getopt_long(argc, argv, "de:hf:l:s:", long_options, + c = getopt_long(argc, argv, "cde:hf:l:s:", long_options, &option_index); if (c == -1) break; @@ -780,6 +798,9 @@ int parse_commandline_args(int argc, char **argv, } slurmd_config->log_opts.syslog_level = errlev; break; + case 'c': + shm_cleanup(); + break; case 0: info("option %s", long_options[option_index].name); if (optarg) { @@ -797,14 +818,8 @@ int parse_commandline_args(int argc, char **argv, digit_optind = this_option_optind; info("option %c\n", c); break; - case '?': - info("?? getopt returned character code 0%o ??", - c); - break; - default: - info("?? getopt returned character code 0%o ??", - c); + info("unknown option %c", c); usage(argv[0]); exit(1); } @@ -835,7 +850,7 @@ reset_cwd(void) else { if (chdir (dir)) error ("chdir to %s error %m", dir); -debug ("chdir %s", dir); + debug ("chdir %s", dir); xfree (dir); } } diff --git a/src/slurmd/task_mgr.c b/src/slurmd/task_mgr.c deleted file mode 100644 index 410ef936bb8..00000000000 --- a/src/slurmd/task_mgr.c +++ /dev/null @@ -1,384 +0,0 @@ -/*****************************************************************************\ - * task_mgr.c - - ***************************************************************************** - * Copyright (C) 2002 The Regents of the University of California. - * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). - * Written by Kevin Tew <tew1@llnl.gov> et. al. - * UCRL-CODE-2002-040. - * - * This file is part of SLURM, a resource management program. - * For details, see <http://www.llnl.gov/linux/slurm/>. - * - * SLURM is free software; you can redistribute it and/or modify it under - * the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) - * any later version. - * - * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY - * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS - * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more - * details. - * - * You should have received a copy of the GNU General Public License along - * with ConMan; if not, write to the Free Software Foundation, Inc., - * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. -\*****************************************************************************/ - -#include <stdlib.h> -#include <sys/types.h> -#include <pwd.h> -#include <grp.h> -#include <sys/wait.h> -#include <errno.h> -#include <unistd.h> -#include <string.h> -#include <pthread.h> -#include <unistd.h> - -#include <src/common/log.h> -#include <src/common/list.h> -#include <src/common/xmalloc.h> -#include <src/common/slurm_protocol_api.h> -#include <src/common/slurm_errno.h> -#include <src/common/util_signals.h> - -#include <src/slurmd/task_mgr.h> -#include <src/slurmd/shmem_struct.h> -#include <src/slurmd/circular_buffer.h> -#include <src/slurmd/pipes.h> -#include <src/slurmd/io.h> -#include <src/slurmd/interconnect.h> - -/* global variables */ - -/* prototypes */ -int kill_task(task_t * task, int signal); -extern pid_t getsid(pid_t pid); - -int send_task_exit_msg(int task_return_code, task_start_t * task_start); - -/****************************************************************** - *task launch method call hierarchy - * - *launch_tasks() - * interconnect_init() - * fan_out_task_launch() (pthread_create) - * task_exec_thread() (fork) for task exec - * task_exec_thread() (pthread_create) for io piping - ******************************************************************/ - -int fan_out_task_launch(launch_tasks_request_msg_t * launch_msg) -{ - int i; - int session_id; - - /* shmem work - see slurmd.c shmem_seg this is probably not needed */ - slurmd_shmem_t *shmem_ptr = get_shmem(); - - /* alloc a job_step object in shmem for this launch_tasks request - * launch_tasks should really be named launch_job_step - */ - job_step_t *curr_job_step = - alloc_job_step(shmem_ptr, launch_msg->job_id, - launch_msg->job_step_id); - - /* task pointer that will point to shmem task structures as they - * are allocated - */ - task_t *curr_task = NULL; - - /* array of pointers used in this function to point to the - * task_start structure for each task to be launched - */ - task_start_t *task_start[launch_msg->tasks_to_launch]; - - debug3("entered fan_out_task_launch()"); - debug("msg->job_step_id = %d", launch_msg->job_step_id); - - if ((session_id = setsid()) == SLURM_ERROR) { - error("set sid failed: %m"); - if ((session_id = getsid(0)) == SLURM_ERROR) { - error("getsid also failed"); - } - } - - curr_job_step->session_id = session_id; - - - debug3("going to launch %d tasks", launch_msg->tasks_to_launch); - /* launch requested number of threads - */ - for (i = 0; i < launch_msg->tasks_to_launch; i++) { - curr_task = alloc_task(shmem_ptr, curr_job_step); - task_start[i] = &curr_task->task_start; - curr_task->task_id = launch_msg->global_task_ids[i]; - - /* fill in task_start struct */ - task_start[i]->launch_msg = launch_msg; - task_start[i]->local_task_id = i; - task_start[i]->io_streams_dest = launch_msg->streams; - - debug("going to launch task %d", i); - if (launch_task(task_start[i])) { - error("launch_task error "); - goto kill_tasks_label; - } - debug("task %i launched", i); - } - - /* wait for all the launched threads to finish - */ - wait_for_tasks(launch_msg, task_start); - - goto return_label; - - kill_tasks_label: - /* kill_launched_tasks(launch_msg, task_start, i); */ - return_label: - /* can't release if this is the same process as the main daemon ie threads - * this is needed if we use forks - * rel_shmem ( shmem_ptr ) ; */ - deallocate_job_step(curr_job_step); - return SLURM_SUCCESS; -} - - - -void *task_exec_thread(void *arg) -{ - task_start_t *task_start = (task_start_t *) arg; - launch_tasks_request_msg_t *launch_msg = task_start->launch_msg; - int *pipes = task_start->pipes; - int rc; - int cpid; - struct passwd *pwd; - int task_return_code; - int local_errno; - log_options_t log_opts_def = LOG_OPTS_STDERR_ONLY; - - interconnect_set_capabilities(task_start); - - /* create pipes to read child stdin, stdout, sterr */ - init_parent_pipes(task_start->pipes); - -#define FORK_ERROR -1 -#define CHILD_PROCCESS 0 - switch ((cpid = fork())) { - case FORK_ERROR: - break; - - case CHILD_PROCCESS: - /* log init stuff */ - log_init("slurmd", log_opts_def, 0, NULL); - - unblock_all_signals(); - - posix_signal_ignore(SIGTTOU); /* ignore tty output */ - posix_signal_ignore(SIGTTIN); /* ignore tty input */ - posix_signal_ignore(SIGTSTP); /* ignore user */ - - /* setup interconnect specific environment variables - */ - interconnect_env(&launch_msg->env, &launch_msg->envc, - launch_msg->srun_node_id, - launch_msg->nnodes, - launch_msg->global_task_ids[task_start->local_task_id], - launch_msg->nprocs); - - /* setup std stream pipes */ - setup_child_pipes(pipes); - - /* get passwd file info */ - if ((pwd = getpwuid(launch_msg->uid)) == NULL) { - error("user id not found in passwd file"); - _exit(SLURM_FAILURE); - } - - /* setgid and uid */ - if ((rc = setgid(pwd->pw_gid)) < 0) { - error("setgid failed: %m "); - _exit(SLURM_FAILURE); - } - - /* initgroups */ - if (( getuid() == (uid_t)0 ) && - ( initgroups(pwd->pw_name, pwd->pw_gid) ) < 0) { - error("initgroups() failed: %m"); - //_exit(SLURM_FAILURE); - } - - if ((rc = setuid(launch_msg->uid)) < 0) { - error("setuid() failed: %m"); - _exit(SLURM_FAILURE); - } - - /* run bash and cmdline */ - if ((chdir(launch_msg->cwd)) < 0) { - error("cannot chdir to `%s,' going to /tmp instead", - launch_msg->cwd); - if ((chdir("/tmp")) < 0) { - error("couldn't chdir to `/tmp' either. dying."); - _exit(SLURM_FAILURE); - } - } - - execve(launch_msg->argv[0], launch_msg->argv, launch_msg->env); - - /* error if execve returns - * clean up */ - error("execve(): %s: %m", launch_msg->argv[0]); - close(STDIN_FILENO); - close(STDOUT_FILENO); - close(STDERR_FILENO); - local_errno = errno; - _exit(local_errno); - break; - - default: /*parent proccess */ - debug("forked pid %ld", cpid); - task_start->exec_pid = cpid; - /* order below is very important - * deadlock can occur if you mess with it - ask me how I know :) - */ - - debug3("calling setup_parent_pipes"); - /* 1 */ setup_parent_pipes(task_start->pipes); - debug3("calling forward_io"); - /* 1.5 */ forward_io(arg); - debug3("calling waitpid(%ld)", cpid); - /* 2 */ waitpid(cpid, &task_return_code, 0); - /* 3 */ wait_on_io_threads(task_start); - - send_task_exit_msg(task_return_code, task_start); - - break; - } - return (void *) SLURM_SUCCESS; /* XXX: I think this is wrong */ -} - - -int send_task_exit_msg(int task_return_code, task_start_t * task_start) -{ - slurm_msg_t resp_msg; - task_exit_msg_t task_exit; - - /* init task_exit_message */ - task_exit.return_code = task_return_code; - task_exit.task_id = - task_start->launch_msg->global_task_ids[task_start->local_task_id]; - - /* init slurm_msg_t */ - resp_msg.address = task_start->launch_msg->response_addr; - resp_msg.data = &task_exit; - resp_msg.msg_type = MESSAGE_TASK_EXIT; - - debug("sending task exit code %d", task_return_code); - - /* send message */ - return slurm_send_only_node_msg(&resp_msg); -} - -int kill_tasks(kill_tasks_msg_t * kill_task_msg) -{ - int i = 0; - int error_code = SLURM_SUCCESS; - - /* get shmemptr - */ - slurmd_shmem_t *shmem_ptr = get_shmem(); - - task_t *task_ptr; - - /* find job step - */ - job_step_t *job_step_ptr = - find_job_step(shmem_ptr, kill_task_msg->job_id, - kill_task_msg->job_step_id); - - debug("request to kill step %d.%d with signal %d", - kill_task_msg->job_id, - kill_task_msg->job_step_id, - kill_task_msg->signal); - - if (job_step_ptr == (void *) SLURM_ERROR) - slurm_seterrno_ret(ESLURMD_ERROR_FINDING_JOB_STEP_IN_SHMEM); - - /* cycle through job_step and kill tasks */ - task_ptr = job_step_ptr->head_task; - - while (task_ptr != NULL) { - debug3("killing task %i of jobid %i , of job_step %i ", i, - kill_task_msg->job_id, kill_task_msg->job_step_id); - kill_task(task_ptr, kill_task_msg->signal); - task_ptr = task_ptr->next; - i++; - debug3("next task_ptr %i ", task_ptr); - } - debug3("leaving kill_tasks"); - return error_code; -} - -int kill_all_tasks() -{ - int error_code = SLURM_SUCCESS; - - /* get shmemptr */ - slurmd_shmem_t *shmem = get_shmem(); - - int i; - pthread_mutex_lock(&shmem->mutex); - for (i = 0; i < MAX_JOB_STEPS; i++) { - if (shmem->job_steps[i].used == true) { - /* cycle through job_step and kill tasks */ - task_t *task_ptr = shmem->job_steps[i].head_task; - while (task_ptr != NULL) { - kill_task(task_ptr, SIGKILL); - task_ptr = task_ptr->next; - } - } - } - pthread_mutex_unlock(&shmem->mutex); - return error_code; - -} - -int kill_task(task_t * task, int signal) -{ - debug3("killing proccess %i, with signal %i", - task->task_start.exec_pid, signal); - return kill(task->task_start.exec_pid, signal); -} - -int reattach_tasks_streams(reattach_tasks_streams_msg_t * req_msg) -{ - int i; - int error_code = SLURM_SUCCESS; - /* get shmemptr */ - slurmd_shmem_t *shmem_ptr = get_shmem(); - - /* find job step */ - job_step_t *job_step_ptr = - find_job_step(shmem_ptr, req_msg->job_id, - req_msg->job_step_id); - - /* cycle through tasks and set streams address */ - for (i = 0; i < req_msg->tasks_to_reattach; i++) { - task_t *task = - find_task(job_step_ptr, req_msg->global_task_ids[i]); - if (task != NULL) { - task->task_start.io_streams_dest = - req_msg->streams; - } else { - error("task id not found job_id %i " - "job_step_id %i global_task_id %i", - req_msg->job_id, req_msg->job_step_id, - req_msg->global_task_ids[i]); - } - } - return error_code; -} - -void pthread_fork_child_after(void) -{ - log_reinit(); -} diff --git a/src/slurmd/task_mgr.h b/src/slurmd/task_mgr.h deleted file mode 100644 index 306de7fe93c..00000000000 --- a/src/slurmd/task_mgr.h +++ /dev/null @@ -1,102 +0,0 @@ -/*****************************************************************************\ - * task_mgr.h - - ***************************************************************************** - * Copyright (C) 2002 The Regents of the University of California. - * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). - * Written by Kevin Tew <tew1@llnl.gov> et. al. - * UCRL-CODE-2002-040. - * - * This file is part of SLURM, a resource management program. - * For details, see <http://www.llnl.gov/linux/slurm/>. - * - * SLURM is free software; you can redistribute it and/or modify it under - * the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) - * any later version. - * - * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY - * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS - * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more - * details. - * - * You should have received a copy of the GNU General Public License along - * with ConMan; if not, write to the Free Software Foundation, Inc., - * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. -\*****************************************************************************/ - -#ifndef _TASK_MGR_H -#define _TASK_MGR_H - -#if HAVE_CONFIG_H -# include <config.h> -# if HAVE_INTTYPES_H -# include <inttypes.h> -# else -# if HAVE_STDINT_H -# include <stdint.h> -# endif -# endif /* HAVE_INTTYPES_H */ -#else /* !HAVE_CONFIG_H */ -# include <inttypes.h> -#endif /* HAVE_CONFIG_H */ - -#include <src/common/slurm_protocol_api.h> - -#define STDIN_IO_THREAD 0 -#define STDOUT_IO_THREAD 1 -#define STDERR_IO_THREAD 2 -#define STDSIG_IO_THREAD 3 -#define SLURMD_NUMBER_OF_IO_THREADS 4 -#define SLURMD_IO_MAX_BUFFER_SIZE 4096 - -/* function prototypes */ -/* launch_tasks - * called by the rpc method to initiate task launch - * IN launch_msg - launch task messge - * RET int - return_code - */ -int launch_tasks(launch_tasks_request_msg_t * launch_msg); - -/* kill_tasks - * called by the rpc method to kill a job_step or set of task launches - * IN - kill task message - * RET int - return_code - */ -int kill_tasks(kill_tasks_msg_t * kill_task_msg); - -/* kill_all_tasks - * kills all the currently running tasks used by shutdown code - * RET - return_code - */ -int kill_all_tasks(); - -/* reattach_tasks_streams - * called by the reattach tasks rpc method to change the shmem task structs to point to a new destination for streams - * IN req_msg - reattach tasks streams message - */ -int reattach_tasks_streams(reattach_tasks_streams_msg_t * req_msg); - -void *task_exec_thread(void *arg); - -void pthread_fork_before(void); -void pthread_fork_parent_after(void); -void pthread_fork_child_after(void); - -typedef struct task_start { - /*task control thread id */ - pthread_t pthread_id; - int thread_return; - /*actual exec thread id */ - int exec_pid; - int exec_thread_return; - /*io threads ids */ - pthread_t io_pthread_id[SLURMD_NUMBER_OF_IO_THREADS]; - int io_thread_return[SLURMD_NUMBER_OF_IO_THREADS]; - launch_tasks_request_msg_t *launch_msg; - int pipes[6]; - int sockets[2]; - int local_task_id; - char addr_update; - slurm_addr io_streams_dest; -} task_start_t; -#endif diff --git a/src/slurmd/threaded_ctrl.c b/src/slurmd/threaded_ctrl.c deleted file mode 100644 index 6759f430e5b..00000000000 --- a/src/slurmd/threaded_ctrl.c +++ /dev/null @@ -1,89 +0,0 @@ -/*****************************************************************************\ - * threaded_ctrl.c - - ***************************************************************************** - * Copyright (C) 2002 The Regents of the University of California. - * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). - * Written by Kevin Tew <tew1@llnl.gov> et. al. - * UCRL-CODE-2002-040. - * - * This file is part of SLURM, a resource management program. - * For details, see <http://www.llnl.gov/linux/slurm/>. - * - * SLURM is free software; you can redistribute it and/or modify it under - * the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) - * any later version. - * - * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY - * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS - * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more - * details. - * - * You should have received a copy of the GNU General Public License along - * with ConMan; if not, write to the Free Software Foundation, Inc., - * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. -\*****************************************************************************/ - -#include <stdlib.h> -#include <sys/types.h> -#include <pwd.h> -#include <grp.h> -#include <sys/wait.h> -#include <errno.h> -#include <unistd.h> -#include <string.h> -#include <pthread.h> - -#include <src/common/log.h> -#include <src/common/list.h> -#include <src/common/xmalloc.h> -#include <src/common/slurm_protocol_api.h> -#include <src/common/slurm_errno.h> -#include <src/common/util_signals.h> - -#include <src/slurmd/task_mgr.h> -#include <src/slurmd/shmem_struct.h> -#include <src/slurmd/circular_buffer.h> -#include <src/slurmd/io.h> -#include <src/slurmd/pipes.h> -#include <src/slurmd/reconnect_utils.h> - -/* global variables */ - -/****************************************************************** - *task launch method call hierarchy - * - *launch_tasks() - * interconnect_init() - * fan_out_task_launch() (pthread_create) - * task_exec_thread() (fork) for task exec - * task_exec_thread() (pthread_create) for io piping - ******************************************************************/ -int launch_task ( task_start_t * task_start ) -{ - return pthread_create ( & task_start -> pthread_id , NULL , task_exec_thread , ( void * ) task_start ) ; -} - -int wait_for_tasks ( launch_tasks_request_msg_t * launch_msg , task_start_t ** task_start ) -{ - int i ; - int rc ; - for ( i = 0 ; i < launch_msg->tasks_to_launch ; i ++ ) - { - rc = pthread_join( task_start[i]->pthread_id , NULL ) ; - debug3 ( "wait for tasks: thread %i pthread_id %i joined " , i , task_start[i]->pthread_id ) ; - } - return SLURM_SUCCESS ; -} - -int kill_launched_tasks ( launch_tasks_request_msg_t * launch_msg , task_start_t ** task_start , int i ) -{ - /* - int rc ; - for ( i-- ; i >= 0 ; i -- ) - { - rc = pthread_kill ( task_start[i]->pthread_id , SIGKILL ) ; - } - */ - return SLURM_SUCCESS ; -} diff --git a/src/slurmd/threaded_io.c b/src/slurmd/threaded_io.c deleted file mode 100644 index 02bd0c05d49..00000000000 --- a/src/slurmd/threaded_io.c +++ /dev/null @@ -1,114 +0,0 @@ -/*****************************************************************************\ - * threaded_io.c - - ***************************************************************************** - * Copyright (C) 2002 The Regents of the University of California. - * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). - * Written by Kevin Tew <tew1@llnl.gov> et. al. - * UCRL-CODE-2002-040. - * - * This file is part of SLURM, a resource management program. - * For details, see <http://www.llnl.gov/linux/slurm/>. - * - * SLURM is free software; you can redistribute it and/or modify it under - * the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) - * any later version. - * - * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY - * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS - * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more - * details. - * - * You should have received a copy of the GNU General Public License along - * with ConMan; if not, write to the Free Software Foundation, Inc., - * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. -\*****************************************************************************/ - -#include <stdlib.h> -#include <sys/types.h> -#include <pwd.h> -#include <grp.h> -#include <sys/wait.h> -#include <errno.h> -#include <unistd.h> -#include <string.h> -#include <pthread.h> - -#include <src/common/log.h> -#include <src/common/list.h> -#include <src/common/xmalloc.h> -#include <src/common/slurm_protocol_api.h> -#include <src/common/slurm_errno.h> -#include <src/common/util_signals.h> - -#include <src/slurmd/task_mgr.h> -#include <src/slurmd/shmem_struct.h> -#include <src/slurmd/circular_buffer.h> -#include <src/slurmd/io.h> -#include <src/slurmd/pipes.h> -#include <src/slurmd/reconnect_utils.h> - -/* global variables */ - -/****************************************************************** - *task launch method call hierarchy - * - *launch_tasks() - * interconnect_init() - * fan_out_task_launch() (pthread_create) - * task_exec_thread() (fork) for task exec - * task_exec_thread() (pthread_create) for io piping - ******************************************************************/ -int forward_io ( task_start_t * task_start ) -{ - pthread_attr_t pthread_attr ; - - //posix_signal_pipe_ignore ( ) ; - - /* open stdout*/ - connect_io_stream ( task_start , STDIN_OUT_SOCK ) ; - /* open stderr*/ - connect_io_stream ( task_start , SIG_STDERR_SOCK ) ; - - /* spawn io pipe threads */ - /* set detatch state */ - pthread_attr_init( & pthread_attr ) ; - /*pthread_attr_setdetachstate ( & pthread_attr , PTHREAD_CREATE_DETACHED ) ;*/ - if ( pthread_create ( & task_start->io_pthread_id[STDIN_FILENO] , NULL , stdin_io_pipe_thread , task_start ) ) - goto return_label; - if ( pthread_create ( & task_start->io_pthread_id[STDOUT_FILENO] , NULL , stdout_io_pipe_thread , task_start ) ) - goto kill_stdin_thread; - if ( pthread_create ( & task_start->io_pthread_id[STDERR_FILENO] , NULL , stderr_io_pipe_thread , task_start ) ) - goto kill_stdout_thread; - - - - goto return_label; - - kill_stdout_thread: - pthread_kill ( task_start->io_pthread_id[STDOUT_FILENO] , SIGKILL ); - kill_stdin_thread: - pthread_kill ( task_start->io_pthread_id[STDIN_FILENO] , SIGKILL ); - return_label: - return SLURM_SUCCESS ; -} - -int wait_on_io_threads ( task_start_t * task_start ) -{ - /* threads have been detatched*/ - pthread_join ( task_start->io_pthread_id[STDERR_FILENO] , NULL ) ; - info ( "%i: errexit" , task_start -> local_task_id ) ; - pthread_join ( task_start->io_pthread_id[STDOUT_FILENO] , NULL ) ; - info ( "%i: outexit" , task_start -> local_task_id ) ; - /*pthread_join ( task_start->io_pthread_id[STDIN_FILENO] , NULL ) ;*/ - pthread_cancel ( task_start->io_pthread_id[STDIN_FILENO] ); - pthread_join ( task_start->io_pthread_id[STDIN_FILENO] , NULL ) ; - info ( "%i: inexit" , task_start -> local_task_id ) ; - /* thread join on stderr or stdout signifies task termination we should kill the stdin thread */ - return SLURM_SUCCESS ; -} - -int iotype_init_pipes ( int * pipes ) -{ - return SLURM_SUCCESS ; -} -- GitLab