diff --git a/NEWS b/NEWS index ea4fc4a688d1e205d7a919871d766fcfb3dee142..d5c9f17b71675fac5ea039b36fb2fcb87954b758 100644 --- a/NEWS +++ b/NEWS @@ -1,6 +1,10 @@ This file describes changes in recent versions of SLURM. It primarily documents those changes that are of interest to users and admins. +* Changes in SLURM 0.3.1 +======================== + -- Fix for slurmd spinning when stdin buffers full (gnats:434) + * Changes in SLURM 0.3.0 ======================== -- Support for AIX added (a few bugs do remain). diff --git a/src/slurmd/io.c b/src/slurmd/io.c index 9c1e7865c7730e668438abe942444173907cc4c3..f3a42c997a5720115bc068ec5673331c2bacd733 100644 --- a/src/slurmd/io.c +++ b/src/slurmd/io.c @@ -1143,17 +1143,42 @@ _obj_close(io_obj_t *obj, List objs) return SLURM_SUCCESS; } +static int +_min_free (struct io_info *reader, int *lenp) +{ + int nfree = cbuf_free (reader->buf); + if (nfree < *lenp) + *lenp = nfree; + return (0); +} + +static int +_max_readable (struct io_info *io, int max) +{ + if (!io->readers) + return (0); + /* + * Determine the maximum amount of data we will + * safely be able to read (starting at max) + */ + list_for_each (io->readers, (ListForF) _min_free, (void *) &max); + return (max); +} + static bool _readable(io_obj_t *obj) { - bool rc; struct io_info *io = (struct io_info *) obj->arg; xassert(io->magic == IO_MAGIC); - rc = (!io->disconnected && !io->eof && (obj->fd > 0)); + if (io->disconnected || io->eof || (obj->fd < 0)) + return (false); - return rc; + if (_max_readable(io, 1024) == 0) + return (false); + + return (true); } static bool @@ -1211,7 +1236,7 @@ _write(io_obj_t *obj, List objs) return 0; } - while ((n = cbuf_read_to_fd(io->buf, obj->fd, -1)) < 0) { + while ((n = cbuf_read_to_fd(io->buf, obj->fd, 1)) < 0) { switch (errno) { case EAGAIN: return 0; @@ -1412,7 +1437,6 @@ _task_error(io_obj_t *obj, List objs) _obj_close(obj, objs); return -1; } - static int _client_read(io_obj_t *obj, List objs) { @@ -1428,22 +1452,7 @@ _client_read(io_obj_t *obj, List objs) xassert(_validate_io_list(objs)); xassert(_isa_client(client)); - /* - * Determine the maximum amount of data we will - * safely be able to read - */ - if (client->readers) { - i = list_iterator_create(client->readers); - while ((reader = list_next(i))) { - if (cbuf_free(reader->buf) < len) - len = cbuf_free(reader->buf); - } - list_iterator_destroy(i); - - if (len == 0) - return 0; - } - + len = _max_readable (client, len); again: if ((n = read(obj->fd, (void *) buf, len)) < 0) { diff --git a/src/srun/io.c b/src/srun/io.c index 4535e4d8d586d0129e58f3f7246fa1dc1ee655f4..d0e000b47780fcc1318083c07f4a32a18106c642 100644 --- a/src/srun/io.c +++ b/src/srun/io.c @@ -277,6 +277,19 @@ _fd_info_init(fd_info_t *info, int taskid, int *pfd, FILE *fp, cbuf_t buf) info->buf = buf; } +static int +_stdin_buffer_space (job_t *job) +{ + int i, nfree, len = 0; + for (i = 0; i < opt.nprocs; i++) { + if ((nfree = cbuf_free (job->inbuf[i])) == 0) + return (0); + if ((len == 0) || (nfree < len)) + len = nfree; + } + return (len); +} + static nfds_t _setup_pollfds(job_t *job, struct pollfd *fds, fd_info_t *map) { @@ -284,7 +297,7 @@ _setup_pollfds(job_t *job, struct pollfd *fds, fd_info_t *map) int i; nfds_t nfds = job->niofds; /* already have n ioport fds + stdin */ - if ((job->stdinfd >= 0) && stdin_open) { + if ((job->stdinfd >= 0) && stdin_open && _stdin_buffer_space(job)) { _poll_set_rd(fds[nfds], job->stdinfd); nfds++; } @@ -386,9 +399,11 @@ _io_thr_poll(void *job_arg) } } - if ((job->stdinfd >= 0) && stdin_open) { - if (fds[i].revents) - _bcast_stdin(job->stdinfd, job); + if ( (fds[i].fd == job->stdinfd) + && (job->stdinfd >= 0) + && stdin_open + && fds[i].revents ) { + _bcast_stdin(job->stdinfd, job); ++i; }