From 638c6f45821c953f68be0d8f681ff925b6f4a824 Mon Sep 17 00:00:00 2001
From: Mark Grondona <mgrondona@llnl.gov>
Date: Thu, 13 May 2004 19:41:21 +0000
Subject: [PATCH]  o Check for available space to write stdin data before
 polling     corresponding file descriptor for readability. Otherwise, srun   
  and slurmd will spin when stdin file descriptors are full because    
 pending data will always go unread. (gnats:434)

---
 NEWS            |  4 ++++
 src/slurmd/io.c | 51 +++++++++++++++++++++++++++++--------------------
 src/srun/io.c   | 23 ++++++++++++++++++----
 3 files changed, 53 insertions(+), 25 deletions(-)

diff --git a/NEWS b/NEWS
index ea4fc4a688d..d5c9f17b716 100644
--- a/NEWS
+++ b/NEWS
@@ -1,6 +1,10 @@
 This file describes changes in recent versions of SLURM. It primarily
 documents those changes that are of interest to users and admins. 
 
+* Changes in SLURM 0.3.1
+========================
+ -- Fix for slurmd spinning when stdin buffers full (gnats:434)
+
 * Changes in SLURM 0.3.0
 ========================
  -- Support for AIX added (a few bugs do remain).
diff --git a/src/slurmd/io.c b/src/slurmd/io.c
index 9c1e7865c77..f3a42c997a5 100644
--- a/src/slurmd/io.c
+++ b/src/slurmd/io.c
@@ -1143,17 +1143,42 @@ _obj_close(io_obj_t *obj, List objs)
 	return SLURM_SUCCESS;
 }
 
+static int 
+_min_free (struct io_info *reader, int *lenp)
+{
+	int nfree = cbuf_free (reader->buf);
+	if (nfree < *lenp)
+		*lenp = nfree;
+	return (0);
+}
+
+static int 
+_max_readable (struct io_info *io, int max)
+{
+	if (!io->readers)
+		return (0);
+	/*
+	 * Determine the maximum amount of data we will
+	 * safely be able to read (starting at max)
+	 */
+	list_for_each (io->readers, (ListForF) _min_free, (void *) &max);
+	return (max);
+}
+
 static bool 
 _readable(io_obj_t *obj)
 {
-	bool rc;
 	struct io_info *io = (struct io_info *) obj->arg;
 
 	xassert(io->magic == IO_MAGIC);
 
-	rc = (!io->disconnected && !io->eof && (obj->fd > 0));
+	if (io->disconnected || io->eof || (obj->fd < 0))
+		return (false);
 
-	return rc;
+	if (_max_readable(io, 1024) == 0)
+		return (false);
+
+	return (true);
 }
 
 static bool 
@@ -1211,7 +1236,7 @@ _write(io_obj_t *obj, List objs)
 		return 0;
 	}
 
-	while ((n = cbuf_read_to_fd(io->buf, obj->fd, -1)) < 0) {
+	while ((n = cbuf_read_to_fd(io->buf, obj->fd, 1)) < 0) {
 		switch (errno) {
 		case EAGAIN:
 			return 0; 
@@ -1412,7 +1437,6 @@ _task_error(io_obj_t *obj, List objs)
 	_obj_close(obj, objs);
 	return -1;
 }
-
 static int 
 _client_read(io_obj_t *obj, List objs)
 {
@@ -1428,22 +1452,7 @@ _client_read(io_obj_t *obj, List objs)
 	xassert(_validate_io_list(objs));
 	xassert(_isa_client(client));
 
-	/*
-	 * Determine the maximum amount of data we will
-	 * safely be able to read
-	 */
-	if (client->readers) {
-		i = list_iterator_create(client->readers);
-		while ((reader = list_next(i))) {
-			if (cbuf_free(reader->buf) < len)
-				len = cbuf_free(reader->buf);
-		}
-		list_iterator_destroy(i);
-
-		if (len == 0)
-			return 0;
-	}
-	
+	len = _max_readable (client, len);
 
    again:
 	if ((n = read(obj->fd, (void *) buf, len)) < 0) {
diff --git a/src/srun/io.c b/src/srun/io.c
index 4535e4d8d58..d0e000b4778 100644
--- a/src/srun/io.c
+++ b/src/srun/io.c
@@ -277,6 +277,19 @@ _fd_info_init(fd_info_t *info, int taskid, int *pfd, FILE *fp, cbuf_t buf)
 	info->buf    = buf;
 }
 
+static int
+_stdin_buffer_space (job_t *job)
+{
+	int i, nfree, len = 0;
+	for (i = 0; i < opt.nprocs; i++) {
+		if ((nfree = cbuf_free (job->inbuf[i])) == 0)
+			return (0);
+		if ((len == 0) || (nfree < len))
+			len = nfree;
+	}
+	return (len);
+}
+
 static nfds_t
 _setup_pollfds(job_t *job, struct pollfd *fds, fd_info_t *map)
 {
@@ -284,7 +297,7 @@ _setup_pollfds(job_t *job, struct pollfd *fds, fd_info_t *map)
 	int i;
 	nfds_t nfds = job->niofds; /* already have n ioport fds + stdin */
 
-	if ((job->stdinfd >= 0) && stdin_open) {
+	if ((job->stdinfd >= 0) && stdin_open && _stdin_buffer_space(job)) {
 		_poll_set_rd(fds[nfds], job->stdinfd);
 		nfds++;
 	}
@@ -386,9 +399,11 @@ _io_thr_poll(void *job_arg)
 			}
 		}
 
-		if ((job->stdinfd >= 0) && stdin_open) {
-		       	if (fds[i].revents)
-				_bcast_stdin(job->stdinfd, job);
+		if (  (fds[i].fd == job->stdinfd) 
+                   && (job->stdinfd >= 0) 
+                   && stdin_open 
+                   && fds[i].revents ) {
+			_bcast_stdin(job->stdinfd, job);
 			++i;
 		}
 
-- 
GitLab