From 33b11c4d24a893e24c0b3d428449ec2e83d90ce6 Mon Sep 17 00:00:00 2001
From: Moe Jette <jette1@llnl.gov>
Date: Thu, 23 Mar 2006 16:28:17 +0000
Subject: [PATCH] svn merge -r7427:7577
 https://eris.llnl.gov/svn/slurm/branches/slurm-1.0

---
 NEWS                          |  5 ++++
 src/slurmd/common/stepd_api.h | 55 +++++++++++++++++++++++++++--------
 2 files changed, 48 insertions(+), 12 deletions(-)

diff --git a/NEWS b/NEWS
index 7ac64d4f83d..13ec4a67a1a 100644
--- a/NEWS
+++ b/NEWS
@@ -52,6 +52,11 @@ documents those changes that are of interest to users and admins.
  -- Remove some use of cr_enabled flag in slurmctld job record, use 
     new flag "test_only" in select_g_job_test() instead.
 
+* Changes in SLURM 1.0.8
+========================
+ -- Fix to communication between slurmd and slurmstepd to allow for partial
+    reads and writes on their communication pipes.
+
 * Changes in SLURM 1.0.7
 ========================
  -- Change in how AuthType=auth/dummy is handled for security testing.
diff --git a/src/slurmd/common/stepd_api.h b/src/slurmd/common/stepd_api.h
index 92e062cf4be..6be167636d3 100644
--- a/src/slurmd/common/stepd_api.h
+++ b/src/slurmd/common/stepd_api.h
@@ -162,21 +162,52 @@ int stepd_suspend(int fd);
  */
 int stepd_resume(int fd);
 
-#define safe_read(fd, ptr, size) do {					\
-		if (read(fd, ptr, size) != size) {			\
-			debug("%s:%d: %s: read (%d bytes) failed: %m",	\
-			      __FILE__, __LINE__, __CURRENT_FUNC__,	\
-			      (int)size);				\
-			goto rwfail;					\
+#define safe_read(fd, buf, size) do {					\
+		int remaining = size;					\
+		void *ptr = buf;					\
+		int rc;							\
+		while (remaining > 0) {					\
+			rc = read(fd, ptr, remaining);			\
+			if (rc == 0) {					\
+				debug("%s:%d: %s: safe_read (%d of %d) EOF", \
+				      __FILE__, __LINE__, __CURRENT_FUNC__, \
+				      remaining, (int)size);		\
+				goto rwfail;				\
+			} else if (rc < 0) {				\
+				debug("%s:%d: %s: safe_read (%d of %d) failed: %m", \
+				      __FILE__, __LINE__, __CURRENT_FUNC__, \
+				      remaining, (int)size);		\
+				goto rwfail;				\
+			} else {					\
+				ptr += rc;				\
+				remaining -= rc;			\
+				if (remaining > 0)			\
+					debug3("%s:%d: %s: safe_read (%d of %d) partial read", \
+					       __FILE__, __LINE__, __CURRENT_FUNC__, \
+					       remaining, (int)size);	\
+			}						\
 		}							\
 	} while (0)
 
-#define safe_write(fd, ptr, size) do {					\
-		if (write(fd, ptr, size) != size) {			\
-			debug("%s:%d: %s: write (%d bytes) failed: %m",	\
-			      __FILE__, __LINE__, __CURRENT_FUNC__,	\
-			      (int)size);				\
-			goto rwfail;					\
+#define safe_write(fd, buf, size) do {					\
+		int remaining = size;					\
+		void *ptr = buf;					\
+		int rc;							\
+		while(remaining > 0) {					\
+			rc = write(fd, ptr, remaining);			\
+			if (rc < 0) {					\
+				debug("%s:%d: %s: safe_write (%d of %d) failed: %m", \
+				      __FILE__, __LINE__, __CURRENT_FUNC__, \
+				      remaining, (int)size);		\
+				goto rwfail;				\
+			} else {					\
+				ptr += rc;				\
+				remaining -= rc;			\
+				if (remaining > 0)			\
+					debug3("%s:%d: %s: safe_write (%d of %d) partial write", \
+					       __FILE__, __LINE__, __CURRENT_FUNC__, \
+					       remaining, (int)size);	\
+			}						\
 		}							\
 	} while (0)
 
-- 
GitLab