diff --git a/NEWS b/NEWS index 7ac64d4f83d67da1868debe18cf56d96f1b58435..13ec4a67a1a98e426113fc470f9ee991712c78a8 100644 --- a/NEWS +++ b/NEWS @@ -52,6 +52,11 @@ documents those changes that are of interest to users and admins. -- Remove some use of cr_enabled flag in slurmctld job record, use new flag "test_only" in select_g_job_test() instead. +* Changes in SLURM 1.0.8 +======================== + -- Fix to communication between slurmd and slurmstepd to allow for partial + reads and writes on their communication pipes. + * Changes in SLURM 1.0.7 ======================== -- Change in how AuthType=auth/dummy is handled for security testing. diff --git a/src/slurmd/common/stepd_api.h b/src/slurmd/common/stepd_api.h index 92e062cf4bee7cf84dff5a6ae8ff79343d15fee8..6be167636d306bf22715a8b536f0edec93d289ec 100644 --- a/src/slurmd/common/stepd_api.h +++ b/src/slurmd/common/stepd_api.h @@ -162,21 +162,52 @@ int stepd_suspend(int fd); */ int stepd_resume(int fd); -#define safe_read(fd, ptr, size) do { \ - if (read(fd, ptr, size) != size) { \ - debug("%s:%d: %s: read (%d bytes) failed: %m", \ - __FILE__, __LINE__, __CURRENT_FUNC__, \ - (int)size); \ - goto rwfail; \ +#define safe_read(fd, buf, size) do { \ + int remaining = size; \ + void *ptr = buf; \ + int rc; \ + while (remaining > 0) { \ + rc = read(fd, ptr, remaining); \ + if (rc == 0) { \ + debug("%s:%d: %s: safe_read (%d of %d) EOF", \ + __FILE__, __LINE__, __CURRENT_FUNC__, \ + remaining, (int)size); \ + goto rwfail; \ + } else if (rc < 0) { \ + debug("%s:%d: %s: safe_read (%d of %d) failed: %m", \ + __FILE__, __LINE__, __CURRENT_FUNC__, \ + remaining, (int)size); \ + goto rwfail; \ + } else { \ + ptr += rc; \ + remaining -= rc; \ + if (remaining > 0) \ + debug3("%s:%d: %s: safe_read (%d of %d) partial read", \ + __FILE__, __LINE__, __CURRENT_FUNC__, \ + remaining, (int)size); \ + } \ } \ } while (0) -#define safe_write(fd, ptr, size) do { \ - if (write(fd, ptr, size) != size) { \ - debug("%s:%d: %s: write (%d bytes) failed: %m", \ - __FILE__, __LINE__, __CURRENT_FUNC__, \ - (int)size); \ - goto rwfail; \ +#define safe_write(fd, buf, size) do { \ + int remaining = size; \ + void *ptr = buf; \ + int rc; \ + while(remaining > 0) { \ + rc = write(fd, ptr, remaining); \ + if (rc < 0) { \ + debug("%s:%d: %s: safe_write (%d of %d) failed: %m", \ + __FILE__, __LINE__, __CURRENT_FUNC__, \ + remaining, (int)size); \ + goto rwfail; \ + } else { \ + ptr += rc; \ + remaining -= rc; \ + if (remaining > 0) \ + debug3("%s:%d: %s: safe_write (%d of %d) partial write", \ + __FILE__, __LINE__, __CURRENT_FUNC__, \ + remaining, (int)size); \ + } \ } \ } while (0)