From 6f9d1a62e3ffd44e2203863f7986d06fdc436da9 Mon Sep 17 00:00:00 2001 From: Moe Jette <jette1@llnl.gov> Date: Fri, 5 Feb 2010 22:11:11 +0000 Subject: [PATCH] Modify communications between slurmd and slurmstepd so that a v2.2 slurmd will work with an earlier slurmstepd. --- src/common/stepd_api.c | 22 ++++++++++++++++------ src/slurmd/slurmstepd/req.c | 8 +++++++- 2 files changed, 23 insertions(+), 7 deletions(-) diff --git a/src/common/stepd_api.c b/src/common/stepd_api.c index 19db6469592..9bef125a4a2 100644 --- a/src/common/stepd_api.c +++ b/src/common/stepd_api.c @@ -292,18 +292,28 @@ stepd_get_info(int fd) info = xmalloc(sizeof(slurmstepd_info_t)); safe_write(fd, &req, sizeof(int)); - safe_read(fd, &protocol_version, sizeof(uint16_t)); + safe_read(fd, &info->uid, sizeof(uid_t)); safe_read(fd, &info->jobid, sizeof(uint32_t)); safe_read(fd, &info->stepid, sizeof(uint32_t)); - safe_read(fd, &info->nodeid, sizeof(uint32_t)); - safe_read(fd, &info->job_mem_limit, sizeof(uint32_t)); - if (protocol_version >= SLURM_2_2_PROTOCOL_VERSION) + + safe_read(fd, &protocol_version, sizeof(uint16_t)); + if (protocol_version >= SLURM_2_2_PROTOCOL_VERSION) { + safe_read(fd, &info->nodeid, sizeof(uint32_t)); + safe_read(fd, &info->job_mem_limit, sizeof(uint32_t)); safe_read(fd, &info->step_mem_limit, sizeof(uint32_t)); - else + } else { + info->nodeid = protocol_version << 16; + safe_read(fd, &protocol_version, sizeof(uint16_t)); + info->nodeid |= protocol_version; + safe_read(fd, &info->job_mem_limit, sizeof(uint32_t)); info->step_mem_limit = info->job_mem_limit; - + verbose("Old version slurmstepd for step %u.%u", + info->jobid, info->stepid); + verbose("modeid = %u mem limit = %u", info->nodeid, info->job_mem_limit); + } return info; + rwfail: xfree(info); return NULL; diff --git a/src/slurmd/slurmstepd/req.c b/src/slurmd/slurmstepd/req.c index e36e257c31c..6a82076164c 100644 --- a/src/slurmd/slurmstepd/req.c +++ b/src/slurmd/slurmstepd/req.c @@ -556,10 +556,16 @@ _handle_info(int fd, slurmd_job_t *job) { uint16_t protocol_version = SLURM_PROTOCOL_VERSION; - safe_write(fd, &protocol_version, sizeof(uint16_t)); safe_write(fd, &job->uid, sizeof(uid_t)); safe_write(fd, &job->jobid, sizeof(uint32_t)); safe_write(fd, &job->stepid, sizeof(uint32_t)); + + /* protocol_version was added in SLURM version 2.2, + * so it needed to be added later in the data sent + * for backward compatability (so that it doesn't + * get confused for a huge UID, job ID or step ID; + * we should be save in avoiding huge node IDs). */ + safe_write(fd, &protocol_version, sizeof(uint16_t)); safe_write(fd, &job->nodeid, sizeof(uint32_t)); safe_write(fd, &job->job_mem, sizeof(uint32_t)); safe_write(fd, &job->step_mem, sizeof(uint32_t)); -- GitLab