diff --git a/src/common/stepd_api.c b/src/common/stepd_api.c index afc368f72daa868edc3913408d1fb25cb8c1a57b..23eaa9af8ce2a251af180bec0ea48ba2bdebea7b 100644 --- a/src/common/stepd_api.c +++ b/src/common/stepd_api.c @@ -861,36 +861,66 @@ stepd_completion(int fd, uint16_t protocol_version, step_complete_msg_t *sent) int errnum = 0; Buf buffer; int len = 0; - int version = SLURM_PROTOCOL_VERSION; buffer = init_buf(0); debug("Entering stepd_completion, range_first = %d, range_last = %d", sent->range_first, sent->range_last); - safe_write(fd, &req, sizeof(int)); - safe_write(fd, &version, sizeof(int)); - safe_write(fd, &sent->range_first, sizeof(int)); - safe_write(fd, &sent->range_last, sizeof(int)); - safe_write(fd, &sent->step_rc, sizeof(int)); - /* - * We must not use setinfo over a pipe with slurmstepd here - * Indeed, slurmd does a large use of getinfo over a pipe - * with slurmstepd and doing the reverse can result in a deadlock - * scenario with slurmstepd : - * slurmd(lockforread,write)/slurmstepd(write,lockforread) - * Do pack/unpack instead to be sure of independances of - * slurmd and slurmstepd - */ - jobacctinfo_pack(sent->jobacct, SLURM_PROTOCOL_VERSION, - PROTOCOL_TYPE_SLURM, buffer); - len = get_buf_offset(buffer); - safe_write(fd, &len, sizeof(int)); - safe_write(fd, get_buf_data(buffer), len); - free_buf(buffer); - /* Receive the return code and errno */ - safe_read(fd, &rc, sizeof(int)); - safe_read(fd, &errnum, sizeof(int)); + if (protocol_version >= SLURM_14_11_PROTOCOL_VERSION) { + safe_write(fd, &req, sizeof(int)); + safe_write(fd, &sent->range_first, sizeof(int)); + safe_write(fd, &sent->range_last, sizeof(int)); + safe_write(fd, &sent->step_rc, sizeof(int)); + + /* + * We must not use setinfo over a pipe with slurmstepd here + * Indeed, slurmd does a large use of getinfo over a pipe + * with slurmstepd and doing the reverse can result in + * a deadlock scenario with slurmstepd : + * slurmd(lockforread,write)/slurmstepd(write,lockforread) + * Do pack/unpack instead to be sure of independances of + * slurmd and slurmstepd + */ + jobacctinfo_pack(sent->jobacct, protocol_version, + PROTOCOL_TYPE_SLURM, buffer); + len = get_buf_offset(buffer); + safe_write(fd, &len, sizeof(int)); + safe_write(fd, get_buf_data(buffer), len); + free_buf(buffer); + + /* Receive the return code and errno */ + safe_read(fd, &rc, sizeof(int)); + safe_read(fd, &errnum, sizeof(int)); + } else { + int version = SLURM_PROTOCOL_VERSION; + + safe_write(fd, &req, sizeof(int)); + safe_write(fd, &version, sizeof(int)); + safe_write(fd, &sent->range_first, sizeof(int)); + safe_write(fd, &sent->range_last, sizeof(int)); + safe_write(fd, &sent->step_rc, sizeof(int)); + + /* + * We must not use setinfo over a pipe with slurmstepd here + * Indeed, slurmd does a large use of getinfo over a pipe + * with slurmstepd and doing the reverse can result in + * a deadlock scenario with slurmstepd : + * slurmd(lockforread,write)/slurmstepd(write,lockforread) + * Do pack/unpack instead to be sure of independances of + * slurmd and slurmstepd + */ + jobacctinfo_pack(sent->jobacct, protocol_version, + PROTOCOL_TYPE_SLURM, buffer); + len = get_buf_offset(buffer); + safe_write(fd, &len, sizeof(int)); + safe_write(fd, get_buf_data(buffer), len); + free_buf(buffer); + + /* Receive the return code and errno */ + safe_read(fd, &rc, sizeof(int)); + safe_read(fd, &errnum, sizeof(int)); + } errno = errnum; return rc; @@ -1019,3 +1049,82 @@ rwfail: *pids_array = NULL; return SLURM_ERROR; } + +/* + * Get the memory limits of the step + * Returns uid of the running step if successful. On error returns -1. + */ +extern int stepd_get_mem_limits(int fd, uint16_t protocol_version, + slurmstepd_mem_info_t *stepd_mem_info) +{ + int req = REQUEST_STEP_MEM_LIMITS; + + xassert(stepd_mem_info); + memset(stepd_mem_info, 0, sizeof(slurmstepd_mem_info_t)); + + if (protocol_version >= SLURM_14_11_PROTOCOL_VERSION) { + safe_write(fd, &req, sizeof(int)); + + safe_read(fd, &stepd_mem_info->job_mem_limit, sizeof(uint32_t)); + safe_read(fd, &stepd_mem_info->step_mem_limit, + sizeof(uint32_t)); + } else { + slurmstepd_info_t *step_info = stepd_get_info(fd); + stepd_mem_info->job_mem_limit = step_info->job_mem_limit; + stepd_mem_info->step_mem_limit = step_info->step_mem_limit; + xfree(step_info); + } + + return SLURM_SUCCESS; +rwfail: + return SLURM_ERROR; +} + +/* + * Get the uid of the step + * Returns uid of the running step if successful. On error returns -1. + */ +extern uid_t stepd_get_uid(int fd, uint16_t protocol_version) +{ + int req = REQUEST_STEP_UID; + uid_t uid = -1; + + if (protocol_version >= SLURM_14_11_PROTOCOL_VERSION) { + safe_write(fd, &req, sizeof(int)); + + safe_read(fd, &uid, sizeof(uid_t)); + } else { + slurmstepd_info_t *step_info = stepd_get_info(fd); + uid = step_info->uid; + xfree(step_info); + } + + return uid; +rwfail: + return -1; +} + +/* + * Get the nodeid of the stepd + * Returns nodeid of the running stepd if successful. On error returns NO_VAL. + */ +extern uint32_t stepd_get_nodeid(int fd, uint16_t protocol_version) +{ + int req = REQUEST_STEP_NODEID; + uint32_t nodeid = NO_VAL; + + if (protocol_version >= SLURM_14_11_PROTOCOL_VERSION) { + safe_write(fd, &req, sizeof(int)); + + safe_read(fd, &nodeid, sizeof(uid_t)); + } else { + slurmstepd_info_t *step_info = stepd_get_info(fd); + nodeid = step_info->nodeid; + xfree(step_info); + } + + return nodeid; +rwfail: + return NO_VAL; +} + diff --git a/src/common/stepd_api.h b/src/common/stepd_api.h index 42fa4d6f71d576489ffeebb614b6a4f70c3668ff..fde67cf7643107f16741e8bae1aba1dcefcc05ec 100644 --- a/src/common/stepd_api.h +++ b/src/common/stepd_api.h @@ -55,7 +55,7 @@ typedef enum { REQUEST_SIGNAL_TASK_GLOBAL, REQUEST_SIGNAL_CONTAINER, REQUEST_STATE, - REQUEST_INFO, + REQUEST_INFO, /* Defunct, See REQUEST_STEP_MEM_LIMITS|UID|NODEID */ REQUEST_ATTACH, REQUEST_PID_IN_CONTAINER, REQUEST_DAEMON_PID, @@ -68,6 +68,9 @@ typedef enum { REQUEST_STEP_RECONFIGURE, REQUEST_STEP_STAT, REQUEST_STEP_COMPLETION_V2, + REQUEST_STEP_MEM_LIMITS, + REQUEST_STEP_UID, + REQUEST_STEP_NODEID } step_msg_t; typedef enum { @@ -87,6 +90,12 @@ typedef struct { uint32_t step_mem_limit; /* step's memory limit, MB */ } slurmstepd_info_t; +typedef struct { + uint32_t job_mem_limit; /* job's memory limit, MB */ + uint32_t nodeid; + uint32_t step_mem_limit; /* step's memory limit, MB */ +} slurmstepd_mem_info_t; + typedef struct { int id; /* local task id */ uint32_t gtid; /* global task id */ @@ -258,5 +267,23 @@ int stepd_task_info(int fd, uint16_t protocol_version, int stepd_list_pids(int fd, uint16_t protocol_version, uint32_t **pids_array, uint32_t *pids_count); +/* + * Get the memory limits of the step + * Returns uid of the running step if successful. On error returns -1. + */ +extern int stepd_get_mem_limits(int fd, uint16_t protocol_version, + slurmstepd_mem_info_t *stepd_mem_info); + +/* + * Get the uid of the step + * Returns uid of the running step if successful. On error returns -1. + */ +extern uid_t stepd_get_uid(int fd, uint16_t protocol_version); + +/* + * Get the nodeid of the stepd + * Returns nodeid of the running stepd if successful. On error returns NO_VAL. + */ +extern uint32_t stepd_get_nodeid(int fd, uint16_t protocol_version); #endif /* _STEPD_API_H */ diff --git a/src/slurmd/slurmd/req.c b/src/slurmd/slurmd/req.c index 63e83df17172898e86fe9fc12eacb86bfefd7068..10999192669a124911bcd0ad51dafc5e2e3d3de1 100644 --- a/src/slurmd/slurmd/req.c +++ b/src/slurmd/slurmd/req.c @@ -202,8 +202,7 @@ static bool _steps_completed_now(uint32_t jobid); static int _valid_sbcast_cred(file_bcast_msg_t *req, uid_t req_uid, uint16_t block_no, uint32_t *job_id); static void _wait_state_completed(uint32_t jobid, int max_delay); -static slurmstepd_info_t *_get_job_step_info(uint32_t jobid); -static long _get_job_uid(uint32_t jobid); +static uid_t _get_job_uid(uint32_t jobid); static gids_t *_gids_cache_lookup(char *user, gid_t gid); @@ -1737,7 +1736,7 @@ _rpc_job_notify(slurm_msg_t *msg) { job_notify_msg_t *req = msg->data; uid_t req_uid = g_slurm_auth_get_uid(msg->auth_cred, NULL); - long job_uid; + uid_t job_uid; List steps; ListIterator i; step_loc_t *stepd = NULL; @@ -1936,7 +1935,7 @@ _load_job_limits(void) step_loc_t *stepd; int fd; job_mem_limits_t *job_limits_ptr; - slurmstepd_info_t *stepd_info; + slurmstepd_mem_info_t stepd_mem_info; if (!job_limits_list) job_limits_list = list_create(_job_limits_free); @@ -1955,16 +1954,21 @@ _load_job_limits(void) if (fd == -1) continue; /* step completed */ - stepd_info = stepd_get_info(fd); + if (!stepd_get_mem_limits(fd, stepd->protocol_version, + &stepd_mem_info)) + continue; + - if (stepd_info && - (stepd_info->job_mem_limit || stepd_info->step_mem_limit)) { + if ((stepd_mem_info.job_mem_limit + || stepd_mem_info.step_mem_limit)) { /* create entry for this job */ job_limits_ptr = xmalloc(sizeof(job_mem_limits_t)); job_limits_ptr->job_id = stepd->jobid; job_limits_ptr->step_id = stepd->stepid; - job_limits_ptr->job_mem = stepd_info->job_mem_limit; - job_limits_ptr->step_mem = stepd_info->step_mem_limit; + job_limits_ptr->job_mem = + stepd_mem_info.job_mem_limit; + job_limits_ptr->step_mem = + stepd_mem_info.step_mem_limit; #if _LIMIT_INFO info("RecLim step:%u.%u job_mem:%u step_mem:%u", job_limits_ptr->job_id, job_limits_ptr->step_id, @@ -1973,7 +1977,6 @@ _load_job_limits(void) #endif list_append(job_limits_list, job_limits_ptr); } - xfree(stepd_info); close(fd); } list_iterator_destroy(step_iter); @@ -2356,7 +2359,7 @@ _signal_jobstep(uint32_t jobid, uint32_t stepid, uid_t req_uid, uint32_t signal) { int fd, rc = SLURM_SUCCESS; - slurmstepd_info_t *step; + uid_t uid; uint16_t protocol_version; fd = stepd_connect(conf->spooldir, conf->node_name, jobid, stepid, @@ -2366,16 +2369,18 @@ _signal_jobstep(uint32_t jobid, uint32_t stepid, uid_t req_uid, jobid, stepid); return ESLURM_INVALID_JOB_ID; } - if ((step = stepd_get_info(fd)) == NULL) { - debug("signal for nonexistent job %u.%u requested", + + if ((uid = stepd_get_uid(fd, protocol_version)) < 0) { + debug("_signal_jobstep: couldn't read from the " + "step %u.%u: %m", jobid, stepid); - close(fd); - return ESLURM_INVALID_JOB_ID; + rc = ESLURM_INVALID_JOB_ID; + goto done2; } - if ((req_uid != step->uid) && (!_slurm_authorized_user(req_uid))) { + if ((req_uid != uid) && (!_slurm_authorized_user(req_uid))) { debug("kill req from uid %ld for job %u.%u owned by uid %ld", - (long) req_uid, jobid, stepid, (long) step->uid); + (long) req_uid, jobid, stepid, (long) uid); rc = ESLURM_USER_ID_MISSING; /* or bad in this case */ goto done2; } @@ -2400,7 +2405,6 @@ _signal_jobstep(uint32_t jobid, uint32_t stepid, uid_t req_uid, rc = ESLURMD_JOB_NOTRUNNING; done2: - xfree(step); close(fd); return rc; } @@ -2434,7 +2438,7 @@ _rpc_checkpoint_tasks(slurm_msg_t *msg) uid_t req_uid = g_slurm_auth_get_uid(msg->auth_cred, NULL); checkpoint_tasks_msg_t *req = (checkpoint_tasks_msg_t *) msg->data; uint16_t protocol_version; - slurmstepd_info_t *stepd_info = NULL; + uid_t uid; fd = stepd_connect(conf->spooldir, conf->node_name, req->job_id, req->job_step_id, &protocol_version); @@ -2445,17 +2449,18 @@ _rpc_checkpoint_tasks(slurm_msg_t *msg) goto done; } - if (!(stepd_info = stepd_get_info(fd))) { - debug("checkpoint couldn't read from the step %u.%u: %m", + if ((uid = stepd_get_uid(fd, protocol_version)) < 0) { + debug("_rpc_checkpoint_tasks: couldn't read from the " + "step %u.%u: %m", req->job_id, req->job_step_id); rc = ESLURM_INVALID_JOB_ID; goto done2; } - if ((req_uid != stepd_info->uid) && (!_slurm_authorized_user(req_uid))) { + if ((req_uid != uid) && (!_slurm_authorized_user(req_uid))) { debug("checkpoint req from uid %ld for job %u.%u owned by " "uid %ld", (long) req_uid, req->job_id, req->job_step_id, - (long) stepd_info->uid); + (long) uid); rc = ESLURM_USER_ID_MISSING; /* or bad in this case */ goto done2; } @@ -2466,7 +2471,6 @@ _rpc_checkpoint_tasks(slurm_msg_t *msg) rc = ESLURMD_JOB_NOTRUNNING; done2: - xfree(stepd_info); close(fd); done: slurm_send_rc_msg(msg, rc); @@ -2478,8 +2482,7 @@ _rpc_terminate_tasks(slurm_msg_t *msg) kill_tasks_msg_t *req = (kill_tasks_msg_t *) msg->data; int rc = SLURM_SUCCESS; int fd; - uid_t req_uid; - slurmstepd_info_t *stepd_info; + uid_t req_uid, uid; uint16_t protocol_version; debug3("Entering _rpc_terminate_tasks"); @@ -2492,19 +2495,19 @@ _rpc_terminate_tasks(slurm_msg_t *msg) goto done; } - if (!(stepd_info = stepd_get_info(fd))) { - debug("checkpoint couldn't read from the step %u.%u: %m", + if ((uid = stepd_get_uid(fd, protocol_version)) < 0) { + debug("terminate_tasks couldn't read from the step %u.%u: %m", req->job_id, req->job_step_id); rc = ESLURM_INVALID_JOB_ID; goto done2; } req_uid = g_slurm_auth_get_uid(msg->auth_cred, NULL); - if ((req_uid != stepd_info->uid) + if ((req_uid != uid) && (!_slurm_authorized_user(req_uid))) { debug("kill req from uid %ld for job %u.%u owned by uid %ld", (long) req_uid, req->job_id, req->job_step_id, - (long) stepd_info->uid); + (long) uid); rc = ESLURM_USER_ID_MISSING; /* or bad in this case */ goto done2; } @@ -2514,7 +2517,6 @@ _rpc_terminate_tasks(slurm_msg_t *msg) rc = ESLURMD_JOB_NOTRUNNING; done2: - xfree(stepd_info); close(fd); done: slurm_send_rc_msg(msg, rc); @@ -2648,9 +2650,7 @@ _rpc_stat_jobacct(slurm_msg_t *msg) slurm_msg_t resp_msg; job_step_stat_t *resp = NULL; int fd; - uid_t req_uid; - long job_uid; - slurmstepd_info_t *stepd_info = NULL; + uid_t req_uid, uid; uint16_t protocol_version; debug3("Entering _rpc_stat_jobacct"); @@ -2667,35 +2667,22 @@ _rpc_stat_jobacct(slurm_msg_t *msg) return ESLURM_INVALID_JOB_ID; } - stepd_info = stepd_get_info(fd); - if (!stepd_info) { - error("stat_jobacct For invalid job_id: %u", - req->job_id); - close(fd); - if (msg->conn_fd >= 0) - slurm_send_rc_msg(msg, ESLURM_INVALID_JOB_ID); - return ESLURM_INVALID_JOB_ID; - } - - job_uid = stepd_info->uid; - xfree(stepd_info); - - if (job_uid < 0) { - error("stat_jobacct for invalid job_id: %u", - req->job_id); + if ((uid = stepd_get_uid(fd, protocol_version)) < 0) { + debug("stat_jobacct couldn't read from the step %u.%u: %m", + req->job_id, req->step_id); close(fd); if (msg->conn_fd >= 0) slurm_send_rc_msg(msg, ESLURM_INVALID_JOB_ID); - return ESLURM_INVALID_JOB_ID; + return ESLURM_INVALID_JOB_ID; } /* * check that requesting user ID is the SLURM UID or root */ - if ((req_uid != job_uid) && (!_slurm_authorized_user(req_uid))) { + if ((req_uid != uid) && (!_slurm_authorized_user(req_uid))) { error("stat_jobacct from uid %ld for job %u " "owned by uid %ld", - (long) req_uid, req->job_id, job_uid); + (long) req_uid, req->job_id, (long) uid); if (msg->conn_fd >= 0) { slurm_send_rc_msg(msg, ESLURM_USER_ID_MISSING); @@ -2743,7 +2730,7 @@ _rpc_list_pids(slurm_msg_t *msg) job_step_pids_t *resp = NULL; int fd; uid_t req_uid; - long job_uid; + uid_t job_uid; uint16_t protocol_version = 0; debug3("Entering _rpc_list_pids"); @@ -2768,7 +2755,7 @@ _rpc_list_pids(slurm_msg_t *msg) && (!_slurm_authorized_user(req_uid))) { error("stat_pid from uid %ld for job %u " "owned by uid %ld", - (long) req_uid, req->job_id, job_uid); + (long) req_uid, req->job_id, (long) job_uid); if (msg->conn_fd >= 0) { slurm_send_rc_msg(msg, ESLURM_USER_ID_MISSING); @@ -3161,9 +3148,9 @@ _rpc_reattach_tasks(slurm_msg_t *msg) uint32_t len; int fd; uid_t req_uid; - slurmstepd_info_t *step = NULL; slurm_addr_t *cli = &msg->orig_addr; uint32_t nodeid = (uint32_t)NO_VAL; + uid_t uid = -1; uint16_t protocol_version; slurm_msg_t_copy(&resp_msg, msg); @@ -3175,22 +3162,26 @@ _rpc_reattach_tasks(slurm_msg_t *msg) rc = ESLURM_INVALID_JOB_ID; goto done; } - if ((step = stepd_get_info(fd)) == NULL) { - debug("reattach for nonexistent job %u.%u requested", + + if ((uid = stepd_get_uid(fd, protocol_version)) < 0) { + debug("_rpc_reattach_tasks couldn't read from the " + "step %u.%u: %m", req->job_id, req->job_step_id); rc = ESLURM_INVALID_JOB_ID; goto done2; } - nodeid = step->nodeid; + + nodeid = stepd_get_nodeid(fd, protocol_version); + debug2("_rpc_reattach_tasks: nodeid %d in the job step", nodeid); req_uid = g_slurm_auth_get_uid(msg->auth_cred, NULL); - if ((req_uid != step->uid) && (!_slurm_authorized_user(req_uid))) { + if ((req_uid != uid) && (!_slurm_authorized_user(req_uid))) { error("uid %ld attempt to attach to job %u.%u owned by %ld", (long) req_uid, req->job_id, req->job_step_id, - (long) step->uid); + (long) uid); rc = EPERM; - goto done3; + goto done2; } memset(resp, 0, sizeof(reattach_tasks_response_msg_t)); @@ -3222,7 +3213,7 @@ _rpc_reattach_tasks(slurm_msg_t *msg) slurm_cred_get_signature(req->cred, (char **)(&job_cred_sig), &len); if (len != SLURM_IO_KEY_SIZE) { error("Incorrect slurm cred signature length"); - goto done3; + goto done2; } resp->gtids = NULL; @@ -3232,10 +3223,9 @@ _rpc_reattach_tasks(slurm_msg_t *msg) &resp_msg.address, job_cred_sig, resp); if (rc != SLURM_SUCCESS) { debug2("stepd_attach call failed"); - goto done3; + goto done2; } -done3: - xfree(step); + done2: close(fd); done: @@ -3250,12 +3240,12 @@ done: slurm_free_reattach_tasks_response_msg(resp); } -static slurmstepd_info_t *_get_job_step_info(uint32_t jobid) +static uid_t _get_job_uid(uint32_t jobid) { List steps; ListIterator i; step_loc_t *stepd; - slurmstepd_info_t *stepd_info = NULL; + uid_t uid = -1; int fd; steps = stepd_available(conf->spooldir, conf->node_name); @@ -3273,11 +3263,11 @@ static slurmstepd_info_t *_get_job_step_info(uint32_t jobid) stepd->jobid, stepd->stepid); continue; } + uid = stepd_get_uid(fd, stepd->protocol_version); - stepd_info = stepd_get_info(fd); close(fd); - if (!stepd_info) { - debug("stepd_get_info failed %u.%u: %m", + if (uid < 0) { + debug("stepd_get_uid failed %u.%u: %m", stepd->jobid, stepd->stepid); continue; } @@ -3286,20 +3276,6 @@ static slurmstepd_info_t *_get_job_step_info(uint32_t jobid) list_iterator_destroy(i); list_destroy(steps); - return stepd_info; -} - -static long -_get_job_uid(uint32_t jobid) -{ - slurmstepd_info_t *stepd_info = NULL; - long uid = -1; - - if ((stepd_info = _get_job_step_info(jobid))) { - uid = (long)stepd_info->uid; - xfree(stepd_info); - } - return uid; } @@ -3546,7 +3522,7 @@ _rpc_signal_job(slurm_msg_t *msg) { signal_job_msg_t *req = msg->data; uid_t req_uid = g_slurm_auth_get_uid(msg->auth_cred, NULL); - long job_uid; + uid_t job_uid; List steps; ListIterator i; step_loc_t *stepd = NULL; diff --git a/src/slurmd/slurmstepd/req.c b/src/slurmd/slurmstepd/req.c index 48f2b4906d12820dd656099661de84a22a5a7b21..15cadf2955b74fc47a219cbfd4f7d345c95d8d49 100644 --- a/src/slurmd/slurmstepd/req.c +++ b/src/slurmd/slurmstepd/req.c @@ -79,6 +79,9 @@ static void *_handle_accept(void *arg); static int _handle_request(int fd, stepd_step_rec_t *job, uid_t uid, gid_t gid); static int _handle_state(int fd, stepd_step_rec_t *job); static int _handle_info(int fd, stepd_step_rec_t *job); +static int _handle_mem_limits(int fd, stepd_step_rec_t *job); +static int _handle_uid(int fd, stepd_step_rec_t *job); +static int _handle_nodeid(int fd, stepd_step_rec_t *job); static int _handle_signal_task_local(int fd, stepd_step_rec_t *job, uid_t uid); static int _handle_signal_container(int fd, stepd_step_rec_t *job, uid_t uid); static int _handle_checkpoint_tasks(int fd, stepd_step_rec_t *job, uid_t uid); @@ -494,6 +497,18 @@ _handle_request(int fd, stepd_step_rec_t *job, uid_t uid, gid_t gid) debug("Handling REQUEST_INFO"); rc = _handle_info(fd, job); break; + case REQUEST_STEP_MEM_LIMITS: + debug("Handling REQUEST_STEP_MEM_LIMITS"); + rc = _handle_mem_limits(fd, job); + break; + case REQUEST_STEP_UID: + debug("Handling REQUEST_STEP_UID"); + rc = _handle_uid(fd, job); + break; + case REQUEST_STEP_NODEID: + debug("Handling REQUEST_STEP_NODEID"); + rc = _handle_nodeid(fd, job); + break; case REQUEST_ATTACH: debug("Handling REQUEST_ATTACH"); rc = _handle_attach(fd, job, uid); @@ -587,6 +602,37 @@ rwfail: return SLURM_FAILURE; } +static int +_handle_mem_limits(int fd, stepd_step_rec_t *job) +{ + safe_write(fd, &job->job_mem, sizeof(uint32_t)); + safe_write(fd, &job->step_mem, sizeof(uint32_t)); + + return SLURM_SUCCESS; +rwfail: + return SLURM_FAILURE; +} + +static int +_handle_uid(int fd, stepd_step_rec_t *job) +{ + safe_write(fd, &job->uid, sizeof(uid_t)); + + return SLURM_SUCCESS; +rwfail: + return SLURM_FAILURE; +} + +static int +_handle_nodeid(int fd, stepd_step_rec_t *job) +{ + safe_write(fd, &job->nodeid, sizeof(uid_t)); + + return SLURM_SUCCESS; +rwfail: + return SLURM_FAILURE; +} + static int _handle_signal_task_local(int fd, stepd_step_rec_t *job, uid_t uid) { @@ -1293,7 +1339,6 @@ _handle_completion(int fd, stepd_step_rec_t *job, uid_t uid) char* buf; int len; Buf buffer; - int version; /* For future use */ bool lock_set = false; debug("_handle_completion for job %u.%u", @@ -1311,7 +1356,6 @@ _handle_completion(int fd, stepd_step_rec_t *job, uid_t uid) return SLURM_SUCCESS; } - safe_read(fd, &version, sizeof(int)); safe_read(fd, &first, sizeof(int)); safe_read(fd, &last, sizeof(int)); safe_read(fd, &step_rc, sizeof(int));