Skip to content
Snippets Groups Projects
Commit f641d70c authored by Christopher J. Morrone's avatar Christopher J. Morrone
Browse files

Fix for handling reissued job credentials due to a job requeue.

parent 4caeb0db
No related branches found
No related tags found
No related merge requests found
......@@ -489,6 +489,7 @@ slurm_cred_verify(slurm_cred_ctx_t ctx, slurm_cred_t cred,
slurm_cred_arg_t *arg)
{
time_t now = time(NULL);
int errnum;
xassert(ctx != NULL);
xassert(cred != NULL);
......@@ -511,6 +512,8 @@ slurm_cred_verify(slurm_cred_ctx_t ctx, slurm_cred_t cred,
goto error;
}
slurm_cred_handle_reissue(ctx, cred);
if (_credential_revoked(ctx, cred)) {
slurm_seterrno(ESLURMD_CREDENTIAL_REVOKED);
goto error;
......@@ -542,8 +545,10 @@ slurm_cred_verify(slurm_cred_ctx_t ctx, slurm_cred_t cred,
return SLURM_SUCCESS;
error:
errnum = slurm_get_errno();
slurm_mutex_unlock(&ctx->mutex);
slurm_mutex_unlock(&cred->mutex);
slurm_seterrno(errnum);
return SLURM_ERROR;
}
......@@ -1169,10 +1174,29 @@ static char * timestr (const time_t *tp, char *buf, size_t n)
return (buf);
}
extern void
slurm_cred_handle_reissue(slurm_cred_ctx_t ctx, slurm_cred_t cred)
{
job_state_t *j = _find_job_state(ctx, cred->jobid);
if (j != NULL && j->revoked && cred->ctime > j->revoked) {
/* The credential has been reissued. Purge the
old record so that "cred" will look like a new
credential to any ensuing commands. */
info("reissued job credential for job %u", j->jobid);
/* Setting j->expiration to zero will make
_clear_expired_job_states() remove this job credential
from the cred context. */
j->expiration = 0;
_clear_expired_job_states(ctx);
}
}
extern bool
slurm_cred_revoked(slurm_cred_ctx_t ctx, slurm_cred_t cred)
{
job_state_t *j = _find_job_state(ctx, cred->jobid);
job_state_t *j = _find_job_state(ctx, cred->jobid);
if ((j == NULL) || (j->revoked == (time_t)0))
return false;
......@@ -1180,12 +1204,6 @@ slurm_cred_revoked(slurm_cred_ctx_t ctx, slurm_cred_t cred)
if (cred->ctime <= j->revoked)
return true;
/* if we are re-running the job, the new job credential is newer
* than the revoke time (see "scontrol requeue"), purge the old
* job record so this looks like a new job */
info("re-creating job credential records for job %u", j->jobid);
j->expiration = 0;
_clear_expired_job_states(ctx);
return false;
}
......@@ -1196,9 +1214,12 @@ _credential_revoked(slurm_cred_ctx_t ctx, slurm_cred_t cred)
_clear_expired_job_states(ctx);
if (!(j = _find_job_state(ctx, cred->jobid)))
if (!(j = _find_job_state(ctx, cred->jobid))) {
(void) _insert_job_state(ctx, cred->jobid);
else if (j->revoked) {
return false;
}
if (cred->ctime <= j->revoked) {
char buf[64];
debug ("cred for %d revoked. expires at %s",
j->jobid, timestr (&j->expiration, buf, 64));
......
......@@ -163,7 +163,13 @@ slurm_cred_t slurm_cred_faker(slurm_cred_arg_t *arg);
/*
* Verify the signed credential `cred,' and return cred contents in
* the cred_arg structure. The credential is cached and cannot be reused.
*
*
* Will perform at least the following checks:
* - Credential signature is valid
* - Credential has not expired
* - If credential is reissue will purge the old credential
* - Credential has not been revoked
* - Credential has not been replayed
*/
int slurm_cred_verify(slurm_cred_ctx_t ctx, slurm_cred_t cred,
slurm_cred_arg_t *arg);
......@@ -175,6 +181,14 @@ int slurm_cred_verify(slurm_cred_ctx_t ctx, slurm_cred_t cred,
*/
int slurm_cred_rewind(slurm_cred_ctx_t ctx, slurm_cred_t cred);
/*
* Check to see if this credential is a reissue of an existing credential
* (this can happen, for instance, with "scontrol restart"). If
* this credential is a reissue, then the old credential is cleared
* from the cred context "ctx".
*/
void slurm_cred_handle_reissue(slurm_cred_ctx_t ctx, slurm_cred_t cred);
/*
* Revoke all credentials for job id jobid
* time IN - the time the job terminiation was requested by slurmctld
......
......@@ -688,19 +688,10 @@ _rpc_launch_tasks(slurm_msg_t *msg)
req->job_step_id, req->uid, req->gid, host, port);
first_job_run = !slurm_cred_jobid_cached(conf->vctx, req->job_id);
/* NOTE: slurm_cred_revoked() will create a new job credential
* if this credential is issued after any previous credential
* for the job was revoked. This occurs when a job is requeued.
* Do this before running _check_job_credential(). */
if (slurm_cred_revoked(conf->vctx, req->cred)) {
info("Job credential revoked for %u", jobid);
errnum = ESLURMD_CREDENTIAL_REVOKED;
goto done;
}
if (_check_job_credential(req->cred, jobid, stepid, req_uid,
req->tasks_to_launch[nodeid],
&step_hset) < 0) {
errnum = ESLURMD_INVALID_JOB_CREDENTIAL;
errnum = errno;
error("Invalid job credential from %ld@%s: %m",
(long) req_uid, host);
goto done;
......@@ -795,6 +786,7 @@ _rpc_batch_job(slurm_msg_t *msg)
rc = ESLURM_USER_ID_MISSING; /* or bad in this case */
goto done;
}
slurm_cred_handle_reissue(conf->vctx, req->cred);
if (slurm_cred_revoked(conf->vctx, req->cred)) {
error("Job %u already killed, do not launch batch job",
req->job_id);
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment