diff --git a/src/common/slurm_cred.c b/src/common/slurm_cred.c index 8ba0422e0c8a422d2eabc55a2d034602a21be34f..00f52fe6647d9d1d6bb1e81410f98faa4d8676f8 100644 --- a/src/common/slurm_cred.c +++ b/src/common/slurm_cred.c @@ -489,6 +489,7 @@ slurm_cred_verify(slurm_cred_ctx_t ctx, slurm_cred_t cred, slurm_cred_arg_t *arg) { time_t now = time(NULL); + int errnum; xassert(ctx != NULL); xassert(cred != NULL); @@ -511,6 +512,8 @@ slurm_cred_verify(slurm_cred_ctx_t ctx, slurm_cred_t cred, goto error; } + slurm_cred_handle_reissue(ctx, cred); + if (_credential_revoked(ctx, cred)) { slurm_seterrno(ESLURMD_CREDENTIAL_REVOKED); goto error; @@ -542,8 +545,10 @@ slurm_cred_verify(slurm_cred_ctx_t ctx, slurm_cred_t cred, return SLURM_SUCCESS; error: + errnum = slurm_get_errno(); slurm_mutex_unlock(&ctx->mutex); slurm_mutex_unlock(&cred->mutex); + slurm_seterrno(errnum); return SLURM_ERROR; } @@ -1169,10 +1174,29 @@ static char * timestr (const time_t *tp, char *buf, size_t n) return (buf); } +extern void +slurm_cred_handle_reissue(slurm_cred_ctx_t ctx, slurm_cred_t cred) +{ + job_state_t *j = _find_job_state(ctx, cred->jobid); + + if (j != NULL && j->revoked && cred->ctime > j->revoked) { + /* The credential has been reissued. Purge the + old record so that "cred" will look like a new + credential to any ensuing commands. */ + info("reissued job credential for job %u", j->jobid); + + /* Setting j->expiration to zero will make + _clear_expired_job_states() remove this job credential + from the cred context. */ + j->expiration = 0; + _clear_expired_job_states(ctx); + } +} + extern bool slurm_cred_revoked(slurm_cred_ctx_t ctx, slurm_cred_t cred) { - job_state_t *j = _find_job_state(ctx, cred->jobid); + job_state_t *j = _find_job_state(ctx, cred->jobid); if ((j == NULL) || (j->revoked == (time_t)0)) return false; @@ -1180,12 +1204,6 @@ slurm_cred_revoked(slurm_cred_ctx_t ctx, slurm_cred_t cred) if (cred->ctime <= j->revoked) return true; - /* if we are re-running the job, the new job credential is newer - * than the revoke time (see "scontrol requeue"), purge the old - * job record so this looks like a new job */ - info("re-creating job credential records for job %u", j->jobid); - j->expiration = 0; - _clear_expired_job_states(ctx); return false; } @@ -1196,9 +1214,12 @@ _credential_revoked(slurm_cred_ctx_t ctx, slurm_cred_t cred) _clear_expired_job_states(ctx); - if (!(j = _find_job_state(ctx, cred->jobid))) + if (!(j = _find_job_state(ctx, cred->jobid))) { (void) _insert_job_state(ctx, cred->jobid); - else if (j->revoked) { + return false; + } + + if (cred->ctime <= j->revoked) { char buf[64]; debug ("cred for %d revoked. expires at %s", j->jobid, timestr (&j->expiration, buf, 64)); diff --git a/src/common/slurm_cred.h b/src/common/slurm_cred.h index 3077fbaec30794ff16f557e263148a3bf397fbc0..372eb69de6222e1ead0ee6761433065c8a7d6d94 100644 --- a/src/common/slurm_cred.h +++ b/src/common/slurm_cred.h @@ -163,7 +163,13 @@ slurm_cred_t slurm_cred_faker(slurm_cred_arg_t *arg); /* * Verify the signed credential `cred,' and return cred contents in * the cred_arg structure. The credential is cached and cannot be reused. - * + * + * Will perform at least the following checks: + * - Credential signature is valid + * - Credential has not expired + * - If credential is reissue will purge the old credential + * - Credential has not been revoked + * - Credential has not been replayed */ int slurm_cred_verify(slurm_cred_ctx_t ctx, slurm_cred_t cred, slurm_cred_arg_t *arg); @@ -175,6 +181,14 @@ int slurm_cred_verify(slurm_cred_ctx_t ctx, slurm_cred_t cred, */ int slurm_cred_rewind(slurm_cred_ctx_t ctx, slurm_cred_t cred); +/* + * Check to see if this credential is a reissue of an existing credential + * (this can happen, for instance, with "scontrol restart"). If + * this credential is a reissue, then the old credential is cleared + * from the cred context "ctx". + */ +void slurm_cred_handle_reissue(slurm_cred_ctx_t ctx, slurm_cred_t cred); + /* * Revoke all credentials for job id jobid * time IN - the time the job terminiation was requested by slurmctld diff --git a/src/slurmd/slurmd/req.c b/src/slurmd/slurmd/req.c index 6ba12f6f8fb16bdbfecb7e3d27f363f3675ce04c..f60978910c7530433ed83715b43adf65f55bd715 100644 --- a/src/slurmd/slurmd/req.c +++ b/src/slurmd/slurmd/req.c @@ -688,19 +688,10 @@ _rpc_launch_tasks(slurm_msg_t *msg) req->job_step_id, req->uid, req->gid, host, port); first_job_run = !slurm_cred_jobid_cached(conf->vctx, req->job_id); - /* NOTE: slurm_cred_revoked() will create a new job credential - * if this credential is issued after any previous credential - * for the job was revoked. This occurs when a job is requeued. - * Do this before running _check_job_credential(). */ - if (slurm_cred_revoked(conf->vctx, req->cred)) { - info("Job credential revoked for %u", jobid); - errnum = ESLURMD_CREDENTIAL_REVOKED; - goto done; - } if (_check_job_credential(req->cred, jobid, stepid, req_uid, req->tasks_to_launch[nodeid], &step_hset) < 0) { - errnum = ESLURMD_INVALID_JOB_CREDENTIAL; + errnum = errno; error("Invalid job credential from %ld@%s: %m", (long) req_uid, host); goto done; @@ -795,6 +786,7 @@ _rpc_batch_job(slurm_msg_t *msg) rc = ESLURM_USER_ID_MISSING; /* or bad in this case */ goto done; } + slurm_cred_handle_reissue(conf->vctx, req->cred); if (slurm_cred_revoked(conf->vctx, req->cred)) { error("Job %u already killed, do not launch batch job", req->job_id);