diff --git a/src/plugins/auth/munge/auth_munge.c b/src/plugins/auth/munge/auth_munge.c index c0b9622cb7f0dffb99afcc69abb720ab825d2e06..139d76b46058a82f412584f2803c8592f67d2ac3 100644 --- a/src/plugins/auth/munge/auth_munge.c +++ b/src/plugins/auth/munge/auth_munge.c @@ -73,8 +73,8 @@ #include "src/common/slurm_xlator.h" #define MUNGE_ERRNO_OFFSET 1000 -#define RETRY_COUNT 3 -#define RETRY_USEC 10000 +#define RETRY_COUNT 10 +#define RETRY_USEC 100000 /* * These variables are required by the generic plugin interface. If they @@ -181,7 +181,7 @@ slurm_auth_create( void *argv[], char *socket ) { int retry = RETRY_COUNT; slurm_auth_credential_t *cred = NULL; - munge_err_t e = EMUNGE_SUCCESS; + munge_err_t err = EMUNGE_SUCCESS; munge_ctx_t ctx = munge_ctx_create(); SigFunc *ohandler; @@ -208,6 +208,13 @@ slurm_auth_create( void *argv[], char *socket ) return NULL; } +#ifdef SLURM_MUNGE_TTL + /* Default munge credential lifetime is 5 minutes. Lower values can + * improve performance of munged (less records to test for replay). + * The value of SLURM_MUNGE_TTL should be in seconds. */ + (void) munge_ctx_set(ctx, MUNGE_OPT_TTL, SLURM_MUNGE_TTL); +#endif + cred = xmalloc(sizeof(*cred)); cred->verified = false; cred->m_str = NULL; @@ -226,19 +233,20 @@ slurm_auth_create( void *argv[], char *socket ) ohandler = xsignal(SIGALRM, (SigFunc *)SIG_BLOCK); again: - e = munge_encode(&cred->m_str, ctx, cred->buf, cred->len); - if (e != EMUNGE_SUCCESS) { - if ((e == EMUNGE_SOCKET) && retry--) { - error ("Munge encode failed: %s (retrying ...)", - munge_ctx_strerror(ctx)); + err = munge_encode(&cred->m_str, ctx, cred->buf, cred->len); + if (err != EMUNGE_SUCCESS) { + if ((err == EMUNGE_SOCKET) && retry--) { + debug("Munge encode failed: %s (retrying ...)", + munge_ctx_strerror(ctx)); usleep(RETRY_USEC); /* Likely munged too busy */ goto again; } - + if (err == EMUNGE_SOCKET) + error("If munged is up, restart with --num-threads=10"); error("Munge encode failed: %s", munge_ctx_strerror(ctx)); xfree( cred ); cred = NULL; - plugin_errno = e + MUNGE_ERRNO_OFFSET; + plugin_errno = err + MUNGE_ERRNO_OFFSET; } else if ((bad_cred_test > 0) && cred->m_str) { int i = ((int) time(NULL)) % strlen(cred->m_str); cred->m_str[i]++; /* random position in credential */ @@ -493,7 +501,7 @@ static int _decode_cred(slurm_auth_credential_t *c, char *socket) { int retry = RETRY_COUNT; - munge_err_t e; + munge_err_t err; munge_ctx_t ctx; if (c == NULL) @@ -517,24 +525,26 @@ _decode_cred(slurm_auth_credential_t *c, char *socket) again: c->buf = NULL; - e = munge_decode(c->m_str, ctx, &c->buf, &c->len, &c->uid, &c->gid); - if (e != EMUNGE_SUCCESS) { + err = munge_decode(c->m_str, ctx, &c->buf, &c->len, &c->uid, &c->gid); + if (err != EMUNGE_SUCCESS) { if (c->buf) { free(c->buf); c->buf = NULL; } - if ((e == EMUNGE_SOCKET) && retry--) { - error ("Munge decode failed: %s (retrying ...)", - munge_ctx_strerror(ctx)); + if ((err == EMUNGE_SOCKET) && retry--) { + debug("Munge decode failed: %s (retrying ...)", + munge_ctx_strerror(ctx)); usleep(RETRY_USEC); /* Likely munged too busy */ goto again; } + if (err == EMUNGE_SOCKET) + error("If munged is up, restart with --num-threads=10"); #ifdef MULTIPLE_SLURMD /* In multple slurmd mode this will happen all the * time since we are authenticating with the same * munged. */ - if (e != EMUNGE_CRED_REPLAYED) { + if (err != EMUNGE_CRED_REPLAYED) { #endif /* * Print any valid credential data @@ -542,15 +552,15 @@ _decode_cred(slurm_auth_credential_t *c, char *socket) error ("Munge decode failed: %s", munge_ctx_strerror(ctx)); _print_cred(ctx); - if (e == EMUNGE_CRED_REWOUND) + if (err == EMUNGE_CRED_REWOUND) error("Check for out of sync clocks"); - c->cr_errno = e + MUNGE_ERRNO_OFFSET; + c->cr_errno = err + MUNGE_ERRNO_OFFSET; #ifdef MULTIPLE_SLURMD } else { debug2("We had a replayed cred, " "but this is expected in multiple " "slurmd mode."); - e = 0; + err = 0; } #endif goto done; @@ -560,7 +570,7 @@ _decode_cred(slurm_auth_credential_t *c, char *socket) done: munge_ctx_destroy(ctx); - return e ? SLURM_ERROR : SLURM_SUCCESS; + return err ? SLURM_ERROR : SLURM_SUCCESS; } diff --git a/src/plugins/crypto/munge/crypto_munge.c b/src/plugins/crypto/munge/crypto_munge.c index 303bccce353af6ef18320b2449d23e9a18d4c73f..63ff683f887cd15c5877f31adde6e50851037ea6 100644 --- a/src/plugins/crypto/munge/crypto_munge.c +++ b/src/plugins/crypto/munge/crypto_munge.c @@ -65,8 +65,8 @@ #include "src/common/xmalloc.h" #include "src/common/xstring.h" -#define RETRY_COUNT 3 -#define RETRY_USEC 10000 +#define RETRY_COUNT 10 +#define RETRY_USEC 100000 /* * These variables are required by the generic plugin interface. If they @@ -205,14 +205,26 @@ crypto_sign(void * key, char *buffer, int buf_size, char **sig_pp, int retry = RETRY_COUNT; char *cred; munge_err_t err; + munge_ctx_t ctx = (munge_ctx_t) key; + +#ifdef SLURM_MUNGE_TTL + /* Default munge credential lifetime is 5 minutes. Lower values can + * improve performance of munged (less records to test for replay). + * The value of SLURM_MUNGE_TTL should be in seconds. */ + (void) munge_ctx_set(ctx, MUNGE_OPT_TTL, SLURM_MUNGE_TTL); +#endif again: - err = munge_encode(&cred, (munge_ctx_t) key, buffer, buf_size); + err = munge_encode(&cred, ctx, buffer, buf_size); if (err != EMUNGE_SUCCESS) { if ((err == EMUNGE_SOCKET) && retry--) { + debug("Munge encode failed: %s (retrying ...)", + munge_ctx_strerror(ctx)); usleep(RETRY_USEC); /* Likely munged too busy */ goto again; } + if (err == EMUNGE_SOCKET) /* Also see MUNGE_OPT_TTL above */ + error("If munged is up, restart with --num-threads=10"); return err; } @@ -233,19 +245,21 @@ crypto_verify_sign(void * key, char *buffer, unsigned int buf_size, int buf_out_size; int rc = 0; munge_err_t err; + munge_ctx_t ctx = (munge_ctx_t) key; again: - err = munge_decode(signature, (munge_ctx_t) key, - &buf_out, &buf_out_size, + err = munge_decode(signature, ctx, &buf_out, &buf_out_size, &uid, &gid); if (err != EMUNGE_SUCCESS) { if ((err == EMUNGE_SOCKET) && retry--) { - error ("Munge decode failed: %s (retrying ...)", - munge_ctx_strerror((munge_ctx_t) key)); + debug("Munge decode failed: %s (retrying ...)", + munge_ctx_strerror(ctx)); usleep(RETRY_USEC); /* Likely munged too busy */ goto again; } + if (err == EMUNGE_SOCKET) + error("If munged is up, restart with --num-threads=10"); #ifdef MULTIPLE_SLURMD if (err != EMUNGE_CRED_REPLAYED) {