From 8fd8a7817afc43865b5f4515d66de8c18dcd43bf Mon Sep 17 00:00:00 2001 From: Moe Jette <jette1@llnl.gov> Date: Thu, 4 Jun 2009 23:43:42 +0000 Subject: [PATCH] Add credential to sbcast RPC to prevent moving files to nodes that have not been allocated to the user svn merge -r17682:17727 https://eris.llnl.gov/svn/slurm/branches/sbcast.cred --- NEWS | 3 + doc/html/authplugins.shtml | 46 ++-- slurm/slurm.h.in | 34 ++- src/api/allocate.c | 44 ++- src/api/step_io.c | 2 +- src/common/slurm_auth.c | 4 +- src/common/slurm_cred.c | 282 +++++++++++++++++--- src/common/slurm_cred.h | 36 ++- src/common/slurm_protocol_defs.c | 19 +- src/common/slurm_protocol_defs.h | 18 +- src/common/slurm_protocol_pack.c | 72 +++++ src/plugins/auth/authd/auth_authd.c | 6 +- src/plugins/auth/munge/auth_munge.c | 4 +- src/plugins/auth/none/auth_none.c | 4 +- src/plugins/crypto/munge/crypto_munge.c | 2 +- src/plugins/crypto/openssl/crypto_openssl.c | 2 +- src/sbcast/agent.c | 8 +- src/sbcast/sbcast.c | 25 +- src/sbcast/sbcast.h | 4 +- src/slurmctld/proc_req.c | 69 +++++ src/slurmd/slurmd/req.c | 74 ++++- src/slurmd/slurmstepd/slurmstepd_job.c | 2 +- testsuite/expect/test14.6 | 8 + 23 files changed, 661 insertions(+), 107 deletions(-) diff --git a/NEWS b/NEWS index 2f19ab204da..7e5974988ed 100644 --- a/NEWS +++ b/NEWS @@ -30,6 +30,9 @@ documents those changes that are of interest to users and admins. -- BLUEGENE - closed some corner cases where a block could had been removed while a job was waiting for it to become ready because an underlying part of the block was put into an error state. + -- Modify sbcast logic to prevent a user from moving files to nodes they + have not been allocated (this would be possible in previous versions + only by hacking the sbcast code). * Changes in SLURM 2.0.2 ======================== diff --git a/doc/html/authplugins.shtml b/doc/html/authplugins.shtml index 8d04e00f8e6..02c221822c5 100644 --- a/doc/html/authplugins.shtml +++ b/doc/html/authplugins.shtml @@ -5,7 +5,7 @@ <h2> Overview</h2> <p> This document describes SLURM authentication plugins and the API that defines them. It is intended as a resource to programmers wishing to write their own SLURM -authentication plugins. This is version 0 of the API.</p> +authentication plugins. This is version 100 of the API.</p> <p>SLURM authentication plugins are SLURM plugins that implement the SLURM authentication API described herein. They must conform to the SLURM Plugin API with the following specifications:</p> @@ -18,7 +18,6 @@ authentication service. This may be used for testing purposes, but is not suitab production use due to lack of effective security.</li> <li><b>authd</b>—Brett Chun's Linux authd.</li> <li><b>munge</b>—LLNL's Munge protocol (recommended plugin for production use).</li> -<li><b>krb5</b>—Kerberos 5 (expected to be available in 2009).</li> </ul> <p>The <span class="commandline">plugin_name</span> and <span class="commandline">plugin_version</span> symbols required by the SLURM Plugin API require no specialization for authentication. @@ -66,13 +65,18 @@ or equal to the symbol SLURM_AUTH_FIRST_LOCAL_ERROR.</p> <h2>API Functions</h2> <p>The following functions must appear. Functions which are not implemented should be stubbed.</p> -<p class="commandline">void *slurm_auth_alloc (void);</p> + +<p class="commandline">void *slurm_auth_create (void **argv, char *auth_info);</p> <p style="margin-left:.2in"><b>Description</b>: Allocates from the free store an anonymous credential object and returns a pointer to it. The pointer should be valid until passed to <span class="commandline">slurm_auth_destroy()</span> for disposal. SLURM will not pass credentials to the API which have not been allocated by this function.</p> -<p style="margin-left:.2in"><b>Arguments</b>: None.</p> +<p style="margin-left:.2in"><b>Arguments</b>:<br> +<span class="commandline">argv</span> (input) plugin specific +information, timeouts for authd<br> +<span class="commandline">auth_info</span> (input) plugin specific +identification of the server.</p> <p style="margin-left:.2in"><b>Returns</b>: A pointer to a newly allocated credential if successful. On failure, the plugin should return NULL and set its errno to an appropriate value to indicate the reason for failure.</p> @@ -87,40 +91,50 @@ be NULL.</p> the plugin should return SLURM_ERROR and set the errno to an appropriate value to indicate the reason for failure.</p> <p class="footer"><a href="#top">top</a></p> -<p class="commandline">int slurm_auth_verify (void *cr );</p> + +<p class="commandline">int slurm_auth_verify (void *cr, char *auth_info );</p> <p style="margin-left:.2in"><b>Description</b>: Verifies that a credential is in order and correctly identifies the associated user. It also verifies that the credential has not expired. If verification is successful, the return values of -<span class="commandline">slurm_auth_get_uid()</span> and <span class="commandline">slurm_auth_get_gid()</span> +<span class="commandline">slurm_auth_get_uid()</span> and +<span class="commandline">slurm_auth_get_gid()</span> in subsequent calls must correspond to the actual verified system UID and GID of the user associated with the credential. Verification must fail if the credential has not previously been activated, even if a credential implementation cannot exist in an unactivated state. A credential's valid term is defined at activation and verification must fail if the credential has expired, even if it would otherwise be valid.</p> -<p style="margin-left:.2in"><b>Arguments</b>: <span class="commandline">cr</span> - (input) pointer to the credential which is to be verified. Cannot -be NULL.</p> +<p style="margin-left:.2in"><b>Arguments</b>: <br> +<span class="commandline">cr</span> (input) pointer to the credential +which is to be verified. Cannot be NULL.<br> +<span class="commandline">auth_info</span> (input) plugin specific +identification of the server.</p> <p style="margin-left:.2in"><b>Returns</b>: SLURM_SUCCESS if the credential is verified to be in order and has not expired. If the credential cannot be verified, or if the credential has expired, the function should return SLURM_ERROR and set its errno to an appropriate value to indicate the reason for failure.</p> -<p class="commandline">uid_t slurm_auth_get_uid (void *cr);<br> -gid_t slurm_auth_get_gid (void *cr);</p> + +<p class="commandline">uid_t slurm_auth_get_uid (void *cr, char *auth_info);<br> +gid_t slurm_auth_get_gid (void *cr, char *auth_info);</p> <p style="margin-left:.2in"><b>Description</b>: Extracts the numerical UID (GID) of the user corresponding to the given credential. SLURM considers this value -trustworthy only if the credential has been successfully verified using <span class="commandline">slurm_auth_verify()</span>. +trustworthy only if the credential has been successfully verified using +<span class="commandline">slurm_auth_verify()</span>. An unverified credential does not immediately give rise to an error condition in these functions, since this would require a plugin to distinguish between a verified and an unverified credential, which may be computationally expensive. A plugin may consider the lack of verification as an error.</p> -<p style="margin-left:.2in"><b>Arguments</b>: <span class="commandline">cr</span> - (input) pointer to the credential containing the desired identification. -Cannot be NULL.</p> +<p style="margin-left:.2in"><b>Arguments</b>:<br> + <span class="commandline">cr</span> (input) pointer to the credential +containing the desired identification. Cannot be NULL.<br> +<span class="commandline">auth_info</span> (input) plugin specific +identification of the server.</p> <p style="margin-left:.2in"><b>Returns</b>: If successful, the Linux UID (GID) associated with the credential. In case of error, SLURM_AUTH_NOBODY should be returned and errno set appropriately to indicate the cause of the failure.</p> + <p class="footer"><a href="#top">top</a></p> + <p class="commandline">int slurm_auth_pack (void *cr, Buf buf);</p> <p style="margin-left:.2in"><b>Description</b>: Marshals a credential into a buffer for transmission according to the SLURM packing protocol. All authentication plugins @@ -198,6 +212,6 @@ plugin that transmitted it. It is at the discretion of the plugin author whether to maintain data format compatibility across different versions of the plugin.</p> <p class="footer"><a href="#top">top</a></p> -<p style="text-align:center;">Last modified 8 June 2004</p> +<p style="text-align:center;">Last modified 3 June 2009</p> <!--#include virtual="footer.txt"--> diff --git a/slurm/slurm.h.in b/slurm/slurm.h.in index 78e354b4d3f..fc28ad9dcc9 100644 --- a/slurm/slurm.h.in +++ b/slurm/slurm.h.in @@ -4,8 +4,7 @@ * Copyright (C) 2002-2007 The Regents of the University of California. * Copyright (C) 2008-2009 Lawrence Livermore National Security. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). - * Written by Morris Jette <jette1@llnl.gov>, - * Joey Ekstrom <ekstrom1@llnl.gov> et. al. + * Written by Morris Jette <jette1@llnl.gov>, et. al. * CODE-OCEC-09-009. All rights reserved. * * This file is part of SLURM, a resource management program. @@ -156,12 +155,17 @@ BEGIN_C_DECLS #endif /* Define allocation_msg_thread_t below to avoid including extraneous - slurm headers */ + * slurm headers */ #ifndef __allocation_msg_thread_t_defined # define __allocation_msg_thread_t_defined typedef struct allocation_msg_thread *allocation_msg_thread_t; #endif +#ifndef __sbcast_cred_t_defined +# define __sbcast_cred_t_defined + typedef struct sbcast_cred *sbcast_cred_t; /* opaque data type */ +#endif + /*****************************************************************************\ * DEFINITIONS FOR VERSION MANAGEMENT \*****************************************************************************/ @@ -1301,6 +1305,14 @@ typedef struct slurm_update_node_msg { typedef struct partition_info update_part_msg_t; +typedef struct job_sbcast_cred_msg { + uint32_t job_id; /* assigned job id */ + slurm_addr *node_addr; /* network addresses */ + uint32_t node_cnt; /* count of nodes */ + char *node_list; /* assigned list of nodes */ + sbcast_cred_t sbcast_cred; /* opaque data structure */ +} job_sbcast_cred_msg_t; + /* Opaque data type for slurm_step_ctx_* functions */ typedef struct slurm_step_ctx_struct *slurm_step_ctx; typedef struct slurm_step_ctx_struct slurm_step_ctx_t; @@ -1409,6 +1421,7 @@ extern void slurm_free_job_alloc_info_response_msg PARAMS(( */ extern int slurm_allocation_lookup PARAMS(( uint32_t job_id, job_alloc_info_response_msg_t **info)); + /* * slurm_allocation_lookup_lite - retrieve minor info for an existing * resource allocation @@ -1483,9 +1496,20 @@ extern void slurm_free_submit_response_response_msg PARAMS(( * IN job_desc_msg - description of resource allocation request * RET 0 on success, otherwise return -1 and set errno to indicate the error */ -extern int slurm_job_will_run PARAMS(( - job_desc_msg_t * job_desc_msg)); +extern int slurm_job_will_run PARAMS((job_desc_msg_t * job_desc_msg)); + +/* + * slurm_sbcast_lookup - retrieve info for an existing resource allocation + * including a credential needed for sbcast + * IN jobid - job allocation identifier + * OUT info - job allocation information including a credential for sbcast + * RET 0 on success, otherwise return -1 and set errno to indicate the error + * NOTE: free the "resp" using slurm_free_sbcast_cred_msg + */ +extern int slurm_sbcast_lookup PARAMS((uint32_t jobid, + job_sbcast_cred_msg_t **info)); +extern void slurm_free_sbcast_cred_msg PARAMS((job_sbcast_cred_msg_t * msg)); /*****************************************************************************\ * JOB/STEP SIGNALING FUNCTIONS diff --git a/src/api/allocate.c b/src/api/allocate.c index 81d623314b2..1c351538551 100644 --- a/src/api/allocate.c +++ b/src/api/allocate.c @@ -2,7 +2,8 @@ * allocate.c - allocate nodes for a job or step with supplied contraints * $Id$ ***************************************************************************** - * Copyright (C) 2002 The Regents of the University of California. + * Copyright (C) 2002-2007 The Regents of the University of California. + * Copyright (C) 2008-2009 Lawrence Livermore National Security. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). * Written by Morris Jette <jette1@llnl.gov>. * CODE-OCEC-09-009. All rights reserved. @@ -451,6 +452,47 @@ slurm_allocation_lookup_lite(uint32_t jobid, return SLURM_PROTOCOL_SUCCESS; } +/* + * slurm_sbcast_lookup - retrieve info for an existing resource allocation + * including a credential needed for sbcast + * IN jobid - job allocation identifier + * OUT info - job allocation information including a credential for sbcast + * RET 0 on success, otherwise return -1 and set errno to indicate the error + * NOTE: free the "resp" using slurm_free_sbcast_cred_msg + */ +int slurm_sbcast_lookup(uint32_t jobid, job_sbcast_cred_msg_t **info) +{ + job_alloc_info_msg_t req; + slurm_msg_t req_msg; + slurm_msg_t resp_msg; + + req.job_id = jobid; + slurm_msg_t_init(&req_msg); + slurm_msg_t_init(&resp_msg); + req_msg.msg_type = REQUEST_JOB_SBCAST_CRED; + req_msg.data = &req; + + if (slurm_send_recv_controller_msg(&req_msg, &resp_msg) < 0) + return SLURM_ERROR; + + switch(resp_msg.msg_type) { + case RESPONSE_SLURM_RC: + if (_handle_rc_msg(&resp_msg) < 0) + return SLURM_ERROR; + *info = NULL; + break; + case RESPONSE_JOB_SBCAST_CRED: + *info = (job_sbcast_cred_msg_t *)resp_msg.data; + return SLURM_PROTOCOL_SUCCESS; + break; + default: + slurm_seterrno_ret(SLURM_UNEXPECTED_MSG_ERROR); + break; + } + + return SLURM_PROTOCOL_SUCCESS; +} + /* * Handle a return code message type. * if return code is nonzero, sets errno to return code and returns < 0. diff --git a/src/api/step_io.c b/src/api/step_io.c index db6317e3d20..38fc3b269d3 100644 --- a/src/api/step_io.c +++ b/src/api/step_io.c @@ -1012,7 +1012,7 @@ client_io_handler_create(slurm_step_io_fds_t fds, client_io_t *cio; int len; int i; - int siglen; + uint32_t siglen; char *sig; cio = (client_io_t *)xmalloc(sizeof(client_io_t)); diff --git a/src/common/slurm_auth.c b/src/common/slurm_auth.c index 469c666cac2..95880d165d1 100644 --- a/src/common/slurm_auth.c +++ b/src/common/slurm_auth.c @@ -62,7 +62,7 @@ static bool auth_dummy = false; /* for security testing */ typedef struct slurm_auth_ops { void * (*create) ( void *argv[], char *auth_info ); int (*destroy) ( void *cred ); - int (*verify) ( void *cred, void *argv[], char *auth_info ); + int (*verify) ( void *cred, char *auth_info ); uid_t (*get_uid) ( void *cred, char *auth_info ); gid_t (*get_gid) ( void *cred, char *auth_info ); int (*pack) ( void *cred, Buf buf ); @@ -413,7 +413,7 @@ g_slurm_auth_verify( void *cred, void *hosts, int timeout, char *auth_info ) return SLURM_ERROR; } - ret = (*(g_context->ops.verify))( cred, argv, auth_info ); + ret = (*(g_context->ops.verify))( cred, auth_info ); xfree( argv ); return ret; } diff --git a/src/common/slurm_cred.c b/src/common/slurm_cred.c index 530008d5c2d..4ff7048793e 100644 --- a/src/common/slurm_cred.c +++ b/src/common/slurm_cred.c @@ -1,5 +1,5 @@ /*****************************************************************************\ - * src/common/slurm_cred.c - SLURM job credential functions + * src/common/slurm_cred.c - SLURM job and sbcast credential functions ***************************************************************************** * Copyright (C) 2002-2007 The Regents of the University of California. * Copyright (C) 2008-2009 Lawrence Livermore National Security. @@ -17,7 +17,7 @@ * any later version. * * In addition, as a special exception, the copyright holders give permission - * to link the code of portions of this program with the OpenSSL library under + * to link the code of portions of this program with the OpenSSL library under * certain conditions as described in each individual source file, and * distribute linked combinations including the two. You must obey the GNU * General Public License in all respects for all of the code used other than @@ -60,32 +60,36 @@ #include "src/common/plugin.h" #include "src/common/plugrack.h" #include "src/common/select_job_res.h" +#include "src/common/slurm_cred.h" #include "src/common/slurm_protocol_api.h" -#include "src/common/xmalloc.h" #include "src/common/xassert.h" +#include "src/common/xmalloc.h" #include "src/common/xstring.h" - -#include "src/common/slurm_cred.h" +#ifndef __sbcast_cred_t_defined +# define __sbcast_cred_t_defined + typedef struct sbcast_cred *sbcast_cred_t; /* opaque data type */ +#endif /* * Default credential information expiration window. * Long enough for loading user environment, running prolog, * and dealing with the slurmd getting paged out of memory. */ -#define DEFAULT_EXPIRATION_WINDOW 1800 +#define DEFAULT_EXPIRATION_WINDOW 1200 #define MAX_TIME 0x7fffffff +#define SBCAST_CACHE_SIZE 64 /* * slurm job credential state * */ typedef struct { - uint32_t jobid; /* SLURM job id for this credential */ - uint32_t stepid; /* SLURM step id for this credential */ time_t ctime; /* Time that the cred was created */ time_t expiration; /* Time at which cred is no longer good */ + uint32_t jobid; /* SLURM job id for this credential */ + uint32_t stepid; /* SLURM step id for this credential */ } cred_state_t; /* @@ -94,10 +98,10 @@ typedef struct { * */ typedef struct { - uint32_t jobid; - time_t revoked; /* Time at which credentials were revoked */ time_t ctime; /* Time that this entry was created */ time_t expiration; /* Time at which credentials can be purged */ + uint32_t jobid; /* SLURM job id for this credential */ + time_t revoked; /* Time at which credentials were revoked */ } job_state_t; @@ -109,6 +113,19 @@ enum ctx_type { SLURM_CRED_VERIFIER }; +/* + * slurm sbcast credential state + * + */ +struct sbcast_cred { + time_t ctime; /* Time that the cred was created */ + time_t expiration; /* Time at which cred is no longer good*/ + uint32_t jobid; /* SLURM job id for this credential */ + char * nodes; /* nodes for which credential is valid */ + char *signature; /* credential signature */ + unsigned int siglen; /* signature length in bytes */ +}; + /* * Credential context, slurm_cred_ctx_t: */ @@ -599,17 +616,8 @@ slurm_cred_create(slurm_cred_ctx_t ctx, slurm_cred_arg_t *arg) if (_slurm_crypto_init() < 0) return NULL; - slurm_mutex_lock(&ctx->mutex); - - xassert(ctx->magic == CRED_CTX_MAGIC); - xassert(ctx->type == SLURM_CRED_CREATOR); - cred = _slurm_cred_alloc(); - - xassert(cred != NULL); - slurm_mutex_lock(&cred->mutex); - xassert(cred->magic == CRED_MAGIC); cred->jobid = arg->jobid; @@ -644,6 +652,9 @@ slurm_cred_create(slurm_cred_ctx_t ctx, slurm_cred_arg_t *arg) #endif cred->ctime = time(NULL); + slurm_mutex_lock(&ctx->mutex); + xassert(ctx->magic == CRED_CTX_MAGIC); + xassert(ctx->type == SLURM_CRED_CREATOR); if (_slurm_cred_sign(ctx, cred) < 0) goto fail; @@ -669,11 +680,7 @@ slurm_cred_copy(slurm_cred_t cred) slurm_mutex_lock(&cred->mutex); rcred = _slurm_cred_alloc(); - - xassert(rcred != NULL); - slurm_mutex_lock(&rcred->mutex); - xassert(rcred->magic == CRED_MAGIC); rcred->jobid = cred->jobid; @@ -720,7 +727,6 @@ slurm_cred_faker(slurm_cred_arg_t *arg) xassert(arg != NULL); cred = _slurm_cred_alloc(); - slurm_mutex_lock(&cred->mutex); cred->jobid = arg->jobid; @@ -854,6 +860,8 @@ slurm_cred_verify(slurm_cred_ctx_t ctx, slurm_cred_t cred, xassert(ctx->type == SLURM_CRED_VERIFIER); xassert(cred->magic == CRED_MAGIC); + /* NOTE: the verification checks that the credential was + * created by SlurmUser or root */ if (_slurm_cred_verify_signature(ctx, cred) < 0) { slurm_seterrno(ESLURMD_INVALID_JOB_CREDENTIAL); goto error; @@ -1091,7 +1099,7 @@ slurm_cred_begin_expiration(slurm_cred_ctx_t ctx, uint32_t jobid) } int -slurm_cred_get_signature(slurm_cred_t cred, char **datap, int *datalen) +slurm_cred_get_signature(slurm_cred_t cred, char **datap, uint32_t *datalen) { xassert(cred != NULL); xassert(datap != NULL); @@ -1117,7 +1125,7 @@ slurm_cred_pack(slurm_cred_t cred, Buf buffer) _pack_cred(cred, buffer); xassert(cred->siglen > 0); - packmem((char *) cred->signature, (uint16_t) cred->siglen, buffer); + packmem(cred->signature, cred->siglen, buffer); slurm_mutex_unlock(&cred->mutex); @@ -1333,7 +1341,8 @@ _ctx_update_public_key(slurm_cred_ctx_t ctx, const char *path) static bool _exkey_is_valid(slurm_cred_ctx_t ctx) { - if (!ctx->exkey) return false; + if (!ctx->exkey) + return false; if (time(NULL) > ctx->exkey_exp) { debug2("old job credential key slurmd expired"); @@ -1419,7 +1428,7 @@ _slurm_cred_verify_signature(slurm_cred_ctx_t ctx, slurm_cred_t cred) Buf buffer; int rc; - debug("Checking credential with %d bytes of sig data", cred->siglen); + debug("Checking credential with %u bytes of sig data", cred->siglen); buffer = init_buf(4096); _pack_cred(cred, buffer); @@ -1867,8 +1876,10 @@ _job_state_unpack(slurm_cred_ctx_t ctx, Buf buffer) if (!j->revoked || (j->revoked && (now < j->expiration))) list_append(ctx->job_list, j); - else - debug3 ("not appending expired job %u state", j->jobid); + else { + debug3 ("not appending expired job %u state", + j->jobid); + } } return; @@ -1878,4 +1889,215 @@ _job_state_unpack(slurm_cred_ctx_t ctx, Buf buffer) return; } +/*****************************************************************************\ + ***************** SBCAST CREDENTIAL FUNCTIONS ****************** +\*****************************************************************************/ + +/* Pack sbcast credential without the digital signature */ +static void _pack_sbcast_cred(sbcast_cred_t sbcast_cred, Buf buffer) +{ + pack_time(sbcast_cred->ctime, buffer); + pack_time(sbcast_cred->expiration, buffer); + pack32(sbcast_cred->jobid, buffer); + packstr(sbcast_cred->nodes, buffer); +} + +/* Create an sbcast credential for the specified job and nodes + * including digital signature. + * RET the sbcast credential or NULL on error */ +sbcast_cred_t create_sbcast_cred(slurm_cred_ctx_t ctx, + uint32_t job_id, char *nodes) +{ + Buf buffer; + int rc; + sbcast_cred_t sbcast_cred; + time_t now = time(NULL); + + xassert(ctx); + if (_slurm_crypto_init() < 0) + return NULL; + + sbcast_cred = xmalloc(sizeof(struct sbcast_cred)); + sbcast_cred->ctime = now; + sbcast_cred->expiration = now + DEFAULT_EXPIRATION_WINDOW; + sbcast_cred->jobid = job_id; + sbcast_cred->nodes = xstrdup(nodes); + + buffer = init_buf(4096); + _pack_sbcast_cred(sbcast_cred, buffer); + rc = (*(g_crypto_context->ops.crypto_sign))(ctx->key, + get_buf_data(buffer), get_buf_offset(buffer), + &sbcast_cred->signature, &sbcast_cred->siglen); + free_buf(buffer); + + if (rc) { + error("sbcast_cred sign: %s", + (*(g_crypto_context->ops.crypto_str_error))(rc)); + delete_sbcast_cred(sbcast_cred); + return NULL; + } + + return sbcast_cred; +} + +/* Copy an sbcast credential created using create_sbcast_cred() or + * unpack_sbcast_cred() */ +sbcast_cred_t copy_sbcast_cred(sbcast_cred_t sbcast_cred) +{ + sbcast_cred_t rcred = NULL; + + xassert(sbcast_cred); + rcred->ctime = sbcast_cred->ctime; + rcred->expiration = sbcast_cred->expiration; + rcred->jobid = sbcast_cred->jobid; + rcred->nodes = xstrdup(sbcast_cred->nodes); + rcred->siglen = sbcast_cred->siglen; + rcred->signature = xstrdup(sbcast_cred->signature); + return rcred; +} + +/* Delete an sbcast credential created using create_sbcast_cred() or + * unpack_sbcast_cred() */ +void delete_sbcast_cred(sbcast_cred_t sbcast_cred) +{ + if (sbcast_cred) { + xfree(sbcast_cred->nodes); + xfree(sbcast_cred->signature); + xfree(sbcast_cred); + } +} + +/* Extract contents of an sbcast credential verifying the digital signature. + * NOTE: We can only perform the full credential validation once with + * Munge without generating a credential replay error, so we only + * verify the credential for block one. All others must have a + * recent signature on file (in our cache). + * RET 0 on success, -1 on error */ +int extract_sbcast_cred(slurm_cred_ctx_t ctx, + sbcast_cred_t sbcast_cred, uint16_t block_no, + uint32_t *job_id, char **nodes) +{ + static time_t cache_expire[SBCAST_CACHE_SIZE]; + static uint32_t cache_value[SBCAST_CACHE_SIZE]; + uint32_t sig_num = 0; + int i, oldest_cache_inx = 0; + time_t now = time(NULL), oldest_cache_time = (time_t) 0; + + *job_id = 0xffffffff; + *nodes = NULL; + xassert(ctx); + + if (_slurm_crypto_init() < 0) + return -1; + + if (now > sbcast_cred->expiration) + return -1; + + if (block_no == 1) { + Buf buffer; + int rc; + buffer = init_buf(4096); + _pack_sbcast_cred(sbcast_cred, buffer); + /* NOTE: the verification checks that the credential was + * created by SlurmUser or root */ + rc = (*(g_crypto_context->ops.crypto_verify_sign))(ctx->key, + get_buf_data(buffer), get_buf_offset(buffer), + sbcast_cred->signature, sbcast_cred->siglen); + free_buf(buffer); + + if (rc) { + error("sbcast_cred verify: %s", + (*(g_crypto_context->ops.crypto_str_error))(rc)); + return -1; + } + + /* Using two bytes at a time gives us a larger number + * and reduces the possibility of a duplicate value */ + for (i=0; i<sbcast_cred->siglen; i+=2) { + sig_num += (sbcast_cred->signature[i] << 8) + + sbcast_cred->signature[i+1]; + } + /* add to cache */ + for (i=0; i<SBCAST_CACHE_SIZE; i++) { + if (now < cache_expire[i]) { + if ((i == 0) || + (oldest_cache_time > cache_expire[i])) { + oldest_cache_inx = i; + oldest_cache_time = cache_expire[i]; + } + continue; + } + cache_expire[i] = sbcast_cred->expiration; + cache_value[i] = sig_num; + break; + } + if (i >= SBCAST_CACHE_SIZE) { + error("sbcast_cred verify: cache overflow"); + + /* overwrite the oldest */ + cache_expire[oldest_cache_inx] = sbcast_cred-> + expiration; + cache_value[oldest_cache_inx] = sig_num; + } + } else { + for (i=0; i<sbcast_cred->siglen; i+=2) { + sig_num += (sbcast_cred->signature[i] << 8) + + sbcast_cred->signature[i+1]; + } + for (i=0; i<SBCAST_CACHE_SIZE; i++) { + if ((cache_expire[i] == sbcast_cred->expiration) && + (cache_value[i] == sig_num)) + break; /* match */ + } + if (i >= SBCAST_CACHE_SIZE) { + error("sbcast_cred verify: signature not in cache"); + return -1; + } + } + + *job_id = sbcast_cred->jobid; + *nodes = xstrdup(sbcast_cred->nodes); + return 0; +} + +/* Pack an sbcast credential into a buffer including the digital signature */ +void pack_sbcast_cred(sbcast_cred_t sbcast_cred, Buf buffer) +{ + xassert(sbcast_cred); + xassert(sbcast_cred->siglen > 0); + + _pack_sbcast_cred(sbcast_cred, buffer); + packmem(sbcast_cred->signature, sbcast_cred->siglen, buffer); +} + +/* Pack an sbcast credential into a buffer including the digital signature */ +sbcast_cred_t unpack_sbcast_cred(Buf buffer) +{ + uint32_t len; + sbcast_cred_t sbcast_cred; + uint32_t uint32_tmp; + + sbcast_cred = xmalloc(sizeof(struct sbcast_cred)); + safe_unpack_time(&sbcast_cred->ctime, buffer); + safe_unpack_time(&sbcast_cred->expiration, buffer); + safe_unpack32(&sbcast_cred->jobid, buffer); + safe_unpackstr_xmalloc(&sbcast_cred->nodes, &uint32_tmp, buffer); + + /* "sigp" must be last */ + safe_unpackmem_xmalloc(&sbcast_cred->signature, &len, buffer); + sbcast_cred->siglen = len; + xassert(len > 0); + return sbcast_cred; + +unpack_error: + delete_sbcast_cred(sbcast_cred); + return NULL; +} + +void print_sbcast_cred(sbcast_cred_t sbcast_cred) +{ + info("Sbcast_cred: Jobid %u", sbcast_cred->jobid ); + info("Sbcast_cred: Nodes %s", sbcast_cred->nodes ); + info("Sbcast_cred: ctime %s", ctime(&sbcast_cred->ctime) ); +} diff --git a/src/common/slurm_cred.h b/src/common/slurm_cred.h index 8259b8b5219..ad046739ddd 100644 --- a/src/common/slurm_cred.h +++ b/src/common/slurm_cred.h @@ -1,7 +1,8 @@ /*****************************************************************************\ - * src/common/slurm_cred.h - SLURM job credential operations + * src/common/slurm_cred.h - SLURM job and sbcast credential functions ***************************************************************************** - * Copyright (C) 2002-2006 The Regents of the University of California. + * Copyright (C) 2002-2007 The Regents of the University of California. + * Copyright (C) 2008-2009 Lawrence Livermore National Security. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). * Written by Mark Grondona <grondona1@llnl.gov>. * CODE-OCEC-09-009. All rights reserved. @@ -16,7 +17,7 @@ * any later version. * * In addition, as a special exception, the copyright holders give permission - * to link the code of portions of this program with the OpenSSL library under + * to link the code of portions of this program with the OpenSSL library under * certain conditions as described in each individual source file, and * distribute linked combinations including the two. You must obey the GNU * General Public License in all respects for all of the code used other than @@ -65,6 +66,16 @@ typedef struct slurm_job_credential * slurm_cred_t; #endif +/* + * The incomplete slurm_cred_t type is also defined in slurm_protocol_defs.h + * so check to ensure that this header has not been included after + * slurm_protocol_defs.h: + */ +#ifndef __sbcast_cred_t_defined +# define __sbcast_cred_t_defined + typedef struct sbcast_cred *sbcast_cred_t; /* opaque data type */ +#endif + /* * The slurm_cred_ctx_t incomplete type */ @@ -278,13 +289,30 @@ slurm_cred_t slurm_cred_unpack(Buf buffer); * Get a pointer to the slurm credential signature * (used by slurm IO connections to verify connecting agent) */ -int slurm_cred_get_signature(slurm_cred_t cred, char **datap, int *len); +int slurm_cred_get_signature(slurm_cred_t cred, char **datap, + uint32_t *len); /* * Print a slurm job credential using the info() call */ void slurm_cred_print(slurm_cred_t cred); +/* + * Functions to create, delete, pack, and unpack an sbcast credential + * Caller of extract_sbcast_cred() must xfree returned node string + */ +sbcast_cred_t create_sbcast_cred(slurm_cred_ctx_t ctx, + uint32_t job_id, char *nodes); +sbcast_cred_t copy_sbcast_cred(sbcast_cred_t sbcast_cred); +void delete_sbcast_cred(sbcast_cred_t sbcast_cred); +int extract_sbcast_cred(slurm_cred_ctx_t ctx, + sbcast_cred_t sbcast_cred, uint16_t block_no, + uint32_t *job_id, char **nodes); +void pack_sbcast_cred(sbcast_cred_t sbcast_cred, Buf buffer); +sbcast_cred_t unpack_sbcast_cred(Buf buffer); +void print_sbcast_cred(sbcast_cred_t sbcast_cred); + + #ifdef DISABLE_LOCALTIME extern char * timestr (const time_t *tp, char *buf, size_t n); #endif diff --git a/src/common/slurm_protocol_defs.c b/src/common/slurm_protocol_defs.c index 132a9653c29..2b110b77e45 100644 --- a/src/common/slurm_protocol_defs.c +++ b/src/common/slurm_protocol_defs.c @@ -1166,6 +1166,22 @@ void slurm_free_resource_allocation_response_msg ( } } +/* + * slurm_free_sbcast_cred_msg - free slurm resource allocation response + * message including an sbcast credential + * IN msg - pointer to response message from slurm_sbcast_lookup() + * NOTE: buffer is loaded by slurm_allocate_resources + */ +void slurm_free_sbcast_cred_msg(job_sbcast_cred_msg_t * msg) +{ + if (msg) { + xfree(msg->node_addr); + xfree(msg->node_list); + delete_sbcast_cred(msg->sbcast_cred); + xfree(msg); + } +} + /* * slurm_free_job_alloc_info_response_msg - free slurm job allocation * info response message @@ -1532,8 +1548,9 @@ extern void slurm_free_topo_info_msg(topo_info_response_msg_t *msg) extern void slurm_free_file_bcast_msg(file_bcast_msg_t *msg) { if (msg) { - xfree(msg->fname); xfree(msg->block); + xfree(msg->fname); + delete_sbcast_cred(msg->cred); xfree(msg); } } diff --git a/src/common/slurm_protocol_defs.h b/src/common/slurm_protocol_defs.h index c16c2606e83..1c66b6605c6 100644 --- a/src/common/slurm_protocol_defs.h +++ b/src/common/slurm_protocol_defs.h @@ -57,12 +57,13 @@ #include <sys/wait.h> #include "src/common/bitstring.h" +#include "src/common/job_options.h" #include "src/common/list.h" #include "src/common/macros.h" +#include "src/common/slurm_cred.h" #include "src/common/slurm_protocol_common.h" -#include "src/common/switch.h" -#include "src/common/job_options.h" #include "src/common/slurm_step_layout.h" +#include "src/common/switch.h" #include "src/common/xassert.h" //#include "src/common/slurm_jobacct_common.h" @@ -160,6 +161,8 @@ typedef enum { RESPONSE_JOB_READY, REQUEST_JOB_END_TIME, REQUEST_JOB_NOTIFY, + REQUEST_JOB_SBCAST_CRED, + RESPONSE_JOB_SBCAST_CRED, REQUEST_JOB_STEP_CREATE = 5001, RESPONSE_JOB_STEP_CREATE, @@ -234,11 +237,11 @@ typedef enum { * core api configuration struct \*****************************************************************************/ typedef struct forward { - char *nodelist; /*ranged string of who to forward the - message to */ - uint16_t cnt; /* number of nodes to forward to */ - uint32_t timeout; /* original timeout increments */ - uint16_t init; /* tell me it has been set (FORWARD_INIT) */ + uint16_t cnt; /* number of nodes to forward to */ + uint16_t init; /* tell me it has been set (FORWARD_INIT) */ + char *nodelist; /* ranged string of who to forward the + * message to */ + uint32_t timeout; /* original timeout increments */ } forward_t; /*core api protocol message structures */ @@ -746,6 +749,7 @@ typedef struct file_bcast_msg { uint32_t gid; /* group for destination file */ time_t atime; /* last access time for destination file */ time_t mtime; /* last modification time for dest file */ + sbcast_cred_t cred; /* credential for the RPC */ uint32_t block_len; /* length of this data block */ char *block; /* data for this block */ } file_bcast_msg_t; diff --git a/src/common/slurm_protocol_pack.c b/src/common/slurm_protocol_pack.c index 1141c203c8e..3d8dc983a5f 100644 --- a/src/common/slurm_protocol_pack.c +++ b/src/common/slurm_protocol_pack.c @@ -393,6 +393,11 @@ static int _unpack_resv_name_msg(reservation_name_msg_t ** msg, Buf buffer); static void _pack_topo_info_msg(topo_info_response_msg_t *msg, Buf buffer); static int _unpack_topo_info_msg(topo_info_response_msg_t **msg, Buf buffer); + +static void _pack_job_sbcast_cred_msg(job_sbcast_cred_msg_t *msg, Buf buffer); +static int _unpack_job_sbcast_cred_msg(job_sbcast_cred_msg_t **msg, + Buf buffer); + /* pack_header * packs a slurm protocol header that proceeds every slurm message * IN header - the header structure to pack @@ -523,6 +528,7 @@ pack_msg(slurm_msg_t const *msg, Buf buffer) case REQUEST_JOB_END_TIME: case REQUEST_JOB_ALLOCATION_INFO: case REQUEST_JOB_ALLOCATION_INFO_LITE: + case REQUEST_JOB_SBCAST_CRED: _pack_job_alloc_info_msg((job_alloc_info_msg_t *) msg->data, buffer); break; @@ -828,6 +834,10 @@ pack_msg(slurm_msg_t const *msg, Buf buffer) _pack_topo_info_msg( (topo_info_response_msg_t *)msg->data, buffer); break; + case RESPONSE_JOB_SBCAST_CRED: + _pack_job_sbcast_cred_msg( + (job_sbcast_cred_msg_t *)msg->data, buffer); + break; default: debug("No pack method for msg type %u", msg->msg_type); return EINVAL; @@ -900,6 +910,7 @@ unpack_msg(slurm_msg_t * msg, Buf buffer) case REQUEST_JOB_END_TIME: case REQUEST_JOB_ALLOCATION_INFO: case REQUEST_JOB_ALLOCATION_INFO_LITE: + case REQUEST_JOB_SBCAST_CRED: rc = _unpack_job_alloc_info_msg((job_alloc_info_msg_t **) & (msg->data), buffer); break; @@ -1236,6 +1247,10 @@ unpack_msg(slurm_msg_t * msg, Buf buffer) rc = _unpack_topo_info_msg( (topo_info_response_msg_t **)&msg->data, buffer); break; + case RESPONSE_JOB_SBCAST_CRED: + rc = _unpack_job_sbcast_cred_msg( + (job_sbcast_cred_msg_t **)&msg->data, buffer); + break; default: debug("No unpack method for msg type %u", msg->msg_type); return EINVAL; @@ -1897,6 +1912,57 @@ unpack_error: return SLURM_ERROR; } +static void +_pack_job_sbcast_cred_msg(job_sbcast_cred_msg_t * msg, Buf buffer) +{ + xassert(msg != NULL); + + pack32(msg->job_id, buffer); + packstr(msg->node_list, buffer); + + pack32(msg->node_cnt, buffer); + if (msg->node_cnt > 0) + _pack_slurm_addr_array(msg->node_addr, msg->node_cnt, buffer); + pack_sbcast_cred(msg->sbcast_cred, buffer); +} + +static int +_unpack_job_sbcast_cred_msg(job_sbcast_cred_msg_t ** msg, Buf buffer) +{ + uint32_t uint32_tmp; + job_sbcast_cred_msg_t *tmp_ptr; + + /* alloc memory for structure */ + xassert(msg != NULL); + tmp_ptr = xmalloc(sizeof(job_sbcast_cred_msg_t)); + *msg = tmp_ptr; + + /* load the data values */ + safe_unpack32(&tmp_ptr->job_id, buffer); + safe_unpackstr_xmalloc(&tmp_ptr->node_list, &uint32_tmp, buffer); + + safe_unpack32(&tmp_ptr->node_cnt, buffer); + if (tmp_ptr->node_cnt > 0) { + if (_unpack_slurm_addr_array(&(tmp_ptr->node_addr), + &uint32_tmp, buffer)) + goto unpack_error; + if (uint32_tmp != tmp_ptr->node_cnt) + goto unpack_error; + } else + tmp_ptr->node_addr = NULL; + + tmp_ptr->sbcast_cred = unpack_sbcast_cred(buffer); + if (tmp_ptr->sbcast_cred == NULL) + goto unpack_error; + + return SLURM_SUCCESS; + +unpack_error: + slurm_free_sbcast_cred_msg(tmp_ptr); + *msg = NULL; + return SLURM_ERROR; +} + static void _pack_submit_response_msg(submit_response_msg_t * msg, Buf buffer) { @@ -5056,6 +5122,7 @@ static void _pack_file_bcast(file_bcast_msg_t * msg , Buf buffer ) packstr ( msg->fname, buffer ); pack32 ( msg->block_len, buffer ); packmem ( msg->block, msg->block_len, buffer ); + pack_sbcast_cred( msg->cred, buffer ); } static int _unpack_file_bcast(file_bcast_msg_t ** msg_ptr , Buf buffer ) @@ -5084,6 +5151,11 @@ static int _unpack_file_bcast(file_bcast_msg_t ** msg_ptr , Buf buffer ) safe_unpackmem_xmalloc ( & msg->block, &uint32_tmp , buffer ) ; if ( uint32_tmp != msg->block_len ) goto unpack_error; + + msg->cred = unpack_sbcast_cred( buffer ); + if (msg->cred == NULL) + goto unpack_error; + return SLURM_SUCCESS; unpack_error: diff --git a/src/plugins/auth/authd/auth_authd.c b/src/plugins/auth/authd/auth_authd.c index f026204f5bb..af7b4142ab5 100644 --- a/src/plugins/auth/authd/auth_authd.c +++ b/src/plugins/auth/authd/auth_authd.c @@ -65,7 +65,7 @@ const char plugin_name[] = "Brent Chun's authd authentication plugin"; const char plugin_type[] = "auth/authd"; -const uint32_t plugin_version = 90; +const uint32_t plugin_version = 100; /* * Where to find the timeout in the argument vector. This is set @@ -179,12 +179,12 @@ slurm_auth_destroy( slurm_auth_credential_t *cred ) } int -slurm_auth_verify( slurm_auth_credential_t *cred, void *argv[], char *auth_info ) +slurm_auth_verify( slurm_auth_credential_t *cred, char *auth_info ) { int rc; time_t now; - if ( ( cred == NULL ) || ( argv == NULL ) ) { + if ( cred == NULL ) { plugin_errno = SLURM_AUTH_BADARG; return SLURM_ERROR; } diff --git a/src/plugins/auth/munge/auth_munge.c b/src/plugins/auth/munge/auth_munge.c index d21df1464b5..b6027ed4b41 100644 --- a/src/plugins/auth/munge/auth_munge.c +++ b/src/plugins/auth/munge/auth_munge.c @@ -76,7 +76,7 @@ const char plugin_name[] = "auth plugin for Munge (http://home.gna.org/munge/)"; const char plugin_type[] = "auth/munge"; -const uint32_t plugin_version = 10; +const uint32_t plugin_version = 100; static int plugin_errno = SLURM_SUCCESS; @@ -246,7 +246,7 @@ slurm_auth_destroy( slurm_auth_credential_t *cred ) * Return SLURM_SUCCESS if the credential is in order and valid. */ int -slurm_auth_verify( slurm_auth_credential_t *c, void *argv, char *socket ) +slurm_auth_verify( slurm_auth_credential_t *c, char *socket ) { if (!c) { plugin_errno = SLURM_AUTH_BADARG; diff --git a/src/plugins/auth/none/auth_none.c b/src/plugins/auth/none/auth_none.c index ee61c54d26e..6803a03dbe1 100644 --- a/src/plugins/auth/none/auth_none.c +++ b/src/plugins/auth/none/auth_none.c @@ -96,7 +96,7 @@ */ const char plugin_name[] = "Null authentication plugin"; const char plugin_type[] = "auth/none"; -const uint32_t plugin_version = 90; +const uint32_t plugin_version = 100; /* @@ -206,7 +206,7 @@ slurm_auth_destroy( slurm_auth_credential_t *cred ) * Return SLURM_SUCCESS if the credential is in order and valid. */ int -slurm_auth_verify( slurm_auth_credential_t *cred, void *argv[], char *auth_info ) +slurm_auth_verify( slurm_auth_credential_t *cred, char *auth_info ) { return SLURM_SUCCESS; } diff --git a/src/plugins/crypto/munge/crypto_munge.c b/src/plugins/crypto/munge/crypto_munge.c index 2021d6cd53e..bcc2b3c1072 100644 --- a/src/plugins/crypto/munge/crypto_munge.c +++ b/src/plugins/crypto/munge/crypto_munge.c @@ -17,7 +17,7 @@ * any later version. * * In addition, as a special exception, the copyright holders give permission - * to link the code of portions of this program with the OpenSSL library under + * to link the code of portions of this program with the OpenSSL library under * certain conditions as described in each individual source file, and * distribute linked combinations including the two. You must obey the GNU * General Public License in all respects for all of the code used other than diff --git a/src/plugins/crypto/openssl/crypto_openssl.c b/src/plugins/crypto/openssl/crypto_openssl.c index 6d0e5c33ea0..b8da00bb8c0 100644 --- a/src/plugins/crypto/openssl/crypto_openssl.c +++ b/src/plugins/crypto/openssl/crypto_openssl.c @@ -17,7 +17,7 @@ * any later version. * * In addition, as a special exception, the copyright holders give permission - * to link the code of portions of this program with the OpenSSL library under + * to link the code of portions of this program with the OpenSSL library under * certain conditions as described in each individual source file, and * distribute linked combinations including the two. You must obey the GNU * General Public License in all respects for all of the code used other than diff --git a/src/sbcast/agent.c b/src/sbcast/agent.c index 1957cc00479..56df1fbdbbc 100644 --- a/src/sbcast/agent.c +++ b/src/sbcast/agent.c @@ -119,7 +119,7 @@ static void *_agent_thread(void *args) /* Issue the RPC to transfer the file's data */ extern void send_rpc(file_bcast_msg_t *bcast_msg, - job_alloc_info_response_msg_t *alloc_resp) + job_sbcast_cred_msg_t *sbcast_cred) { /* Preserve some data structures across calls for better performance */ static int threads_used = 0; @@ -141,12 +141,12 @@ extern void send_rpc(file_bcast_msg_t *bcast_msg, else fanout = MAX_THREADS; - span = set_span(alloc_resp->node_cnt, fanout); + span = set_span(sbcast_cred->node_cnt, fanout); - hl = hostlist_create(alloc_resp->node_list); + hl = hostlist_create(sbcast_cred->node_list); i = 0; - while (i < alloc_resp->node_cnt) { + while (i < sbcast_cred->node_cnt) { int j = 0; name = hostlist_shift(hl); if(!name) { diff --git a/src/sbcast/sbcast.c b/src/sbcast/sbcast.c index c5b446ff7d7..470c167500e 100644 --- a/src/sbcast/sbcast.c +++ b/src/sbcast/sbcast.c @@ -54,6 +54,7 @@ #include "src/common/forward.h" #include "src/common/hostlist.h" #include "src/common/log.h" +#include "src/common/slurm_cred.h" #include "src/common/slurm_protocol_api.h" #include "src/common/slurm_protocol_interface.h" #include "src/common/xmalloc.h" @@ -61,10 +62,10 @@ #include "src/sbcast/sbcast.h" /* global variables */ -int fd; /* source file descriptor */ -struct sbcast_parameters params; /* program parameters */ -struct stat f_stat; /* source file stats */ -job_alloc_info_response_msg_t *alloc_resp; /* job specification */ +int fd; /* source file descriptor */ +struct sbcast_parameters params; /* program parameters */ +struct stat f_stat; /* source file stats */ +job_sbcast_cred_msg_t *sbcast_cred; /* job alloc info and sbcast cred */ static void _bcast_file(void); static void _get_job_info(void); @@ -106,7 +107,7 @@ int main(int argc, char *argv[]) /* transmit the file */ _bcast_file(); -/* slurm_free_resource_allocation_response_msg(alloc_resp); */ +/* slurm_free_sbcast_cred_msg(sbcast_cred); */ exit(0); } @@ -125,15 +126,18 @@ static void _get_job_info(void) jobid = (uint32_t) atol(jobid_str); verbose("jobid = %u", jobid); - if (slurm_allocation_lookup(jobid, &alloc_resp) != SLURM_SUCCESS) { + if (slurm_sbcast_lookup(jobid, &sbcast_cred) != SLURM_SUCCESS) { error("SLURM jobid %u lookup error: %s", jobid, slurm_strerror(slurm_get_errno())); exit(1); } - verbose("node_list = %s", alloc_resp->node_list); - verbose("node_cnt = %u", alloc_resp->node_cnt); - /* also see alloc_resp->node_addr (array) */ + verbose("node_cnt = %u", sbcast_cred->node_cnt); + verbose("node_list = %s", sbcast_cred->node_list); + /* also see sbcast_cred->node_addr (array) */ + + if (params.verbose) + print_sbcast_cred(sbcast_cred->sbcast_cred); /* do not bother to release the return message, * we need to preserve and use most of the information later */ @@ -198,6 +202,7 @@ static void _bcast_file(void) buffer = xmalloc(buf_size); bcast_msg.block = buffer; bcast_msg.block_len = 0; + bcast_msg.cred = sbcast_cred->sbcast_cred; if (params.preserve) { bcast_msg.atime = f_stat.st_atime; @@ -215,7 +220,7 @@ static void _bcast_file(void) if (size_read >= f_stat.st_size) bcast_msg.last_block = 1; - send_rpc(&bcast_msg, alloc_resp); + send_rpc(&bcast_msg, sbcast_cred); if (bcast_msg.last_block) break; /* end of file */ bcast_msg.block_no++; diff --git a/src/sbcast/sbcast.h b/src/sbcast/sbcast.h index 6e853df2bf4..2b7df77d2da 100644 --- a/src/sbcast/sbcast.h +++ b/src/sbcast/sbcast.h @@ -2,7 +2,7 @@ * sbcast.h - definitions used for sbcast data functions ***************************************************************************** * Copyright (C) 2006-2007 The Regents of the University of California. - * Copyright (C) 2008 Lawrence Livermore National Security. + * Copyright (C) 2008-2009 Lawrence Livermore National Security. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). * Written by Morris Jette <jette1@llnl.gov> * CODE-OCEC-09-009. All rights reserved. @@ -64,6 +64,6 @@ extern struct sbcast_parameters params; extern void parse_command_line(int argc, char *argv[]); extern void send_rpc(file_bcast_msg_t *bcast_msg, - job_alloc_info_response_msg_t *alloc_resp); + job_sbcast_cred_msg_t *sbcast_cred); #endif diff --git a/src/slurmctld/proc_req.c b/src/slurmctld/proc_req.c index 127059e9efd..77a41a43f31 100644 --- a/src/slurmctld/proc_req.c +++ b/src/slurmctld/proc_req.c @@ -107,6 +107,7 @@ inline static void _slurm_rpc_dump_partitions(slurm_msg_t * msg); inline static void _slurm_rpc_epilog_complete(slurm_msg_t * msg); inline static void _slurm_rpc_job_notify(slurm_msg_t * msg); inline static void _slurm_rpc_job_ready(slurm_msg_t * msg); +inline static void _slurm_rpc_job_sbcast_cred(slurm_msg_t * msg); inline static void _slurm_rpc_job_step_kill(slurm_msg_t * msg); inline static void _slurm_rpc_job_step_create(slurm_msg_t * msg); inline static void _slurm_rpc_job_step_get_info(slurm_msg_t * msg); @@ -234,6 +235,10 @@ void slurmctld_req (slurm_msg_t * msg) _slurm_rpc_job_alloc_info_lite(msg); slurm_free_job_alloc_info_msg(msg->data); break; + case REQUEST_JOB_SBCAST_CRED: + _slurm_rpc_job_sbcast_cred(msg); + slurm_free_job_alloc_info_msg(msg->data); + break; case REQUEST_PING: _slurm_rpc_ping(msg); /* No body to free */ @@ -1729,6 +1734,70 @@ static void _slurm_rpc_job_alloc_info_lite(slurm_msg_t * msg) } } +/* _slurm_rpc_job_sbcast_cred - process RPC to get details on existing job + * plus sbcast credential */ +static void _slurm_rpc_job_sbcast_cred(slurm_msg_t * msg) +{ + int error_code = SLURM_SUCCESS; + slurm_msg_t response_msg; + struct job_record *job_ptr; + DEF_TIMERS; + job_alloc_info_msg_t *job_info_msg = + (job_alloc_info_msg_t *) msg->data; + job_sbcast_cred_msg_t job_info_resp_msg; + sbcast_cred_t sbcast_cred; + /* Locks: Read config, job, read node */ + slurmctld_lock_t job_read_lock = { + READ_LOCK, READ_LOCK, READ_LOCK, NO_LOCK }; + uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, NULL); + + START_TIMER; + debug2("Processing RPC: REQUEST_JOB_SBCAST_CRED from uid=%u", + (unsigned int) uid); + + /* do RPC call */ + lock_slurmctld(job_read_lock); + error_code = job_alloc_info(uid, job_info_msg->job_id, &job_ptr); + END_TIMER2("_slurm_rpc_job_alloc_info"); + + /* return result */ + if (error_code || (job_ptr == NULL)) { + unlock_slurmctld(job_read_lock); + debug2("_slurm_rpc_job_sbcast_cred: JobId=%u, uid=%u: %s", + job_info_msg->job_id, uid, + slurm_strerror(error_code)); + slurm_send_rc_msg(msg, error_code); + } else if ((sbcast_cred = create_sbcast_cred(slurmctld_config.cred_ctx, + job_ptr->job_id, job_ptr->nodes)) == NULL) { + unlock_slurmctld(job_read_lock); + error("_slurm_rpc_job_sbcast_cred JobId=%u cred create error", + job_info_msg->job_id); + slurm_send_rc_msg(msg, SLURM_ERROR); + } else { + info("_slurm_rpc_job_sbcast_cred JobId=%u NodeList=%s %s", + job_info_msg->job_id, job_ptr->nodes, TIME_STR); + + job_info_resp_msg.job_id = job_ptr->job_id; + job_info_resp_msg.node_addr = xmalloc(sizeof(slurm_addr) * + job_ptr->node_cnt); + memcpy(job_info_resp_msg.node_addr, job_ptr->node_addr, + (sizeof(slurm_addr) * job_ptr->node_cnt)); + job_info_resp_msg.node_cnt = job_ptr->node_cnt; + job_info_resp_msg.node_list = xstrdup(job_ptr->nodes); + job_info_resp_msg.sbcast_cred = sbcast_cred; + unlock_slurmctld(job_read_lock); + + slurm_msg_t_init(&response_msg); + response_msg.msg_type = RESPONSE_JOB_SBCAST_CRED; + response_msg.data = &job_info_resp_msg; + + slurm_send_node_msg(msg->conn_fd, &response_msg); + xfree(job_info_resp_msg.node_addr); + xfree(job_info_resp_msg.node_list); + delete_sbcast_cred(sbcast_cred); + } +} + /* _slurm_rpc_ping - process ping RPC */ static void _slurm_rpc_ping(slurm_msg_t * msg) { diff --git a/src/slurmd/slurmd/req.c b/src/slurmd/slurmd/req.c index 9bfce0e0437..a91b481bf34 100644 --- a/src/slurmd/slurmd/req.c +++ b/src/slurmd/slurmd/req.c @@ -142,10 +142,12 @@ static int _run_epilog(uint32_t jobid, uid_t uid, char *resv_id, static bool _pause_for_job_completion(uint32_t jobid, char *nodes, int maxtime); static void _sync_messages_kill(kill_job_msg_t *req); -static int _waiter_init (uint32_t jobid); -static int _waiter_complete (uint32_t jobid); +static int _waiter_init (uint32_t jobid); +static int _waiter_complete (uint32_t jobid); static bool _steps_completed_now(uint32_t jobid); +static int _valid_sbcast_cred(file_bcast_msg_t *req, uid_t req_uid, + uint16_t block_no); static void _wait_state_completed(uint32_t jobid, int max_delay); static long _get_job_uid(uint32_t jobid); @@ -1918,12 +1920,49 @@ _init_groups(uid_t my_uid, gid_t my_gid) if (rc) { error("sbcast: Error in initgroups(%s, %ld): %m", user_name, (long)my_gid); - return -1; +// return -1; } return 0; } +/* Validate sbcast credential. + * NOTE: We can only perform the full credential validation once with + * Munge without generating a credential replay error + * RET SLURM_SUCCESS or an error code */ +static int +_valid_sbcast_cred(file_bcast_msg_t *req, uid_t req_uid, uint16_t block_no) +{ + int rc = SLURM_SUCCESS; + uint32_t job_id; + char *nodes = NULL; + hostset_t hset = NULL; + + rc = extract_sbcast_cred(conf->vctx, req->cred, block_no, + &job_id, &nodes); + if (rc != 0) { + error("Security violation: Invalid sbcast_cred from uid %u", + (uint32_t) req_uid); + return ESLURMD_INVALID_JOB_CREDENTIAL; + } + + if (!(hset = hostset_create(nodes))) { + error("Unable to parse sbcast_cred hostlist %s", nodes); + rc = ESLURMD_INVALID_JOB_CREDENTIAL; + } else if (!hostset_within(hset, conf->node_name)) { + error("Security violation: sbcast_cred from %u has " + "bad hostset %s", req_uid, nodes); + rc = ESLURMD_INVALID_JOB_CREDENTIAL; + } + if (hset) + hostset_destroy(hset); + xfree(nodes); + + /* print_sbcast_cred(req->cred); */ + + return rc; +} + static int _rpc_file_bcast(slurm_msg_t *msg) { @@ -1935,16 +1974,23 @@ _rpc_file_bcast(slurm_msg_t *msg) #if 0 info("last_block=%u force=%u modes=%o", - req->last_block, req->force, req->modes); + req->last_block, req->force, req->modes); info("uid=%u gid=%u atime=%lu mtime=%lu block_len[0]=%u", - req->uid, req->gid, req->atime, req->mtime, req->block_len[0]); + req->uid, req->gid, req->atime, req->mtime, req->block_len); +#if 0 /* when the file being transferred is binary, the following line * can break the terminal output for slurmd */ - /* info("req->block[0]=%s, @ %lu", req->block[0], (unsigned long) &req->block[0]); */ + info("req->block[0]=%s, @ %lu", \ + req->block[0], (unsigned long) &req->block); +#endif #endif + if ((rc = _valid_sbcast_cred(req, req_uid, req->block_no)) != + SLURM_SUCCESS) + return rc; + info("sbcast req_uid=%u fname=%s block_no=%u", - req_uid, req->fname, req->block_no); + req_uid, req->fname, req->block_no); child = fork(); if (child == -1) { error("sbcast: fork failure"); @@ -1962,7 +2008,7 @@ _rpc_file_bcast(slurm_msg_t *msg) } if (setgid(req_gid) < 0) { error("sbcast: uid:%u setgid(%u): %s", req_uid, req_gid, - strerror(errno)); + strerror(errno)); exit(errno); } if (setuid(req_uid) < 0) { @@ -1989,12 +2035,13 @@ _rpc_file_bcast(slurm_msg_t *msg) offset = 0; while (req->block_len - offset) { - inx = write(fd, &req->block[offset], (req->block_len - offset)); + inx = write(fd, &req->block[offset], + (req->block_len - offset)); if (inx == -1) { if ((errno == EINTR) || (errno == EAGAIN)) continue; error("sbcast: uid:%u can't write `%s`: %s", - req_uid, req->fname, strerror(errno)); + req_uid, req->fname, strerror(errno)); close(fd); exit(errno); } @@ -2002,7 +2049,7 @@ _rpc_file_bcast(slurm_msg_t *msg) } if (req->last_block && fchmod(fd, (req->modes & 0777))) { error("sbcast: uid:%u can't chmod `%s`: %s", - req_uid, req->fname, strerror(errno)); + req_uid, req->fname, strerror(errno)); } if (req->last_block && fchown(fd, req->uid, req->gid)) { error("sbcast: uid:%u can't chown `%s`: %s", @@ -2016,7 +2063,7 @@ _rpc_file_bcast(slurm_msg_t *msg) time_buf.modtime = req->mtime; if (utime(req->fname, &time_buf)) { error("sbcast: uid:%u can't utime `%s`: %s", - req_uid, req->fname, strerror(errno)); + req_uid, req->fname, strerror(errno)); } } exit(SLURM_SUCCESS); @@ -2034,7 +2081,7 @@ _rpc_reattach_tasks(slurm_msg_t *msg) char host[MAXHOSTNAMELEN]; slurm_addr ioaddr; void *job_cred_sig; - int len; + uint32_t len; int fd; uid_t req_uid; slurmstepd_info_t *step = NULL; @@ -2802,7 +2849,6 @@ _rpc_terminate_job(slurm_msg_t *msg) return; } - /* * "revoke" all future credentials for this jobid */ diff --git a/src/slurmd/slurmstepd/slurmstepd_job.c b/src/slurmd/slurmstepd/slurmstepd_job.c index 6d3ecd96a93..998d4acc7c0 100644 --- a/src/slurmd/slurmstepd/slurmstepd_job.c +++ b/src/slurmd/slurmstepd/slurmstepd_job.c @@ -542,7 +542,7 @@ struct srun_info * srun_info_create(slurm_cred_t cred, slurm_addr *resp_addr, slurm_addr *ioaddr) { char *data = NULL; - int len = 0; + uint32_t len = 0; struct srun_info *srun = xmalloc(sizeof(struct srun_info)); srun_key_t *key = xmalloc(sizeof(srun_key_t)); diff --git a/testsuite/expect/test14.6 b/testsuite/expect/test14.6 index 05ee6e8dcfa..6459a1ea1ea 100755 --- a/testsuite/expect/test14.6 +++ b/testsuite/expect/test14.6 @@ -46,6 +46,10 @@ if {[test_front_end] != 0} { send_user "\nWARNING: This test is incompatable with front-end systems\n" exit 0 } +if {[slurmd_user_root] == 0} { + send_user "\nWARNING: This test is incompatable with SlurmdUser != root\n" + exit 0 +} # Delete left-over stdout/err files file delete $file_out $file_err @@ -114,6 +118,10 @@ if {[wait_for_file $file_err] == 0} { incr matches exp_continue } + -re "error:" { + send_user "\nFAILURE: unexpected error\n" + set exit_code 1 + } eof { wait } -- GitLab