Skip to content
Snippets Groups Projects
Commit 2b61d656 authored by Tim Wickberg's avatar Tim Wickberg
Browse files

Merge branch 'slurm-16.05'

parents 579f7a69 71a39bf4
No related branches found
No related tags found
No related merge requests found
...@@ -177,6 +177,7 @@ documents those changes that are of interest to users and administrators. ...@@ -177,6 +177,7 @@ documents those changes that are of interest to users and administrators.
-- Cray - add NHC_ABSOLUTELY_NO to never run NHC, even on certain edge cases -- Cray - add NHC_ABSOLUTELY_NO to never run NHC, even on certain edge cases
that it would otherwise be run on with NHC_NO. that it would otherwise be run on with NHC_NO.
-- Ignore GRES/QOS updates that maintain the same value as before. -- Ignore GRES/QOS updates that maintain the same value as before.
-- mpi/pmix - prepare temp directory for application.
* Changes in Slurm 16.05.4 * Changes in Slurm 16.05.4
========================== ==========================
......
...@@ -59,6 +59,11 @@ ...@@ -59,6 +59,11 @@
#pragma message "PMIx version mismatch: the major version seen during configuration was " VALUE(HAVE_PMIX_VER) "L but found " VALUE(PMIX_VERSION_MAJOR) " compilation will most likely fail. Please reconfigure against the new version." #pragma message "PMIx version mismatch: the major version seen during configuration was " VALUE(HAVE_PMIX_VER) "L but found " VALUE(PMIX_VERSION_MAJOR) " compilation will most likely fail. Please reconfigure against the new version."
#endif #endif
// define some additional keys
#ifndef PMIX_TDIR_RMCLEAN
#define PMIX_TDIR_RMCLEAN "pmix.tdir.rmclean"
#endif
#define PMIXP_ALLOC_KEY(kvp, key_str) \ #define PMIXP_ALLOC_KEY(kvp, key_str) \
{ \ { \
char *key = key_str; \ char *key = key_str; \
...@@ -217,31 +222,18 @@ int pmixp_libpmix_init(void) ...@@ -217,31 +222,18 @@ int pmixp_libpmix_init(void)
mode_t rights = (S_IRUSR | S_IWUSR | S_IXUSR) | (S_IRGRP | S_IWGRP | S_IXGRP); mode_t rights = (S_IRUSR | S_IWUSR | S_IXUSR) | (S_IRGRP | S_IWGRP | S_IXGRP);
pmix_info_t *kvp = NULL; pmix_info_t *kvp = NULL;
/* NOTE: we need user who owns the job to access PMIx usock if (0 != (rc = pmixp_mkdir(pmixp_info_tmpdir_lib(), rights))) {
* file. According to 'man 7 unix': PMIXP_ERROR_STD("Cannot create server tmpdir: \"%s\"",
* "... In the Linux implementation, sockets which are visible in the file system pmixp_info_tmpdir_lib());
* honor the permissions of the directory they are in... "
* Our case is the following: slurmstepd is usually running as root, user application will
* be "sudo'ed". To provide both of them with acces to the unix socket we do the following:
* 1. Owner ID is set to the job owner.
* 2. Group ID corresponds to slurmstepd.
* 3. Set 0770 access mode */
if (0 != mkdir(pmixp_info_tmpdir_lib(), rights) ) {
PMIXP_ERROR_STD("Cannot create directory \"%s\"", pmixp_info_tmpdir_lib());
return errno; return errno;
} }
/* There might be umask that will drop essential rights. Fix it explicitly. if (0 != (rc = pmixp_mkdir(pmixp_info_tmpdir_cli(), rights))) {
* TODO: is there more elegant solution? */ PMIXP_ERROR_STD("Cannot create client tmpdir: \"%s\"",
if (chmod(pmixp_info_tmpdir_lib(), rights) < 0) { pmixp_info_tmpdir_cli());
error("chown(%s): %m", pmixp_info_tmpdir_lib());
return errno; return errno;
} }
if (chown(pmixp_info_tmpdir_lib(), (uid_t) pmixp_info_jobuid(), (gid_t) -1) < 0) {
error("chown(%s): %m", pmixp_info_tmpdir_lib());
return errno;
}
/* TODO: must be deleted in future once info-key approach will harden */ /* TODO: must be deleted in future once info-key approach will harden */
setenv(PMIXP_PMIXLIB_TMPDIR, pmixp_info_tmpdir_lib(), 1); setenv(PMIXP_PMIXLIB_TMPDIR, pmixp_info_tmpdir_lib(), 1);
...@@ -296,10 +288,17 @@ int pmixp_libpmix_finalize(void) ...@@ -296,10 +288,17 @@ int pmixp_libpmix_finalize(void)
} }
rc1 = pmixp_rmdir_recursively(pmixp_info_tmpdir_lib()); rc1 = pmixp_rmdir_recursively(pmixp_info_tmpdir_lib());
if (0 == rc) { if (0 != rc1) {
/* return only one error :) */ PMIXP_ERROR_STD("Failed to remove %s\n", pmixp_info_tmpdir_lib());
rc = rc1; /* Not considering this as fatal error */
} }
rc1 = pmixp_rmdir_recursively(pmixp_info_tmpdir_cli());
if (0 != rc1) {
PMIXP_ERROR_STD("Failed to remove %s\n", pmixp_info_tmpdir_cli());
/* Not considering this as fatal error */
}
return rc; return rc;
} }
...@@ -362,27 +361,29 @@ static void _set_tmpdirs(List lresp) ...@@ -362,27 +361,29 @@ static void _set_tmpdirs(List lresp)
{ {
pmix_info_t *kvp; pmix_info_t *kvp;
char *p = NULL; char *p = NULL;
bool rmclean = true;
/* We consider two sources of the tempdir: /* We consider two sources of the tempdir:
* - SLURM's slurm.conf TmpFS option; * - SLURM's slurm.conf TmpFS option;
* - env var SLURM_PMIX_TMPDIR; * - env var SLURM_PMIX_TMPDIR;
* do we need to do anything else? * do we need to do anything else?
*/ */
p = pmixp_info_tmpdir_cli(); p = pmixp_info_tmpdir_cli_base();
if (NULL == p) {
p = PMIXP_TMPDIR_DEFAULT;
}
PMIXP_ALLOC_KEY(kvp, PMIX_TMPDIR); PMIXP_ALLOC_KEY(kvp, PMIX_TMPDIR);
PMIX_VAL_SET(&kvp->value, string, p); PMIX_VAL_SET(&kvp->value, string, p);
list_append(lresp, kvp); list_append(lresp, kvp);
p = pmixp_info_tmpdir_cli();
PMIXP_ALLOC_KEY(kvp, PMIX_NSDIR); PMIXP_ALLOC_KEY(kvp, PMIX_NSDIR);
PMIX_VAL_SET(&kvp->value, string, p); PMIX_VAL_SET(&kvp->value, string, p);
list_append(lresp, kvp); list_append(lresp, kvp);
PMIXP_ALLOC_KEY(kvp, PMIX_PROCDIR);
PMIX_VAL_SET(&kvp->value, string, p); PMIXP_ALLOC_KEY(kvp, PMIX_TDIR_RMCLEAN);
PMIX_VAL_SET(&kvp->value, flag, rmclean);
list_append(lresp, kvp); list_append(lresp, kvp);
} }
/* /*
* information about relative ranks as assigned by the RM * information about relative ranks as assigned by the RM
*/ */
......
...@@ -299,13 +299,18 @@ static int _env_set(char ***env) ...@@ -299,13 +299,18 @@ static int _env_set(char ***env)
*/ */
p = getenvp(*env, PMIXP_TMPDIR_CLI); p = getenvp(*env, PMIXP_TMPDIR_CLI);
if (NULL != p) { if (NULL != p) {
_pmixp_job_info.cli_tmpdir = xstrdup(p); _pmixp_job_info.cli_tmpdir_base = xstrdup(p);
} else { } else {
p = slurm_get_tmp_fs(); p = slurm_get_tmp_fs();
if (NULL != p) { if (NULL != p) {
_pmixp_job_info.cli_tmpdir = p; _pmixp_job_info.cli_tmpdir_base = p;
} }
} }
_pmixp_job_info.cli_tmpdir =
xstrdup_printf("%s/spmix_appdir_%d.%d",
_pmixp_job_info.cli_tmpdir_base,
pmixp_info_jobid(), pmixp_info_stepid());
/* ----------- Timeout setting ------------- */ /* ----------- Timeout setting ------------- */
/* TODO: also would be nice to have a cluster-wide setting in SLURM */ /* TODO: also would be nice to have a cluster-wide setting in SLURM */
......
...@@ -67,7 +67,7 @@ typedef struct { ...@@ -67,7 +67,7 @@ typedef struct {
uint32_t *gtids; /* global ids of tasks located on *this* node */ uint32_t *gtids; /* global ids of tasks located on *this* node */
char *task_map_packed; /* string represents packed task mapping information */ char *task_map_packed; /* string represents packed task mapping information */
int timeout; int timeout;
char *cli_tmpdir; char *cli_tmpdir, *cli_tmpdir_base;
char *lib_tmpdir; char *lib_tmpdir;
uid_t uid; uid_t uid;
gid_t gid; gid_t gid;
...@@ -98,6 +98,11 @@ static inline char *pmixp_info_tmpdir_cli(void) ...@@ -98,6 +98,11 @@ static inline char *pmixp_info_tmpdir_cli(void)
return _pmixp_job_info.cli_tmpdir; return _pmixp_job_info.cli_tmpdir;
} }
static inline char *pmixp_info_tmpdir_cli_base(void)
{
return _pmixp_job_info.cli_tmpdir_base;
}
/* Cli tempdir */ /* Cli tempdir */
static inline char *pmixp_info_tmpdir_lib(void) static inline char *pmixp_info_tmpdir_lib(void)
{ {
......
...@@ -417,3 +417,38 @@ int pmixp_fixrights(char *path, uid_t uid, mode_t mode) ...@@ -417,3 +417,38 @@ int pmixp_fixrights(char *path, uid_t uid, mode_t mode)
closedir(dp); closedir(dp);
return 0; return 0;
} }
int pmixp_mkdir(char *path, mode_t rights)
{
/* NOTE: we need user who owns the job to access PMIx usock
* file. According to 'man 7 unix':
* "... In the Linux implementation, sockets which are visible in the
* file system honor the permissions of the directory they are in... "
* Our case is the following: slurmstepd is usually running as root,
* user application will be "sudo'ed". To provide both of them with
* access to the unix socket we do the following:
* 1. Owner ID is set to the job owner.
* 2. Group ID corresponds to slurmstepd.
* 3. Set 0770 access mode
*/
if (0 != mkdir(path, rights) ) {
PMIXP_ERROR_STD("Cannot create directory \"%s\"",
pmixp_info_tmpdir_lib());
return errno;
}
/* There might be umask that will drop essential rights.
* Fix it explicitly.
* TODO: is there more elegant solution? */
if (chmod(path, rights) < 0) {
error("%s: chown(%s): %m", __func__, path);
return errno;
}
if (chown(path, (uid_t) pmixp_info_jobuid(), (gid_t) -1) < 0) {
error("%s: chown(%s): %m", __func__, pmixp_info_tmpdir_lib());
return errno;
}
return 0;
}
...@@ -55,5 +55,6 @@ int pmixp_stepd_send(char *nodelist, const char *address, char *data, ...@@ -55,5 +55,6 @@ int pmixp_stepd_send(char *nodelist, const char *address, char *data,
int silent); int silent);
int pmixp_rmdir_recursively(char *path); int pmixp_rmdir_recursively(char *path);
int pmixp_fixrights(char *path, uid_t uid, mode_t mode); int pmixp_fixrights(char *path, uid_t uid, mode_t mode);
int pmixp_mkdir(char *path, mode_t rights);
#endif /* PMIXP_UTILS_H*/ #endif /* PMIXP_UTILS_H*/
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment