diff --git a/NEWS b/NEWS index 41051dc639daeb28f8a9ff62121e46960d34eab0..c3af6a16dcc5e32ccb929dd188831e48d4c85a18 100644 --- a/NEWS +++ b/NEWS @@ -238,6 +238,8 @@ documents those changes that are of interest to users and administrators. -- power/cray - Disable power cap get and set operations on DOWN nodes. -- Jobs preempted with PreemptMode=REQUEUE were incorrectly recorded as REQUEUED in the accounting. + -- PMIX - Use volatile specifier to avoid flag caching. + -- PMIX - Make it possible to use %n or %h in a spool dir. * Changes in Slurm 16.05.9 ========================== diff --git a/src/common/slurm_protocol_api.c b/src/common/slurm_protocol_api.c index d91d9d92cbaba68b25b3e65eb10c04d4e89619b8..5abda4eb0d4a2efc06b46b06f1291ce8f4a1e860 100644 --- a/src/common/slurm_protocol_api.c +++ b/src/common/slurm_protocol_api.c @@ -2770,7 +2770,7 @@ char *slurm_get_job_container_plugin(void) /* slurm_get_slurmd_spooldir * RET slurmd_spooldir name, must be xfreed by caller */ -char *slurm_get_slurmd_spooldir(void) +char *slurm_get_slurmd_spooldir(char *node_name) { char *slurmd_spooldir = NULL; slurm_ctl_conf_t *conf; @@ -2778,7 +2778,11 @@ char *slurm_get_slurmd_spooldir(void) if (slurmdbd_conf) { } else { conf = slurm_conf_lock(); - slurmd_spooldir = xstrdup(conf->slurmd_spooldir); + if (!node_name) + slurmd_spooldir = xstrdup(conf->slurmd_spooldir); + else + slurmd_spooldir = slurm_conf_expand_slurmd_path( + conf->slurmd_spooldir, node_name); slurm_conf_unlock(); } return slurmd_spooldir; diff --git a/src/common/slurm_protocol_api.h b/src/common/slurm_protocol_api.h index fa69a20293a775ad29d1da0ef7661b02c27c8d0c..2f4c8c1cf03df1171099547e40eed0bab7f89a11 100644 --- a/src/common/slurm_protocol_api.h +++ b/src/common/slurm_protocol_api.h @@ -893,7 +893,7 @@ char *slurm_get_job_container_plugin(void); /* slurm_get_slurmd_spooldir * RET slurmd_spooldir name, must be xfreed by caller */ -char *slurm_get_slurmd_spooldir(void); +char *slurm_get_slurmd_spooldir(char *node_name); /* slurm_get_layouts * RET comma seperated list of layouts in a string, must be xfreed by caller diff --git a/src/plugins/mpi/pmi2/setup.c b/src/plugins/mpi/pmi2/setup.c index 242769189be006ff2deffb728ee7326d604ad70a..7298bbd99d82a74b16bed9dbd87ce71e12efa3f3 100644 --- a/src/plugins/mpi/pmi2/setup.c +++ b/src/plugins/mpi/pmi2/setup.c @@ -272,7 +272,10 @@ _setup_stepd_sockets(const stepd_step_rec_t *job, char ***env) } sa.sun_family = PF_UNIX; - spool = slurm_get_slurmd_spooldir(); + /* FIXME: We need to handle %n and %h in the spool dir, we have + * job->node_name, but the other place we call get_slurmd_spooldir + * we don't so we aren't using it here */ + spool = slurm_get_slurmd_spooldir(NULL); snprintf(sa.sun_path, sizeof(sa.sun_path), PMI2_SOCK_ADDR_FMT, spool, job->jobid, job->stepid); unlink(sa.sun_path); /* remove possible old socket */ @@ -628,7 +631,9 @@ _setup_srun_tree_info(const mpi_plugin_client_info_t *job) } else tree_info.srun_addr = NULL; - spool = slurm_get_slurmd_spooldir(); + /* FIXME: We need to handle %n and %h in the spool dir, but don't have + * the node name here */ + spool = slurm_get_slurmd_spooldir(NULL); snprintf(tree_sock_addr, 128, PMI2_SOCK_ADDR_FMT, spool, job->jobid, job->stepid); xfree(spool); diff --git a/src/plugins/mpi/pmix/pmixp_agent.c b/src/plugins/mpi/pmix/pmixp_agent.c index 333e7ac8394bf5ea5300184499690d4176201e2e..09f82c678f8f706bb77d25a518c36b59a66d56a3 100644 --- a/src/plugins/mpi/pmix/pmixp_agent.c +++ b/src/plugins/mpi/pmix/pmixp_agent.c @@ -2,7 +2,7 @@ ** pmix_agent.c - PMIx agent thread ***************************************************************************** * Copyright (C) 2014-2015 Artem Polyakov. All rights reserved. - * Copyright (C) 2015 Mellanox Technologies. All rights reserved. + * Copyright (C) 2015-2017 Mellanox Technologies. All rights reserved. * Written by Artem Y. Polyakov <artpol84@gmail.com, artemp@mellanox.com>. * * This file is part of SLURM, a resource management program. @@ -51,8 +51,8 @@ #define MAX_RETRIES 5 -static int _agent_is_running = 0; -static int _timer_is_running = 0; +static volatile int _agent_is_running = 0; +static volatile int _timer_is_running = 0; static eio_handle_t *_io_handle = NULL; static int _agent_spawned = 0, _timer_spawned = 0; diff --git a/src/plugins/mpi/pmix/pmixp_info.c b/src/plugins/mpi/pmix/pmixp_info.c index edc0e47204fccbb2600d2051e22f5ea346dd4c1a..ecd17d97fd49c6def5bf443988243c0f053ea1d9 100644 --- a/src/plugins/mpi/pmix/pmixp_info.c +++ b/src/plugins/mpi/pmix/pmixp_info.c @@ -286,26 +286,27 @@ err_exit: static int _env_set(char ***env) { char *p = NULL; - char *spool = slurm_get_slurmd_spooldir(); + + xassert(_pmixp_job_info.hostname); + + _pmixp_job_info.lib_tmpdir = slurm_get_slurmd_spooldir( + _pmixp_job_info.hostname); /* ----------- Temp directories settings ------------- */ - _pmixp_job_info.lib_tmpdir = xstrdup_printf("%s/pmix.%d.%d/", spool, - pmixp_info_jobid(), pmixp_info_stepid()); - xfree(spool); + xstrfmtcat(_pmixp_job_info.lib_tmpdir, "/pmix.%d.%d/", + pmixp_info_jobid(), pmixp_info_stepid()); /* save client temp directory if requested * TODO: We want to get TmpFS value as well if exists. * Need to sync with SLURM developers. */ p = getenvp(*env, PMIXP_TMPDIR_CLI); - if (NULL != p) { + + if (p) _pmixp_job_info.cli_tmpdir_base = xstrdup(p); - } else { - p = slurm_get_tmp_fs(); - if (NULL != p) { - _pmixp_job_info.cli_tmpdir_base = p; - } - } + else + _pmixp_job_info.cli_tmpdir_base = slurm_get_tmp_fs(); + _pmixp_job_info.cli_tmpdir = xstrdup_printf("%s/spmix_appdir_%d.%d", _pmixp_job_info.cli_tmpdir_base, diff --git a/src/plugins/mpi/pmix/pmixp_info.h b/src/plugins/mpi/pmix/pmixp_info.h index e8e159567e88b99fb23f32219c5ceef2c043ba9c..69ea4d0a8af7b2b5649034cd1790ff63ce9490fc 100644 --- a/src/plugins/mpi/pmix/pmixp_info.h +++ b/src/plugins/mpi/pmix/pmixp_info.h @@ -272,12 +272,11 @@ static inline char *pmixp_info_job_host(int nodeid) /* namespaces list operations */ static inline char *pmixp_info_nspace_usock(const char *nspace) { - char *spool, *usock = NULL; + char *spool; debug("mpi/pmix: setup sockets"); - spool = slurm_get_slurmd_spooldir(); - xstrfmtcat(usock, "%s/stepd.%s", spool, nspace); - xfree(spool); - return usock; + spool = slurm_get_slurmd_spooldir(_pmixp_job_info.hostname); + xstrfmtcat(spool, "/stepd.%s", nspace); + return spool; } #endif /* PMIXP_INFO_H */