diff --git a/src/common/env.c b/src/common/env.c index 9fae63be667b0c4302c343436567c148b6618901..44083577e37159d1f0c6ad1886cb3ef9ff555317 100644 --- a/src/common/env.c +++ b/src/common/env.c @@ -219,7 +219,8 @@ int setup_env(env_t *env) int rc = SLURM_SUCCESS; char *dist = NULL; char *bgl_part_id = NULL; - + char addrbuf[INET_ADDRSTRLEN]; + if (env == NULL) return SLURM_ERROR; @@ -282,7 +283,12 @@ int setup_env(env_t *env) rc = SLURM_FAILURE; } - + if (env->stepid + && setenvf(&env->env, "SLURM_STEPID", "%u", env->stepid)) { + error("Unable to set SLURM_JOBID environment"); + rc = SLURM_FAILURE; + } + if (env->nhosts && setenvf(&env->env, "SLURM_NNODES", "%u", env->nhosts)) { error("Unable to set SLURM_NNODES environment var"); @@ -301,6 +307,31 @@ int setup_env(env_t *env) error ("Can't set SLURM_TASKS_PER_NODE env variable"); rc = SLURM_FAILURE; } + + if (env->cli) { + + slurm_print_slurm_addr (env->cli, addrbuf, INET_ADDRSTRLEN); + + /* + * XXX: Eventually, need a function for slurm_addrs that + * returns just the IP address (not addr:port) + */ + + if ((dist = strchr (addrbuf, ':')) != NULL) + *dist = '\0'; + + setenvf (&env->env, "SLURM_LAUNCH_NODE_IPADDR", "%s", addrbuf); + + if (getenvp(env->env, "SLURM_GMPI")) { + setenvf (&env->env, "GMPI_MASTER", "%s", addrbuf); + slurm_print_slurm_addr (env->self, + addrbuf, INET_ADDRSTRLEN); + if ((dist = strchr (addrbuf, ':')) != NULL) + *dist = '\0'; + setenvf (&env->env, "GMPI_SLAVE", "%s", addrbuf); + } + + } uname(&name); if (strcasecmp(name.sysname, "AIX") == 0) { diff --git a/src/common/env.h b/src/common/env.h index c24582a0ddaf91c89edb7026429e5a91e35fba60..d2602ee0653c98f2d610ee3840da1e159bee08f8 100644 --- a/src/common/env.h +++ b/src/common/env.h @@ -48,10 +48,12 @@ typedef struct env_options { bool labelio; /* --label-output, -l */ select_jobinfo_t select_jobinfo; uint32_t jobid; /* assigned job id */ + uint32_t stepid; /* assigned step id */ int nhosts; char *nodelist; /* nodelist in string form */ char **env; /* job environment */ - + slurm_addr *cli; + slurm_addr *self; } env_t; diff --git a/src/slurmd/mgr.c b/src/slurmd/mgr.c index d390918838eef4a770aeb7467abc1c5c477c8bdd..847b49158f68a14eb06a5803539e33121ffc022e 100644 --- a/src/slurmd/mgr.c +++ b/src/slurmd/mgr.c @@ -138,7 +138,8 @@ static int _send_pending_exit_msgs(slurmd_job_t *job); static void _kill_running_tasks(slurmd_job_t *job); static void _setargs(slurmd_job_t *job); -static void _set_mgr_env(slurmd_job_t *, slurm_addr *cli, slurm_addr *self); +static void _setup_spawn_env(slurmd_job_t *, + slurm_addr *cli, slurm_addr *self); static void _random_sleep(slurmd_job_t *job); static char *_sprint_task_cnt(batch_job_launch_msg_t *msg); @@ -163,7 +164,8 @@ mgr_launch_tasks(launch_tasks_request_msg_t *msg, slurm_addr *cli, slurm_addr *self) { slurmd_job_t *job = NULL; - + env_t *env = xmalloc(sizeof(env_t)); + if (!(job = job_create(msg, cli))) { _send_launch_failure (msg, cli, errno); return SLURM_ERROR; @@ -172,8 +174,8 @@ mgr_launch_tasks(launch_tasks_request_msg_t *msg, slurm_addr *cli, _set_job_log_prefix(job); _setargs(job); - - _set_mgr_env(job, cli, self); + + _setup_spawn_env(job, cli, self); if (_job_mgr(job) < 0) return SLURM_ERROR; @@ -254,8 +256,8 @@ mgr_spawn_task(spawn_task_request_msg_t *msg, slurm_addr *cli, _setargs(job); - _set_mgr_env(job, cli, self); - + _setup_spawn_env(job, cli, self); + if (_job_mgr(job) < 0) return SLURM_ERROR; @@ -1043,6 +1045,7 @@ _setup_batch_env(slurmd_job_t *job, batch_job_launch_msg_t *msg) env->nprocs = msg->nprocs; env->select_jobinfo = msg->select_jobinfo; env->jobid = job->jobid; + env->stepid = job->stepid; env->nhosts = hostlist_count(hl); hostlist_destroy(hl); env->nodelist = buf; @@ -1294,30 +1297,23 @@ _setargs(slurmd_job_t *job) } static void -_set_mgr_env(slurmd_job_t *job, slurm_addr *cli, slurm_addr *self) +_setup_spawn_env(slurmd_job_t *job, slurm_addr *cli, slurm_addr *self) { char *p; char addrbuf[INET_ADDRSTRLEN]; + env_t *env = xmalloc(sizeof(env_t)); + int rc; - slurm_print_slurm_addr (cli, addrbuf, INET_ADDRSTRLEN); - - /* - * XXX: Eventually, need a function for slurm_addrs that - * returns just the IP address (not addr:port) - */ - - if ((p = strchr (addrbuf, ':')) != NULL) - *p = '\0'; - - setenvf (&job->env, "SLURM_LAUNCH_NODE_IPADDR", "%s", addrbuf); - - if (getenvp(job->env, "SLURM_GMPI")) { - setenvf (&job->env, "GMPI_MASTER", "%s", addrbuf); - slurm_print_slurm_addr (self, addrbuf, INET_ADDRSTRLEN); - if ((p = strchr (addrbuf, ':')) != NULL) *p = '\0'; - setenvf (&job->env, "GMPI_SLAVE", "%s", addrbuf); - } - + env->cli = cli; + env->self = self; + env->jobid = job->jobid; + env->stepid = job->stepid; + env->env = job->env; + + rc = setup_env(env); + job->env = env->env; + xfree(env->task_count); + return; } diff --git a/src/srun/srun.c b/src/srun/srun.c index d0bc724be74b111ce38ff741bb8ba0cd2594bc5a..ee876e16e78ceb285aead525e14b30b3af53ffdb 100644 --- a/src/srun/srun.c +++ b/src/srun/srun.c @@ -237,6 +237,7 @@ int srun(int ac, char **av) env->jobid = job->jobid; env->nhosts = job->nhosts; env->nodelist = job->nodelist; + env->stepid = job->stepid; env->task_count = _task_count_string (job); }