diff --git a/src/common/env.c b/src/common/env.c index 44083577e37159d1f0c6ad1886cb3ef9ff555317..dc76d97261f761c8c502778c28087fe41696d921 100644 --- a/src/common/env.c +++ b/src/common/env.c @@ -231,7 +231,15 @@ int setup_env(env_t *env) } if (env->cpus_per_task - && setenvf(&env->env, "SLURM_CPUS_PER_TASK", "%u", env->cpus_per_task) ) { + && setenvf(&env->env, "SLURM_CPUS_PER_TASK", "%u", + env->cpus_per_task) ) { + error("Unable to set SLURM_CPUS_PER_TASK"); + rc = SLURM_FAILURE; + } + + if (env->cpus_on_node + && setenvf(&env->env, "SLURM_CPUS_ON_NODE", "%d", + env->cpus_on_node) ) { error("Unable to set SLURM_CPUS_PER_TASK"); rc = SLURM_FAILURE; } @@ -247,8 +255,8 @@ int setup_env(env_t *env) } } - if (env->overcommit && - (setenvf(&env->env, "SLURM_OVERCOMMIT", "1"))) { + if (env->overcommit + && (setenvf(&env->env, "SLURM_OVERCOMMIT", "1"))) { error("Unable to set SLURM_OVERCOMMIT environment variable"); rc = SLURM_FAILURE; } @@ -277,15 +285,33 @@ int setup_env(env_t *env) } } - if (env->jobid + if (env->jobid >= 0 && setenvf(&env->env, "SLURM_JOBID", "%u", env->jobid)) { error("Unable to set SLURM_JOBID environment"); rc = SLURM_FAILURE; } - if (env->stepid + if (env->nodeid >= 0 + && setenvf(&env->env, "SLURM_NODEID", "%d", env->nodeid)) { + error("Unable to set SLURM_NODEID environment"); + rc = SLURM_FAILURE; + } + + if (env->procid >= 0 + && setenvf(&env->env, "SLURM_PROCID", "%d", env->procid)) { + error("Unable to set SLURM_PROCID environment"); + rc = SLURM_FAILURE; + } + + if (env->stepid >= 0 && setenvf(&env->env, "SLURM_STEPID", "%u", env->stepid)) { - error("Unable to set SLURM_JOBID environment"); + error("Unable to set SLURM_STEPID environment"); + rc = SLURM_FAILURE; + } + + if (env->gmpi >= 0 + && setenvf(&env->env, "SLURM_GMPI", "%d", env->gmpi)) { + error("Unable to set SLURM_GMPI environment"); rc = SLURM_FAILURE; } diff --git a/src/common/env.h b/src/common/env.h index d2602ee0653c98f2d610ee3840da1e159bee08f8..ea9eb385a794ef8cb53930fcbb18969c2b042f6c 100644 --- a/src/common/env.h +++ b/src/common/env.h @@ -54,6 +54,10 @@ typedef struct env_options { char **env; /* job environment */ slurm_addr *cli; slurm_addr *self; + int procid; + int gmpi; + int nodeid; + int cpus_on_node; } env_t; diff --git a/src/slurmd/job.h b/src/slurmd/job.h index 99318323cb3aafaa0ae191e1eedad7ca64f5a2f4..3c3dfc47b2399e27d8147dd9275ec03aff7cd902 100644 --- a/src/slurmd/job.h +++ b/src/slurmd/job.h @@ -136,6 +136,8 @@ typedef struct slurmd_job { /* communication between slurmds */ uint16_t task_flags; + slurm_addr *cli; + slurm_addr *self; } slurmd_job_t; diff --git a/src/slurmd/mgr.c b/src/slurmd/mgr.c index 847b49158f68a14eb06a5803539e33121ffc022e..7a5e7854a02ba3c506ce80f562ee3cb1ba6d5c07 100644 --- a/src/slurmd/mgr.c +++ b/src/slurmd/mgr.c @@ -174,9 +174,9 @@ mgr_launch_tasks(launch_tasks_request_msg_t *msg, slurm_addr *cli, _set_job_log_prefix(job); _setargs(job); + job->cli = cli; + job->self = self; - _setup_spawn_env(job, cli, self); - if (_job_mgr(job) < 0) return SLURM_ERROR; @@ -255,8 +255,9 @@ mgr_spawn_task(spawn_task_request_msg_t *msg, slurm_addr *cli, _set_job_log_prefix(job); _setargs(job); - - _setup_spawn_env(job, cli, self); + + job->cli = cli; + job->self = self; if (_job_mgr(job) < 0) return SLURM_ERROR; @@ -1036,25 +1037,30 @@ _setup_batch_env(slurmd_job_t *job, batch_job_launch_msg_t *msg) struct utsname name; hostlist_t hl = hostlist_create(msg->nodes); env_t *env = xmalloc(sizeof(env_t)); - int rc; + if (!hl) return SLURM_ERROR; hostlist_ranged_string(hl, 1024, buf); + env->stepid = -1; + env->gmpi = -1; + env->procid = -1; + env->nodeid = -1; env->nprocs = msg->nprocs; env->select_jobinfo = msg->select_jobinfo; env->jobid = job->jobid; - env->stepid = job->stepid; env->nhosts = hostlist_count(hl); hostlist_destroy(hl); env->nodelist = buf; env->task_count = _sprint_task_cnt(msg); env->env = job->env; - - rc = setup_env(env); + + setup_env(env); job->env = env->env; + env->env = NULL; xfree(env->task_count); + xfree(env); return 0; @@ -1299,21 +1305,24 @@ _setargs(slurmd_job_t *job) static void _setup_spawn_env(slurmd_job_t *job, slurm_addr *cli, slurm_addr *self) { - char *p; - char addrbuf[INET_ADDRSTRLEN]; env_t *env = xmalloc(sizeof(env_t)); - int rc; - + + env->stepid = -1; + env->gmpi = -1; + env->procid = -1; + env->nodeid = -1; + env->jobid = -1; + env->cli = cli; env->self = self; env->jobid = job->jobid; env->stepid = job->stepid; env->env = job->env; - rc = setup_env(env); + setup_env(env); job->env = env->env; - xfree(env->task_count); - + env->env = NULL; + xfree(env); return; } diff --git a/src/slurmd/smgr.c b/src/slurmd/smgr.c index 34e1ae1d23ed569097a984ce53fcab9708cd91f8..1f74a561eb9068db8aaa0b6c65c9d7e2d32fd507 100644 --- a/src/slurmd/smgr.c +++ b/src/slurmd/smgr.c @@ -93,7 +93,6 @@ static int _local_taskid(slurmd_job_t *job, pid_t pid); static int _send_exit_status(slurmd_job_t *job, pid_t pid, int status, struct rusage *rusage); static char *_signame(int signo); static void _cleanup_file_descriptors(slurmd_job_t *job); -static int _setup_env(slurmd_job_t *job, int taskid); static void _setup_spawn_io(slurmd_job_t *job); /* parallel debugger support */ @@ -371,6 +370,14 @@ _exec_all_tasks(slurmd_job_t *job) static void _exec_task(slurmd_job_t *job, int i) { + task_info_t *t = NULL; + env_t *env = xmalloc(sizeof(env_t)); + env->stepid = -1; + env->gmpi = -1; + env->procid = -1; + env->nodeid = -1; + env->jobid = -1; + if (xsignal_unblock(smgr_sigarray) < 0) { error("unable to unblock signals"); exit(1); @@ -383,10 +390,24 @@ _exec_task(slurmd_job_t *job, int i) error("Unable to attach to interconnect: %m"); exit(1); } - - if (_setup_env(job, i) < 0) - error("error establishing SLURM env vars: %m"); - + + t = job->task[i]; + + env->jobid = job->jobid; + env->stepid = job->stepid; + env->nodeid = job->nodeid; + env->cpus_on_node = job->cpus; + env->cli = job->cli; + env->self = job->self; + env->procid = t->gtid; + env->gmpi = t->gtid; + env->env = job->env; + + setup_env(env); + job->env = env->env; + env->env = NULL; + xfree(env); + _pdebug_stop_current(job); } @@ -597,28 +618,6 @@ _local_taskid(slurmd_job_t *job, pid_t pid) return SLURM_ERROR; } -/* - * Set task-specific environment variables - */ -static int -_setup_env(slurmd_job_t *job, int taskid) -{ - task_info_t *t = job->task[taskid]; - - if (setenvf(&job->env, "SLURM_NODEID", "%d", job->nodeid) < 0) - return -1; - if (setenvf(&job->env, "SLURM_CPUS_ON_NODE", "%d", job->cpus) < 0) - return -1; - if (setenvf(&job->env, "SLURM_PROCID", "%d", t->gtid ) < 0) - return -1; - if (getenvp(job->env, "SLURM_GMPI")) { - if (setenvf(&job->env, "GMPI_ID", "%d", t->gtid) < 0) - return -1; - } - - return SLURM_SUCCESS; -} - static void _make_tmpdir(slurmd_job_t *job) { diff --git a/src/srun/srun.c b/src/srun/srun.c index 99659163167ac00fdb73cb7c7a0269123c696d68..8e1c95ba5d08f04ff6eafe7233a0623e584a3e2b 100644 --- a/src/srun/srun.c +++ b/src/srun/srun.c @@ -115,6 +115,13 @@ int srun(int ac, char **av) log_options_t logopt = LOG_OPTS_STDERR_ONLY; + env->stepid = -1; + env->gmpi = -1; + env->procid = -1; + env->nodeid = -1; + env->cli = NULL; + env->env = NULL; + log_init(xbasename(av[0]), logopt, 0, NULL); /* set default options, process commandline arguments, and @@ -242,12 +249,11 @@ int srun(int ac, char **av) env->jobid = job->jobid; env->nhosts = job->nhosts; env->nodelist = job->nodelist; - env->stepid = job->stepid; env->task_count = _task_count_string (job); } setup_env(env); - + xfree(env); if (slurm_get_mpich_gm_dir() && getenv("GMPI_PORT") == NULL) { /* * It is possible for one to modify the mpirun command in