diff --git a/src/slurmd/fname.c b/src/slurmd/fname.c index 161c2a92cdbb03cdfbca0f70ea3437a1d946c704..55d08a31cfc4e005551cf122076cd71a6acc371c 100644 --- a/src/slurmd/fname.c +++ b/src/slurmd/fname.c @@ -50,9 +50,19 @@ fname_create(slurmd_job_t *job, const char *format, int taskid) { unsigned long int wid = 0; char *name = NULL; + char *orig = xstrdup(format); char *p, *q; - q = p = format; + /* If format doesn't specify an absolute pathname, + * use cwd + */ + if (orig[0] != '/') { + xstrcat(name, job->cwd); + if (name[strlen(name)-1] != '/') + xstrcatchar(name, '/'); + } + + q = p = orig; while(*p != '\0') { if (*p == '%') { if (isdigit(*(++p))) { @@ -102,6 +112,7 @@ fname_create(slurmd_job_t *job, const char *format, int taskid) if (q != p) xmemcat(name, q, p); + xfree(orig); return name; } diff --git a/src/slurmd/io.c b/src/slurmd/io.c index 4f02969a1f518c32eff76f1664290e44173cc786..b3e33aa933490cb2ba01983388a776c9ca4fe0c9 100644 --- a/src/slurmd/io.c +++ b/src/slurmd/io.c @@ -236,7 +236,6 @@ io_spawn_handler(slurmd_job_t *job) */ if (_io_prepare_clients(job) < 0) return SLURM_FAILURE; - return 0; } @@ -260,11 +259,10 @@ _xclose(int fd) static void _io_finalize(task_info_t *t) { - struct io_info *in = t->in->arg; + struct io_info *in = t->in->arg; ListIterator i; struct io_info *io; - if (_xclose(t->pin[0] ) < 0) error("close(stdin) : %m"); if (_xclose(t->pout[1]) < 0) @@ -272,9 +270,8 @@ _io_finalize(task_info_t *t) if (_xclose(t->perr[1]) < 0) error("close(stderr): %m"); - in->disconnected = 1; - /* close stdin objs - */ + in->disconnected = 1; + if (!in->writers) return; @@ -446,6 +443,9 @@ _io_prepare_one(slurmd_job_t *j, task_info_t *t, srun_info_t *s) _io_add_connecting(j, t, s, CLIENT_STDIN); } + if (!list_find_first(t->srun_list, (ListFindF) find_obj, s)) + list_append(t->srun_list, (void *) s); + return SLURM_SUCCESS; } @@ -464,7 +464,8 @@ _io_prepare_clients(slurmd_job_t *job) xassert(srun != NULL); slurm_get_addr(&srun->ioaddr, &port, host, sizeof(host)); - debug2("connecting IO back to %s:%d", host, ntohs(port)); + if (port) + debug2("connecting IO back to %s:%d", host, ntohs(port)); /* Connect stdin/out/err to either a remote srun or * local file diff --git a/src/slurmd/job.c b/src/slurmd/job.c index 7c221fa1bd18da363ebce8d6145f96b9ea533b7a..0542781bf27ba15c09d8dc9d8e8ef990d47942a2 100644 --- a/src/slurmd/job.c +++ b/src/slurmd/job.c @@ -52,6 +52,43 @@ static void _array_free(int n, char ***array); static void _srun_info_destructor(void *arg); static void _job_init_task_info(slurmd_job_t *job, uint32_t *gids); +static struct passwd * +_pwd_create(uid_t uid) +{ + struct passwd *pwd = xmalloc(sizeof(*pwd)); + struct passwd *ppwd = getpwuid(uid); + + if (!ppwd) { + xfree(pwd); + return NULL; + } + + pwd->pw_name = xstrdup(ppwd->pw_name); + pwd->pw_passwd = xstrdup(ppwd->pw_passwd); + pwd->pw_gecos = xstrdup(ppwd->pw_gecos); + pwd->pw_shell = xstrdup(ppwd->pw_shell); + pwd->pw_dir = xstrdup(ppwd->pw_dir); + pwd->pw_uid = ppwd->pw_uid; + pwd->pw_gid = ppwd->pw_gid; + + return pwd; +} + +static void +_pwd_destroy(struct passwd *pwd) +{ + if (!pwd) + return; + + xfree(pwd->pw_name); + xfree(pwd->pw_passwd); + xfree(pwd->pw_gecos); + xfree(pwd->pw_shell); + xfree(pwd->pw_dir); + xfree(pwd); + +} + /* create a slurmd job structure from a launch tasks message */ slurmd_job_t * @@ -67,7 +104,7 @@ job_create(launch_tasks_request_msg_t *msg, slurm_addr *cli_addr) debug3("entering job_create"); - if ((pwd = getpwuid((uid_t)msg->uid)) < 0) { + if ((pwd = _pwd_create((uid_t)msg->uid)) < 0) { error("uid %ld not found on system", msg->uid); return NULL; } @@ -124,12 +161,9 @@ job_create(launch_tasks_request_msg_t *msg, slurm_addr *cli_addr) static char * _mkfilename(slurmd_job_t *job, const char *name) { - char buf[256]; - - if (name == NULL) { - snprintf(buf, 256, "%s/job%u.out", job->cwd, job->jobid); - return xstrdup(buf); - } else + if (name == NULL) + return fname_create(job, "job%j.out", 0); + else return fname_create(job, name, 0); } @@ -139,9 +173,9 @@ job_batch_job_create(batch_job_launch_msg_t *msg) struct passwd *pwd; slurmd_job_t *job = xmalloc(sizeof(*job)); srun_info_t *srun = NULL; - uint32_t gid = 0; + uint32_t global_taskid = 0; - if ((pwd = getpwuid((uid_t)msg->uid)) < 0) { + if ((pwd = _pwd_create((uid_t)msg->uid)) < 0) { error("uid %ld not found on system", msg->uid); return NULL; } @@ -172,7 +206,7 @@ job_batch_job_create(batch_job_launch_msg_t *msg) */ job->argv = (char **) xmalloc(job->argc * sizeof(char *)); - _job_init_task_info(job, &gid); + _job_init_task_info(job, &global_taskid); return job; } @@ -188,8 +222,9 @@ _job_init_task_info(slurmd_job_t *job, uint32_t *gid) for (i = 0; i < n; i++){ job->task[i] = task_info_create(i, gid[i]); - if (srun != NULL) - list_append(job->task[i]->srun_list, (void *)srun); + /* "srun" info is attached to task in + * io_add_connecting + */ } } @@ -241,6 +276,8 @@ job_destroy(slurmd_job_t *job) _array_free(job->envc, &job->env); _array_free(job->argc, &job->argv); + _pwd_destroy(job->pwd); + for (i = 0; i < job->ntasks; i++) task_info_destroy(job->task[i]); list_destroy(job->sruns); diff --git a/src/slurmd/mgr.c b/src/slurmd/mgr.c index 0e41d2bb504ecdf3036fc009c5192ae829f1146b..c581119a2161d0371cd60b9877898c850c1b9b45 100644 --- a/src/slurmd/mgr.c +++ b/src/slurmd/mgr.c @@ -64,8 +64,9 @@ static int _run_job(slurmd_job_t *job); static int _run_batch_job(slurmd_job_t *job); static void _exec_all_tasks(slurmd_job_t *job); static void _task_exec(slurmd_job_t *job, int i, bool batch); -static int _seteuid_and_chdir(slurmd_job_t *job); -static int _setuid(slurmd_job_t *job); +static int _drop_privileges(struct passwd *pwd); +static int _reclaim_privileges(struct passwd *pwd); +static int _become_user(slurmd_job_t *job); static int _unblock_all_signals(void); static int _send_exit_msg(int rc, task_info_t *t); static int _complete_job(slurmd_job_t *job, int rc, int status); @@ -87,6 +88,9 @@ mgr_launch_tasks(launch_tasks_request_msg_t *msg, slurm_addr *cli) verbose("running job step %d.%d for %s", job->jobid, job->stepid, job->pwd->pw_name); + + /* Run job's tasks and wait for all tasks to exit. + */ if (_run_job(job) < 0) goto error; @@ -254,20 +258,13 @@ mgr_launch_batch_job(batch_job_launch_msg_t *msg, slurm_addr *cli) static int _run_job(slurmd_job_t *job) { - int rc = SLURM_SUCCESS; - int i; - uid_t suid = getuid(); - gid_t sgid = getgid(); + int rc = SLURM_SUCCESS; + int i = 0; + struct passwd *spwd = getpwuid(geteuid()); /* Insert job info into shared memory */ job_update_shm(job); - /* - * Need to detach from shared memory - * We don't know what will happen in interconnect_init() - */ - /* shm_fini(); */ - if (interconnect_init(job) == SLURM_ERROR) { job_error(job, "interconnect_init: %m"); rc = -2; @@ -275,23 +272,10 @@ _run_job(slurmd_job_t *job) goto done; } - /* Reattach to shared memory after interconnect is initialized + /* + * Temporarily drop permissions */ - /* job_debug(job, "%ld reattaching to shm", getpid()); */ - /* if (shm_init() < 0) { - job_error(job, "unable to reattach to shm: %m"); - rc = -1; - goto done; - }*/ - - /* initialize I/O, connect back to srun, and spawn thread for - * forwarding I/O. - */ - - /* Temporarily drop permissions and attempt to chdir() - * - */ - if ((rc = _seteuid_and_chdir(job)) < 0) + if ((rc = _drop_privileges(job->pwd)) < 0) goto done; /* Option: connect slurmd stderr to srun local task 0: stderr? */ @@ -301,8 +285,8 @@ _run_job(slurmd_job_t *job) goto done; } - if ((seteuid(suid) < 0) || (setegid(sgid) < 0)) - error("sete{u/g}id(%ld/%ld): %m", suid, sgid); + if (_reclaim_privileges(spwd) < 0) + error("sete{u/g}id(%ld/%ld): %m", spwd->pw_uid, spwd->pw_gid); _exec_all_tasks(job); job_debug2(job, "job complete, waiting on IO"); @@ -406,22 +390,24 @@ _run_batch_job(slurmd_job_t *job) int rc = 0; task_t t; pid_t sid, pid; - gid_t sgid = getgid(); - uid_t suid = getuid(); + struct passwd *spwd = getpwuid(getuid()); /* Temporarily drop permissions to initiate * IO thread. This will ensure that calling user * has appropriate permissions to open output * files, if any. */ - _seteuid_and_chdir(job); + if (_drop_privileges(job->pwd) < 0) { + error("seteuid(%ld) : %m", job->uid); + return ESLURMD_SET_UID_OR_GID_ERROR; + } rc = io_spawn_handler(job); /* seteuid/gid back to saved uid/gid */ - if ((seteuid(suid) < 0) || (setegid(sgid) < 0)) { - error("set{e/g}uid(%ld/%ld) : %m", suid, sgid); + if (_reclaim_privileges(spwd) < 0) { + error("seteuid(%ld) : %m", spwd->pw_uid); return ESLURMD_SET_UID_OR_GID_ERROR; } @@ -457,7 +443,7 @@ _run_batch_job(slurmd_job_t *job) job->task[0]->pid = t.pid; if (shm_add_task(job->jobid, job->stepid, &t) < 0) { - job_error(job, "shm_add_task: %m"); + error("job %d: shm_add_task: %m", job->jobid); return ESLURMD_SHARED_MEMORY_ERROR; } @@ -509,37 +495,52 @@ _wait_for_all_tasks(slurmd_job_t *job) } static int -_seteuid_and_chdir(slurmd_job_t *job) +_drop_privileges(struct passwd *pwd) { - if (setegid(job->pwd->pw_gid) < 0) { + if (setegid(pwd->pw_gid) < 0) { error("setegid: %m"); return -1; } - if (initgroups(job->pwd->pw_name, job->pwd->pw_gid) < 0) { - ; - /* error("initgroups: %m"); */ + if (initgroups(pwd->pw_name, pwd->pw_gid) < 0) { + error("initgroups: %m"); } - if (seteuid(job->pwd->pw_uid) < 0) { + if (seteuid(pwd->pw_uid) < 0) { error("seteuid: %m"); return -1; } - if (chdir(job->cwd) < 0) { - error("couldn't chdir to `%s': %m: going to /tmp instead", - job->cwd); - if (chdir("/tmp") < 0) { - error("couldn't chdir to /tmp either. dying."); - return -1; - } + return SLURM_SUCCESS; +} + +static int +_reclaim_privileges(struct passwd *pwd) +{ + if (seteuid(pwd->pw_uid) < 0) { + error("seteuid: %m"); + return -1; + } + + if (setegid(pwd->pw_gid) < 0) { + error("setegid: %m"); + return -1; + } + + if (initgroups(pwd->pw_name, pwd->pw_gid) < 0) { + error("initgroups: %m"); + return -1; } return SLURM_SUCCESS; } + + + + static int -_setuid(slurmd_job_t *job) +_become_user(slurmd_job_t *job) { if (setgid(job->pwd->pw_gid) < 0) { error("setgid: %m"); @@ -551,7 +552,7 @@ _setuid(slurmd_job_t *job) /* error("initgroups: %m"); */ } - if (setuid(job->uid) < 0) { + if (setuid(job->pwd->pw_uid) < 0) { error("setuid: %m"); return -1; } @@ -572,7 +573,7 @@ _task_exec(slurmd_job_t *job, int i, bool batch) */ log_init("slurmd", opts, 0, NULL); - if ((rc = _setuid(job)) < 0) + if ((rc = _become_user(job)) < 0) exit(rc); if (_unblock_all_signals() == SLURM_ERROR) { @@ -590,6 +591,16 @@ _task_exec(slurmd_job_t *job, int i, bool batch) error("interconnect_env: %m"); } + if (chdir(job->cwd) < 0) { + error("couldn't chdir to `%s': %m: going to /tmp instead", + job->cwd); + if (chdir("/tmp") < 0) { + error("couldn't chdir to /tmp either. dying."); + exit(1); + } + } + + /* exec the cmdline */ execve(job->argv[0], job->argv, job->env);