Skip to content
Snippets Groups Projects
Commit 2698ec54 authored by Mark Grondona's avatar Mark Grondona
Browse files

o mgr.c : no longer chdir() in io thread, just drop/reasser priveleges

   before opening output files. (Thus, seteuid_and_chdir() renamed
   to _drop_privileges()/_reclaim_privileges())
 o io.c : add srun info objects to tasks' srun list as they attach,
   so new sruns get task exit messages.
parent 0ea9a61e
No related branches found
No related tags found
No related merge requests found
......@@ -50,9 +50,19 @@ fname_create(slurmd_job_t *job, const char *format, int taskid)
{
unsigned long int wid = 0;
char *name = NULL;
char *orig = xstrdup(format);
char *p, *q;
q = p = format;
/* If format doesn't specify an absolute pathname,
* use cwd
*/
if (orig[0] != '/') {
xstrcat(name, job->cwd);
if (name[strlen(name)-1] != '/')
xstrcatchar(name, '/');
}
q = p = orig;
while(*p != '\0') {
if (*p == '%') {
if (isdigit(*(++p))) {
......@@ -102,6 +112,7 @@ fname_create(slurmd_job_t *job, const char *format, int taskid)
if (q != p)
xmemcat(name, q, p);
xfree(orig);
return name;
}
......@@ -236,7 +236,6 @@ io_spawn_handler(slurmd_job_t *job)
*/
if (_io_prepare_clients(job) < 0)
return SLURM_FAILURE;
return 0;
}
......@@ -260,11 +259,10 @@ _xclose(int fd)
static void
_io_finalize(task_info_t *t)
{
struct io_info *in = t->in->arg;
struct io_info *in = t->in->arg;
ListIterator i;
struct io_info *io;
if (_xclose(t->pin[0] ) < 0)
error("close(stdin) : %m");
if (_xclose(t->pout[1]) < 0)
......@@ -272,9 +270,8 @@ _io_finalize(task_info_t *t)
if (_xclose(t->perr[1]) < 0)
error("close(stderr): %m");
in->disconnected = 1;
/* close stdin objs
*/
in->disconnected = 1;
if (!in->writers)
return;
......@@ -446,6 +443,9 @@ _io_prepare_one(slurmd_job_t *j, task_info_t *t, srun_info_t *s)
_io_add_connecting(j, t, s, CLIENT_STDIN);
}
if (!list_find_first(t->srun_list, (ListFindF) find_obj, s))
list_append(t->srun_list, (void *) s);
return SLURM_SUCCESS;
}
......@@ -464,7 +464,8 @@ _io_prepare_clients(slurmd_job_t *job)
xassert(srun != NULL);
slurm_get_addr(&srun->ioaddr, &port, host, sizeof(host));
debug2("connecting IO back to %s:%d", host, ntohs(port));
if (port)
debug2("connecting IO back to %s:%d", host, ntohs(port));
/* Connect stdin/out/err to either a remote srun or
* local file
......
......@@ -52,6 +52,43 @@ static void _array_free(int n, char ***array);
static void _srun_info_destructor(void *arg);
static void _job_init_task_info(slurmd_job_t *job, uint32_t *gids);
static struct passwd *
_pwd_create(uid_t uid)
{
struct passwd *pwd = xmalloc(sizeof(*pwd));
struct passwd *ppwd = getpwuid(uid);
if (!ppwd) {
xfree(pwd);
return NULL;
}
pwd->pw_name = xstrdup(ppwd->pw_name);
pwd->pw_passwd = xstrdup(ppwd->pw_passwd);
pwd->pw_gecos = xstrdup(ppwd->pw_gecos);
pwd->pw_shell = xstrdup(ppwd->pw_shell);
pwd->pw_dir = xstrdup(ppwd->pw_dir);
pwd->pw_uid = ppwd->pw_uid;
pwd->pw_gid = ppwd->pw_gid;
return pwd;
}
static void
_pwd_destroy(struct passwd *pwd)
{
if (!pwd)
return;
xfree(pwd->pw_name);
xfree(pwd->pw_passwd);
xfree(pwd->pw_gecos);
xfree(pwd->pw_shell);
xfree(pwd->pw_dir);
xfree(pwd);
}
/* create a slurmd job structure from a launch tasks message */
slurmd_job_t *
......@@ -67,7 +104,7 @@ job_create(launch_tasks_request_msg_t *msg, slurm_addr *cli_addr)
debug3("entering job_create");
if ((pwd = getpwuid((uid_t)msg->uid)) < 0) {
if ((pwd = _pwd_create((uid_t)msg->uid)) < 0) {
error("uid %ld not found on system", msg->uid);
return NULL;
}
......@@ -124,12 +161,9 @@ job_create(launch_tasks_request_msg_t *msg, slurm_addr *cli_addr)
static char *
_mkfilename(slurmd_job_t *job, const char *name)
{
char buf[256];
if (name == NULL) {
snprintf(buf, 256, "%s/job%u.out", job->cwd, job->jobid);
return xstrdup(buf);
} else
if (name == NULL)
return fname_create(job, "job%j.out", 0);
else
return fname_create(job, name, 0);
}
......@@ -139,9 +173,9 @@ job_batch_job_create(batch_job_launch_msg_t *msg)
struct passwd *pwd;
slurmd_job_t *job = xmalloc(sizeof(*job));
srun_info_t *srun = NULL;
uint32_t gid = 0;
uint32_t global_taskid = 0;
if ((pwd = getpwuid((uid_t)msg->uid)) < 0) {
if ((pwd = _pwd_create((uid_t)msg->uid)) < 0) {
error("uid %ld not found on system", msg->uid);
return NULL;
}
......@@ -172,7 +206,7 @@ job_batch_job_create(batch_job_launch_msg_t *msg)
*/
job->argv = (char **) xmalloc(job->argc * sizeof(char *));
_job_init_task_info(job, &gid);
_job_init_task_info(job, &global_taskid);
return job;
}
......@@ -188,8 +222,9 @@ _job_init_task_info(slurmd_job_t *job, uint32_t *gid)
for (i = 0; i < n; i++){
job->task[i] = task_info_create(i, gid[i]);
if (srun != NULL)
list_append(job->task[i]->srun_list, (void *)srun);
/* "srun" info is attached to task in
* io_add_connecting
*/
}
}
......@@ -241,6 +276,8 @@ job_destroy(slurmd_job_t *job)
_array_free(job->envc, &job->env);
_array_free(job->argc, &job->argv);
_pwd_destroy(job->pwd);
for (i = 0; i < job->ntasks; i++)
task_info_destroy(job->task[i]);
list_destroy(job->sruns);
......
......@@ -64,8 +64,9 @@ static int _run_job(slurmd_job_t *job);
static int _run_batch_job(slurmd_job_t *job);
static void _exec_all_tasks(slurmd_job_t *job);
static void _task_exec(slurmd_job_t *job, int i, bool batch);
static int _seteuid_and_chdir(slurmd_job_t *job);
static int _setuid(slurmd_job_t *job);
static int _drop_privileges(struct passwd *pwd);
static int _reclaim_privileges(struct passwd *pwd);
static int _become_user(slurmd_job_t *job);
static int _unblock_all_signals(void);
static int _send_exit_msg(int rc, task_info_t *t);
static int _complete_job(slurmd_job_t *job, int rc, int status);
......@@ -87,6 +88,9 @@ mgr_launch_tasks(launch_tasks_request_msg_t *msg, slurm_addr *cli)
verbose("running job step %d.%d for %s",
job->jobid, job->stepid, job->pwd->pw_name);
/* Run job's tasks and wait for all tasks to exit.
*/
if (_run_job(job) < 0)
goto error;
......@@ -254,20 +258,13 @@ mgr_launch_batch_job(batch_job_launch_msg_t *msg, slurm_addr *cli)
static int
_run_job(slurmd_job_t *job)
{
int rc = SLURM_SUCCESS;
int i;
uid_t suid = getuid();
gid_t sgid = getgid();
int rc = SLURM_SUCCESS;
int i = 0;
struct passwd *spwd = getpwuid(geteuid());
/* Insert job info into shared memory */
job_update_shm(job);
/*
* Need to detach from shared memory
* We don't know what will happen in interconnect_init()
*/
/* shm_fini(); */
if (interconnect_init(job) == SLURM_ERROR) {
job_error(job, "interconnect_init: %m");
rc = -2;
......@@ -275,23 +272,10 @@ _run_job(slurmd_job_t *job)
goto done;
}
/* Reattach to shared memory after interconnect is initialized
/*
* Temporarily drop permissions
*/
/* job_debug(job, "%ld reattaching to shm", getpid()); */
/* if (shm_init() < 0) {
job_error(job, "unable to reattach to shm: %m");
rc = -1;
goto done;
}*/
/* initialize I/O, connect back to srun, and spawn thread for
* forwarding I/O.
*/
/* Temporarily drop permissions and attempt to chdir()
*
*/
if ((rc = _seteuid_and_chdir(job)) < 0)
if ((rc = _drop_privileges(job->pwd)) < 0)
goto done;
/* Option: connect slurmd stderr to srun local task 0: stderr? */
......@@ -301,8 +285,8 @@ _run_job(slurmd_job_t *job)
goto done;
}
if ((seteuid(suid) < 0) || (setegid(sgid) < 0))
error("sete{u/g}id(%ld/%ld): %m", suid, sgid);
if (_reclaim_privileges(spwd) < 0)
error("sete{u/g}id(%ld/%ld): %m", spwd->pw_uid, spwd->pw_gid);
_exec_all_tasks(job);
job_debug2(job, "job complete, waiting on IO");
......@@ -406,22 +390,24 @@ _run_batch_job(slurmd_job_t *job)
int rc = 0;
task_t t;
pid_t sid, pid;
gid_t sgid = getgid();
uid_t suid = getuid();
struct passwd *spwd = getpwuid(getuid());
/* Temporarily drop permissions to initiate
* IO thread. This will ensure that calling user
* has appropriate permissions to open output
* files, if any.
*/
_seteuid_and_chdir(job);
if (_drop_privileges(job->pwd) < 0) {
error("seteuid(%ld) : %m", job->uid);
return ESLURMD_SET_UID_OR_GID_ERROR;
}
rc = io_spawn_handler(job);
/* seteuid/gid back to saved uid/gid
*/
if ((seteuid(suid) < 0) || (setegid(sgid) < 0)) {
error("set{e/g}uid(%ld/%ld) : %m", suid, sgid);
if (_reclaim_privileges(spwd) < 0) {
error("seteuid(%ld) : %m", spwd->pw_uid);
return ESLURMD_SET_UID_OR_GID_ERROR;
}
......@@ -457,7 +443,7 @@ _run_batch_job(slurmd_job_t *job)
job->task[0]->pid = t.pid;
if (shm_add_task(job->jobid, job->stepid, &t) < 0) {
job_error(job, "shm_add_task: %m");
error("job %d: shm_add_task: %m", job->jobid);
return ESLURMD_SHARED_MEMORY_ERROR;
}
......@@ -509,37 +495,52 @@ _wait_for_all_tasks(slurmd_job_t *job)
}
static int
_seteuid_and_chdir(slurmd_job_t *job)
_drop_privileges(struct passwd *pwd)
{
if (setegid(job->pwd->pw_gid) < 0) {
if (setegid(pwd->pw_gid) < 0) {
error("setegid: %m");
return -1;
}
if (initgroups(job->pwd->pw_name, job->pwd->pw_gid) < 0) {
;
/* error("initgroups: %m"); */
if (initgroups(pwd->pw_name, pwd->pw_gid) < 0) {
error("initgroups: %m");
}
if (seteuid(job->pwd->pw_uid) < 0) {
if (seteuid(pwd->pw_uid) < 0) {
error("seteuid: %m");
return -1;
}
if (chdir(job->cwd) < 0) {
error("couldn't chdir to `%s': %m: going to /tmp instead",
job->cwd);
if (chdir("/tmp") < 0) {
error("couldn't chdir to /tmp either. dying.");
return -1;
}
return SLURM_SUCCESS;
}
static int
_reclaim_privileges(struct passwd *pwd)
{
if (seteuid(pwd->pw_uid) < 0) {
error("seteuid: %m");
return -1;
}
if (setegid(pwd->pw_gid) < 0) {
error("setegid: %m");
return -1;
}
if (initgroups(pwd->pw_name, pwd->pw_gid) < 0) {
error("initgroups: %m");
return -1;
}
return SLURM_SUCCESS;
}
static int
_setuid(slurmd_job_t *job)
_become_user(slurmd_job_t *job)
{
if (setgid(job->pwd->pw_gid) < 0) {
error("setgid: %m");
......@@ -551,7 +552,7 @@ _setuid(slurmd_job_t *job)
/* error("initgroups: %m"); */
}
if (setuid(job->uid) < 0) {
if (setuid(job->pwd->pw_uid) < 0) {
error("setuid: %m");
return -1;
}
......@@ -572,7 +573,7 @@ _task_exec(slurmd_job_t *job, int i, bool batch)
*/
log_init("slurmd", opts, 0, NULL);
if ((rc = _setuid(job)) < 0)
if ((rc = _become_user(job)) < 0)
exit(rc);
if (_unblock_all_signals() == SLURM_ERROR) {
......@@ -590,6 +591,16 @@ _task_exec(slurmd_job_t *job, int i, bool batch)
error("interconnect_env: %m");
}
if (chdir(job->cwd) < 0) {
error("couldn't chdir to `%s': %m: going to /tmp instead",
job->cwd);
if (chdir("/tmp") < 0) {
error("couldn't chdir to /tmp either. dying.");
exit(1);
}
}
/* exec the cmdline */
execve(job->argv[0], job->argv, job->env);
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment