diff --git a/src/slurmd/io.c b/src/slurmd/io.c index 4c6891524ffe13c23563be8f6b10651a848566cc..61a33686feed9af5a216101039bd58fa50702164 100644 --- a/src/slurmd/io.c +++ b/src/slurmd/io.c @@ -604,8 +604,7 @@ _local_filename (char *fname, int taskid) } static int -_init_task_stdio_fds(slurmd_job_t *job, slurmd_task_info_t *task, - srun_info_t *srun) +_init_task_stdio_fds(slurmd_task_info_t *task, slurmd_job_t *job) { char *name; int single; @@ -615,11 +614,10 @@ _init_task_stdio_fds(slurmd_job_t *job, slurmd_task_info_t *task, /* * Initialize stdin */ - if ((name = _local_filename(srun->ifname, task->gtid)) != NULL) { - /* open file "name" on task's stdin */ - name = fname_create(job, srun->ifname, task->gtid); - debug3(" stdin file name = %s", name); - if ((task->stdin = open(name, O_RDONLY)) == -1) { + if (task->ifname != NULL) { + /* open file on task's stdin */ + debug3(" stdin file name = %s", task->ifname); + if ((task->stdin = open(task->ifname, O_RDONLY)) == -1) { error("Could not open stdin file: %m"); return SLURM_ERROR; } @@ -643,12 +641,11 @@ _init_task_stdio_fds(slurmd_job_t *job, slurmd_task_info_t *task, /* * Initialize stdout */ - if ((name = _local_filename(srun->ofname, task->gtid)) != NULL) { - /* open file "name" on task's stdout */ - name = fname_create(job, srun->ofname, task->gtid); - debug3(" stdout file name = %s", name); - task->stdout = open(name, O_CREAT|O_WRONLY|O_TRUNC|O_APPEND, - 0666); + if (task->ofname != NULL) { + /* open file on task's stdout */ + debug3(" stdout file name = %s", task->ofname); + task->stdout = open(task->ofname, + O_CREAT|O_WRONLY|O_TRUNC|O_APPEND, 0666); if (task->stdout == -1) { error("Could not open stdout file: %m"); return SLURM_ERROR; @@ -675,12 +672,11 @@ _init_task_stdio_fds(slurmd_job_t *job, slurmd_task_info_t *task, /* * Initialize stderr */ - if ((name = _local_filename(srun->efname, task->gtid)) != NULL) { - /* open file "name" on task's stdout */ - name = fname_create(job, srun->efname, task->gtid); - debug3(" stderr file name = %s", name); - task->stderr = open(name, O_CREAT|O_WRONLY|O_TRUNC|O_APPEND, - 0666); + if (task->efname != NULL) { + /* open file on task's stdout */ + debug3(" stderr file name = %s", task->efname); + task->stderr = open(task->efname, + O_CREAT|O_WRONLY|O_TRUNC|O_APPEND, 0666); if (task->stderr == -1) { error("Could not open stderr file: %m"); return SLURM_ERROR; @@ -708,14 +704,10 @@ _init_task_stdio_fds(slurmd_job_t *job, slurmd_task_info_t *task, int io_init_tasks_stdio(slurmd_job_t *job) { - srun_info_t *srun; int i; - srun = list_peek(job->sruns); - xassert(srun != NULL); - for (i = 0; i < job->ntasks; i++) { - _init_task_stdio_fds(job, job->task[i], srun); + _init_task_stdio_fds(job->task[i], job); } } @@ -933,7 +925,6 @@ io_client_connect(slurmd_job_t *job) /* kick IO thread */ eio_signal_wakeup(job->eio); - debug3(" test 3"); return SLURM_SUCCESS; } diff --git a/src/slurmd/mgr.c b/src/slurmd/mgr.c index 11268715cebe4170c1b2b8c43db42a1a82e2f4b9..52b2a0f20c16f845acbccfe7bf95ba68671a605f 100644 --- a/src/slurmd/mgr.c +++ b/src/slurmd/mgr.c @@ -440,6 +440,7 @@ _setup_io(slurmd_job_t *job) if (_drop_privileges(job->pwd) < 0) return ESLURMD_SET_UID_OR_GID_ERROR; + /* FIXME - need to check a return code for failures */ io_init_tasks_stdio(job); if (_reclaim_privileges(spwd) < 0) diff --git a/src/slurmd/slurmd_job.c b/src/slurmd/slurmd_job.c index f1c1d49d855041d514576e8caafba23b5a32b947..5f24262e451c9fc01b282c03cbbdf38bccb4e989 100644 --- a/src/slurmd/slurmd_job.c +++ b/src/slurmd/slurmd_job.c @@ -54,7 +54,8 @@ static char ** _array_copy(int n, char **src); static void _array_free(char ***array); static void _srun_info_destructor(void *arg); -static void _job_init_task_info(slurmd_job_t *job, uint32_t *gtid); +static void _job_init_task_info(slurmd_job_t *job, uint32_t *gtid, + char *ifname, char *ofname, char *efname); static struct passwd * _pwd_create(uid_t uid) @@ -200,9 +201,6 @@ job_create(launch_tasks_request_msg_t *msg, slurm_addr *cli_addr) slurm_set_addr(&io_addr, msg->io_port, NULL); srun = srun_info_create(msg->cred, &resp_addr, &io_addr); - srun->ofname = xstrdup(msg->ofname); - srun->efname = xstrdup(msg->efname); - srun->ifname = xstrdup(msg->ifname); job->buffered_stdio = msg->buffered_stdio; job->task_prolog = xstrdup(msg->task_prolog); @@ -221,7 +219,8 @@ job_create(launch_tasks_request_msg_t *msg, slurm_addr *cli_addr) list_append(job->sruns, (void *) srun); - _job_init_task_info(job, msg->global_task_ids); + _job_init_task_info(job, msg->global_task_ids, + msg->ifname, msg->ofname, msg->efname); return job; } @@ -288,7 +287,10 @@ job_spawn_create(spawn_task_request_msg_t *msg, slurm_addr *cli_addr) list_append(job->sruns, (void *) srun); - _job_init_task_info(job, &(msg->global_task_id)); + job->task = (slurmd_task_info_t **) + xmalloc(sizeof(slurmd_task_info_t *)); + job->task[0] = task_info_create(0, msg->global_task_id, + NULL, NULL, NULL); return job; } @@ -297,7 +299,7 @@ job_spawn_create(spawn_task_request_msg_t *msg, slurm_addr *cli_addr) * return the default output filename for a batch job */ static char * -_mkfilename(slurmd_job_t *job, const char *name) +_batchfilename(slurmd_job_t *job, const char *name) { if (name == NULL) return fname_create(job, "slurm-%J.out", 0); @@ -312,6 +314,7 @@ job_batch_job_create(batch_job_launch_msg_t *msg) slurmd_job_t *job; srun_info_t *srun = NULL; uint32_t global_taskid = 0; + char *ofname, *efname; xassert(msg != NULL); @@ -355,9 +358,6 @@ job_batch_job_create(batch_job_launch_msg_t *msg) srun = srun_info_create(NULL, NULL, NULL); - srun->ofname = _mkfilename(job, msg->out); - srun->efname = msg->err ? xstrdup(msg->err) : srun->ofname; - srun->ifname = xstrdup("/dev/null"); list_append(job->sruns, (void *) srun); if (msg->argc) { @@ -371,25 +371,69 @@ job_batch_job_create(batch_job_launch_msg_t *msg) job->argv = (char **) xmalloc(job->argc * sizeof(char *)); } - _job_init_task_info(job, &global_taskid); + job->task = (slurmd_task_info_t **) + xmalloc(sizeof(slurmd_task_info_t *)); + if (msg->err == NULL) + msg->err = msg->out; + job->task[0] = task_info_create(0, global_taskid, + xstrdup("/dev/null"), + _batchfilename(job, msg->out), + _batchfilename(job, msg->err)); return job; } +/* + * Expand a stdio file name. + * + * If "filename" is NULL it means that an eio object should be created + * for that stdio file rather than a directly connecting it to a file. + * + * If the "filename" is a valid task number in string form and the + * number matches "taskid", then NULL is returned so that an eio + * object will be used. If is a valid number, but it does not match + * "taskid", then the file descriptor will be connected to /dev/null. + */ +static char * +_expand_stdio_filename(char *filename, int gtaskid, slurmd_job_t *job) +{ + int id; + + if (filename == NULL) + return NULL; + + id = fname_single_task_io(filename); + + if (id < 0) + return fname_create(job, filename, gtaskid); + if (id >= job->nprocs) { + error("Task ID in filename is invalid"); + return NULL; + } + + if (id == gtaskid) + return NULL; + else + return xstrdup("/dev/null"); +} + static void -_job_init_task_info(slurmd_job_t *job, uint32_t *gtid) +_job_init_task_info(slurmd_job_t *job, uint32_t *gtid, + char *ifname, char *ofname, char *efname) { int i; int n = job->ntasks; + char *in, *out, *err; job->task = (slurmd_task_info_t **) xmalloc(n * sizeof(slurmd_task_info_t *)); for (i = 0; i < n; i++){ - job->task[i] = task_info_create(i, gtid[i]); - /* "srun" info is attached to task in - * io_add_connecting - */ + in = _expand_stdio_filename(ifname, gtid[i], job); + out = _expand_stdio_filename(ofname, gtid[i], job); + err = _expand_stdio_filename(efname, gtid[i], job); + + job->task[i] = task_info_create(i, gtid[i], in, out, err); } } @@ -527,7 +571,8 @@ srun_info_destroy(struct srun_info *srun) } slurmd_task_info_t * -task_info_create(int taskid, int gtaskid) +task_info_create(int taskid, int gtaskid, + char *ifname, char *ofname, char *efname) { slurmd_task_info_t *t = (slurmd_task_info_t *) xmalloc(sizeof(*t)); @@ -540,6 +585,9 @@ task_info_create(int taskid, int gtaskid) t->id = taskid; t->gtid = gtaskid; t->pid = (pid_t) -1; + t->ifname = ifname; + t->ofname = ofname; + t->efname = efname; t->stdin = -1; t->to_stdin = -1; t->stdout = -1; diff --git a/src/slurmd/slurmd_job.h b/src/slurmd/slurmd_job.h index 33c58feecffe4009e48749ba3ed39a76c952a083..8b77520eb5b0e194f4bc8ac0db69c98132df8d4f 100644 --- a/src/slurmd/slurmd_job.h +++ b/src/slurmd/slurmd_job.h @@ -54,10 +54,6 @@ typedef struct srun_info { srun_key_t *key; /* srun key for IO verification */ slurm_addr resp_addr; /* response addr for task exit msg */ slurm_addr ioaddr; /* Address to connect on for I/O */ - char * ofname; /* output file (if any) */ - char * efname; /* error file (if any) */ - char * ifname; /* input file (if any) */ - } srun_info_t; typedef enum task_state { @@ -85,6 +81,9 @@ typedef struct task_info { uint32_t gtid; /* global task id */ pid_t pid; /* task pid */ + char *ifname; /* standard input file name */ + char *ofname; /* standard output file name */ + char *efname; /* standard error file name */ int stdin; /* standard input file descriptor */ int stdout; /* standard output file descriptor */ int stderr; /* standard error file descriptor */ @@ -167,7 +166,8 @@ struct srun_info * srun_info_create(slurm_cred_t cred, slurm_addr *respaddr, void srun_info_destroy(struct srun_info *srun); -slurmd_task_info_t * task_info_create(int taskid, int gtaskid); +slurmd_task_info_t * task_info_create(int taskid, int gtaskid, + char *ifname, char *ofname, char *efname); void task_info_destroy(slurmd_task_info_t *t);