From aa76e19e80e2bf0057c0f1e84558720026573de3 Mon Sep 17 00:00:00 2001 From: "Christopher J. Morrone" <morrone2@llnl.gov> Date: Thu, 13 Oct 2005 02:16:22 +0000 Subject: [PATCH] Move the stdio filenames out of the srun_info_t. With the new stdio engine, it no longer makes sense to associate the task stdio filenames with the srun info structure. Instead for each task we expand the filename format string and stick it in the slurmd_task_info_t structure. --- src/slurmd/io.c | 41 ++++++++------------- src/slurmd/mgr.c | 1 + src/slurmd/slurmd_job.c | 82 ++++++++++++++++++++++++++++++++--------- src/slurmd/slurmd_job.h | 10 ++--- 4 files changed, 87 insertions(+), 47 deletions(-) diff --git a/src/slurmd/io.c b/src/slurmd/io.c index 4c6891524ff..61a33686fee 100644 --- a/src/slurmd/io.c +++ b/src/slurmd/io.c @@ -604,8 +604,7 @@ _local_filename (char *fname, int taskid) } static int -_init_task_stdio_fds(slurmd_job_t *job, slurmd_task_info_t *task, - srun_info_t *srun) +_init_task_stdio_fds(slurmd_task_info_t *task, slurmd_job_t *job) { char *name; int single; @@ -615,11 +614,10 @@ _init_task_stdio_fds(slurmd_job_t *job, slurmd_task_info_t *task, /* * Initialize stdin */ - if ((name = _local_filename(srun->ifname, task->gtid)) != NULL) { - /* open file "name" on task's stdin */ - name = fname_create(job, srun->ifname, task->gtid); - debug3(" stdin file name = %s", name); - if ((task->stdin = open(name, O_RDONLY)) == -1) { + if (task->ifname != NULL) { + /* open file on task's stdin */ + debug3(" stdin file name = %s", task->ifname); + if ((task->stdin = open(task->ifname, O_RDONLY)) == -1) { error("Could not open stdin file: %m"); return SLURM_ERROR; } @@ -643,12 +641,11 @@ _init_task_stdio_fds(slurmd_job_t *job, slurmd_task_info_t *task, /* * Initialize stdout */ - if ((name = _local_filename(srun->ofname, task->gtid)) != NULL) { - /* open file "name" on task's stdout */ - name = fname_create(job, srun->ofname, task->gtid); - debug3(" stdout file name = %s", name); - task->stdout = open(name, O_CREAT|O_WRONLY|O_TRUNC|O_APPEND, - 0666); + if (task->ofname != NULL) { + /* open file on task's stdout */ + debug3(" stdout file name = %s", task->ofname); + task->stdout = open(task->ofname, + O_CREAT|O_WRONLY|O_TRUNC|O_APPEND, 0666); if (task->stdout == -1) { error("Could not open stdout file: %m"); return SLURM_ERROR; @@ -675,12 +672,11 @@ _init_task_stdio_fds(slurmd_job_t *job, slurmd_task_info_t *task, /* * Initialize stderr */ - if ((name = _local_filename(srun->efname, task->gtid)) != NULL) { - /* open file "name" on task's stdout */ - name = fname_create(job, srun->efname, task->gtid); - debug3(" stderr file name = %s", name); - task->stderr = open(name, O_CREAT|O_WRONLY|O_TRUNC|O_APPEND, - 0666); + if (task->efname != NULL) { + /* open file on task's stdout */ + debug3(" stderr file name = %s", task->efname); + task->stderr = open(task->efname, + O_CREAT|O_WRONLY|O_TRUNC|O_APPEND, 0666); if (task->stderr == -1) { error("Could not open stderr file: %m"); return SLURM_ERROR; @@ -708,14 +704,10 @@ _init_task_stdio_fds(slurmd_job_t *job, slurmd_task_info_t *task, int io_init_tasks_stdio(slurmd_job_t *job) { - srun_info_t *srun; int i; - srun = list_peek(job->sruns); - xassert(srun != NULL); - for (i = 0; i < job->ntasks; i++) { - _init_task_stdio_fds(job, job->task[i], srun); + _init_task_stdio_fds(job->task[i], job); } } @@ -933,7 +925,6 @@ io_client_connect(slurmd_job_t *job) /* kick IO thread */ eio_signal_wakeup(job->eio); - debug3(" test 3"); return SLURM_SUCCESS; } diff --git a/src/slurmd/mgr.c b/src/slurmd/mgr.c index 11268715ceb..52b2a0f20c1 100644 --- a/src/slurmd/mgr.c +++ b/src/slurmd/mgr.c @@ -440,6 +440,7 @@ _setup_io(slurmd_job_t *job) if (_drop_privileges(job->pwd) < 0) return ESLURMD_SET_UID_OR_GID_ERROR; + /* FIXME - need to check a return code for failures */ io_init_tasks_stdio(job); if (_reclaim_privileges(spwd) < 0) diff --git a/src/slurmd/slurmd_job.c b/src/slurmd/slurmd_job.c index f1c1d49d855..5f24262e451 100644 --- a/src/slurmd/slurmd_job.c +++ b/src/slurmd/slurmd_job.c @@ -54,7 +54,8 @@ static char ** _array_copy(int n, char **src); static void _array_free(char ***array); static void _srun_info_destructor(void *arg); -static void _job_init_task_info(slurmd_job_t *job, uint32_t *gtid); +static void _job_init_task_info(slurmd_job_t *job, uint32_t *gtid, + char *ifname, char *ofname, char *efname); static struct passwd * _pwd_create(uid_t uid) @@ -200,9 +201,6 @@ job_create(launch_tasks_request_msg_t *msg, slurm_addr *cli_addr) slurm_set_addr(&io_addr, msg->io_port, NULL); srun = srun_info_create(msg->cred, &resp_addr, &io_addr); - srun->ofname = xstrdup(msg->ofname); - srun->efname = xstrdup(msg->efname); - srun->ifname = xstrdup(msg->ifname); job->buffered_stdio = msg->buffered_stdio; job->task_prolog = xstrdup(msg->task_prolog); @@ -221,7 +219,8 @@ job_create(launch_tasks_request_msg_t *msg, slurm_addr *cli_addr) list_append(job->sruns, (void *) srun); - _job_init_task_info(job, msg->global_task_ids); + _job_init_task_info(job, msg->global_task_ids, + msg->ifname, msg->ofname, msg->efname); return job; } @@ -288,7 +287,10 @@ job_spawn_create(spawn_task_request_msg_t *msg, slurm_addr *cli_addr) list_append(job->sruns, (void *) srun); - _job_init_task_info(job, &(msg->global_task_id)); + job->task = (slurmd_task_info_t **) + xmalloc(sizeof(slurmd_task_info_t *)); + job->task[0] = task_info_create(0, msg->global_task_id, + NULL, NULL, NULL); return job; } @@ -297,7 +299,7 @@ job_spawn_create(spawn_task_request_msg_t *msg, slurm_addr *cli_addr) * return the default output filename for a batch job */ static char * -_mkfilename(slurmd_job_t *job, const char *name) +_batchfilename(slurmd_job_t *job, const char *name) { if (name == NULL) return fname_create(job, "slurm-%J.out", 0); @@ -312,6 +314,7 @@ job_batch_job_create(batch_job_launch_msg_t *msg) slurmd_job_t *job; srun_info_t *srun = NULL; uint32_t global_taskid = 0; + char *ofname, *efname; xassert(msg != NULL); @@ -355,9 +358,6 @@ job_batch_job_create(batch_job_launch_msg_t *msg) srun = srun_info_create(NULL, NULL, NULL); - srun->ofname = _mkfilename(job, msg->out); - srun->efname = msg->err ? xstrdup(msg->err) : srun->ofname; - srun->ifname = xstrdup("/dev/null"); list_append(job->sruns, (void *) srun); if (msg->argc) { @@ -371,25 +371,69 @@ job_batch_job_create(batch_job_launch_msg_t *msg) job->argv = (char **) xmalloc(job->argc * sizeof(char *)); } - _job_init_task_info(job, &global_taskid); + job->task = (slurmd_task_info_t **) + xmalloc(sizeof(slurmd_task_info_t *)); + if (msg->err == NULL) + msg->err = msg->out; + job->task[0] = task_info_create(0, global_taskid, + xstrdup("/dev/null"), + _batchfilename(job, msg->out), + _batchfilename(job, msg->err)); return job; } +/* + * Expand a stdio file name. + * + * If "filename" is NULL it means that an eio object should be created + * for that stdio file rather than a directly connecting it to a file. + * + * If the "filename" is a valid task number in string form and the + * number matches "taskid", then NULL is returned so that an eio + * object will be used. If is a valid number, but it does not match + * "taskid", then the file descriptor will be connected to /dev/null. + */ +static char * +_expand_stdio_filename(char *filename, int gtaskid, slurmd_job_t *job) +{ + int id; + + if (filename == NULL) + return NULL; + + id = fname_single_task_io(filename); + + if (id < 0) + return fname_create(job, filename, gtaskid); + if (id >= job->nprocs) { + error("Task ID in filename is invalid"); + return NULL; + } + + if (id == gtaskid) + return NULL; + else + return xstrdup("/dev/null"); +} + static void -_job_init_task_info(slurmd_job_t *job, uint32_t *gtid) +_job_init_task_info(slurmd_job_t *job, uint32_t *gtid, + char *ifname, char *ofname, char *efname) { int i; int n = job->ntasks; + char *in, *out, *err; job->task = (slurmd_task_info_t **) xmalloc(n * sizeof(slurmd_task_info_t *)); for (i = 0; i < n; i++){ - job->task[i] = task_info_create(i, gtid[i]); - /* "srun" info is attached to task in - * io_add_connecting - */ + in = _expand_stdio_filename(ifname, gtid[i], job); + out = _expand_stdio_filename(ofname, gtid[i], job); + err = _expand_stdio_filename(efname, gtid[i], job); + + job->task[i] = task_info_create(i, gtid[i], in, out, err); } } @@ -527,7 +571,8 @@ srun_info_destroy(struct srun_info *srun) } slurmd_task_info_t * -task_info_create(int taskid, int gtaskid) +task_info_create(int taskid, int gtaskid, + char *ifname, char *ofname, char *efname) { slurmd_task_info_t *t = (slurmd_task_info_t *) xmalloc(sizeof(*t)); @@ -540,6 +585,9 @@ task_info_create(int taskid, int gtaskid) t->id = taskid; t->gtid = gtaskid; t->pid = (pid_t) -1; + t->ifname = ifname; + t->ofname = ofname; + t->efname = efname; t->stdin = -1; t->to_stdin = -1; t->stdout = -1; diff --git a/src/slurmd/slurmd_job.h b/src/slurmd/slurmd_job.h index 33c58feecff..8b77520eb5b 100644 --- a/src/slurmd/slurmd_job.h +++ b/src/slurmd/slurmd_job.h @@ -54,10 +54,6 @@ typedef struct srun_info { srun_key_t *key; /* srun key for IO verification */ slurm_addr resp_addr; /* response addr for task exit msg */ slurm_addr ioaddr; /* Address to connect on for I/O */ - char * ofname; /* output file (if any) */ - char * efname; /* error file (if any) */ - char * ifname; /* input file (if any) */ - } srun_info_t; typedef enum task_state { @@ -85,6 +81,9 @@ typedef struct task_info { uint32_t gtid; /* global task id */ pid_t pid; /* task pid */ + char *ifname; /* standard input file name */ + char *ofname; /* standard output file name */ + char *efname; /* standard error file name */ int stdin; /* standard input file descriptor */ int stdout; /* standard output file descriptor */ int stderr; /* standard error file descriptor */ @@ -167,7 +166,8 @@ struct srun_info * srun_info_create(slurm_cred_t cred, slurm_addr *respaddr, void srun_info_destroy(struct srun_info *srun); -slurmd_task_info_t * task_info_create(int taskid, int gtaskid); +slurmd_task_info_t * task_info_create(int taskid, int gtaskid, + char *ifname, char *ofname, char *efname); void task_info_destroy(slurmd_task_info_t *t); -- GitLab