From 91337c3fea7a1f7ceef0cd80c0eecbdde8402226 Mon Sep 17 00:00:00 2001 From: Morris Jette <jette@schedmd.com> Date: Tue, 29 Aug 2017 14:08:26 -0600 Subject: [PATCH] Set SLURM_JOBID to be pack job leader This applies to job steps for MPI. Even if there is more than one pack job component in a single MPI_COMM_WORLD, they will share a common SLURM_JOBID. --- slurm/slurm.h.in | 1 + src/api/step_launch.c | 9 +++++++-- src/common/slurm_protocol_defs.h | 1 + src/common/slurm_protocol_pack.c | 4 ++++ src/plugins/launch/slurm/launch_slurm.c | 1 + src/slurmd/slurmstepd/slurmstepd_job.c | 4 +++- src/slurmd/slurmstepd/slurmstepd_job.h | 1 + src/slurmd/slurmstepd/task.c | 5 ++++- src/srun/libsrun/srun_job.c | 26 ++++++++++++++++++++----- src/srun/libsrun/srun_job.h | 1 + src/srun/srun.c | 5 ++++- 11 files changed, 48 insertions(+), 10 deletions(-) diff --git a/slurm/slurm.h.in b/slurm/slurm.h.in index 66d0a9c2938..b88389a9611 100644 --- a/slurm/slurm.h.in +++ b/slurm/slurm.h.in @@ -1928,6 +1928,7 @@ typedef struct { bool no_alloc; uint32_t slurmd_debug; /* remote slurmd debug level */ uint32_t node_offset; /* Pack job node offset or NO_VAL */ + uint32_t pack_jobid; /* pack job ID or NO_VAL */ uint32_t pack_ntasks; /* total task count for entire pack job */ uint32_t pack_offset; /* Pack job offset or NO_VAL */ uint32_t task_offset; /* Pack job tsk offset or NO_VAL */ diff --git a/src/api/step_launch.c b/src/api/step_launch.c index 97598ff160a..fa6cd9ca5a7 100644 --- a/src/api/step_launch.c +++ b/src/api/step_launch.c @@ -165,6 +165,7 @@ extern void slurm_step_launch_params_t_init(slurm_step_launch_params_t *ptr) ptr->cpu_freq_max = NO_VAL; ptr->cpu_freq_gov = NO_VAL; ptr->node_offset = NO_VAL; + ptr->pack_jobid = NO_VAL; ptr->pack_ntasks = NO_VAL; ptr->pack_offset = NO_VAL; ptr->task_offset = NO_VAL; @@ -294,6 +295,7 @@ extern int slurm_step_launch(slurm_step_ctx_t *ctx, launch.cred = ctx->step_resp->cred; launch.job_step_id = ctx->step_resp->job_step_id; launch.node_offset = params->node_offset; + launch.pack_jobid = params->pack_jobid; launch.pack_ntasks = params->pack_ntasks; launch.pack_offset = params->pack_offset; launch.task_offset = params->task_offset; @@ -500,12 +502,15 @@ extern int slurm_step_launch_add(slurm_step_ctx_t *ctx, launch.spank_job_env_size = params->spank_job_env_size; launch.cred = ctx->step_resp->cred; launch.job_step_id = ctx->step_resp->job_step_id; + launch.pack_jobid = params->pack_jobid; launch.pack_ntasks = params->pack_ntasks; launch.pack_offset = params->pack_offset; launch.task_offset = params->task_offset; if (params->env == NULL) { - /* if the user didn't specify an environment, grab the - * environment of the running process */ + /* + * if the user didn't specify an environment, grab the + * environment of the running process + */ env_array_merge(&env, (const char **)environ); } else { env_array_merge(&env, (const char **)params->env); diff --git a/src/common/slurm_protocol_defs.h b/src/common/slurm_protocol_defs.h index 98b72fca17a..213de4f3102 100644 --- a/src/common/slurm_protocol_defs.h +++ b/src/common/slurm_protocol_defs.h @@ -767,6 +767,7 @@ typedef struct launch_tasks_request_msg { uint32_t job_id; uint32_t job_step_id; uint32_t node_offset; /* pack job node offset of NO_VAL */ + uint32_t pack_jobid; /* pack job ID or NO_VAL */ uint32_t pack_ntasks; /* total task count for entire pack job */ uint32_t pack_offset; /* pack job offset of NO_VAL */ uint32_t task_offset; /* pack job task ID offset of NO_VAL */ diff --git a/src/common/slurm_protocol_pack.c b/src/common/slurm_protocol_pack.c index a1d2dc2a0cf..7e87b22707e 100644 --- a/src/common/slurm_protocol_pack.c +++ b/src/common/slurm_protocol_pack.c @@ -10262,6 +10262,7 @@ _pack_launch_tasks_request_msg(launch_tasks_request_msg_t * msg, Buf buffer, pack32(msg->job_id, buffer); pack32(msg->job_step_id, buffer); pack32(msg->node_offset, buffer); + pack32(msg->pack_jobid, buffer); pack32(msg->pack_ntasks, buffer); pack32(msg->pack_offset, buffer); pack32(msg->task_offset, buffer); @@ -10549,6 +10550,7 @@ _unpack_launch_tasks_request_msg(launch_tasks_request_msg_t ** safe_unpack32(&msg->job_id, buffer); safe_unpack32(&msg->job_step_id, buffer); safe_unpack32(&msg->node_offset, buffer); + safe_unpack32(&msg->pack_jobid, buffer); safe_unpack32(&msg->pack_ntasks, buffer); safe_unpack32(&msg->pack_offset, buffer); safe_unpack32(&msg->task_offset, buffer); @@ -10661,6 +10663,7 @@ _unpack_launch_tasks_request_msg(launch_tasks_request_msg_t ** safe_unpack32(&msg->job_id, buffer); safe_unpack32(&msg->job_step_id, buffer); msg->node_offset = NO_VAL; + msg->pack_jobid = NO_VAL; msg->pack_ntasks = NO_VAL; msg->pack_offset = NO_VAL; msg->task_offset = NO_VAL; @@ -10792,6 +10795,7 @@ _unpack_launch_tasks_request_msg(launch_tasks_request_msg_t ** safe_unpack32(&msg->job_id, buffer); safe_unpack32(&msg->job_step_id, buffer); msg->node_offset = NO_VAL; + msg->pack_jobid = NO_VAL; msg->pack_ntasks = NO_VAL; msg->pack_offset = NO_VAL; msg->task_offset = NO_VAL; diff --git a/src/plugins/launch/slurm/launch_slurm.c b/src/plugins/launch/slurm/launch_slurm.c index 0bd7c617d9a..82cc84fce8b 100644 --- a/src/plugins/launch/slurm/launch_slurm.c +++ b/src/plugins/launch/slurm/launch_slurm.c @@ -674,6 +674,7 @@ extern int launch_p_step_launch(srun_job_t *job, slurm_step_io_fds_t *cio_fds, launch_params.remote_input_filename = fname_remote_string(job->ifname); launch_params.remote_error_filename = fname_remote_string(job->efname); launch_params.node_offset = job->node_offset; + launch_params.pack_jobid = job->pack_jobid; launch_params.pack_ntasks = job->pack_ntasks; launch_params.pack_offset = job->pack_offset; launch_params.task_offset = job->task_offset; diff --git a/src/slurmd/slurmstepd/slurmstepd_job.c b/src/slurmd/slurmstepd/slurmstepd_job.c index ec2164b73d6..f71f654feaf 100644 --- a/src/slurmd/slurmstepd/slurmstepd_job.c +++ b/src/slurmd/slurmstepd/slurmstepd_job.c @@ -309,6 +309,7 @@ extern stepd_step_rec_t *stepd_step_rec_create(launch_tasks_request_msg_t *msg, job->array_job_id = msg->job_id; job->array_task_id = NO_VAL; job->node_offset = msg->node_offset; /* Used for env vars */ + job->pack_jobid = msg->pack_jobid; /* Used for env vars */ job->pack_ntasks = msg->pack_ntasks; /* Used for env vars */ job->pack_offset = msg->pack_offset; /* Used for env vars & labels */ job->task_offset = msg->task_offset; /* Used for env vars & labels */ @@ -472,7 +473,8 @@ batch_stepd_step_rec_create(batch_job_launch_msg_t *msg) job->stepid = msg->step_id; job->array_job_id = msg->array_job_id; job->array_task_id = msg->array_task_id; - job->pack_offset = NO_VAL; /* Used to set output labels */ + job->pack_jobid = NO_VAL; /* Used to set env vars */ + job->pack_offset = NO_VAL; /* Used to set labels and env vars */ job->job_core_spec = msg->job_core_spec; job->batch = true; diff --git a/src/slurmd/slurmstepd/slurmstepd_job.h b/src/slurmd/slurmstepd/slurmstepd_job.h index 702a65b2910..68ec5e83301 100644 --- a/src/slurmd/slurmstepd/slurmstepd_job.h +++ b/src/slurmd/slurmstepd/slurmstepd_job.h @@ -135,6 +135,7 @@ typedef struct { uint32_t nodeid; /* relative position of this node in job */ uint32_t node_offset; /* pack job node offset or NO_VAL */ uint32_t node_tasks; /* number of tasks on *this* node */ + uint32_t pack_jobid; /* pack job ID or NO_VAL */ uint32_t pack_ntasks; /* total task count for entire pack job */ uint32_t pack_offset; /* pack job offset or NO_VAL */ uint32_t task_offset; /* pack job task offset or NO_VAL */ diff --git a/src/slurmd/slurmstepd/task.c b/src/slurmd/slurmstepd/task.c index 3fa2836e789..8168cac272b 100644 --- a/src/slurmd/slurmstepd/task.c +++ b/src/slurmd/slurmstepd/task.c @@ -357,7 +357,10 @@ extern void exec_task(stepd_step_rec_t *job, int i) job->envtp->sgtids = _uint32_array_to_str(job->node_tasks, gtids); xfree(gtids); - job->envtp->jobid = job->jobid; + if (job->pack_jobid != NO_VAL) + job->envtp->jobid = job->pack_jobid; + else + job->envtp->jobid = job->jobid; job->envtp->stepid = job->stepid; job->envtp->nodeid = job->nodeid + node_offset; job->envtp->cpus_on_node = job->cpus; diff --git a/src/srun/libsrun/srun_job.c b/src/srun/libsrun/srun_job.c index 0bb321b5bec..9f1374684cd 100644 --- a/src/srun/libsrun/srun_job.c +++ b/src/srun/libsrun/srun_job.c @@ -365,7 +365,8 @@ extern srun_job_t *job_step_create_allocation( buf = hostlist_ranged_string_xmalloc(hl); count = hostlist_count(hl); hostlist_destroy(hl); - /* Don't reset the ai->nodelist because that is the + /* + * Don't reset the ai->nodelist because that is the * nodelist we want to say the allocation is under * opt_local->nodelist is what is used for the allocation. */ @@ -675,7 +676,7 @@ static void _set_step_opts(opt_t *opt_local) * the job allocation request with its requested options. */ static int _create_job_step(srun_job_t *job, bool use_all_cpus, - List srun_job_list) + List srun_job_list, uint32_t pack_jobid) { ListIterator opt_iter = NULL, job_iter; opt_t *opt_local = &opt; @@ -688,6 +689,8 @@ static int _create_job_step(srun_job_t *job, bool use_all_cpus, opt_iter = list_iterator_create(opt_list); job_iter = list_iterator_create(srun_job_list); while ((job = (srun_job_t *) list_next(job_iter))) { + if (pack_jobid) + job->pack_jobid = pack_jobid; job->stepid = NO_VAL; pack_ntasks += job->ntasks; } @@ -762,7 +765,7 @@ extern void create_srun_job(void **p_job, bool *got_alloc, srun_job_t *job = NULL; int i, max_list_offset, max_pack_offset, pack_offset = -1; opt_t *opt_local; - uint32_t my_job_id = 0; + uint32_t my_job_id = 0, pack_jobid = 0; bool begin_error_logged = false; bool core_spec_error_logged = false; #ifdef HAVE_NATIVE_CRAY @@ -924,8 +927,14 @@ extern void create_srun_job(void **p_job, bool *got_alloc, max_pack_offset, pack_offset); exit(error_exit); } + if (srun_job_list && (list_count(srun_job_list) > 1) && + opt_list && (list_count(opt_list) > 1) && + my_job_id && (opt.mpi_combine == true)) { + pack_jobid = my_job_id; + } - if (_create_job_step(job, false, srun_job_list) < 0) { + if (_create_job_step(job, false, srun_job_list, pack_jobid) + < 0) { if (*got_alloc) slurm_complete_job(my_job_id, 1); else @@ -994,6 +1003,11 @@ extern void create_srun_job(void **p_job, bool *got_alloc, job = job_create_allocation(resp, &opt); _set_step_opts(&opt); } + if (srun_job_list && (list_count(srun_job_list) > 1) && + opt_list && (list_count(opt_list) > 1) && + my_job_id && (opt.mpi_combine == true)) { + pack_jobid = my_job_id; + } /* * Become --uid user @@ -1001,7 +1015,8 @@ extern void create_srun_job(void **p_job, bool *got_alloc, if (_become_user () < 0) info("Warning: Unable to assume uid=%u", opt.uid); - if (_create_job_step(job, true, srun_job_list) < 0) { + if (_create_job_step(job, true, srun_job_list, pack_jobid) + < 0) { slurm_complete_job(my_job_id, 1); exit(error_exit); } @@ -1196,6 +1211,7 @@ static srun_job_t *_job_create_structure(allocation_info_t *ainfo, job->nodelist = xstrdup(ainfo->nodelist); job->partition = xstrdup(ainfo->partition); job->stepid = ainfo->stepid; + job->pack_jobid = NO_VAL; job->pack_ntasks = NO_VAL; job->pack_offset = NO_VAL; job->task_offset = NO_VAL; diff --git a/src/srun/libsrun/srun_job.h b/src/srun/libsrun/srun_job.h index 504f6945145..39b3430ac72 100644 --- a/src/srun/libsrun/srun_job.h +++ b/src/srun/libsrun/srun_job.h @@ -82,6 +82,7 @@ typedef struct srun_job { uint32_t jobid; /* assigned job id */ uint32_t stepid; /* assigned step id */ uint32_t node_offset; /* pack job node offset or NO_VAL */ + uint32_t pack_jobid; /* pack job leader or NO_VAL */ uint32_t pack_ntasks; /* total task count for entire pack job */ uint32_t pack_offset; /* pack job offset or NO_VAL */ uint32_t task_offset; /* pack job tsk offset or NO_VAL */ diff --git a/src/srun/srun.c b/src/srun/srun.c index 4c90c5a0c4a..d638e2ce8fb 100644 --- a/src/srun/srun.c +++ b/src/srun/srun.c @@ -390,7 +390,10 @@ static void _setup_one_job_env(opt_t *opt_local, srun_job_t *job, if (job->pack_ntasks != NO_VAL) env->ntasks = job->pack_ntasks; env->task_count = _uint16_array_to_str(job->nhosts, tasks); - env->jobid = job->jobid; + if (job->pack_jobid != NO_VAL) + env->jobid = job->pack_jobid; + else + env->jobid = job->jobid; env->stepid = job->stepid; env->account = job->account; env->qos = job->qos; -- GitLab