From 822b3da8562b51b62f41634aace827c7909c73c7 Mon Sep 17 00:00:00 2001
From: Morris Jette <jette@schedmd.com>
Date: Wed, 16 Aug 2017 15:52:58 -0600
Subject: [PATCH] Correct some recent pack job env var work

---
 src/api/step_launch.c                  |  2 ++
 src/common/slurm_protocol_defs.h       |  1 +
 src/common/slurm_protocol_pack.c       |  4 ++++
 src/slurmd/slurmstepd/mgr.c            | 11 ++++++-----
 src/slurmd/slurmstepd/slurmstepd_job.c |  1 +
 src/slurmd/slurmstepd/task.c           |  8 ++++----
 src/srun/libsrun/srun_job.c            |  4 ++--
 7 files changed, 20 insertions(+), 11 deletions(-)

diff --git a/src/api/step_launch.c b/src/api/step_launch.c
index e2bb17bd3e8..97116dde97d 100644
--- a/src/api/step_launch.c
+++ b/src/api/step_launch.c
@@ -292,6 +292,7 @@ extern int slurm_step_launch(slurm_step_ctx_t *ctx,
 	launch.job_step_id = ctx->step_resp->job_step_id;
 	launch.pack_ntasks = params->pack_ntasks;
 	launch.pack_offset = params->pack_offset;
+	launch.task_offset = params->task_offset;
 	if (params->env == NULL) {
 		/* if the user didn't specify an environment, grab the
 		 * environment of the running process */
@@ -495,6 +496,7 @@ extern int slurm_step_launch_add(slurm_step_ctx_t *ctx,
 	launch.job_step_id = ctx->step_resp->job_step_id;
 	launch.pack_ntasks = params->pack_ntasks;
 	launch.pack_offset = params->pack_offset;
+	launch.task_offset = params->task_offset;
 	if (params->env == NULL) {
 		/* if the user didn't specify an environment, grab the
 		 * environment of the running process */
diff --git a/src/common/slurm_protocol_defs.h b/src/common/slurm_protocol_defs.h
index 55a3fc53a5f..eb8e02c8513 100644
--- a/src/common/slurm_protocol_defs.h
+++ b/src/common/slurm_protocol_defs.h
@@ -768,6 +768,7 @@ typedef struct launch_tasks_request_msg {
 	uint32_t  job_step_id;
 	uint32_t  pack_ntasks;	/* total task count for entire pack job */
 	uint32_t  pack_offset;	/* pack job offset of NO_VAL */
+	uint32_t  task_offset;	/* pack job task ID offset of NO_VAL */
 	uint32_t  nnodes;	/* number of nodes in this job step       */
 	uint32_t  ntasks;	/* number of tasks in this job step   */
 	uint16_t  ntasks_per_board;/* number of tasks to invoke on each board */
diff --git a/src/common/slurm_protocol_pack.c b/src/common/slurm_protocol_pack.c
index f7ba69b422e..668735d65f7 100644
--- a/src/common/slurm_protocol_pack.c
+++ b/src/common/slurm_protocol_pack.c
@@ -10252,6 +10252,7 @@ _pack_launch_tasks_request_msg(launch_tasks_request_msg_t * msg, Buf buffer,
 		pack32(msg->job_step_id, buffer);
 		pack32(msg->pack_ntasks, buffer);
 		pack32(msg->pack_offset, buffer);
+		pack32(msg->task_offset, buffer);
 		pack32(msg->ntasks, buffer);
 		pack16(msg->ntasks_per_board, buffer);
 		pack16(msg->ntasks_per_core, buffer);
@@ -10537,6 +10538,7 @@ _unpack_launch_tasks_request_msg(launch_tasks_request_msg_t **
 		safe_unpack32(&msg->job_step_id, buffer);
 		safe_unpack32(&msg->pack_ntasks, buffer);
 		safe_unpack32(&msg->pack_offset, buffer);
+		safe_unpack32(&msg->task_offset, buffer);
 		safe_unpack32(&msg->ntasks, buffer);
 		safe_unpack16(&msg->ntasks_per_board, buffer);
 		safe_unpack16(&msg->ntasks_per_core, buffer);
@@ -10647,6 +10649,7 @@ _unpack_launch_tasks_request_msg(launch_tasks_request_msg_t **
 		safe_unpack32(&msg->job_step_id, buffer);
 		msg->pack_ntasks = NO_VAL;
 		msg->pack_offset = NO_VAL;
+		msg->task_offset = NO_VAL;
 		safe_unpack32(&uint32_tmp, buffer);
 		safe_unpack32(&uint32_tmp, buffer);
 		safe_unpack32(&uint32_tmp, buffer);
@@ -10776,6 +10779,7 @@ _unpack_launch_tasks_request_msg(launch_tasks_request_msg_t **
 		safe_unpack32(&msg->job_step_id, buffer);
 		msg->pack_ntasks = NO_VAL;
 		msg->pack_offset = NO_VAL;
+		msg->task_offset = NO_VAL;
 		safe_unpack32(&msg->ntasks, buffer);
 		safe_unpack16(&msg->ntasks_per_board, buffer);
 		safe_unpack16(&msg->ntasks_per_core, buffer);
diff --git a/src/slurmd/slurmstepd/mgr.c b/src/slurmd/slurmstepd/mgr.c
index e65b42abc13..0f9c7f51d35 100644
--- a/src/slurmd/slurmstepd/mgr.c
+++ b/src/slurmd/slurmstepd/mgr.c
@@ -1976,10 +1976,10 @@ _wait_for_any_task(stepd_step_rec_t *job, bool waitflag)
 	jobacctinfo_t *jobacct = NULL;
 	struct rusage rusage;
 	char **tmp_env;
-	uint32_t pack_offset = 0;
+	uint32_t task_offset = 0;
 
-	if (job->pack_offset != NO_VAL)
-		pack_offset = job->pack_offset;
+	if (job->task_offset != NO_VAL)
+		task_offset = job->task_offset;
 	do {
 		pid = wait3(&status, waitflag ? 0 : WNOHANG, &rusage);
 		if (pid == -1) {
@@ -2028,7 +2028,7 @@ _wait_for_any_task(stepd_step_rec_t *job, bool waitflag)
 			_log_task_exit(t->gtid, pid, status);
 			t->exited  = true;
 			t->estatus = status;
-			job->envtp->procid = t->gtid + pack_offset;
+			job->envtp->procid = t->gtid + task_offset;
 			job->envtp->localid = t->id;
 			job->envtp->distribution = -1;
 			job->envtp->batch_flag = job->batch;
@@ -2038,7 +2038,8 @@ _wait_for_any_task(stepd_step_rec_t *job, bool waitflag)
 			 * place or concurrent searches of the environment can
 			 * generate invalid memory references.
 			 */
-			job->envtp->env = env_array_copy((const char **) job->env);
+			job->envtp->env =
+				env_array_copy((const char **) job->env);
 			setup_env(job->envtp, false);
 			tmp_env = job->env;
 			job->env = job->envtp->env;
diff --git a/src/slurmd/slurmstepd/slurmstepd_job.c b/src/slurmd/slurmstepd/slurmstepd_job.c
index fe5dd3e8e3e..148f8210abb 100644
--- a/src/slurmd/slurmstepd/slurmstepd_job.c
+++ b/src/slurmd/slurmstepd/slurmstepd_job.c
@@ -310,6 +310,7 @@ extern stepd_step_rec_t *stepd_step_rec_create(launch_tasks_request_msg_t *msg,
 	job->array_task_id = NO_VAL;
 	job->pack_ntasks = msg->pack_ntasks;	/* Used for env vars */
 	job->pack_offset = msg->pack_offset;	/* Used for env vars & labels */
+	job->task_offset = msg->task_offset;	/* Used for env vars & labels */
 	for (i = 0; i < msg->envc; i++) {
 		/*                         1234567890123456789 */
 		if (!xstrncmp(msg->env[i], "SLURM_ARRAY_JOB_ID=", 19))
diff --git a/src/slurmd/slurmstepd/task.c b/src/slurmd/slurmstepd/task.c
index 2074e291680..f3539ef2c0e 100644
--- a/src/slurmd/slurmstepd/task.c
+++ b/src/slurmd/slurmstepd/task.c
@@ -343,10 +343,10 @@ exec_task(stepd_step_rec_t *job, int i)
 	stepd_step_task_info_t *task = job->task[i];
 	char **tmp_env;
 	int saved_errno;
-	uint32_t pack_offset = 0;
+	uint32_t task_offset = 0;
 
-	if (job->pack_offset != NO_VAL)
-		pack_offset = job->pack_offset;
+	if (job->task_offset != NO_VAL)
+		task_offset = job->task_offset;
 	if (i == 0)
 		_make_tmpdir(job);
 
@@ -360,7 +360,7 @@ exec_task(stepd_step_rec_t *job, int i)
 	job->envtp->stepid = job->stepid;
 	job->envtp->nodeid = job->nodeid;
 	job->envtp->cpus_on_node = job->cpus;
-	job->envtp->procid = task->gtid + pack_offset;
+	job->envtp->procid = task->gtid + task_offset;
 	job->envtp->localid = task->id;
 	job->envtp->task_pid = getpid();
 	job->envtp->distribution = job->task_dist;
diff --git a/src/srun/libsrun/srun_job.c b/src/srun/libsrun/srun_job.c
index baf19929978..73424a5202e 100644
--- a/src/srun/libsrun/srun_job.c
+++ b/src/srun/libsrun/srun_job.c
@@ -703,10 +703,10 @@ static int _create_job_step(srun_job_t *job, bool use_all_cpus,
 				fatal("%s: opt_list too short", __func__);
 			job->pack_offset = pack_offset;
 			if (opt.mpi_combine) {
-				pack_offset++;
 				job->pack_ntasks = pack_ntasks;
 				job->task_offset = task_offset;
-			}
+			} else
+				pack_offset++;
 			rc = create_job_step(job, use_all_cpus, opt_local);
 			if (rc < 0)
 				break;
-- 
GitLab