From b52786b3d38cb8f2cf7dcacee87b087619bc15ff Mon Sep 17 00:00:00 2001
From: "Christopher J. Morrone" <morrone2@llnl.gov>
Date: Tue, 15 Aug 2006 02:04:18 +0000
Subject: [PATCH] Set new environment variables for batch scripts, so slaunch
 will work with sbatch.

---
 src/api/step_launch.c       |  13 ++--
 src/common/env.c            | 114 ++++++++++++++++++------------------
 src/common/env.h            |  54 +++++++++++++----
 src/salloc/salloc.c         |   2 +-
 src/slurmd/slurmstepd/mgr.c |   4 ++
 testsuite/expect/test18.11  |  32 ++++------
 6 files changed, 121 insertions(+), 98 deletions(-)

diff --git a/src/api/step_launch.c b/src/api/step_launch.c
index 293d8c87f53..ee4929ccea4 100644
--- a/src/api/step_launch.c
+++ b/src/api/step_launch.c
@@ -173,18 +173,15 @@ int slurm_step_launch (slurm_step_ctx ctx,
 	env_array_merge(&env, (const char **)params->env);
 	{
 		/* FIXME - hostname and IP need to be user settable */
-		char **step_env = NULL;
 		char *launcher_hostname = xshort_hostname();
 		struct hostent *ent = gethostbyname(launcher_hostname);
 
-		step_env = env_array_create_for_step(
-			ctx->step_resp,
-			launcher_hostname,
-			ctx->launch_state->resp_port[0],
-			ent->h_addr_list[0]);
+		env_array_for_step(&env,
+				   ctx->step_resp,
+				   launcher_hostname,
+				   ctx->launch_state->resp_port[0],
+				   ent->h_addr_list[0]);
 		xfree(launcher_hostname);
-		env_array_merge(&env, (const char **) step_env);
-		env_array_free(step_env);
 	}
 	launch.envc = envcount(env);
 	launch.env = env;
diff --git a/src/common/env.c b/src/common/env.c
index cbdf7871067..1c5cc45063f 100644
--- a/src/common/env.c
+++ b/src/common/env.c
@@ -613,9 +613,11 @@ static char *_uint32_compressed_to_str(uint32_t array_len,
 }
 
 /*
- * Create an array of pointers to environment variables strings relevant
- * to a SLURM job allocation.  The array is terminated by a NULL pointer,
- * and thus is suitable for use by execle() and other env_array_* functions.
+ * Set in "dest" the environment variables relevant to a SLURM job
+ * allocation, overwriting any environment variables of the same name.
+ * If the address pointed to by "dest" is NULL, memory will automatically be
+ * xmalloc'ed.  The array is terminated by a NULL pointer, and thus is
+ * suitable for use by execle() and other env_array_* functions.
  *
  * Sets the variables:
  *	SLURM_JOB_ID
@@ -630,36 +632,37 @@ static char *_uint32_compressed_to_str(uint32_t array_len,
  *	SLURM_TASKS_PER_NODE <- poorly named, really CPUs per node
  *	? probably only needed for users...
  */
-char **
-env_array_create_for_job(const resource_allocation_response_msg_t *alloc)
+void
+env_array_for_job(char ***dest, const resource_allocation_response_msg_t *alloc)
 {
-	char **ptr;
 	char *tmp;
 
-	ptr = env_array_create();
-	env_array_append(&ptr, "SLURM_JOB_ID", "%u", alloc->job_id);
-	env_array_append(&ptr, "SLURM_JOB_NUM_NODES", "%u", alloc->node_cnt);
-	env_array_append(&ptr, "SLURM_JOB_NODELIST", "%s", alloc->node_list);
+	env_array_overwrite_fmt(dest, "SLURM_JOB_ID", "%u", alloc->job_id);
+	env_array_overwrite_fmt(dest, "SLURM_JOB_NUM_NODES", "%u",
+				alloc->node_cnt);
+	env_array_overwrite_fmt(dest, "SLURM_JOB_NODELIST", "%s",
+				alloc->node_list);
 
 	tmp = _uint32_compressed_to_str((uint32_t)alloc->num_cpu_groups,
 					alloc->cpus_per_node,
 					alloc->cpu_count_reps);
-	env_array_append(&ptr, "SLURM_JOB_CPUS_PER_NODE", "%s", tmp);
+	env_array_overwrite_fmt(dest, "SLURM_JOB_CPUS_PER_NODE", "%s", tmp);
 
 	/* obsolete */
-	env_array_append(&ptr, "SLURM_JOBID", "%u", alloc->job_id);
-	env_array_append(&ptr, "SLURM_NNODES", "%u", alloc->node_cnt);
-	env_array_append(&ptr, "SLURM_NODELIST", "%s", alloc->node_list);
-	env_array_append(&ptr, "SLURM_TASKS_PER_NODE", "%s", tmp);
+	env_array_overwrite_fmt(dest, "SLURM_JOBID", "%u", alloc->job_id);
+	env_array_overwrite_fmt(dest, "SLURM_NNODES", "%u", alloc->node_cnt);
+	env_array_overwrite_fmt(dest, "SLURM_NODELIST", "%s", alloc->node_list);
+	env_array_overwrite_fmt(dest, "SLURM_TASKS_PER_NODE", "%s", tmp);
 
 	xfree(tmp);
-	return ptr;
 }
 
 /*
- * Create an array of pointers to environment variables strings relevant
- * to a SLURM batch job allocation.  The array is terminated by a NULL pointer,
- * and thus is suitable for use by execle() and other env_array_* functions.
+ * Set in "dest" the environment variables strings relevant to a SLURM batch
+ * job allocation, overwriting any environment variables of the same name.
+ * If the address pointed to by "dest" is NULL, memory will automatically be
+ * xmalloc'ed.  The array is terminated by a NULL pointer, and thus is
+ * suitable for use by execle() and other env_array_* functions.
  *
  * Sets the variables:
  *	SLURM_JOB_ID
@@ -674,10 +677,9 @@ env_array_create_for_job(const resource_allocation_response_msg_t *alloc)
  *	SLURM_TASKS_PER_NODE <- poorly named, really CPUs per node
  *	? probably only needed for users...
  */
-char **
-env_array_create_for_batch_job(const batch_job_launch_msg_t *batch)
+void
+env_array_for_batch_job(char ***dest, const batch_job_launch_msg_t *batch)
 {
-	char **ptr;
 	char *tmp;
 	uint32_t num_nodes = 0;
 	int i;
@@ -688,29 +690,29 @@ env_array_create_for_batch_job(const batch_job_launch_msg_t *batch)
 		num_nodes += batch->cpu_count_reps[i];
 	}
 
-	ptr = env_array_create();
-	env_array_append(&ptr, "SLURM_JOB_ID", "%u", batch->job_id);
-	env_array_append(&ptr, "SLURM_JOB_NUM_NODES", "%u", num_nodes);
-	env_array_append(&ptr, "SLURM_JOB_NODELIST", "%s", batch->nodes);
+	env_array_overwrite_fmt(dest, "SLURM_JOB_ID", "%u", batch->job_id);
+	env_array_overwrite_fmt(dest, "SLURM_JOB_NUM_NODES", "%u", num_nodes);
+	env_array_overwrite_fmt(dest, "SLURM_JOB_NODELIST", "%s", batch->nodes);
 	tmp = _uint32_compressed_to_str((uint32_t)batch->num_cpu_groups,
 					batch->cpus_per_node,
 					batch->cpu_count_reps);
-	env_array_append(&ptr, "SLURM_JOB_CPUS_PER_NODE", "%s", tmp);
+	env_array_overwrite_fmt(dest, "SLURM_JOB_CPUS_PER_NODE", "%s", tmp);
 
 	/* OBSOLETE */
-	env_array_append(&ptr, "SLURM_JOBID", "%u", batch->job_id);
-	env_array_append(&ptr, "SLURM_NNODES", "%u", num_nodes);
-	env_array_append(&ptr, "SLURM_NODELIST", "%s", batch->nodes);
-	env_array_append(&ptr, "SLURM_TASKS_PER_NODE", "%s", tmp);
+	env_array_overwrite_fmt(dest, "SLURM_JOBID", "%u", batch->job_id);
+	env_array_overwrite_fmt(dest, "SLURM_NNODES", "%u", num_nodes);
+	env_array_overwrite_fmt(dest, "SLURM_NODELIST", "%s", batch->nodes);
+	env_array_overwrite_fmt(dest, "SLURM_TASKS_PER_NODE", "%s", tmp);
 
 	xfree(tmp);
-	return ptr;
 }
 
 /*
- * Create an array of pointers to environment variables strings relevant
- * to a SLURM job step.  The array is terminated by a NULL pointer,
- * and thus is suitable for use by execle() and other env_array_* functions.
+ * Set in "dest the environment variables relevant to a SLURM job step,
+ * overwriting any environment variables of the same name.  If the address
+ * pointed to by "dest" is NULL, memory will automatically be xmalloc'ed.
+ * The array is terminated by a NULL pointer, and thus is suitable for
+ * use by execle() and other env_array_* functions.
  *
  * Sets variables:
  *	SLURM_STEP_ID
@@ -732,47 +734,45 @@ env_array_create_for_batch_job(const batch_job_launch_msg_t *batch)
  *	SLURM_LAUNCH_NODE_IPADDR
  *
  */
-char **
-env_array_create_for_step(const job_step_create_response_msg_t *step,
-			  const char *launcher_hostname,
-			  uint16_t launcher_port,
-			  const char *ip_addr_str)
+void
+env_array_for_step(char ***dest, 
+		   const job_step_create_response_msg_t *step,
+		   const char *launcher_hostname,
+		   uint16_t launcher_port,
+		   const char *ip_addr_str)
 {
-	char **ptr;
 	char *tmp;
 
 	tmp = _uint32_array_to_str(step->step_layout->node_cnt,
 				   step->step_layout->tasks);
-	ptr = env_array_create();
-	env_array_append(&ptr, "SLURM_STEP_ID", "%u", step->job_step_id);
-	env_array_append(&ptr, "SLURM_STEP_NUM_NODES",
+	env_array_overwrite_fmt(dest, "SLURM_STEP_ID", "%u", step->job_step_id);
+	env_array_overwrite_fmt(dest, "SLURM_STEP_NUM_NODES",
 			 "%hu", step->step_layout->node_cnt);
-	env_array_append(&ptr, "SLURM_STEP_NUM_TASKS",
+	env_array_overwrite_fmt(dest, "SLURM_STEP_NUM_TASKS",
 			 "%u", step->step_layout->task_cnt);
-	env_array_append(&ptr, "SLURM_STEP_TASKS_PER_NODE", "%s", tmp);
-	env_array_append(&ptr, "SLURM_STEP_LAUNCHER_HOSTNAME",
+	env_array_overwrite_fmt(dest, "SLURM_STEP_TASKS_PER_NODE", "%s", tmp);
+	env_array_overwrite_fmt(dest, "SLURM_STEP_LAUNCHER_HOSTNAME",
 			 "%s", launcher_hostname);
-	env_array_append(&ptr, "SLURM_STEP_LAUNCHER_PORT",
+	env_array_overwrite_fmt(dest, "SLURM_STEP_LAUNCHER_PORT",
 			 "%hu", launcher_port);
-/* 	env_array_append(&ptr, "SLURM_STEP_LAUNCHER_IPADDR", */
+/* 	env_array_overwrite_fmt(dest, "SLURM_STEP_LAUNCHER_IPADDR", */
 /* 			 "%s", ip_addr_str); */
 
 	/* OBSOLETE */
-	env_array_append(&ptr, "SLURM_STEPID", "%u", step->job_step_id);
-	env_array_append(&ptr, "SLURM_NNODES",
+	env_array_overwrite_fmt(dest, "SLURM_STEPID", "%u", step->job_step_id);
+	env_array_overwrite_fmt(dest, "SLURM_NNODES",
 			 "%hu", step->step_layout->node_cnt);
-	env_array_append(&ptr, "SLURM_NPROCS",
+	env_array_overwrite_fmt(dest, "SLURM_NPROCS",
 			 "%u", step->step_layout->task_cnt);
-	env_array_append(&ptr, "SLURM_TASKS_PER_NODE", "%s", tmp);
-	env_array_append(&ptr, "SLURM_SRUN_COMM_HOST",
+	env_array_overwrite_fmt(dest, "SLURM_TASKS_PER_NODE", "%s", tmp);
+	env_array_overwrite_fmt(dest, "SLURM_SRUN_COMM_HOST",
 			 "%s", launcher_hostname);
-	env_array_append(&ptr, "SLURM_SRUN_COMM_PORT",
+	env_array_overwrite_fmt(dest, "SLURM_SRUN_COMM_PORT",
 			 "%hu", launcher_port);
-/* 	env_array_append(&ptr, "SLURM_LAUNCH_NODE_IPADDR", */
+/* 	env_array_overwrite_fmt(dest, "SLURM_LAUNCH_NODE_IPADDR", */
 /* 			 "%s", ip_addr_str); */
 
 	xfree(tmp);
-	return ptr;
 }
 
 /*
diff --git a/src/common/env.h b/src/common/env.h
index 736c72403a9..4beaf6bca86 100644
--- a/src/common/env.h
+++ b/src/common/env.h
@@ -32,6 +32,7 @@
 #include <sys/utsname.h>
 
 #include "src/common/macros.h"
+#include "src/common/slurm_protocol_api.h"
 
 typedef struct env_options {
 	int nprocs;		/* --nprocs=n,      -n n	*/
@@ -79,9 +80,11 @@ int     setup_env(env_t *env);
  * Newer environment variable handling scheme
  **********************************************************************/
 /*
- * Create an array of pointers to environment variables strings relevant
- * to a SLURM job allocation.  The array is terminated by a NULL pointer,
- * and thus is suitable for use by execle() and other env_array_* functions.
+ * Set in "dest" the environment variables relevant to a SLURM job
+ * allocation, overwriting any environment variables of the same name.
+ * If the address pointed to by "dest" is NULL, memory will automatically be
+ * xmalloc'ed.  The array is terminated by a NULL pointer, and thus is
+ * suitable for use by execle() and other env_array_* functions.
  *
  * Sets the variables:
  *	SLURM_JOB_ID
@@ -92,13 +95,37 @@ int     setup_env(env_t *env);
  * Sets OBSOLETE variables:
  *	? probably only needed for users...
  */
-char **
-env_array_create_for_job(const resource_allocation_response_msg_t *alloc);
+void env_array_for_job(char ***dest,
+		       const resource_allocation_response_msg_t *alloc);
 
 /*
- * Create an array of pointers to environment variables strings relevant
- * to a SLURM job step.  The array is terminated by a NULL pointer,
- * and thus is suitable for use by execle() and other env_array_* functions.
+ * Set in "dest" the environment variables relevant to a SLURM batch
+ * job allocation, overwriting any environment variables of the same name.
+ * If the address pointed to by "dest" is NULL, memory will automatically be
+ * xmalloc'ed.  The array is terminated by a NULL pointer, and thus is
+ * suitable for use by execle() and other env_array_* functions.
+ *
+ * Sets the variables:
+ *	SLURM_JOB_ID
+ *	SLURM_JOB_NUM_NODES
+ *	SLURM_JOB_NODELIST
+ *	SLURM_JOB_CPUS_PER_NODE
+ *
+ * Sets OBSOLETE variables:
+ *	SLURM_JOBID
+ *	SLURM_NNODES
+ *	SLURM_NODELIST
+ *	SLURM_TASKS_PER_NODE <- poorly named, really CPUs per node
+ *	? probably only needed for users...
+ */
+void env_array_for_batch_job(char ***dest, const batch_job_launch_msg_t *batch);
+
+/*
+ * Set in "dest the environment variables relevant to a SLURM job step,
+ * overwriting any environment variables of the same name.  If the address
+ * pointed to by "dest" is NULL, memory will automatically be xmalloc'ed.
+ * The array is terminated by a NULL pointer, and thus is suitable for
+ * use by execle() and other env_array_* functions.
  *
  * Sets variables:
  *	SLURM_STEP_ID
@@ -120,11 +147,12 @@ env_array_create_for_job(const resource_allocation_response_msg_t *alloc);
  *	SLURM_LAUNCH_NODE_IPADDR
  *
  */
-char **
-env_array_create_for_step(const job_step_create_response_msg_t *step,
-			  const char *launcher_hostname,
-			  uint16_t launcher_port,
-			  const char *ip_addr_str);
+void
+env_array_for_step(char ***dest,
+		   const job_step_create_response_msg_t *step,
+		   const char *launcher_hostname,
+		   uint16_t launcher_port,
+		   const char *ip_addr_str);
 
 /*
  * Return an empty environment variable array (contains a single
diff --git a/src/salloc/salloc.c b/src/salloc/salloc.c
index d985d616e7b..b116e21cd6b 100644
--- a/src/salloc/salloc.c
+++ b/src/salloc/salloc.c
@@ -139,7 +139,7 @@ int main(int argc, char *argv[])
 	/*
 	 * Run the user's command.
 	 */
-	env = env_array_create_for_job(alloc);
+	env_array_for_job(&env, alloc);
 	env_array_set_environment(env);
 	env_array_free(env);
 	pthread_mutex_lock(&allocation_state_lock);
diff --git a/src/slurmd/slurmstepd/mgr.c b/src/slurmd/slurmstepd/mgr.c
index f49fe73eb4d..5e9fb4d3088 100644
--- a/src/slurmd/slurmstepd/mgr.c
+++ b/src/slurmd/slurmstepd/mgr.c
@@ -268,6 +268,10 @@ mgr_launch_batch_job_setup(batch_job_launch_msg_t *msg, slurm_addr *cli)
 		return NULL;
 	}
 	
+	/* this is the new way of setting environment variables */
+	env_array_for_batch_job(&job->env, msg);
+
+	/* this is the old way of setting environment variables */
 	job->envtp->nprocs = msg->nprocs;
 	job->envtp->overcommit = msg->overcommit;
 	job->envtp->select_jobinfo = msg->select_jobinfo;
diff --git a/testsuite/expect/test18.11 b/testsuite/expect/test18.11
index e9f2741af84..a43fac4016e 100755
--- a/testsuite/expect/test18.11
+++ b/testsuite/expect/test18.11
@@ -49,10 +49,7 @@ print_header $test_id
 # Spawn a program that includes "task_id" (%t) in stdout file names
 # and confirm they are created
 #
-for {set task_id 0} {$task_id < $task_cnt} {incr task_id} {
-	set file_out_t_glob  "test$test_id.t.$task_id.output"
-	exec $bin_rm -f $file_out_t_glob
-}
+file delete [glob -nocomplain "test$test_id.s.*.output"]
 set timeout $max_job_delay
 spawn $salloc -N1 -t1 $slaunch --remote-output=$file_out_t -n$task_cnt --overcommit $bin_id
 expect {
@@ -85,7 +82,7 @@ for {set task_id 0} {$task_id < $task_cnt} {incr task_id} {
 	if {[wait_for_file $file_out_t_glob] != 0} {
 		set exit_code 1
 	} else {
-		exec $bin_rm -f $file_out_t_glob
+		file delete $file_out_t_glob
 		incr file_cnt
 	}
 }
@@ -106,7 +103,7 @@ expect {
 		set job_id $expect_out(1,string)
 		exp_continue
 	}
-	-re "exit code $number" {
+	-re "exited with return code $number" {
 		send_user "This error is expected, no worries\n"
 		set matches 1
 		exp_continue;
@@ -135,7 +132,7 @@ if {$matches == 0} {
 
 set file_err_j_glob  "test$test_id.j.$job_id.error"
 if {[wait_for_file $file_err_j_glob] == 0} {
-	exec $bin_rm -f $file_err_j_glob
+	file delete $file_err_j_glob
 } else {
 	send_user "\nFAILURE: file format of %j in stderr failed\n"
 	set exit_code 1
@@ -175,7 +172,7 @@ if {[wait_for_file $file_out_J_glob] != 0} {
 	send_user "\nFAILURE: file format of %J in stdout failed\n"
 	set exit_code 1
 } else {
-	exec $bin_rm -f $file_out_J_glob
+	file delete $file_out_J_glob
 }
 
 #
@@ -184,7 +181,7 @@ if {[wait_for_file $file_out_J_glob] != 0} {
 #
 set node_id 0
 set file_out_n_glob  "test$test_id.n.$node_id.output"
-exec $bin_rm -f $file_out_n_glob
+file delete $file_out_n_glob
 
 set job_id   0
 spawn $salloc -N1 -t1 $slaunch --remote-output=$file_out_n -n2 --overcommit $bin_hostname
@@ -221,7 +218,7 @@ for {set node_id 0} {$node_id < 2} {incr node_id} {
 		exec $bin_sleep 1
 	}
 	if [file exists $file_out_n_glob] {
-		exec $bin_rm -f $file_out_n_glob
+		file delete $file_out_n_glob
 		incr file_cnt
 	}
 }
@@ -240,10 +237,7 @@ make_bash_script $file_in "
   $slaunch -n4 --overcommit --remote-output=$file_out_s $bin_hostname
 "
 
-for {set step_id 0} {$step_id < 4} {incr step_id} {
-	set file_out_s_glob  "test$test_id.s.$step_id.output"
-	exec $bin_rm -f $file_out_s_glob
-}
+file delete [glob -nocomplain "test$test_id.s.\[0-4].output"]
 
 if { [test_bluegene] } {
 	set node_cnt 32-2048
@@ -258,15 +252,15 @@ if { [test_bluegene] } {
 }
 
 set job_id   0
-spawn $sbatch --output=/dev/null -N$node_cnt -t1 $file_in
+set sbatch_pid [spawn $sbatch --output=/dev/null -N$node_cnt -t1 $file_in]
 expect {
 	-re "Submitted batch job ($number)" {
 		set job_id $expect_out(1,string)
 		exp_continue
 	}
 	timeout {
-		send_user "\nFAILURE: srun not responding\n"
-		kill_srun
+		send_user "\nFAILURE: sbatch not responding\n"
+		slow_kill $sbatch_pid
 		exit 1
 	}
 	eof {
@@ -294,7 +288,7 @@ for {set step_id 0} {$step_id < 3} {incr step_id} {
 		exec $bin_sleep 1
 	}
 	if [file exists $file_out_s_glob] {
-		exec $bin_rm -f $file_out_s_glob
+		file delete $file_out_s_glob
 		incr file_cnt
 	}
 }
@@ -308,7 +302,7 @@ if {$file_cnt != 2} {
 # Post-processing
 #
 if {$exit_code == 0} {
-	exec $bin_rm -f $file_in
+	file delete $file_in
 	send_user "\nSUCCESS\n"
 }
 exit $exit_code
-- 
GitLab