From b52786b3d38cb8f2cf7dcacee87b087619bc15ff Mon Sep 17 00:00:00 2001 From: "Christopher J. Morrone" <morrone2@llnl.gov> Date: Tue, 15 Aug 2006 02:04:18 +0000 Subject: [PATCH] Set new environment variables for batch scripts, so slaunch will work with sbatch. --- src/api/step_launch.c | 13 ++-- src/common/env.c | 114 ++++++++++++++++++------------------ src/common/env.h | 54 +++++++++++++---- src/salloc/salloc.c | 2 +- src/slurmd/slurmstepd/mgr.c | 4 ++ testsuite/expect/test18.11 | 32 ++++------ 6 files changed, 121 insertions(+), 98 deletions(-) diff --git a/src/api/step_launch.c b/src/api/step_launch.c index 293d8c87f53..ee4929ccea4 100644 --- a/src/api/step_launch.c +++ b/src/api/step_launch.c @@ -173,18 +173,15 @@ int slurm_step_launch (slurm_step_ctx ctx, env_array_merge(&env, (const char **)params->env); { /* FIXME - hostname and IP need to be user settable */ - char **step_env = NULL; char *launcher_hostname = xshort_hostname(); struct hostent *ent = gethostbyname(launcher_hostname); - step_env = env_array_create_for_step( - ctx->step_resp, - launcher_hostname, - ctx->launch_state->resp_port[0], - ent->h_addr_list[0]); + env_array_for_step(&env, + ctx->step_resp, + launcher_hostname, + ctx->launch_state->resp_port[0], + ent->h_addr_list[0]); xfree(launcher_hostname); - env_array_merge(&env, (const char **) step_env); - env_array_free(step_env); } launch.envc = envcount(env); launch.env = env; diff --git a/src/common/env.c b/src/common/env.c index cbdf7871067..1c5cc45063f 100644 --- a/src/common/env.c +++ b/src/common/env.c @@ -613,9 +613,11 @@ static char *_uint32_compressed_to_str(uint32_t array_len, } /* - * Create an array of pointers to environment variables strings relevant - * to a SLURM job allocation. The array is terminated by a NULL pointer, - * and thus is suitable for use by execle() and other env_array_* functions. + * Set in "dest" the environment variables relevant to a SLURM job + * allocation, overwriting any environment variables of the same name. + * If the address pointed to by "dest" is NULL, memory will automatically be + * xmalloc'ed. The array is terminated by a NULL pointer, and thus is + * suitable for use by execle() and other env_array_* functions. * * Sets the variables: * SLURM_JOB_ID @@ -630,36 +632,37 @@ static char *_uint32_compressed_to_str(uint32_t array_len, * SLURM_TASKS_PER_NODE <- poorly named, really CPUs per node * ? probably only needed for users... */ -char ** -env_array_create_for_job(const resource_allocation_response_msg_t *alloc) +void +env_array_for_job(char ***dest, const resource_allocation_response_msg_t *alloc) { - char **ptr; char *tmp; - ptr = env_array_create(); - env_array_append(&ptr, "SLURM_JOB_ID", "%u", alloc->job_id); - env_array_append(&ptr, "SLURM_JOB_NUM_NODES", "%u", alloc->node_cnt); - env_array_append(&ptr, "SLURM_JOB_NODELIST", "%s", alloc->node_list); + env_array_overwrite_fmt(dest, "SLURM_JOB_ID", "%u", alloc->job_id); + env_array_overwrite_fmt(dest, "SLURM_JOB_NUM_NODES", "%u", + alloc->node_cnt); + env_array_overwrite_fmt(dest, "SLURM_JOB_NODELIST", "%s", + alloc->node_list); tmp = _uint32_compressed_to_str((uint32_t)alloc->num_cpu_groups, alloc->cpus_per_node, alloc->cpu_count_reps); - env_array_append(&ptr, "SLURM_JOB_CPUS_PER_NODE", "%s", tmp); + env_array_overwrite_fmt(dest, "SLURM_JOB_CPUS_PER_NODE", "%s", tmp); /* obsolete */ - env_array_append(&ptr, "SLURM_JOBID", "%u", alloc->job_id); - env_array_append(&ptr, "SLURM_NNODES", "%u", alloc->node_cnt); - env_array_append(&ptr, "SLURM_NODELIST", "%s", alloc->node_list); - env_array_append(&ptr, "SLURM_TASKS_PER_NODE", "%s", tmp); + env_array_overwrite_fmt(dest, "SLURM_JOBID", "%u", alloc->job_id); + env_array_overwrite_fmt(dest, "SLURM_NNODES", "%u", alloc->node_cnt); + env_array_overwrite_fmt(dest, "SLURM_NODELIST", "%s", alloc->node_list); + env_array_overwrite_fmt(dest, "SLURM_TASKS_PER_NODE", "%s", tmp); xfree(tmp); - return ptr; } /* - * Create an array of pointers to environment variables strings relevant - * to a SLURM batch job allocation. The array is terminated by a NULL pointer, - * and thus is suitable for use by execle() and other env_array_* functions. + * Set in "dest" the environment variables strings relevant to a SLURM batch + * job allocation, overwriting any environment variables of the same name. + * If the address pointed to by "dest" is NULL, memory will automatically be + * xmalloc'ed. The array is terminated by a NULL pointer, and thus is + * suitable for use by execle() and other env_array_* functions. * * Sets the variables: * SLURM_JOB_ID @@ -674,10 +677,9 @@ env_array_create_for_job(const resource_allocation_response_msg_t *alloc) * SLURM_TASKS_PER_NODE <- poorly named, really CPUs per node * ? probably only needed for users... */ -char ** -env_array_create_for_batch_job(const batch_job_launch_msg_t *batch) +void +env_array_for_batch_job(char ***dest, const batch_job_launch_msg_t *batch) { - char **ptr; char *tmp; uint32_t num_nodes = 0; int i; @@ -688,29 +690,29 @@ env_array_create_for_batch_job(const batch_job_launch_msg_t *batch) num_nodes += batch->cpu_count_reps[i]; } - ptr = env_array_create(); - env_array_append(&ptr, "SLURM_JOB_ID", "%u", batch->job_id); - env_array_append(&ptr, "SLURM_JOB_NUM_NODES", "%u", num_nodes); - env_array_append(&ptr, "SLURM_JOB_NODELIST", "%s", batch->nodes); + env_array_overwrite_fmt(dest, "SLURM_JOB_ID", "%u", batch->job_id); + env_array_overwrite_fmt(dest, "SLURM_JOB_NUM_NODES", "%u", num_nodes); + env_array_overwrite_fmt(dest, "SLURM_JOB_NODELIST", "%s", batch->nodes); tmp = _uint32_compressed_to_str((uint32_t)batch->num_cpu_groups, batch->cpus_per_node, batch->cpu_count_reps); - env_array_append(&ptr, "SLURM_JOB_CPUS_PER_NODE", "%s", tmp); + env_array_overwrite_fmt(dest, "SLURM_JOB_CPUS_PER_NODE", "%s", tmp); /* OBSOLETE */ - env_array_append(&ptr, "SLURM_JOBID", "%u", batch->job_id); - env_array_append(&ptr, "SLURM_NNODES", "%u", num_nodes); - env_array_append(&ptr, "SLURM_NODELIST", "%s", batch->nodes); - env_array_append(&ptr, "SLURM_TASKS_PER_NODE", "%s", tmp); + env_array_overwrite_fmt(dest, "SLURM_JOBID", "%u", batch->job_id); + env_array_overwrite_fmt(dest, "SLURM_NNODES", "%u", num_nodes); + env_array_overwrite_fmt(dest, "SLURM_NODELIST", "%s", batch->nodes); + env_array_overwrite_fmt(dest, "SLURM_TASKS_PER_NODE", "%s", tmp); xfree(tmp); - return ptr; } /* - * Create an array of pointers to environment variables strings relevant - * to a SLURM job step. The array is terminated by a NULL pointer, - * and thus is suitable for use by execle() and other env_array_* functions. + * Set in "dest the environment variables relevant to a SLURM job step, + * overwriting any environment variables of the same name. If the address + * pointed to by "dest" is NULL, memory will automatically be xmalloc'ed. + * The array is terminated by a NULL pointer, and thus is suitable for + * use by execle() and other env_array_* functions. * * Sets variables: * SLURM_STEP_ID @@ -732,47 +734,45 @@ env_array_create_for_batch_job(const batch_job_launch_msg_t *batch) * SLURM_LAUNCH_NODE_IPADDR * */ -char ** -env_array_create_for_step(const job_step_create_response_msg_t *step, - const char *launcher_hostname, - uint16_t launcher_port, - const char *ip_addr_str) +void +env_array_for_step(char ***dest, + const job_step_create_response_msg_t *step, + const char *launcher_hostname, + uint16_t launcher_port, + const char *ip_addr_str) { - char **ptr; char *tmp; tmp = _uint32_array_to_str(step->step_layout->node_cnt, step->step_layout->tasks); - ptr = env_array_create(); - env_array_append(&ptr, "SLURM_STEP_ID", "%u", step->job_step_id); - env_array_append(&ptr, "SLURM_STEP_NUM_NODES", + env_array_overwrite_fmt(dest, "SLURM_STEP_ID", "%u", step->job_step_id); + env_array_overwrite_fmt(dest, "SLURM_STEP_NUM_NODES", "%hu", step->step_layout->node_cnt); - env_array_append(&ptr, "SLURM_STEP_NUM_TASKS", + env_array_overwrite_fmt(dest, "SLURM_STEP_NUM_TASKS", "%u", step->step_layout->task_cnt); - env_array_append(&ptr, "SLURM_STEP_TASKS_PER_NODE", "%s", tmp); - env_array_append(&ptr, "SLURM_STEP_LAUNCHER_HOSTNAME", + env_array_overwrite_fmt(dest, "SLURM_STEP_TASKS_PER_NODE", "%s", tmp); + env_array_overwrite_fmt(dest, "SLURM_STEP_LAUNCHER_HOSTNAME", "%s", launcher_hostname); - env_array_append(&ptr, "SLURM_STEP_LAUNCHER_PORT", + env_array_overwrite_fmt(dest, "SLURM_STEP_LAUNCHER_PORT", "%hu", launcher_port); -/* env_array_append(&ptr, "SLURM_STEP_LAUNCHER_IPADDR", */ +/* env_array_overwrite_fmt(dest, "SLURM_STEP_LAUNCHER_IPADDR", */ /* "%s", ip_addr_str); */ /* OBSOLETE */ - env_array_append(&ptr, "SLURM_STEPID", "%u", step->job_step_id); - env_array_append(&ptr, "SLURM_NNODES", + env_array_overwrite_fmt(dest, "SLURM_STEPID", "%u", step->job_step_id); + env_array_overwrite_fmt(dest, "SLURM_NNODES", "%hu", step->step_layout->node_cnt); - env_array_append(&ptr, "SLURM_NPROCS", + env_array_overwrite_fmt(dest, "SLURM_NPROCS", "%u", step->step_layout->task_cnt); - env_array_append(&ptr, "SLURM_TASKS_PER_NODE", "%s", tmp); - env_array_append(&ptr, "SLURM_SRUN_COMM_HOST", + env_array_overwrite_fmt(dest, "SLURM_TASKS_PER_NODE", "%s", tmp); + env_array_overwrite_fmt(dest, "SLURM_SRUN_COMM_HOST", "%s", launcher_hostname); - env_array_append(&ptr, "SLURM_SRUN_COMM_PORT", + env_array_overwrite_fmt(dest, "SLURM_SRUN_COMM_PORT", "%hu", launcher_port); -/* env_array_append(&ptr, "SLURM_LAUNCH_NODE_IPADDR", */ +/* env_array_overwrite_fmt(dest, "SLURM_LAUNCH_NODE_IPADDR", */ /* "%s", ip_addr_str); */ xfree(tmp); - return ptr; } /* diff --git a/src/common/env.h b/src/common/env.h index 736c72403a9..4beaf6bca86 100644 --- a/src/common/env.h +++ b/src/common/env.h @@ -32,6 +32,7 @@ #include <sys/utsname.h> #include "src/common/macros.h" +#include "src/common/slurm_protocol_api.h" typedef struct env_options { int nprocs; /* --nprocs=n, -n n */ @@ -79,9 +80,11 @@ int setup_env(env_t *env); * Newer environment variable handling scheme **********************************************************************/ /* - * Create an array of pointers to environment variables strings relevant - * to a SLURM job allocation. The array is terminated by a NULL pointer, - * and thus is suitable for use by execle() and other env_array_* functions. + * Set in "dest" the environment variables relevant to a SLURM job + * allocation, overwriting any environment variables of the same name. + * If the address pointed to by "dest" is NULL, memory will automatically be + * xmalloc'ed. The array is terminated by a NULL pointer, and thus is + * suitable for use by execle() and other env_array_* functions. * * Sets the variables: * SLURM_JOB_ID @@ -92,13 +95,37 @@ int setup_env(env_t *env); * Sets OBSOLETE variables: * ? probably only needed for users... */ -char ** -env_array_create_for_job(const resource_allocation_response_msg_t *alloc); +void env_array_for_job(char ***dest, + const resource_allocation_response_msg_t *alloc); /* - * Create an array of pointers to environment variables strings relevant - * to a SLURM job step. The array is terminated by a NULL pointer, - * and thus is suitable for use by execle() and other env_array_* functions. + * Set in "dest" the environment variables relevant to a SLURM batch + * job allocation, overwriting any environment variables of the same name. + * If the address pointed to by "dest" is NULL, memory will automatically be + * xmalloc'ed. The array is terminated by a NULL pointer, and thus is + * suitable for use by execle() and other env_array_* functions. + * + * Sets the variables: + * SLURM_JOB_ID + * SLURM_JOB_NUM_NODES + * SLURM_JOB_NODELIST + * SLURM_JOB_CPUS_PER_NODE + * + * Sets OBSOLETE variables: + * SLURM_JOBID + * SLURM_NNODES + * SLURM_NODELIST + * SLURM_TASKS_PER_NODE <- poorly named, really CPUs per node + * ? probably only needed for users... + */ +void env_array_for_batch_job(char ***dest, const batch_job_launch_msg_t *batch); + +/* + * Set in "dest the environment variables relevant to a SLURM job step, + * overwriting any environment variables of the same name. If the address + * pointed to by "dest" is NULL, memory will automatically be xmalloc'ed. + * The array is terminated by a NULL pointer, and thus is suitable for + * use by execle() and other env_array_* functions. * * Sets variables: * SLURM_STEP_ID @@ -120,11 +147,12 @@ env_array_create_for_job(const resource_allocation_response_msg_t *alloc); * SLURM_LAUNCH_NODE_IPADDR * */ -char ** -env_array_create_for_step(const job_step_create_response_msg_t *step, - const char *launcher_hostname, - uint16_t launcher_port, - const char *ip_addr_str); +void +env_array_for_step(char ***dest, + const job_step_create_response_msg_t *step, + const char *launcher_hostname, + uint16_t launcher_port, + const char *ip_addr_str); /* * Return an empty environment variable array (contains a single diff --git a/src/salloc/salloc.c b/src/salloc/salloc.c index d985d616e7b..b116e21cd6b 100644 --- a/src/salloc/salloc.c +++ b/src/salloc/salloc.c @@ -139,7 +139,7 @@ int main(int argc, char *argv[]) /* * Run the user's command. */ - env = env_array_create_for_job(alloc); + env_array_for_job(&env, alloc); env_array_set_environment(env); env_array_free(env); pthread_mutex_lock(&allocation_state_lock); diff --git a/src/slurmd/slurmstepd/mgr.c b/src/slurmd/slurmstepd/mgr.c index f49fe73eb4d..5e9fb4d3088 100644 --- a/src/slurmd/slurmstepd/mgr.c +++ b/src/slurmd/slurmstepd/mgr.c @@ -268,6 +268,10 @@ mgr_launch_batch_job_setup(batch_job_launch_msg_t *msg, slurm_addr *cli) return NULL; } + /* this is the new way of setting environment variables */ + env_array_for_batch_job(&job->env, msg); + + /* this is the old way of setting environment variables */ job->envtp->nprocs = msg->nprocs; job->envtp->overcommit = msg->overcommit; job->envtp->select_jobinfo = msg->select_jobinfo; diff --git a/testsuite/expect/test18.11 b/testsuite/expect/test18.11 index e9f2741af84..a43fac4016e 100755 --- a/testsuite/expect/test18.11 +++ b/testsuite/expect/test18.11 @@ -49,10 +49,7 @@ print_header $test_id # Spawn a program that includes "task_id" (%t) in stdout file names # and confirm they are created # -for {set task_id 0} {$task_id < $task_cnt} {incr task_id} { - set file_out_t_glob "test$test_id.t.$task_id.output" - exec $bin_rm -f $file_out_t_glob -} +file delete [glob -nocomplain "test$test_id.s.*.output"] set timeout $max_job_delay spawn $salloc -N1 -t1 $slaunch --remote-output=$file_out_t -n$task_cnt --overcommit $bin_id expect { @@ -85,7 +82,7 @@ for {set task_id 0} {$task_id < $task_cnt} {incr task_id} { if {[wait_for_file $file_out_t_glob] != 0} { set exit_code 1 } else { - exec $bin_rm -f $file_out_t_glob + file delete $file_out_t_glob incr file_cnt } } @@ -106,7 +103,7 @@ expect { set job_id $expect_out(1,string) exp_continue } - -re "exit code $number" { + -re "exited with return code $number" { send_user "This error is expected, no worries\n" set matches 1 exp_continue; @@ -135,7 +132,7 @@ if {$matches == 0} { set file_err_j_glob "test$test_id.j.$job_id.error" if {[wait_for_file $file_err_j_glob] == 0} { - exec $bin_rm -f $file_err_j_glob + file delete $file_err_j_glob } else { send_user "\nFAILURE: file format of %j in stderr failed\n" set exit_code 1 @@ -175,7 +172,7 @@ if {[wait_for_file $file_out_J_glob] != 0} { send_user "\nFAILURE: file format of %J in stdout failed\n" set exit_code 1 } else { - exec $bin_rm -f $file_out_J_glob + file delete $file_out_J_glob } # @@ -184,7 +181,7 @@ if {[wait_for_file $file_out_J_glob] != 0} { # set node_id 0 set file_out_n_glob "test$test_id.n.$node_id.output" -exec $bin_rm -f $file_out_n_glob +file delete $file_out_n_glob set job_id 0 spawn $salloc -N1 -t1 $slaunch --remote-output=$file_out_n -n2 --overcommit $bin_hostname @@ -221,7 +218,7 @@ for {set node_id 0} {$node_id < 2} {incr node_id} { exec $bin_sleep 1 } if [file exists $file_out_n_glob] { - exec $bin_rm -f $file_out_n_glob + file delete $file_out_n_glob incr file_cnt } } @@ -240,10 +237,7 @@ make_bash_script $file_in " $slaunch -n4 --overcommit --remote-output=$file_out_s $bin_hostname " -for {set step_id 0} {$step_id < 4} {incr step_id} { - set file_out_s_glob "test$test_id.s.$step_id.output" - exec $bin_rm -f $file_out_s_glob -} +file delete [glob -nocomplain "test$test_id.s.\[0-4].output"] if { [test_bluegene] } { set node_cnt 32-2048 @@ -258,15 +252,15 @@ if { [test_bluegene] } { } set job_id 0 -spawn $sbatch --output=/dev/null -N$node_cnt -t1 $file_in +set sbatch_pid [spawn $sbatch --output=/dev/null -N$node_cnt -t1 $file_in] expect { -re "Submitted batch job ($number)" { set job_id $expect_out(1,string) exp_continue } timeout { - send_user "\nFAILURE: srun not responding\n" - kill_srun + send_user "\nFAILURE: sbatch not responding\n" + slow_kill $sbatch_pid exit 1 } eof { @@ -294,7 +288,7 @@ for {set step_id 0} {$step_id < 3} {incr step_id} { exec $bin_sleep 1 } if [file exists $file_out_s_glob] { - exec $bin_rm -f $file_out_s_glob + file delete $file_out_s_glob incr file_cnt } } @@ -308,7 +302,7 @@ if {$file_cnt != 2} { # Post-processing # if {$exit_code == 0} { - exec $bin_rm -f $file_in + file delete $file_in send_user "\nSUCCESS\n" } exit $exit_code -- GitLab