From 246fd377c5f6104a6cac43c36feb574dc36493ca Mon Sep 17 00:00:00 2001 From: Moe Jette <jette1@llnl.gov> Date: Wed, 12 Nov 2008 21:27:36 +0000 Subject: [PATCH] pass the cpus_per_task to slurmd and set SLURM_CPUS_PER_TASK env var there rather than in sbatch. slurmd needs the value for computing node layout. --- src/common/env.c | 29 ++++++++++++++++------------- src/common/slurm_protocol_defs.h | 1 + src/common/slurm_protocol_pack.c | 2 ++ src/sbatch/opt.c | 2 -- src/slurmctld/job_scheduler.c | 1 + src/slurmctld/proc_req.c | 1 + 6 files changed, 21 insertions(+), 15 deletions(-) diff --git a/src/common/env.c b/src/common/env.c index 2674a8e4d9f..994c4419e9a 100644 --- a/src/common/env.c +++ b/src/common/env.c @@ -877,21 +877,16 @@ extern void env_array_for_batch_job(char ***dest, const batch_job_launch_msg_t *batch, const char *node_name) { - char *tmp = getenvp(batch->environment, "SLURM_CPUS_PER_TASK"); + char *tmp; uint32_t num_nodes = 0; uint32_t num_cpus = 0; int i; slurm_step_layout_t *step_layout = NULL; - int cpus_per_task = 1; uint32_t num_tasks = batch->nprocs; + uint16_t cpus_per_task; - if(tmp) - cpus_per_task = atoi(tmp); - /* There is no explicit node count in the batch structure, - * so we need to calculate the node count. We also need to - * figure out the explicit cpu count so we can figure out the - * cpus_per_task. */ + * so we need to calculate the node count. */ for (i = 0; i < batch->num_cpu_groups; i++) { num_nodes += batch->cpu_count_reps[i]; num_cpus += batch->cpu_count_reps[i] * batch->cpus_per_node[i]; @@ -921,17 +916,25 @@ env_array_for_batch_job(char ***dest, const batch_job_launch_msg_t *batch, if(num_tasks) env_array_overwrite_fmt(dest, "SLURM_NPROCS", "%u", num_tasks); - else - num_tasks = num_cpus / cpus_per_task; + + if((batch->cpus_per_task != 0) && + (batch->cpus_per_task != (uint16_t) NO_VAL)) + cpus_per_task = batch->cpus_per_task; + else + cpus_per_task = 1; /* default value */ + if (cpus_per_task > 1) { + env_array_overwrite_fmt(dest, "SLURM_CPUS_PER_TASK", "%u", + cpus_per_task); + } + num_tasks = num_cpus / cpus_per_task; step_layout = slurm_step_layout_create(batch->nodes, batch->cpus_per_node, batch->cpu_count_reps, num_nodes, num_tasks, - (uint16_t)cpus_per_task, - (uint16_t) - SLURM_DIST_BLOCK, + cpus_per_task, + (uint16_t)SLURM_DIST_BLOCK, (uint16_t)NO_VAL); tmp = _uint16_array_to_str(step_layout->node_cnt, step_layout->tasks); diff --git a/src/common/slurm_protocol_defs.h b/src/common/slurm_protocol_defs.h index 21e33556360..fe3d6e8977d 100644 --- a/src/common/slurm_protocol_defs.h +++ b/src/common/slurm_protocol_defs.h @@ -605,6 +605,7 @@ typedef struct batch_job_launch_msg { uint32_t num_cpu_groups;/* elements in below cpu arrays */ uint16_t *cpus_per_node;/* cpus per node */ uint32_t *cpu_count_reps;/* how many nodes have same cpu count */ + uint16_t cpus_per_task; /* number of CPUs requested per task */ char *nodes; /* list of nodes allocated to job_step */ char *script; /* the actual job script, default NONE */ char *err; /* pathname of stderr */ diff --git a/src/common/slurm_protocol_pack.c b/src/common/slurm_protocol_pack.c index 26e60854d59..b9a76daabf7 100644 --- a/src/common/slurm_protocol_pack.c +++ b/src/common/slurm_protocol_pack.c @@ -4078,6 +4078,7 @@ _pack_batch_job_launch_msg(batch_job_launch_msg_t * msg, Buf buffer) pack8(msg->overcommit, buffer); pack16(msg->acctg_freq, buffer); + pack16(msg->cpus_per_task, buffer); pack32(msg->num_cpu_groups, buffer); if (msg->num_cpu_groups) { @@ -4126,6 +4127,7 @@ _unpack_batch_job_launch_msg(batch_job_launch_msg_t ** msg, Buf buffer) safe_unpack8(&launch_msg_ptr->overcommit, buffer); safe_unpack16(&launch_msg_ptr->acctg_freq, buffer); + safe_unpack16(&launch_msg_ptr->cpus_per_task, buffer); safe_unpack32(&launch_msg_ptr->num_cpu_groups, buffer); if (launch_msg_ptr->num_cpu_groups) { diff --git a/src/sbatch/opt.c b/src/sbatch/opt.c index 6ca85bcfd56..33398f00c60 100644 --- a/src/sbatch/opt.c +++ b/src/sbatch/opt.c @@ -1923,8 +1923,6 @@ static bool _opt_verify(void) else setenvf(NULL, "SLURM_OPEN_MODE", "t"); } - if (opt.cpus_per_task > 1) - setenvfs("SLURM_CPUS_PER_TASK=%d", opt.cpus_per_task); if (opt.dependency) setenvfs("SLURM_JOB_DEPENDENCY=%s", opt.dependency); diff --git a/src/slurmctld/job_scheduler.c b/src/slurmctld/job_scheduler.c index 8e33269e841..ba200211ad7 100644 --- a/src/slurmctld/job_scheduler.c +++ b/src/slurmctld/job_scheduler.c @@ -526,6 +526,7 @@ extern void launch_job(struct job_record *job_ptr) launch_msg_ptr->overcommit = job_ptr->details->overcommit; launch_msg_ptr->open_mode = job_ptr->details->open_mode; launch_msg_ptr->acctg_freq = job_ptr->details->acctg_freq; + launch_msg_ptr->cpus_per_task = job_ptr->details->cpus_per_task; if (make_batch_job_cred(launch_msg_ptr, job_ptr)) { error("aborting batch job %u", job_ptr->job_id); diff --git a/src/slurmctld/proc_req.c b/src/slurmctld/proc_req.c index ea1af5c1ae8..b03d1463e1e 100644 --- a/src/slurmctld/proc_req.c +++ b/src/slurmctld/proc_req.c @@ -2802,6 +2802,7 @@ int _launch_batch_step(job_desc_msg_t *job_desc_msg, uid_t uid, job_desc_msg->environment); launch_msg_ptr->envc = job_desc_msg->env_size; launch_msg_ptr->job_mem = job_desc_msg->job_min_memory; + launch_msg_ptr->cpus_per_task = job_desc_msg->cpus_per_task; /* _max_nprocs() represents the total number of CPUs available * for this step (overcommit not supported yet). If job_desc_msg -- GitLab