diff --git a/src/common/env.c b/src/common/env.c index 2674a8e4d9fd2e3738896bb6310a68e42bdaa299..994c4419e9a7ec41e3b4cbe6c545df09ed94814b 100644 --- a/src/common/env.c +++ b/src/common/env.c @@ -877,21 +877,16 @@ extern void env_array_for_batch_job(char ***dest, const batch_job_launch_msg_t *batch, const char *node_name) { - char *tmp = getenvp(batch->environment, "SLURM_CPUS_PER_TASK"); + char *tmp; uint32_t num_nodes = 0; uint32_t num_cpus = 0; int i; slurm_step_layout_t *step_layout = NULL; - int cpus_per_task = 1; uint32_t num_tasks = batch->nprocs; + uint16_t cpus_per_task; - if(tmp) - cpus_per_task = atoi(tmp); - /* There is no explicit node count in the batch structure, - * so we need to calculate the node count. We also need to - * figure out the explicit cpu count so we can figure out the - * cpus_per_task. */ + * so we need to calculate the node count. */ for (i = 0; i < batch->num_cpu_groups; i++) { num_nodes += batch->cpu_count_reps[i]; num_cpus += batch->cpu_count_reps[i] * batch->cpus_per_node[i]; @@ -921,17 +916,25 @@ env_array_for_batch_job(char ***dest, const batch_job_launch_msg_t *batch, if(num_tasks) env_array_overwrite_fmt(dest, "SLURM_NPROCS", "%u", num_tasks); - else - num_tasks = num_cpus / cpus_per_task; + + if((batch->cpus_per_task != 0) && + (batch->cpus_per_task != (uint16_t) NO_VAL)) + cpus_per_task = batch->cpus_per_task; + else + cpus_per_task = 1; /* default value */ + if (cpus_per_task > 1) { + env_array_overwrite_fmt(dest, "SLURM_CPUS_PER_TASK", "%u", + cpus_per_task); + } + num_tasks = num_cpus / cpus_per_task; step_layout = slurm_step_layout_create(batch->nodes, batch->cpus_per_node, batch->cpu_count_reps, num_nodes, num_tasks, - (uint16_t)cpus_per_task, - (uint16_t) - SLURM_DIST_BLOCK, + cpus_per_task, + (uint16_t)SLURM_DIST_BLOCK, (uint16_t)NO_VAL); tmp = _uint16_array_to_str(step_layout->node_cnt, step_layout->tasks); diff --git a/src/common/slurm_protocol_defs.h b/src/common/slurm_protocol_defs.h index 21e3355636086ba6f67caefacb3f8a0795bf4304..fe3d6e8977de3737e38f1c22d116254a579ba57f 100644 --- a/src/common/slurm_protocol_defs.h +++ b/src/common/slurm_protocol_defs.h @@ -605,6 +605,7 @@ typedef struct batch_job_launch_msg { uint32_t num_cpu_groups;/* elements in below cpu arrays */ uint16_t *cpus_per_node;/* cpus per node */ uint32_t *cpu_count_reps;/* how many nodes have same cpu count */ + uint16_t cpus_per_task; /* number of CPUs requested per task */ char *nodes; /* list of nodes allocated to job_step */ char *script; /* the actual job script, default NONE */ char *err; /* pathname of stderr */ diff --git a/src/common/slurm_protocol_pack.c b/src/common/slurm_protocol_pack.c index 26e60854d59b5167d5a23f3dddbd0b9ce8b32ead..b9a76daabf7448bffed0e4e210b366f29d197480 100644 --- a/src/common/slurm_protocol_pack.c +++ b/src/common/slurm_protocol_pack.c @@ -4078,6 +4078,7 @@ _pack_batch_job_launch_msg(batch_job_launch_msg_t * msg, Buf buffer) pack8(msg->overcommit, buffer); pack16(msg->acctg_freq, buffer); + pack16(msg->cpus_per_task, buffer); pack32(msg->num_cpu_groups, buffer); if (msg->num_cpu_groups) { @@ -4126,6 +4127,7 @@ _unpack_batch_job_launch_msg(batch_job_launch_msg_t ** msg, Buf buffer) safe_unpack8(&launch_msg_ptr->overcommit, buffer); safe_unpack16(&launch_msg_ptr->acctg_freq, buffer); + safe_unpack16(&launch_msg_ptr->cpus_per_task, buffer); safe_unpack32(&launch_msg_ptr->num_cpu_groups, buffer); if (launch_msg_ptr->num_cpu_groups) { diff --git a/src/sbatch/opt.c b/src/sbatch/opt.c index 6ca85bcfd56d1ba6aa3e8799a68b65f1164b591d..33398f00c606ce51a93221f89140b627c6217509 100644 --- a/src/sbatch/opt.c +++ b/src/sbatch/opt.c @@ -1923,8 +1923,6 @@ static bool _opt_verify(void) else setenvf(NULL, "SLURM_OPEN_MODE", "t"); } - if (opt.cpus_per_task > 1) - setenvfs("SLURM_CPUS_PER_TASK=%d", opt.cpus_per_task); if (opt.dependency) setenvfs("SLURM_JOB_DEPENDENCY=%s", opt.dependency); diff --git a/src/slurmctld/job_scheduler.c b/src/slurmctld/job_scheduler.c index 8e33269e84146db00a7fa6c2fc4b929f37f07e3f..ba200211ad776421611552178f607f6ecc37b066 100644 --- a/src/slurmctld/job_scheduler.c +++ b/src/slurmctld/job_scheduler.c @@ -526,6 +526,7 @@ extern void launch_job(struct job_record *job_ptr) launch_msg_ptr->overcommit = job_ptr->details->overcommit; launch_msg_ptr->open_mode = job_ptr->details->open_mode; launch_msg_ptr->acctg_freq = job_ptr->details->acctg_freq; + launch_msg_ptr->cpus_per_task = job_ptr->details->cpus_per_task; if (make_batch_job_cred(launch_msg_ptr, job_ptr)) { error("aborting batch job %u", job_ptr->job_id); diff --git a/src/slurmctld/proc_req.c b/src/slurmctld/proc_req.c index ea1af5c1ae89d82f7605cd8134a24efe5b4efa7b..b03d1463e1e15e04c9e65a8fe22ee0713c271ba3 100644 --- a/src/slurmctld/proc_req.c +++ b/src/slurmctld/proc_req.c @@ -2802,6 +2802,7 @@ int _launch_batch_step(job_desc_msg_t *job_desc_msg, uid_t uid, job_desc_msg->environment); launch_msg_ptr->envc = job_desc_msg->env_size; launch_msg_ptr->job_mem = job_desc_msg->job_min_memory; + launch_msg_ptr->cpus_per_task = job_desc_msg->cpus_per_task; /* _max_nprocs() represents the total number of CPUs available * for this step (overcommit not supported yet). If job_desc_msg