From 6ab3bfd560e253afa8746c3957d6eb9218edb49c Mon Sep 17 00:00:00 2001 From: Moe Jette <jette1@llnl.gov> Date: Tue, 11 Nov 2008 23:13:45 +0000 Subject: [PATCH] svn merge -r15647:15652 https://eris.llnl.gov/svn/slurm/branches/slurm-1.3 --- NEWS | 3 ++ doc/man/man1/srun.1 | 3 ++ src/common/env.c | 96 ++++++++++++++++++++--------------- src/slurmctld/job_scheduler.c | 5 +- src/slurmctld/slurmctld.h | 1 + src/slurmctld/step_mgr.c | 28 ++++++---- src/srun/opt.c | 4 +- 7 files changed, 85 insertions(+), 55 deletions(-) diff --git a/NEWS b/NEWS index c60dc187012..7d1cdfe8c76 100644 --- a/NEWS +++ b/NEWS @@ -103,6 +103,9 @@ documents those changes that are of interest to users and admins. -- Propagate --cpus-per-task parameter from salloc or sbatch input line to the SLURM_CPUS_PER_TASK environment variable in the spawned shell for srun to use. + -- Add support for srun --cpus-per-task=0. This can be used to spawn tasks + without allocating resouces for the job step from the job's allocation + when running multiple job steps with the --exclusive option. * Changes in SLURM 1.3.10 ========================= diff --git a/doc/man/man1/srun.1 b/doc/man/man1/srun.1 index d3d35a154ec..af7e88547f7 100644 --- a/doc/man/man1/srun.1 +++ b/doc/man/man1/srun.1 @@ -197,6 +197,9 @@ per node, a job request for 4 nodes and 3 CPUs per task may be allocated 3 or 6 CPUs per node (1 or 2 tasks per node) depending upon resource consumption by other jobs. Such a job may be unable to execute more than a total of 4 tasks. +This option may also be useful to spawn tasks without allocating +resources to the job step from the job's allocation when running +multiple job steps with the \fB\-\-exclusive\fR option. .TP \fB\-\-comment\fR=<\fIstring\fR> diff --git a/src/common/env.c b/src/common/env.c index 74c51e93cf1..2674a8e4d9f 100644 --- a/src/common/env.c +++ b/src/common/env.c @@ -782,6 +782,7 @@ env_array_for_job(char ***dest, const resource_allocation_response_msg_t *alloc, { char *bgl_part_id = NULL, *tmp; slurm_step_layout_t *step_layout = NULL; + uint32_t num_tasks = desc->num_tasks; env_array_overwrite_fmt(dest, "SLURM_JOB_ID", "%u", alloc->job_id); env_array_overwrite_fmt(dest, "SLURM_JOB_NUM_NODES", "%u", @@ -815,23 +816,37 @@ env_array_for_job(char ***dest, const resource_allocation_response_msg_t *alloc, env_array_overwrite_fmt(dest, "SLURM_NNODES", "%u", alloc->node_cnt); env_array_overwrite_fmt(dest, "SLURM_NODELIST", "%s", alloc->node_list); - if(desc->num_tasks != NO_VAL) { + if(num_tasks == NO_VAL) { /* If we know how many tasks we are going to do then we set SLURM_TASKS_PER_NODE */ - step_layout = slurm_step_layout_create(alloc->node_list, - alloc->cpus_per_node, - alloc->cpu_count_reps, - alloc->node_cnt, - desc->num_tasks, - desc->cpus_per_task, - desc->task_dist, - desc->plane_size); - tmp = _uint16_array_to_str(step_layout->node_cnt, - step_layout->tasks); - slurm_step_layout_destroy(step_layout); - env_array_overwrite_fmt(dest, "SLURM_TASKS_PER_NODE", - "%s", tmp); + int i=0; + /* If no tasks were given we can figure it out here + * by totalling up the cpus and then dividing by the + * number of cpus per task */ + + num_tasks = 0; + for (i = 0; i < alloc->num_cpu_groups; i++) { + num_tasks += alloc->cpu_count_reps[i] + * alloc->cpus_per_node[i]; + } + if((int)desc->cpus_per_task > 1 + && desc->cpus_per_task != (uint16_t)NO_VAL) + num_tasks /= desc->cpus_per_task; + //num_tasks = desc->num_procs; } + //info("got %d and %d", num_tasks, desc->cpus_per_task); + step_layout = slurm_step_layout_create(alloc->node_list, + alloc->cpus_per_node, + alloc->cpu_count_reps, + alloc->node_cnt, + num_tasks, + desc->cpus_per_task, + desc->task_dist, + desc->plane_size); + tmp = _uint16_array_to_str(step_layout->node_cnt, + step_layout->tasks); + slurm_step_layout_destroy(step_layout); + env_array_overwrite_fmt(dest, "SLURM_TASKS_PER_NODE", "%s", tmp); xfree(tmp); } @@ -862,12 +877,17 @@ extern void env_array_for_batch_job(char ***dest, const batch_job_launch_msg_t *batch, const char *node_name) { - char *tmp; + char *tmp = getenvp(batch->environment, "SLURM_CPUS_PER_TASK"); uint32_t num_nodes = 0; uint32_t num_cpus = 0; int i; slurm_step_layout_t *step_layout = NULL; + int cpus_per_task = 1; + uint32_t num_tasks = batch->nprocs; + if(tmp) + cpus_per_task = atoi(tmp); + /* There is no explicit node count in the batch structure, * so we need to calculate the node count. We also need to * figure out the explicit cpu count so we can figure out the @@ -898,34 +918,26 @@ env_array_for_batch_job(char ***dest, const batch_job_launch_msg_t *batch, env_array_overwrite_fmt(dest, "SLURM_JOBID", "%u", batch->job_id); env_array_overwrite_fmt(dest, "SLURM_NNODES", "%u", num_nodes); env_array_overwrite_fmt(dest, "SLURM_NODELIST", "%s", batch->nodes); - - if(batch->nprocs) { - /* we can figure out the cpus_per_task here by - * reversing what happens in sbatch */ - int cpus_per_task = num_cpus / batch->nprocs; -/* info(" we have %u / %u = %u", num_cpus, */ -/* batch->nprocs, cpus_per_task); */ - if(cpus_per_task < 1) - cpus_per_task = 1; - + if(num_tasks) env_array_overwrite_fmt(dest, "SLURM_NPROCS", "%u", - batch->nprocs); - step_layout = slurm_step_layout_create(batch->nodes, - batch->cpus_per_node, - batch->cpu_count_reps, - num_nodes, - batch->nprocs, - (uint16_t)cpus_per_task, - (uint16_t) - SLURM_DIST_BLOCK, - (uint16_t)NO_VAL); - tmp = _uint16_array_to_str(step_layout->node_cnt, - step_layout->tasks); - slurm_step_layout_destroy(step_layout); - env_array_overwrite_fmt(dest, "SLURM_TASKS_PER_NODE", - "%s", tmp); - xfree(tmp); - } + num_tasks); + else + num_tasks = num_cpus / cpus_per_task; + + step_layout = slurm_step_layout_create(batch->nodes, + batch->cpus_per_node, + batch->cpu_count_reps, + num_nodes, + num_tasks, + (uint16_t)cpus_per_task, + (uint16_t) + SLURM_DIST_BLOCK, + (uint16_t)NO_VAL); + tmp = _uint16_array_to_str(step_layout->node_cnt, + step_layout->tasks); + slurm_step_layout_destroy(step_layout); + env_array_overwrite_fmt(dest, "SLURM_TASKS_PER_NODE", "%s", tmp); + xfree(tmp); } /* diff --git a/src/slurmctld/job_scheduler.c b/src/slurmctld/job_scheduler.c index 7e0e0ae8a9f..8e33269e841 100644 --- a/src/slurmctld/job_scheduler.c +++ b/src/slurmctld/job_scheduler.c @@ -85,7 +85,7 @@ static char ** _xduparray(uint16_t size, char ** array); * RET number of entries in job_queue * NOTE: the buffer at *job_queue must be xfreed by the caller */ -static int _build_user_job_list(uint32_t user_id,char* job_name, +static int _build_user_job_list(uint32_t user_id, char* job_name, struct job_queue **job_queue) { ListIterator job_iterator; @@ -102,7 +102,8 @@ static int _build_user_job_list(uint32_t user_id,char* job_name, xassert (job_ptr->magic == JOB_MAGIC); if (job_ptr->user_id != user_id) continue; - if (job_name && strcmp(job_name,job_ptr->name)) + if (job_name && job_ptr->name && + strcmp(job_name, job_ptr->name)) continue; if (job_buffer_size <= job_queue_size) { job_buffer_size += 200; diff --git a/src/slurmctld/slurmctld.h b/src/slurmctld/slurmctld.h index b035063953c..7c63a03c0a9 100644 --- a/src/slurmctld/slurmctld.h +++ b/src/slurmctld/slurmctld.h @@ -478,6 +478,7 @@ struct step_record { * step relative to job's nodes, * see src/common/select_job_res.h */ uint32_t cpu_count; /* count of step's CPUs */ + uint16_t cpus_per_task; /* cpus per task initiated */ uint16_t cyclic_alloc; /* set for cyclic task allocation across nodes */ uint16_t exclusive; /* dedicated resources for the step */ diff --git a/src/slurmctld/step_mgr.c b/src/slurmctld/step_mgr.c index 8d48d681167..6097480c58f 100644 --- a/src/slurmctld/step_mgr.c +++ b/src/slurmctld/step_mgr.c @@ -506,7 +506,8 @@ _pick_step_nodes (struct job_record *job_ptr, tot_tasks = MIN(tot_tasks, usable_mem); } if ((avail_tasks <= 0) || - (cpus_picked_cnt >= step_spec->cpu_count)) + ((cpus_picked_cnt > 0) && + (cpus_picked_cnt >= step_spec->cpu_count))) bit_clear(nodes_avail, i); else cpus_picked_cnt += avail_tasks; @@ -907,8 +908,10 @@ extern void step_alloc_lps(struct step_record *step_ptr) step_node_inx++; if (job_node_inx >= select_ptr->nhosts) fatal("step_alloc_lps: node index bad"); - select_ptr->cpus_used[job_node_inx] += - step_ptr->step_layout->tasks[step_node_inx]; + if (step_ptr->cpus_per_task) { + select_ptr->cpus_used[job_node_inx] += + step_ptr->step_layout->tasks[step_node_inx]; + } if (step_ptr->mem_per_task) { select_ptr->memory_used[job_node_inx] += (step_ptr->mem_per_task * @@ -970,8 +973,10 @@ static void _step_dealloc_lps(struct step_record *step_ptr) step_node_inx++; if (job_node_inx >= select_ptr->nhosts) fatal("_step_dealloc_lps: node index bad"); - if (select_ptr->cpus_used[job_node_inx] >= - step_ptr->step_layout->tasks[step_node_inx]) { + if (step_ptr->cpus_per_task == 0) + ; /* no CPUs allocated */ + else if (select_ptr->cpus_used[job_node_inx] >= + step_ptr->step_layout->tasks[step_node_inx]) { select_ptr->cpus_used[job_node_inx] -= step_ptr->step_layout->tasks[step_node_inx]; } else { @@ -1192,13 +1197,17 @@ step_create(job_step_create_request_msg_t *step_specs, /* a batch script does not need switch info */ if (!batch_step) { /* we can figure out the cpus_per_task here by - reversing what happens in srun */ + * reversing what happens in srun, record + * argument, plus save/restore in slurm v1.4 */ int cpus_per_task = step_specs->cpu_count / step_specs->num_tasks; -/* info(" we have %u / %u = %u", step_specs->cpu_count, */ -/* step_specs->num_tasks, cpus_per_task); */ - if(cpus_per_task < 1) + if (cpus_per_task < 1) cpus_per_task = 1; + if (step_specs->cpu_count) + step_ptr->cpus_per_task = cpus_per_task; + else + step_ptr->cpus_per_task = 0; + step_ptr->step_layout = step_layout_create(step_ptr, step_node_list, @@ -2112,6 +2121,7 @@ extern int load_step_state(struct job_record *job_ptr, Buf buffer) step_ptr->pre_sus_time = pre_sus_time; step_ptr->tot_sus_time = tot_sus_time; step_ptr->ckpt_time = ckpt_time; + step_ptr->cpus_per_task = 1; /* Need to save/restore in v1.4 */ slurm_step_layout_destroy(step_ptr->step_layout); step_ptr->step_layout = step_layout; diff --git a/src/srun/opt.c b/src/srun/opt.c index 3d54cf214e6..f32bc21fc38 100644 --- a/src/srun/opt.c +++ b/src/srun/opt.c @@ -822,7 +822,7 @@ static void set_options(const int argc, char **argv) case (int)'c': opt.cpus_set = true; opt.cpus_per_task = - _get_int(optarg, "cpus-per-task", true); + _get_int(optarg, "cpus-per-task", false); break; case (int)'C': xfree(opt.constraints); @@ -1550,7 +1550,7 @@ static bool _opt_verify(void) verified = false; } - if (opt.cpus_per_task <= 0) { + if (opt.cpus_per_task < 0) { error("%s: invalid number of cpus per task (-c %d)\n", opt.progname, opt.cpus_per_task); verified = false; -- GitLab