Skip to content
Snippets Groups Projects
Commit c0098be5 authored by Mark Grondona's avatar Mark Grondona
Browse files

o move _compute_task_count() into job.c to allow -c option to

   affect number of tasks in batch job steps.
parent 683e3604
No related branches found
No related tags found
No related merge requests found
...@@ -48,7 +48,9 @@ typedef struct allocation_info { ...@@ -48,7 +48,9 @@ typedef struct allocation_info {
uint32_t jobid; uint32_t jobid;
uint32_t stepid; uint32_t stepid;
char *nodelist; char *nodelist;
int nnodes;
slurm_addr *addrs; slurm_addr *addrs;
int num_cpu_groups;
int *cpus_per_node; int *cpus_per_node;
int *cpu_count_reps; int *cpu_count_reps;
} allocation_info_t; } allocation_info_t;
...@@ -62,6 +64,27 @@ _estimate_nports(int nclients, int cli_per_port) ...@@ -62,6 +64,27 @@ _estimate_nports(int nclients, int cli_per_port)
return d.rem > 0 ? d.quot + 1 : d.quot; return d.rem > 0 ? d.quot + 1 : d.quot;
} }
static int
_compute_task_count(allocation_info_t *info)
{
int i, cnt = 0;
if (opt.cpus_set) {
for (i = 0; i < info->num_cpu_groups; i++)
cnt += ( info->cpu_count_reps[i] *
(info->cpus_per_node[i]/opt.cpus_per_task));
}
return (cnt < info->nnodes) ? info->nnodes : cnt;
}
static void
_set_nprocs(allocation_info_t *info)
{
if (!opt.nprocs_set)
opt.nprocs = _compute_task_count(info);
}
static job_t * static job_t *
_job_create_internal(allocation_info_t *info) _job_create_internal(allocation_info_t *info)
...@@ -73,6 +96,10 @@ _job_create_internal(allocation_info_t *info) ...@@ -73,6 +96,10 @@ _job_create_internal(allocation_info_t *info)
hostlist_t hl; hostlist_t hl;
job_t *job; job_t *job;
/* Reset nprocs if necessary
*/
_set_nprocs(info);
job = xmalloc(sizeof(*job)); job = xmalloc(sizeof(*job));
slurm_mutex_init(&job->state_mutex); slurm_mutex_init(&job->state_mutex);
...@@ -120,7 +147,7 @@ _job_create_internal(allocation_info_t *info) ...@@ -120,7 +147,7 @@ _job_create_internal(allocation_info_t *info)
job->outbuf = (cbuf_t *) xmalloc(opt.nprocs * sizeof(cbuf_t)); job->outbuf = (cbuf_t *) xmalloc(opt.nprocs * sizeof(cbuf_t));
job->errbuf = (cbuf_t *) xmalloc(opt.nprocs * sizeof(cbuf_t)); job->errbuf = (cbuf_t *) xmalloc(opt.nprocs * sizeof(cbuf_t));
job->inbuf = (cbuf_t *) xmalloc(opt.nprocs * sizeof(cbuf_t)); job->inbuf = (cbuf_t *) xmalloc(opt.nprocs * sizeof(cbuf_t));
job->stdin_eof = (bool *) xmalloc(opt.nprocs * sizeof(bool)); job->stdin_eof = (bool *) xmalloc(opt.nprocs * sizeof(bool));
/* nhost host states */ /* nhost host states */
...@@ -177,8 +204,10 @@ job_create_allocation(resource_allocation_response_msg_t *resp) ...@@ -177,8 +204,10 @@ job_create_allocation(resource_allocation_response_msg_t *resp)
allocation_info_t *info = xmalloc(sizeof(*info)); allocation_info_t *info = xmalloc(sizeof(*info));
info->nodelist = resp->node_list; info->nodelist = resp->node_list;
info->nnodes = resp->node_cnt;
info->jobid = resp->job_id; info->jobid = resp->job_id;
info->stepid = NO_VAL; info->stepid = NO_VAL;
info->num_cpu_groups = resp->num_cpu_groups;
info->cpus_per_node = resp->cpus_per_node; info->cpus_per_node = resp->cpus_per_node;
info->cpu_count_reps = resp->cpu_count_reps; info->cpu_count_reps = resp->cpu_count_reps;
info->addrs = resp->node_addr; info->addrs = resp->node_addr;
...@@ -225,9 +254,11 @@ job_create_noalloc(void) ...@@ -225,9 +254,11 @@ job_create_noalloc(void)
info->jobid = (uint32_t) (lrand48() % 65550L + 1L); info->jobid = (uint32_t) (lrand48() % 65550L + 1L);
info->stepid = 0; info->stepid = 0;
info->nodelist = opt.nodelist; info->nodelist = opt.nodelist;
info->nnodes = hostlist_count(hl);
if (opt.nprocs < hostlist_count(hl)) /* if (opt.nprocs < info->nnodes)
opt.nprocs = hostlist_count(hl); opt.nprocs = hostlist_count(hl);
*/
hostlist_destroy(hl); hostlist_destroy(hl);
info->cpus_per_node = &cpn; info->cpus_per_node = &cpn;
......
...@@ -329,25 +329,10 @@ _allocate_nodes(void) ...@@ -329,25 +329,10 @@ _allocate_nodes(void)
} }
sigaction(SIGINT, &old_action, NULL); sigaction(SIGINT, &old_action, NULL);
} }
if (!opt.nprocs_set) /* can vary by min-max node count */
opt.nprocs = _compute_task_count(resp);
return resp; return resp;
} }
static int _compute_task_count(allocation_resp *resp)
{
int i, cnt = 0;
if (opt.cpus_set) {
for (i=0; i<resp->num_cpu_groups; i++)
cnt += (resp->cpu_count_reps[i] *
(resp->cpus_per_node[i]/opt.cpus_per_task));
}
if (cnt < resp->node_cnt)
cnt = resp->node_cnt;
return cnt;
}
static void static void
_sig_kill_alloc(int signum) _sig_kill_alloc(int signum)
...@@ -773,11 +758,17 @@ _set_batch_script_env(uint32_t jobid, uint32_t node_cnt) ...@@ -773,11 +758,17 @@ _set_batch_script_env(uint32_t jobid, uint32_t node_cnt)
opt.nprocs = node_cnt; opt.nprocs = node_cnt;
} }
if (setenvf("SLURM_NPROCS=%u", opt.nprocs)) { if (opt.nprocs_set && setenvf("SLURM_NPROCS=%u", opt.nprocs)) {
error("Unable to set SLURM_NPROCS environment variable"); error("Unable to set SLURM_NPROCS environment variable");
return -1; return -1;
} }
if ( (opt.cpus_per_task > 0) &&
setenvf("SLURM_CPUS_PER_TASK=%u", opt.cpus_per_task)) {
error("Unable to set SLURM_CPUS_PER_TASK");
return -1;
}
if (opt.distribution != SRUN_DIST_UNKNOWN) { if (opt.distribution != SRUN_DIST_UNKNOWN) {
dist = (opt.distribution == SRUN_DIST_BLOCK) ? dist = (opt.distribution == SRUN_DIST_BLOCK) ?
"block" : "cyclic"; "block" : "cyclic";
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment