From c623b34d4b8cd5c3ca3239df1d8e31ef86b39df4 Mon Sep 17 00:00:00 2001 From: Danny Auble <da@llnl.gov> Date: Fri, 4 Aug 2006 15:05:45 +0000 Subject: [PATCH] svn merge -r8747:8772 https://eris.llnl.gov/svn/slurm/branches/slurm-1.1 --- NEWS | 2 + src/slurmctld/agent.c | 1 + src/srun/allocate.c | 25 ++++++-- src/srun/srun.c | 6 ++ src/srun/srun_job.c | 129 +++++++++++++++++++++++++++++++------- testsuite/expect/globals | 2 +- testsuite/expect/test1.87 | 41 +++--------- 7 files changed, 146 insertions(+), 60 deletions(-) diff --git a/NEWS b/NEWS index 58349349abf..2f61ce9fb97 100644 --- a/NEWS +++ b/NEWS @@ -33,6 +33,8 @@ documents those changes that are of interest to users and admins. -- Fixed heterogeous cpu overcommit issue -- Fix bug where srun would hang if it ran on one node and that node's slurmd died + -- Fix bug where srun task layout would be bad when min-max node range is + specified (e.g. "srun -N1-4 ...") * Changes in SLURM 1.1.5 ======================== diff --git a/src/slurmctld/agent.c b/src/slurmctld/agent.c index 391277b3c5d..471bacd93a0 100644 --- a/src/slurmctld/agent.c +++ b/src/slurmctld/agent.c @@ -791,6 +791,7 @@ static void *_thread_per_group_rpc(void *args) if (job_ptr) state = job_ptr->job_state; unlock_slurmctld(job_read_lock); + if ((state == JOB_RUNNING) || (msg_type == SRUN_JOB_COMPLETE) || ((msg_type == SRUN_NODE_FAIL) diff --git a/src/srun/allocate.c b/src/srun/allocate.c index 5034c5e23bf..8bad0ba489d 100644 --- a/src/srun/allocate.c +++ b/src/srun/allocate.c @@ -551,10 +551,27 @@ static job_step_create_request_msg_t * _step_req_create(srun_job_t *j) { job_step_create_request_msg_t *r = xmalloc(sizeof(*r)); + hostlist_t hl; r->job_id = j->jobid; r->user_id = opt.uid; - r->node_count = j->nhosts; - r->cpu_count = opt.overcommit ? j->nhosts + + /* get the correct number of hosts to run tasks on */ + if(opt.nodelist) { + hl = hostlist_create(opt.nodelist); + hostlist_uniq(hl); + r->node_count = hostlist_count(hl); + hostlist_destroy(hl); + } else if((opt.max_nodes > 0) && (opt.max_nodes <j->nhosts)) + r->node_count = opt.max_nodes; + else + r->node_count = j->nhosts; + /* info("send %d or %d? sending %d", opt.max_nodes, */ +/* j->nhosts, r->node_count); */ + if(r->node_count > j->nhosts) { + error("Asking for more nodes that allocated"); + return NULL; + } + r->cpu_count = opt.overcommit ? r->node_count : (opt.nprocs*opt.cpus_per_task); r->num_tasks = opt.nprocs; r->node_list = xstrdup(opt.nodelist); @@ -578,8 +595,8 @@ _step_req_create(srun_job_t *j) r->task_dist = SLURM_DIST_CYCLIC; break; default: - r->task_dist = ((opt.nprocs <= j->nhosts) - ? SLURM_DIST_CYCLIC : SLURM_DIST_BLOCK); + r->task_dist = (opt.nprocs <= r->node_count) + ? SLURM_DIST_CYCLIC : SLURM_DIST_BLOCK; break; } diff --git a/src/srun/srun.c b/src/srun/srun.c index e89e3dd302a..547149063c0 100644 --- a/src/srun/srun.c +++ b/src/srun/srun.c @@ -354,6 +354,10 @@ int srun(int ac, char **av) info("Force Terminated job"); srun_job_destroy(job, 0); exit(1); + } else if (job->state == SRUN_JOB_CANCELLED) { + info("Cancelling job"); + srun_job_destroy(job, NO_VAL); + exit(1); } /* @@ -365,6 +369,8 @@ int srun(int ac, char **av) error ("Waiting on message thread: %m"); debug("done"); + /* have to check if job was cancelled here just to make sure + state didn't change when we were waiting for the message thread */ if (job->state == SRUN_JOB_CANCELLED) { info("Cancelling job"); srun_job_destroy(job, NO_VAL); diff --git a/src/srun/srun_job.c b/src/srun/srun_job.c index 10e095ebbc5..78335f3234f 100644 --- a/src/srun/srun_job.c +++ b/src/srun/srun_job.c @@ -144,9 +144,10 @@ job_step_create_allocation(uint32_t job_id) { srun_job_t *job = NULL; allocation_info_t *ai = xmalloc(sizeof(*ai)); - uint32_t cpn = 1; hostlist_t hl = NULL; char buf[8192]; + int count = 0; + char *tasks_per_node = xstrdup(getenv("SLURM_TASKS_PER_NODE")); ai->jobid = job_id; ai->stepid = NO_VAL; @@ -168,13 +169,21 @@ job_step_create_allocation(uint32_t job_id) } } } - ai->nodelist = opt.alloc_nodelist; + /* hl = hostlist_create(ai->nodelist); */ +/* hostlist_uniq(hl); */ +/* ai->nnodes = hostlist_count(hl); */ +/* hostlist_destroy(hl); */ +/* info("using %s %d not %d", ai->nodelist, ai->nnodes, opt.min_nodes); */ + if (opt.exc_nodes) { - hl = hostlist_create(ai->nodelist); hostlist_t exc_hl = hostlist_create(opt.exc_nodes); char *node_name = NULL; - + if(opt.nodelist) + hl = hostlist_create(opt.nodelist); + else + hl = hostlist_create(ai->nodelist); + info("using %s or %s", opt.nodelist, ai->nodelist); while ((node_name = hostlist_shift(exc_hl))) { int inx = hostlist_find(hl, node_name); if (inx >= 0) { @@ -183,9 +192,15 @@ job_step_create_allocation(uint32_t job_id) } free(node_name); } + if(!hostlist_count(hl)) { + error("Hostlist is now nothing! Can't run job."); + return NULL; + } hostlist_destroy(exc_hl); hostlist_ranged_string(hl, sizeof(buf), buf); hostlist_destroy(hl); + xfree(opt.nodelist); + opt.nodelist = xstrdup(buf); xfree(ai->nodelist); ai->nodelist = xstrdup(buf); } @@ -194,24 +209,86 @@ job_step_create_allocation(uint32_t job_id) /* opt.nodelist = ai->nodelist; */ if(opt.nodelist) { hl = hostlist_create(opt.nodelist); + if(!hostlist_count(hl)) { + error("1 Hostlist is now nothing! Can't run job."); + return NULL; + } hostlist_ranged_string(hl, sizeof(buf), buf); + count = hostlist_count(hl); hostlist_destroy(hl); xfree(ai->nodelist); ai->nodelist = xstrdup(buf); xfree(opt.nodelist); opt.nodelist = xstrdup(buf); } - ai->nnodes = opt.min_nodes; - debug("node list is now %s", ai->nodelist); - cpn = (opt.nprocs + ai->nnodes - 1) / ai->nnodes; - ai->cpus_per_node = &cpn; - ai->cpu_count_reps = &ai->nnodes; + if(opt.distribution == SLURM_DIST_ARBITRARY) { + if(count != opt.nprocs) { + error("You asked for %d tasks but specified %d nodes", + opt.nprocs, count); + goto error; + } + } + + hl = hostlist_create(ai->nodelist); + hostlist_uniq(hl); + ai->nnodes = hostlist_count(hl); + hostlist_destroy(hl); + //ai->nnodes = opt.min_nodes; + /* info("node list is now %s %s %d procs", */ +/* ai->nodelist, opt.nodelist, */ +/* opt.nprocs); */ + if(tasks_per_node) { + int i = 0; + + ai->num_cpu_groups = 0; + ai->cpus_per_node = xmalloc(sizeof(uint32_t) * ai->nnodes); + ai->cpu_count_reps =xmalloc(sizeof(uint32_t) * ai->nnodes); + + while(tasks_per_node[i]) { + if(tasks_per_node[i] >= '0' + && tasks_per_node[i] <= '9') + ai->cpus_per_node[ai->num_cpu_groups] = + atoi(&tasks_per_node[i]); + else { + error("problem with tasks_per_node %s", + tasks_per_node); + goto error; + } + while(tasks_per_node[i]!='x' && tasks_per_node[i]) + i++; + i++; + if(tasks_per_node[i] >= '0' + && tasks_per_node[i] <= '9') + ai->cpu_count_reps[ai->num_cpu_groups] = + atoi(&tasks_per_node[i]); + else { + error("1 problem with tasks_per_node %s", + tasks_per_node); + goto error; + } + while(tasks_per_node[i]!=',' && tasks_per_node[i]) + i++; + if(tasks_per_node[i] == ',') { + i++; + } + ai->num_cpu_groups++; + } + xfree(tasks_per_node); + } else { + uint32_t cpn = (opt.nprocs + ai->nnodes - 1) / ai->nnodes; + info("SLURM_TASKS_PER_NODE not set! " + "Guessing %d cpus per node", cpn); + ai->cpus_per_node = &cpn; + ai->cpu_count_reps = &ai->nnodes; + } + if(!opt.max_nodes) + opt.max_nodes = opt.min_nodes; /* * Create job, then fill in host addresses */ job = _job_create_structure(ai); - +error: xfree(ai); return (job); @@ -247,30 +324,34 @@ job_create_allocation(resource_allocation_response_msg_t *resp) * Create an srun job structure from a resource allocation response msg */ static srun_job_t * -_job_create_structure(allocation_info_t *info) +_job_create_structure(allocation_info_t *ainfo) { srun_job_t *job = xmalloc(sizeof(srun_job_t)); - _set_nprocs(info); + _set_nprocs(ainfo); debug2("creating job with %d tasks", opt.nprocs); slurm_mutex_init(&job->state_mutex); pthread_cond_init(&job->state_cond, NULL); job->state = SRUN_JOB_INIT; - job->nodelist = xstrdup(info->nodelist); - job->stepid = info->stepid; + job->nodelist = xstrdup(ainfo->nodelist); + job->stepid = ainfo->stepid; #ifdef HAVE_FRONT_END /* Limited job step support */ opt.overcommit = true; job->nhosts = 1; #else - job->nhosts = info->nnodes; + job->nhosts = ainfo->nnodes; #endif - - - job->select_jobinfo = info->select_jobinfo; - job->jobid = info->jobid; + if(opt.min_nodes > job->nhosts) { + error("Only allocated %d nodes asked for %d", + job->nhosts, opt.min_nodes); + return NULL; + } + + job->select_jobinfo = ainfo->select_jobinfo; + job->jobid = ainfo->jobid; job->ntasks = opt.nprocs; job->task_prolog = xstrdup(opt.task_prolog); @@ -488,17 +569,17 @@ _estimate_nports(int nclients, int cli_per_port) } static int -_compute_task_count(allocation_info_t *info) +_compute_task_count(allocation_info_t *ainfo) { int i, cnt = 0; if (opt.cpus_set) { - for (i = 0; i < info->num_cpu_groups; i++) - cnt += ( info->cpu_count_reps[i] * - (info->cpus_per_node[i]/opt.cpus_per_task)); + for (i = 0; i < ainfo->num_cpu_groups; i++) + cnt += ( ainfo->cpu_count_reps[i] * + (ainfo->cpus_per_node[i]/opt.cpus_per_task)); } - return (cnt < info->nnodes) ? info->nnodes : cnt; + return (cnt < ainfo->nnodes) ? ainfo->nnodes : cnt; } static void diff --git a/testsuite/expect/globals b/testsuite/expect/globals index f773d74f2a7..6bbe8869b8c 100755 --- a/testsuite/expect/globals +++ b/testsuite/expect/globals @@ -718,7 +718,7 @@ proc default_partition {} { set name "" set fd [open "|$scontrol --all --oneliner show partition"] while {[gets $fd line] != -1} { - if {[regexp {^PartitionName=(\w+).*Default=YES} $line name] + if {[regexp {^PartitionName=(\w+).*Default=YES} $line frag name] == 1} { break } diff --git a/testsuite/expect/test1.87 b/testsuite/expect/test1.87 index cb98b2872ce..e5c7d914cde 100755 --- a/testsuite/expect/test1.87 +++ b/testsuite/expect/test1.87 @@ -267,37 +267,19 @@ if {$matches < 2} { # # Get two nodes relative (starting at) node 3 -# Since there is only one node left, we check for a warning message +# Since there is only one node left, we check for a error message # -set matches 0 -set warning 0 +set error 0 send "$srun -l -N2 -n2 -O --relative=3 $bin_hostname\n" expect { - -re "Warning:" { - set warning 1 + -re "error:" { + send_user "This error is expected, no worries\n" + incr error exp_continue } -re "($number): ($alpha_numeric)" { - if { $host_num == 0 } { - if {[string compare $expect_out(2,string) $host_3] == 0} { - incr matches - } else { - send_user "\nFAILURE: wrong node responded\n" - set exit_code 1 - } - } - if { $host_num == 1 } { - if {[string compare $expect_out(2,string) $host_3] == 0} { - incr matches - } else { - send_user "\nFAILURE: wrong node responded\n" - set exit_code 1 - } - } - if { $host_num > 1 } { - send_user "\nFAILURE: too many tasks responded\n" - set exit_code 1 - } + send_user "\nFAILURE: running where we shouldn't be able to run\n" + set exit_code 1 exp_continue } -re "Unable to contact" { @@ -316,12 +298,9 @@ expect { wait } } -if {$matches < 2} { - send_user "\nFAILURE: required nodes failed to respond\n" - set exit_code 1 -} -if {$warning < 1} { - send_user "\nFAILURE: warning message missing for inconsistent node count\n" + +if {$error == 0} { + send_user "\nFAILURE: No error for inconsistent node count\n" set exit_code 1 } -- GitLab