From acccbdfe6bb38d9826f311de226921809d59967e Mon Sep 17 00:00:00 2001 From: Danny Auble <da@llnl.gov> Date: Tue, 8 Nov 2005 18:34:46 +0000 Subject: [PATCH] hostfile support addition --- NEWS | 2 + slurm/slurm.h.in | 11 +- src/api/allocate.c | 112 +++++- src/api/spawn.c | 259 +++++++++---- src/common/dist_tasks.c | 3 - src/common/dist_tasks.h | 10 +- src/plugins/switch/federation/federation.c | 5 +- src/slurmctld/node_scheduler.c | 30 +- src/slurmctld/proc_req.c | 6 +- src/slurmctld/step_mgr.c | 22 +- src/srun/allocate.c | 148 ++++---- src/srun/allocate.h | 2 +- src/srun/launch.c | 44 ++- src/srun/msg.c | 24 +- src/srun/opt.c | 24 +- src/srun/opt.h | 7 +- src/srun/reattach.c | 8 +- src/srun/srun.c | 37 +- src/srun/srun_job.c | 416 +++++++++++++-------- src/srun/srun_job.h | 28 +- testsuite/expect/test1.47 | 6 +- testsuite/expect/test1.51 | 2 +- testsuite/expect/test1.81 | 31 +- testsuite/expect/test9.8 | 6 + 24 files changed, 839 insertions(+), 404 deletions(-) diff --git a/NEWS b/NEWS index 3c258bc0f30..c281d6c80a0 100644 --- a/NEWS +++ b/NEWS @@ -5,6 +5,8 @@ documents those changes that are of interest to users and admins. ============================= -- Remove BNR libary functions and add those for PMI (not fully implemented yet) + -- Added Hostfile support for POE and srun. MP_HOSTFILE env var to set + location of hostfile. Tasks will run from list order in the file. * Changes in SLURM 0.7.0-pre3 ============================= diff --git a/slurm/slurm.h.in b/slurm/slurm.h.in index c8fef5607b3..ea1d04aca1c 100644 --- a/slurm/slurm.h.in +++ b/slurm/slurm.h.in @@ -200,7 +200,9 @@ enum select_print_mode { /* Possible task distributions across the nodes */ enum task_dist_states { SLURM_DIST_CYCLIC, /* distribute tasks 1 per node, round robin */ - SLURM_DIST_BLOCK /* distribute tasks filling node by node */ + SLURM_DIST_BLOCK, /* distribute tasks filling node by node */ + SLURM_DIST_HOSTFILE, /* distribute tasks from what hostfile says */ + SLURM_DIST_UNKNOWN /* unknown dist */ }; /* The last entry in node_states must be STATE_END, keep in sync with @@ -243,7 +245,12 @@ enum ctx_keys { SLURM_STEP_CTX_STEPID, /* get the created job step id */ SLURM_STEP_CTX_TASKS, /* get array of task count on each node */ SLURM_STEP_CTX_TID, /* get array of task IDs for specified node */ - SLURM_STEP_CTX_RESP /* get job step create response message */ + SLURM_STEP_CTX_RESP, /* get job step create response message */ + SLURM_STEP_CTX_CRED, + SLURM_STEP_CTX_SWITCH_JOB, + SLURM_STEP_CTX_NHOSTS, + SLURM_STEP_CTX_CPUS, + SLURM_STEP_CTX_HOST }; /*****************************************************************************\ diff --git a/src/api/allocate.c b/src/api/allocate.c index 6224b78cf23..b11b911e29c 100644 --- a/src/api/allocate.c +++ b/src/api/allocate.c @@ -42,8 +42,13 @@ extern pid_t getsid(pid_t pid); /* missing from <unistd.h> */ #include "src/common/read_config.h" #include "src/common/slurm_protocol_api.h" +#include "src/common/hostlist.h" +#include "src/common/xmalloc.h" + +#define BUF_SIZE 1024 static int _handle_rc_msg(slurm_msg_t *msg); +static int _nodelist_from_hostfile(job_step_create_request_msg_t *req); /* * slurm_allocate_resources - allocate resources for a job request @@ -61,7 +66,6 @@ slurm_allocate_resources (job_desc_msg_t *req, slurm_msg_t resp_msg; bool host_set = false; char host[64]; - /* * set Node and session id for this request */ @@ -205,6 +209,9 @@ slurm_job_step_create (job_step_create_request_msg_t *req, req_msg.msg_type = REQUEST_JOB_STEP_CREATE; req_msg.data = req; + + if(_nodelist_from_hostfile(req) == 0) + debug("nodelist was NULL"); if (slurm_send_recv_controller_msg(&req_msg, &resp_msg) < 0) return SLURM_ERROR; @@ -282,3 +289,106 @@ _handle_rc_msg(slurm_msg_t *msg) else return SLURM_SUCCESS; } + +static int _nodelist_from_hostfile(job_step_create_request_msg_t *req) +{ + char *hostfile = NULL; + char *hostname = NULL; + FILE *hostfilep = NULL; + char in_line[BUF_SIZE]; /* input line */ + int i, j; + int line_size; + hostlist_t hostlist = NULL; + int count; + int len = 0; + int ret = 0; + int line_num = 0; + char *nodelist = NULL; + + if (hostfile = (char *)getenv("MP_HOSTFILE")) { + if(strlen(hostfile)<1) + goto no_hostfile; + if((hostfilep = fopen(hostfile, "r")) == NULL) { + error("slurm_allocate_resources " + "error opening file %s, %m", + hostfile); + goto no_hostfile; + } + hostlist = hostlist_create(NULL); + + while (fgets (in_line, BUF_SIZE, hostfilep) != NULL) { + line_num++; + line_size = strlen(in_line); + if (line_size >= (BUF_SIZE - 1)) { + error ("Line %d, of hostfile %s too long", + line_num, hostfile); + fclose (hostfilep); + goto no_hostfile; + } + for (i = 0; i < line_size; i++) { + if (in_line[i] == '\n') { + in_line[i] = '\0'; + break; + } + if (in_line[i] == '\0') + break; + if (in_line[i] != '#') + continue; + if ((i > 0) && (in_line[i - 1] == '\\')) { + for (j = i; j < line_size; j++) { + in_line[j - 1] = in_line[j]; + } + line_size--; + continue; + } + in_line[i] = '\0'; + break; + } + + len += strlen(in_line)+1; + hostlist_push(hostlist,in_line); + if(req->num_tasks && (line_num+1)>req->num_tasks) + break; + } + fclose (hostfilep); + + nodelist = (char *)xmalloc(sizeof(char)*len); + memset(nodelist, 0, len); + + count = hostlist_count(hostlist); + if (count <= 0) { + error("Hostlist is empty!\n"); + xfree(*nodelist); + goto cleanup_hostfile; + } + + len = 0; + while (hostname = hostlist_shift(hostlist)) { + line_num = strlen(hostname)+1; + ret = sprintf(nodelist+len, + "%s,", hostname); + if (ret < 0 || ret > line_num) { + error("bad snprintf only %d printed",ret); + xfree(*nodelist); + goto cleanup_hostfile; + } + len += ret; + } + nodelist[--len] = '\0'; + debug2("Hostlist from MP_HOSTFILE = %s\n", + nodelist); + + cleanup_hostfile: + hostlist_destroy(hostlist); + + } +no_hostfile: + if(nodelist) { + if(req->node_list) + xfree(req->node_list); + req->node_list = nodelist; + req->num_tasks = count; + req->task_dist = SLURM_DIST_HOSTFILE; + } + return count; +} diff --git a/src/api/spawn.c b/src/api/spawn.c index 9dd4e80e385..0780bfa2930 100644 --- a/src/api/spawn.c +++ b/src/api/spawn.c @@ -103,6 +103,7 @@ static void _free_char_array(char ***argv_p, int cnt); static int _p_launch(slurm_msg_t *req, slurm_step_ctx ctx); static int _sock_bind_wild(int sockfd); static int _task_layout(slurm_step_ctx ctx); +static int _task_layout_hostfile(slurm_step_ctx ctx); static int _task_layout_block(slurm_step_ctx ctx); static int _task_layout_cyclic(slurm_step_ctx ctx); static void * _thread_per_node_rpc(void *args); @@ -123,29 +124,41 @@ slurm_step_ctx_create (job_step_create_request_msg_t *step_req) old_job_alloc_msg_t old_job_req; job_step_create_response_msg_t *step_resp = NULL; resource_allocation_response_msg_t *alloc_resp; - + char *temp = NULL; old_job_req.job_id = step_req->job_id; old_job_req.uid = getuid(); if (slurm_confirm_allocation(&old_job_req, &alloc_resp) < 0) return NULL; - + if ((slurm_job_step_create(step_req, &step_resp) < 0) || (step_resp == NULL)) { slurm_free_resource_allocation_response_msg(alloc_resp); return NULL; /* slurm errno already set */ } - + + temp = step_req->node_list; + step_req->node_list = step_resp->node_list; + step_resp->node_list = temp; + rc = xmalloc(sizeof(struct slurm_step_ctx_struct)); rc->magic = STEP_CTX_MAGIC; rc->job_id = step_req->job_id; rc->user_id = step_req->user_id; - rc->num_tasks = step_req->num_tasks; rc->task_dist = step_req->task_dist; + rc->num_tasks = step_req->num_tasks; rc->step_resp = step_resp; rc->alloc_resp = alloc_resp; - - rc->hl = hostlist_create(rc->step_resp->node_list); - rc->nhosts = hostlist_count(rc->hl); + rc->hl = hostlist_create(step_req->node_list); + +#ifdef HAVE_FRONT_END /* Limited job step support */ + /* All jobs execute through front-end on Blue Gene/L. + * Normally we would not permit execution of job steps, + * but can fake it by just allocating all tasks to + * one of the allocated nodes. */ + rc->nhosts = 1; +#else + rc->nhosts = hostlist_count(rc->hl); +#endif (void) _task_layout(rc); return rc; @@ -162,10 +175,13 @@ slurm_step_ctx_get (slurm_step_ctx ctx, int ctx_key, ...) va_list ap; int rc = SLURM_SUCCESS; uint32_t node_inx; - uint32_t *step_id_ptr; - uint32_t **array_pptr = (uint32_t **) NULL; + uint32_t *uint32_ptr; + uint32_t **uint32_array_pptr = (uint32_t **) NULL; + char **char_array_pptr = (char **) NULL; job_step_create_response_msg_t ** step_resp_pptr; - + slurm_cred_t *cred; /* Slurm job credential */ + switch_jobinfo_t *switch_job; + if ((ctx == NULL) || (ctx->magic != STEP_CTX_MAGIC)) { slurm_seterrno(EINVAL); @@ -174,35 +190,60 @@ slurm_step_ctx_get (slurm_step_ctx ctx, int ctx_key, ...) va_start(ap, ctx_key); switch (ctx_key) { - case SLURM_STEP_CTX_STEPID: - step_id_ptr = (uint32_t *) va_arg(ap, void *); - *step_id_ptr = ctx->step_resp->job_step_id; - break; - case SLURM_STEP_CTX_TASKS: - array_pptr = (uint32_t **) va_arg(ap, void *); - *array_pptr = ctx->tasks; - break; - - case SLURM_STEP_CTX_TID: - node_inx = va_arg(ap, uint32_t); - if ((node_inx < 0) || (node_inx > ctx->nhosts)) { - slurm_seterrno(EINVAL); - rc = SLURM_ERROR; - break; - } - array_pptr = (uint32_t **) va_arg(ap, void *); - *array_pptr = ctx->tids[node_inx]; - break; - - case SLURM_STEP_CTX_RESP: - step_resp_pptr = (job_step_create_response_msg_t **) - va_arg(ap, void *); - *step_resp_pptr = ctx->step_resp; + case SLURM_STEP_CTX_STEPID: + uint32_ptr = (uint32_t *) va_arg(ap, void *); + *uint32_ptr = ctx->step_resp->job_step_id; + break; + case SLURM_STEP_CTX_TASKS: + uint32_array_pptr = (uint32_t **) va_arg(ap, void *); + *uint32_array_pptr = ctx->tasks; + break; + + case SLURM_STEP_CTX_TID: + node_inx = va_arg(ap, uint32_t); + if ((node_inx < 0) || (node_inx > ctx->nhosts)) { + slurm_seterrno(EINVAL); + rc = SLURM_ERROR; break; - - default: + } + uint32_array_pptr = (uint32_t **) va_arg(ap, void *); + *uint32_array_pptr = ctx->tids[node_inx]; + break; + + case SLURM_STEP_CTX_RESP: + step_resp_pptr = (job_step_create_response_msg_t **) + va_arg(ap, void *); + *step_resp_pptr = ctx->step_resp; + break; + case SLURM_STEP_CTX_CRED: + cred = (slurm_cred_t *) va_arg(ap, void *); + *cred = ctx->step_resp->cred; + break; + case SLURM_STEP_CTX_SWITCH_JOB: + switch_job = (switch_jobinfo_t *) va_arg(ap, void *); + *switch_job = ctx->step_resp->switch_job; + break; + case SLURM_STEP_CTX_NHOSTS: + uint32_ptr = (uint32_t *) va_arg(ap, void *); + *uint32_ptr = ctx->nhosts; + break; + case SLURM_STEP_CTX_CPUS: + uint32_array_pptr = (uint32_t **) va_arg(ap, void *); + *uint32_array_pptr = ctx->cpus; + break; + case SLURM_STEP_CTX_HOST: + node_inx = va_arg(ap, uint32_t); + if ((node_inx < 0) || (node_inx > ctx->nhosts)) { slurm_seterrno(EINVAL); rc = SLURM_ERROR; + break; + } + char_array_pptr = (char **) va_arg(ap, void *); + *char_array_pptr = ctx->host[node_inx]; + break; + default: + slurm_seterrno(EINVAL); + rc = SLURM_ERROR; } va_end(ap); @@ -246,40 +287,39 @@ slurm_step_ctx_set (slurm_step_ctx ctx, int ctx_key, ...) va_start(ap, ctx_key); switch (ctx_key) { - case SLURM_STEP_CTX_ARGS: - if (ctx->argv) - _xfree_char_array(&ctx->argv, ctx->argc); - ctx->argc = va_arg(ap, int); - if ((ctx->argc < 1) || (ctx->argc > 1024)) { - slurm_seterrno(EINVAL); - break; - } - _xcopy_char_array(&ctx->argv, va_arg(ap, char **), - ctx->argc); - break; - - case SLURM_STEP_CTX_CHDIR: - if (ctx->cwd) - xfree(ctx->cwd); - ctx->cwd = xstrdup(va_arg(ap, char *)); - break; - - case SLURM_STEP_CTX_ENV: - ctx->env_set = 1; - if (ctx->env) - _xfree_char_array(&ctx->env, ctx->envc); - ctx->envc = va_arg(ap, int); - if ((ctx->envc < 1) || (ctx->envc > 1024)) { - slurm_seterrno(EINVAL); - break; - } - _xcopy_char_array(&ctx->env, va_arg(ap, char **), - ctx->envc); + case SLURM_STEP_CTX_ARGS: + if (ctx->argv) + _xfree_char_array(&ctx->argv, ctx->argc); + ctx->argc = va_arg(ap, int); + if ((ctx->argc < 1) || (ctx->argc > 1024)) { + slurm_seterrno(EINVAL); break; - - default: + } + _xcopy_char_array(&ctx->argv, va_arg(ap, char **), + ctx->argc); + break; + + case SLURM_STEP_CTX_CHDIR: + if (ctx->cwd) + xfree(ctx->cwd); + ctx->cwd = xstrdup(va_arg(ap, char *)); + break; + + case SLURM_STEP_CTX_ENV: + ctx->env_set = 1; + if (ctx->env) + _xfree_char_array(&ctx->env, ctx->envc); + ctx->envc = va_arg(ap, int); + if ((ctx->envc < 1) || (ctx->envc > 1024)) { slurm_seterrno(EINVAL); - rc = SLURM_ERROR; + break; + } + _xcopy_char_array(&ctx->env, va_arg(ap, char **), + ctx->envc); + break; + default: + slurm_seterrno(EINVAL); + rc = SLURM_ERROR; } va_end(ap); @@ -348,9 +388,12 @@ extern int slurm_spawn (slurm_step_ctx ctx, int *fd_array) spawn_task_request_msg_t *msg_array_ptr; int *sock_array; slurm_msg_t *req_array_ptr; - int i, rc = SLURM_SUCCESS; + int i, j, rc = SLURM_SUCCESS; uint16_t slurmd_debug = 0; char *env_var; + hostlist_t hostlist = NULL; + hostlist_iterator_t itr = NULL; + char *host = NULL; if ((ctx == NULL) || (ctx->magic != STEP_CTX_MAGIC) || @@ -390,6 +433,10 @@ extern int slurm_spawn (slurm_step_ctx ctx, int *fd_array) msg_array_ptr = xmalloc(sizeof(spawn_task_request_msg_t) * ctx->nhosts); req_array_ptr = xmalloc(sizeof(slurm_msg_t) * ctx->nhosts); + + hostlist = hostlist_create(ctx->alloc_resp->node_list); + itr = hostlist_iterator_create(hostlist); + for (i=0; i<ctx->nhosts; i++) { spawn_task_request_msg_t *r = &msg_array_ptr[i]; slurm_msg_t *m = &req_array_ptr[i]; @@ -408,7 +455,6 @@ extern int slurm_spawn (slurm_step_ctx ctx, int *fd_array) r->nprocs = ctx->num_tasks; r->switch_job = ctx->step_resp->switch_job; r->slurmd_debug = slurmd_debug; - /* Task specific message contents */ r->global_task_id = ctx->tids[i][0]; r->cpus_allocated = ctx->cpus[i]; @@ -416,14 +462,28 @@ extern int slurm_spawn (slurm_step_ctx ctx, int *fd_array) r->io_port = ntohs(sock_array[i]); m->msg_type = REQUEST_SPAWN_TASK; m->data = r; - memcpy(&m->address, &ctx->alloc_resp->node_addr[i], + + j=0; + while(host = hostlist_next(itr)) { + if(!strcmp(host,ctx->host[i])) { + free(host); + break; + } + j++; + free(host); + } + debug2("using %d %s with %d tasks\n", j, ctx->host[i], + r->nprocs); + hostlist_iterator_reset(itr); + memcpy(&m->address, &ctx->alloc_resp->node_addr[j], sizeof(slurm_addr)); #if _DEBUG printf("tid=%d, fd=%d, port=%u, node_id=%u\n", ctx->tids[i][0], fd_array[i], r->io_port, i); #endif } - + hostlist_iterator_destroy(itr); + hostlist_destroy(hostlist); rc = _p_launch(req_array_ptr, ctx); xfree(msg_array_ptr); @@ -433,7 +493,6 @@ extern int slurm_spawn (slurm_step_ctx ctx, int *fd_array) return rc; } - /* * slurm_spawn_kill - send the specified signal to an existing job step * IN ctx - job step context generated by slurm_step_ctx_create @@ -535,10 +594,64 @@ static int _task_layout(slurm_step_ctx ctx) if (ctx->task_dist == SLURM_DIST_CYCLIC) return _task_layout_cyclic(ctx); + else if(ctx->task_dist == SLURM_DIST_HOSTFILE) + return _task_layout_hostfile(ctx); else return _task_layout_block(ctx); } +/* use specific set run tasks on each host listed in hostfile + */ +static int _task_layout_hostfile(slurm_step_ctx ctx) +{ + int i=0, j, taskid = 0; + bool over_subscribe = false; + hostlist_iterator_t itr = NULL, itr_task = NULL; + char *host = NULL; + char *host_task = NULL; + hostlist_t job_alloc_hosts = NULL; + hostlist_t step_alloc_hosts = NULL; + + job_alloc_hosts = hostlist_create(ctx->alloc_resp->node_list); + itr = hostlist_iterator_create(job_alloc_hosts); + step_alloc_hosts = hostlist_create(ctx->step_resp->node_list); + itr_task = hostlist_iterator_create(step_alloc_hosts); + while(host = hostlist_next(itr)) { + + ctx->tasks[i] = 0; + while(host_task = hostlist_next(itr_task)) { + if(!strcmp(host, host_task)) + ctx->tasks[i]++; + } + debug2("%s got %d tasks\n", + host, + ctx->tasks[i]); + if(ctx->tasks[i] == 0) + goto reset_hosts; + ctx->tids[i] = xmalloc(sizeof(uint32_t) * ctx->tasks[i]); + hostlist_iterator_reset(itr_task); + taskid = 0; + j = 0; + while(host_task = hostlist_next(itr_task)) { + if(!strcmp(host, host_task)) { + ctx->tids[i][j] = taskid; + j++; + } + taskid++; + free(host_task); + } + i++; + reset_hosts: + hostlist_iterator_reset(itr_task); + free(host); + } + + hostlist_iterator_destroy(itr); + hostlist_iterator_destroy(itr_task); + hostlist_destroy(job_alloc_hosts); + + return SLURM_SUCCESS; +} /* to effectively deal with heterogeneous nodes, we fake a cyclic * distribution to figure out how many tasks go on each node and diff --git a/src/common/dist_tasks.c b/src/common/dist_tasks.c index d1706bf46dd..1df7deb265d 100644 --- a/src/common/dist_tasks.c +++ b/src/common/dist_tasks.c @@ -47,7 +47,6 @@ #include "src/common/log.h" #include "src/common/xmalloc.h" - /* * distribute_tasks - determine how many tasks of a job will be run on each. * node. Distribution is influenced by number of cpus on @@ -92,7 +91,6 @@ int *distribute_tasks(const char *mlist, uint16_t num_cpu_groups, i = 0; ncpus = 0; while ((this_node_name = hostlist_shift(master_hl))) { - if (hostlist_find(task_hl, this_node_name) >= 0) { if (i >= nnodes) { fatal("Internal error: duplicate nodes? " @@ -110,7 +108,6 @@ int *distribute_tasks(const char *mlist, uint16_t num_cpu_groups, } hostlist_destroy(master_hl); hostlist_destroy(task_hl); - if (num_tasks >= ncpus) { /* * Evenly overcommit tasks over the hosts diff --git a/src/common/dist_tasks.h b/src/common/dist_tasks.h index b347aebbea7..565a388bfb5 100644 --- a/src/common/dist_tasks.h +++ b/src/common/dist_tasks.h @@ -58,10 +58,10 @@ * NOTE: allocates memory that should be xfreed by caller */ int * distribute_tasks(const char *mlist, - uint16_t num_cpu_groups, - uint32_t *cpus_per_node, - uint32_t *cpu_count_reps, - const char *tlist, - uint32_t num_tasks); + uint16_t num_cpu_groups, + uint32_t *cpus_per_node, + uint32_t *cpu_count_reps, + const char *tlist, + uint32_t num_tasks); #endif /* !_DIST_TASKS_H */ diff --git a/src/plugins/switch/federation/federation.c b/src/plugins/switch/federation/federation.c index c16d3e7dada..48b19b4f6aa 100644 --- a/src/plugins/switch/federation/federation.c +++ b/src/plugins/switch/federation/federation.c @@ -1773,12 +1773,11 @@ fed_build_jobinfo(fed_jobinfo_t *jp, hostlist_t hl, int nprocs, int min_procs_per_node; int max_procs_per_node; - debug("Allocating windows in block mode"); + debug("Allocating windows in non-cyclic mode"); nnodes = hostlist_count(hl); full_node_cnt = nprocs % nnodes; min_procs_per_node = nprocs / nnodes; max_procs_per_node = (nprocs + nnodes - 1) / nnodes; - proc_cnt = 0; _lock(); for (i = 0; i < nnodes; i++) { @@ -1790,7 +1789,7 @@ fed_build_jobinfo(fed_jobinfo_t *jp, hostlist_t hl, int nprocs, task_cnt = max_procs_per_node; else task_cnt = min_procs_per_node; - + for (j = 0; j < task_cnt; j++) { rc = _allocate_windows(jp->tables_per_task, jp->tableinfo, diff --git a/src/slurmctld/node_scheduler.c b/src/slurmctld/node_scheduler.c index 237479dc699..ef19cfd929f 100644 --- a/src/slurmctld/node_scheduler.c +++ b/src/slurmctld/node_scheduler.c @@ -1146,16 +1146,20 @@ extern void build_node_details(struct job_record *job_ptr) } job_ptr->num_cpu_groups = 0; - job_ptr->node_cnt = bit_set_count(job_ptr->node_bitmap); + + /* Use hostlist here to insure ordering of info matches that of srun */ + if ((host_list = hostlist_create(job_ptr->nodes)) == NULL) + fatal("hostlist_create error for %s: %m", this_node_name); + + job_ptr->node_cnt = hostlist_count(host_list); + xrealloc(job_ptr->cpus_per_node, (sizeof(uint32_t) * job_ptr->node_cnt)); xrealloc(job_ptr->cpu_count_reps, (sizeof(uint32_t) * job_ptr->node_cnt)); xrealloc(job_ptr->node_addr, (sizeof(slurm_addr) * job_ptr->node_cnt)); - /* Use hostlist here to insure ordering of info matches that of srun */ - if ((host_list = hostlist_create(job_ptr->nodes)) == NULL) - fatal("hostlist_create error for %s: %m", job_ptr->nodes); + job_ptr->ntask_cnt = 0; xfree(job_ptr->ntask); @@ -1176,17 +1180,16 @@ extern void build_node_details(struct job_record *job_ptr) job_ptr->ntask[cr_count++] = usable_cpus; if(error_code != SLURM_SUCCESS) { xfree(job_ptr->ntask); - free(this_node_name); - error("Invalid node %s in JobId=%u", - this_node_name, - job_ptr->job_id); + error("Unable to get extra jobinfo " + "from JobId=%u", + job_ptr->job_id); } } else if (slurmctld_conf.fast_schedule) { usable_cpus = node_ptr->config_ptr->cpus; } else { usable_cpus = node_ptr->cpus; } - + if (usable_cpus <= 0) continue; memcpy(&job_ptr->node_addr[node_inx++], @@ -1196,11 +1199,12 @@ extern void build_node_details(struct job_record *job_ptr) usable_cpus)) { cpu_inx++; job_ptr->cpus_per_node[cpu_inx] = - usable_cpus; + usable_cpus; + job_ptr->cpu_count_reps[cpu_inx] = 1; } else job_ptr->cpu_count_reps[cpu_inx]++; - + } else { error("Invalid node %s in JobId=%u", this_node_name, job_ptr->job_id); @@ -1217,8 +1221,8 @@ extern void build_node_details(struct job_record *job_ptr) if ((cr_enabled) && (error_code == SLURM_SUCCESS)) { error_code = select_g_update_nodeinfo(job_ptr, SELECT_CR_USED_CPUS); if(error_code != SLURM_SUCCESS) - error("Invalid node %s in JobId=%u", - this_node_name, job_ptr->job_id); + error("Unable to update nodeinfo JobId=%u", + job_ptr->job_id); } } diff --git a/src/slurmctld/proc_req.c b/src/slurmctld/proc_req.c index 135bb940daf..71083e81675 100644 --- a/src/slurmctld/proc_req.c +++ b/src/slurmctld/proc_req.c @@ -1045,7 +1045,7 @@ static void _slurm_rpc_job_step_create(slurm_msg_t * msg) /* return result */ if (error_code) { unlock_slurmctld(job_write_lock); - info("_slurm_rpc_job_step_create: %s", + error("_slurm_rpc_job_step_create: %s", slurm_strerror(error_code)); slurm_send_rc_msg(msg, error_code); } else { @@ -1053,7 +1053,7 @@ static void _slurm_rpc_job_step_create(slurm_msg_t * msg) step_rec->job_ptr->job_id, step_rec->step_id, TIME_STR); job_step_resp.job_step_id = step_rec->step_id; - job_step_resp.node_list = xstrdup(step_rec->step_node_list); + job_step_resp.node_list = xstrdup(req_step_msg->node_list); job_step_resp.cred = slurm_cred; job_step_resp.switch_job = switch_copy_jobinfo( step_rec->switch_job); @@ -1279,7 +1279,7 @@ static void _slurm_rpc_old_job_alloc(slurm_msg_t * msg) slurm_strerror(error_code)); slurm_send_rc_msg(msg, error_code); } else { - debug2("_slurm_rpc_old_job_alloc JobId=%u NodeList=%s %s", + info("_slurm_rpc_old_job_alloc JobId=%u NodeList=%s %s", job_desc_msg->job_id, job_ptr->nodes, TIME_STR); /* send job_ID and node_name_ptr */ diff --git a/src/slurmctld/step_mgr.c b/src/slurmctld/step_mgr.c index 0b2c68d40f4..9b1b9646b35 100644 --- a/src/slurmctld/step_mgr.c +++ b/src/slurmctld/step_mgr.c @@ -405,8 +405,11 @@ _pick_step_nodes (struct job_record *job_ptr, step_spec->node_list, job_ptr->job_id); goto cleanup; } - } - else if (step_spec->relative) { + if(step_spec->task_dist == SLURM_DIST_HOSTFILE) { + FREE_NULL_BITMAP(nodes_avail); + return nodes_picked; + } + } else if (step_spec->relative) { /* Remove first (step_spec->relative) nodes from * available list */ bitstr_t *relative_nodes = NULL; @@ -420,14 +423,13 @@ _pick_step_nodes (struct job_record *job_ptr, bit_not (relative_nodes); bit_and (nodes_avail, relative_nodes); bit_free (relative_nodes); - } - else { + } else { nodes_picked = bit_alloc (bit_size (nodes_avail) ); if (nodes_picked == NULL) fatal("bit_alloc malloc failure"); } - /* if user specifies step needs a specific processor count and */ + /* istep_specs->node_listf user specifies step needs a specific processor count and */ /* all nodes have the same processor count, just translate this to */ /* a node count */ if (step_spec->cpu_count && (job_ptr->num_cpu_groups == 1)) { @@ -524,7 +526,8 @@ step_create ( job_step_create_request_msg_t *step_specs, return ESLURM_ALREADY_DONE; if ((step_specs->task_dist != SLURM_DIST_CYCLIC) && - (step_specs->task_dist != SLURM_DIST_BLOCK)) + (step_specs->task_dist != SLURM_DIST_BLOCK) && + (step_specs->task_dist != SLURM_DIST_HOSTFILE)) return ESLURM_BAD_DIST; if (job_ptr->kill_on_step_done) @@ -537,7 +540,7 @@ step_create ( job_step_create_request_msg_t *step_specs, if (nodeset == NULL) return ESLURM_REQUESTED_NODE_CONFIG_UNAVAILABLE ; node_count = bit_set_count(nodeset); - + if (step_specs->num_tasks == NO_VAL) { if (step_specs->cpu_count != NO_VAL) step_specs->num_tasks = step_specs->cpu_count; @@ -553,7 +556,10 @@ step_create ( job_step_create_request_msg_t *step_specs, fatal ("create_step_record failed with no memory"); /* set the step_record values */ - step_ptr->step_node_list = bitmap2node_name(nodeset); + /* Here is where the node list is set for the job */ + step_ptr->step_node_list = xstrdup(step_specs->node_list); + xfree(step_specs->node_list); + step_specs->node_list = bitmap2node_name(nodeset); step_ptr->step_node_bitmap = nodeset; step_ptr->cyclic_alloc = (uint16_t) (step_specs->task_dist == SLURM_DIST_CYCLIC); diff --git a/src/srun/allocate.c b/src/srun/allocate.c index 96824b85f88..29802445b65 100644 --- a/src/srun/allocate.c +++ b/src/srun/allocate.c @@ -63,8 +63,8 @@ static void _wait_for_resources(resource_allocation_response_msg_t **resp); static bool _retry(); static void _intr_handler(int signo); -static job_step_create_request_msg_t * _step_req_create(srun_job_t *j); -static void _step_req_destroy(job_step_create_request_msg_t *r); +/* static job_step_create_request_msg_t * _step_req_create(srun_job_t *j); */ +/* static void _step_req_destroy(job_step_create_request_msg_t *r); */ static sig_atomic_t destroy_job = 0; static srun_job_t *allocate_job = NULL; @@ -462,77 +462,79 @@ job_desc_msg_destroy(job_desc_msg_t *j) } } -static job_step_create_request_msg_t * -_step_req_create(srun_job_t *j) -{ - job_step_create_request_msg_t *r = xmalloc(sizeof(*r)); - r->job_id = j->jobid; - r->user_id = opt.uid; - r->node_count = j->nhosts; - r->cpu_count = opt.overcommit ? j->nhosts - : (opt.nprocs*opt.cpus_per_task); - r->num_tasks = opt.nprocs; - r->node_list = j->nodelist; - r->network = opt.network; - r->name = opt.job_name; - r->relative = false; /* XXX fix this oneday */ - - switch (opt.distribution) { - case SRUN_DIST_UNKNOWN: - r->task_dist = (opt.nprocs <= j->nhosts) ? SLURM_DIST_CYCLIC - : SLURM_DIST_BLOCK; - break; - case SRUN_DIST_CYCLIC: - r->task_dist = SLURM_DIST_CYCLIC; - break; - default: /* (opt.distribution == SRUN_DIST_BLOCK) */ - r->task_dist = SLURM_DIST_BLOCK; - break; - } - - if (slurmctld_comm_addr.port) { - r->host = xstrdup(slurmctld_comm_addr.hostname); - r->port = slurmctld_comm_addr.port; - } - - return(r); -} - -static void -_step_req_destroy(job_step_create_request_msg_t *r) -{ - if (r) { - xfree(r->host); - xfree(r); - } -} - -int -create_job_step(srun_job_t *job) -{ - job_step_create_request_msg_t *req = NULL; - job_step_create_response_msg_t *resp = NULL; - - if (!(req = _step_req_create(job))) { - error ("Unable to allocate step request message"); - return -1; - } - if ((slurm_job_step_create(req, &resp) < 0) || (resp == NULL)) { - error ("Unable to create job step: %m"); - return -1; - } - - job->stepid = resp->job_step_id; - job->cred = resp->cred; - job->switch_job = resp->switch_job; - /* - * Recreate filenames which may depend upon step id - */ - job_update_io_fnames(job); - - _step_req_destroy(req); - return 0; -} +/* static job_step_create_request_msg_t * */ +/* _step_req_create(srun_job_t *j) */ +/* { */ +/* job_step_create_request_msg_t *r = xmalloc(sizeof(*r)); */ +/* r->job_id = j->jobid; */ +/* r->user_id = opt.uid; */ +/* r->node_count = j->nhosts; */ +/* r->cpu_count = opt.overcommit ? j->nhosts */ +/* : (opt.nprocs*opt.cpus_per_task); */ +/* r->num_tasks = opt.nprocs; */ +/* r->node_list = xstrdup(j->nodelist); */ +/* r->network = opt.network; */ +/* r->name = opt.job_name; */ +/* r->relative = false; /\* XXX fix this oneday *\/ */ + +/* switch (opt.distribution) { */ +/* case SRUN_DIST_UNKNOWN: */ +/* r->task_dist = (opt.nprocs <= j->nhosts) ? SLURM_DIST_CYCLIC */ +/* : SLURM_DIST_BLOCK; */ +/* break; */ +/* case SRUN_DIST_CYCLIC: */ +/* r->task_dist = SLURM_DIST_CYCLIC; */ +/* break; */ +/* default: /\* (opt.distribution == SRUN_DIST_BLOCK) *\/ */ +/* r->task_dist = SLURM_DIST_BLOCK; */ +/* break; */ +/* } */ + +/* if (slurmctld_comm_addr.port) { */ +/* r->host = xstrdup(slurmctld_comm_addr.hostname); */ +/* r->port = slurmctld_comm_addr.port; */ +/* } */ + +/* return(r); */ +/* } */ + +/* static void */ +/* _step_req_destroy(job_step_create_request_msg_t *r) */ +/* { */ +/* if (r) { */ +/* xfree(r->host); */ +/* xfree(r->node_list); */ +/* xfree(r); */ +/* } */ +/* } */ + +/* int */ +/* create_job_step(srun_job_t *job) */ +/* { */ +/* job_step_create_request_msg_t *req = NULL; */ +/* job_step_create_response_msg_t *resp = NULL; */ +/* char *temp = NULL; */ + +/* if (!(req = _step_req_create(job))) { */ +/* error ("Unable to allocate step request message"); */ +/* return -1; */ +/* } */ +/* if ((slurm_job_step_create(req, &resp) < 0) || (resp == NULL)) { */ +/* error ("Unable to create job step: %m"); */ +/* return -1; */ +/* } */ + +/* job->stepid = resp->job_step_id; */ +/* job->cred = resp->cred; */ +/* job->switch_job = resp->switch_job; */ +/* /\* */ +/* * Recreate filenames which may depend upon step id */ +/* *\/ */ +/* job_update_io_fnames(job); */ + +/* _step_req_destroy(req); */ +/* return 0; */ +/* } */ void set_allocate_job(srun_job_t *job) diff --git a/src/srun/allocate.h b/src/srun/allocate.h index 7411f7006a0..fe7e8d9ff3e 100644 --- a/src/srun/allocate.h +++ b/src/srun/allocate.h @@ -83,7 +83,7 @@ uint32_t jobid_from_env(void); * * Returns -1 if job step creation failure, 0 otherwise */ -int create_job_step(srun_job_t *j); +/* int create_job_step(srun_job_t *j); */ /* set the job for debugging purpose */ void set_allocate_job(srun_job_t *job); diff --git a/src/srun/launch.c b/src/srun/launch.c index edfe7b6ef13..15da6932c0b 100644 --- a/src/srun/launch.c +++ b/src/srun/launch.c @@ -37,6 +37,7 @@ #include "src/common/log.h" #include "src/common/macros.h" +#include "src/common/hostlist.h" #include "src/common/slurm_protocol_api.h" #include "src/common/xmalloc.h" #include "src/common/xsignal.h" @@ -105,13 +106,13 @@ launch(void *arg) slurm_msg_t *req_array_ptr; launch_tasks_request_msg_t *msg_array_ptr; srun_job_t *job = (srun_job_t *) arg; - int i, my_envc; - char hostname[MAXHOSTNAMELEN]; + int i, j, my_envc; + hostlist_t hostlist = NULL; + hostlist_iterator_t itr = NULL; + char *host = NULL; update_job_state(job, SRUN_JOB_LAUNCHING); - if (gethostname(hostname, MAXHOSTNAMELEN) < 0) - error("gethostname: %m"); - + debug("going to launch %d tasks on %d hosts", opt.nprocs, job->nhosts); debug("sending to slurmd port %d", slurm_get_slurmd_port()); @@ -119,6 +120,10 @@ launch(void *arg) xmalloc(sizeof(launch_tasks_request_msg_t)*job->nhosts); req_array_ptr = xmalloc(sizeof(slurm_msg_t) * job->nhosts); my_envc = envcount(environ); + + hostlist = hostlist_create(job->nodelist); + itr = hostlist_iterator_create(hostlist); + for (i = 0; i < job->nhosts; i++) { launch_tasks_request_msg_t *r = &msg_array_ptr[i]; slurm_msg_t *m = &req_array_ptr[i]; @@ -159,11 +164,27 @@ launch(void *arg) r->srun_node_id = (uint32_t)i; r->io_port = ntohs(job->listenport[i%job->num_listen]); r->resp_port = ntohs(job->jaddr[i%job->njfds].sin_port); + m->msg_type = REQUEST_LAUNCH_TASKS; - m->data = &msg_array_ptr[i]; - memcpy(&m->address, &job->slurmd_addr[i], sizeof(slurm_addr)); + m->data = r; + j=0; + while(host = hostlist_next(itr)) { + if(!strcmp(host,job->host[i])) { + free(host); + break; + } + j++; + free(host); + } + hostlist_iterator_reset(itr); + debug2("using %d %s with %d tasks\n", j, job->host[i], + r->nprocs); + + memcpy(&m->address, &job->slurmd_addr[j], sizeof(slurm_addr)); } - + hostlist_iterator_destroy(itr); + hostlist_destroy(hostlist); + _p_launch(req_array_ptr, job); xfree(msg_array_ptr); @@ -313,7 +334,6 @@ static void _p_launch(slurm_msg_t *req, srun_job_t *job) thd = xmalloc (job->nhosts * sizeof (thd_t)); for (i = 0; i < job->nhosts; i++) { - if (job->ntask[i] == 0) { /* No tasks for this node */ debug("Node %s is unused",job->host[i]); job->host_state[i] = SRUN_HOST_REPLIED; @@ -451,7 +471,8 @@ static void * _p_launch_task(void *arg) if (_send_msg_rc(req) < 0) { /* Has timeout */ if (errno != EINTR) - verbose("launch error on %s: %m", job->host[nodeid]); + verbose("fisrt launch error on %s: %m", + job->host[nodeid]); if ((errno != ETIMEDOUT) && (job->state == SRUN_JOB_LAUNCHING) @@ -464,7 +485,8 @@ static void * _p_launch_task(void *arg) if (errno == EINTR) verbose("launch on %s canceled", job->host[nodeid]); else - error("launch error on %s: %m", job->host[nodeid]); + error("second launch error on %s: %m", + job->host[nodeid]); _update_failed_node(job, nodeid); diff --git a/src/srun/msg.c b/src/srun/msg.c index 07950ec28e3..61ef3d85f4c 100644 --- a/src/srun/msg.c +++ b/src/srun/msg.c @@ -265,7 +265,6 @@ _process_launch_resp(srun_job_t *job, launch_tasks_response_msg_t *msg) error ("Bad launch response from %s", msg->node_name); return; } - pthread_mutex_lock(&job->task_mutex); job->host_state[msg->srun_node_id] = SRUN_HOST_REPLIED; pthread_mutex_unlock(&job->task_mutex); @@ -368,8 +367,9 @@ _launch_handler(srun_job_t *job, slurm_msg_t *resp) launch_tasks_response_msg_t *msg = resp->data; pipe_enum_t pipe_enum = PIPE_HOST_STATE; - debug2("received launch resp from %s nodeid=%d", msg->node_name, - msg->srun_node_id); + debug3("received launch resp from %s nodeid=%d", + msg->node_name, + msg->srun_node_id); if (msg->return_code != 0) { @@ -412,8 +412,10 @@ static void _confirm_launch_complete(srun_job_t *job) { int i; - + printf("job->nhosts %d\n",job->nhosts); + for (i=0; i<job->nhosts; i++) { + printf("job->nhosts %d\n",job->nhosts); if (job->host_state[i] != SRUN_HOST_REPLIED) { error ("Node %s not responding, terminating job step", job->host[i]); @@ -656,7 +658,7 @@ _handle_msg(srun_job_t *job, slurm_msg_t *msg) uid_t uid = getuid(); srun_timeout_msg_t *to; srun_node_fail_msg_t *nf; - + if ((req_uid != slurm_uid) && (req_uid != 0) && (req_uid != uid)) { error ("Security violation, slurm message from uid %u", (unsigned int) req_uid); @@ -856,21 +858,21 @@ _msg_thr_poll(srun_job_t *job) { struct pollfd *fds; int i; - + fds = xmalloc((job->njfds + 1) * sizeof(*fds)); _set_jfds_nonblocking(job); - + for (i = 0; i < job->njfds; i++) _poll_set_rd(fds[i], job->jfd[i]); _poll_set_rd(fds[i], slurmctld_fd); - + while (!_job_msg_done(job)) { if (_do_poll(job, fds, _get_next_timeout(job)) == 0) { _do_poll_timeout(job); continue; } - + for (i = 0; i < (job->njfds + 1) ; i++) { unsigned short revents = fds[i].revents; if ((revents & POLLERR) || @@ -882,6 +884,7 @@ _msg_thr_poll(srun_job_t *job) } } + xfree(fds); /* if we were to break out of while loop */ } @@ -1029,7 +1032,8 @@ msg_thr_create(srun_job_t *job) for (i = 0; i < job->njfds; i++) { if ((job->jfd[i] = slurm_init_msg_engine_port(0)) < 0) fatal("init_msg_engine_port: %m"); - if (slurm_get_stream_addr(job->jfd[i], &job->jaddr[i]) + if (slurm_get_stream_addr(job->jfd[i], + &job->jaddr[i]) < 0) fatal("slurm_get_stream_addr: %m"); debug("initialized job control port %d\n", diff --git a/src/srun/opt.c b/src/srun/opt.c index 58b31c91d8a..9c3b38112f4 100644 --- a/src/srun/opt.c +++ b/src/srun/opt.c @@ -156,7 +156,7 @@ static bool _under_parallel_debugger(void); static void _usage(void); static bool _valid_node_list(char **node_list_pptr); -static enum distribution_t _verify_dist_type(const char *arg); +static enum task_dist_states _verify_dist_type(const char *arg); static bool _verify_node_count(const char *arg, int *min, int *max); static int _verify_geometry(const char *arg, int *geometry); static int _verify_conn_type(const char *arg); @@ -228,17 +228,19 @@ static bool _valid_node_list(char **node_list_pptr) /* * verify that a distribution type in arg is of a known form - * returns the distribution_t or SRUN_DIST_UNKNOWN + * returns the task_dist_states or SLURM_DIST_UNKNOWN */ -static enum distribution_t _verify_dist_type(const char *arg) +static enum task_dist_states _verify_dist_type(const char *arg) { int len = strlen(arg); - enum distribution_t result = SRUN_DIST_UNKNOWN; + enum task_dist_states result = SLURM_DIST_UNKNOWN; if (strncasecmp(arg, "cyclic", len) == 0) - result = SRUN_DIST_CYCLIC; + result = SLURM_DIST_CYCLIC; else if (strncasecmp(arg, "block", len) == 0) - result = SRUN_DIST_BLOCK; + result = SLURM_DIST_BLOCK; + else if (strncasecmp(arg, "hostfile", len) == 0) + result = SLURM_DIST_HOSTFILE; return result; } @@ -461,7 +463,7 @@ static void _opt_default() opt.dependency = NO_VAL; opt.account = NULL; - opt.distribution = SRUN_DIST_UNKNOWN; + opt.distribution = SLURM_DIST_UNKNOWN; opt.ofname = NULL; opt.ifname = NULL; @@ -607,7 +609,7 @@ static void _process_env_var(env_vars_t *e, const char *val) { char *end = NULL; - enum distribution_t dt; + enum task_dist_states dt; debug2("now processing env var %s=%s", e->var, val); @@ -637,7 +639,7 @@ _process_env_var(env_vars_t *e, const char *val) case OPT_DISTRIB: dt = _verify_dist_type(val); - if (dt == SRUN_DIST_UNKNOWN) { + if (dt == SLURM_DIST_UNKNOWN) { error("\"%s=%s\" -- invalid distribution type. " "ignoring...", e->var, val); } else @@ -933,7 +935,7 @@ void set_options(const int argc, char **argv, int first) break; opt.distribution = _verify_dist_type(optarg); - if (opt.distribution == SRUN_DIST_UNKNOWN) { + if (opt.distribution == SLURM_DIST_UNKNOWN) { error("distribution type `%s' " "is not recognized", optarg); exit(1); @@ -1605,7 +1607,7 @@ static void _opt_list() info("partition : %s", opt.partition == NULL ? "default" : opt.partition); info("job name : `%s'", opt.job_name); - info("distribution : %s", format_distribution_t(opt.distribution)); + info("distribution : %s", format_task_dist_states(opt.distribution)); info("core format : %s", core_format_name (opt.core_type)); info("verbose : %d", _verbose); info("slurmd_debug : %d", opt.slurmd_debug); diff --git a/src/srun/opt.h b/src/srun/opt.h index 6d748cd4986..2a641a0cc9b 100644 --- a/src/srun/opt.h +++ b/src/srun/opt.h @@ -62,8 +62,9 @@ enum modes { enum modes mode; -#define format_distribution_t(t) (t == SRUN_DIST_BLOCK) ? "block" : \ - (t == SRUN_DIST_CYCLIC) ? "cyclic" : \ +#define format_task_dist_states(t) (t == SLURM_DIST_BLOCK) ? "block" : \ + (t == SLURM_DIST_CYCLIC) ? "cyclic" : \ + (t == SLURM_DIST_HOSTFILE) ? "hostfile" : \ "unknown" enum io_t { @@ -97,7 +98,7 @@ typedef struct srun_options { bool nodes_set; /* true if nodes explicitly set */ int time_limit; /* --time, -t */ char *partition; /* --partition=n, -p n */ - enum distribution_t + enum task_dist_states distribution; /* --distribution=, -m dist */ char *job_name; /* --job-name=, -J name */ unsigned int jobid; /* --jobid=jobid */ diff --git a/src/srun/reattach.c b/src/srun/reattach.c index 0234c2f0967..729faa25183 100644 --- a/src/srun/reattach.c +++ b/src/srun/reattach.c @@ -306,8 +306,12 @@ _attach_to_job(srun_job_t *job) r->job_id = job->jobid; r->job_step_id = job->stepid; r->srun_node_id = (uint32_t) i; - r->io_port = ntohs(job->listenport[i%job->num_listen]); - r->resp_port = ntohs(job->jaddr[i%job->njfds].sin_port); + r->io_port = + ntohs(job-> + listenport[i%job->num_listen]); + r->resp_port = + ntohs(job-> + jaddr[i%job->njfds].sin_port); r->cred = job->cred; diff --git a/src/srun/srun.c b/src/srun/srun.c index 2586c94205e..7228ac0e133 100644 --- a/src/srun/srun.c +++ b/src/srun/srun.c @@ -115,7 +115,7 @@ static int _run_srun_script (srun_job_t *job, char *script); int srun(int ac, char **av) { allocation_resp *resp; - srun_job_t *job; + srun_job_t *job = NULL; char *task_cnt, *bgl_part_id = NULL; int exitcode = 0; env_t *env = xmalloc(sizeof(env_t)); @@ -190,7 +190,9 @@ int srun(int ac, char **av) info ("Warning: unable to assume uid=%lu\n", opt.uid); if (_verbose) _print_job_information(resp); - job = job_create_allocation(resp); + + job = job_create_allocation(resp); + if (msg_thr_create(job) < 0) job_fatal(job, "Unable to create msg thread"); exitcode = _run_job_script(job, env); @@ -209,11 +211,13 @@ int srun(int ac, char **av) } if (job_resp_hack_for_step(resp)) /* FIXME */ exit(1); + job = job_create_allocation(resp); + job->old_job = true; sig_setup_sigmask(); - if (create_job_step(job) < 0) - exit(1); + build_step_ctx(job); + slurm_free_resource_allocation_response_msg(resp); } else if (mode == MODE_ATTACH) { @@ -226,13 +230,11 @@ int srun(int ac, char **av) exit(1); if (_verbose) _print_job_information(resp); - - job = job_create_allocation(resp); - if (create_job_step(job) < 0) { - srun_job_destroy(job, 0); - exit(1); - } - slurm_free_resource_allocation_response_msg(resp); + + job = job_create_allocation(resp); + build_step_ctx(job); + + slurm_free_resource_allocation_response_msg(resp); } /* @@ -277,10 +279,10 @@ int srun(int ac, char **av) if (sig_thr_create(job) < 0) job_fatal(job, "Unable to create signals thread: %m"); - + if (launch_thr_create(job) < 0) - job_fatal(job, "Unable to create launch thread: %m"); - + job_fatal(job, "Unable to create launch thread: %m"); + /* wait for job to terminate */ slurm_mutex_lock(&job->state_mutex); @@ -340,7 +342,8 @@ _task_count_string (srun_job_t *job) int i, last_val, last_cnt; char tmp[16]; char *str = xstrdup (""); - + if(job->ntasks == 0) + return (str); last_val = job->ntask[0]; last_cnt = 1; for (i=1; i<job->nhosts; i++) { @@ -367,7 +370,7 @@ _task_count_string (srun_job_t *job) static void _switch_standalone(srun_job_t *job) { - int cyclic = (opt.distribution == SRUN_DIST_CYCLIC); + int cyclic = (opt.distribution == SLURM_DIST_CYCLIC); if (switch_alloc_jobinfo(&job->switch_job) < 0) fatal("switch_alloc_jobinfo: %m"); @@ -816,7 +819,7 @@ static int _run_job_script (srun_job_t *job, env_t *env) env->jobid = job->jobid; env->nhosts = job->nhosts; env->nodelist = job->nodelist; - env->task_count = _task_count_string (job); + //env->task_count = _task_count_string (job); } if (setup_env(env) != SLURM_SUCCESS) diff --git a/src/srun/srun_job.c b/src/srun/srun_job.c index f89c4d45918..03f77cd142b 100644 --- a/src/srun/srun_job.c +++ b/src/srun/srun_job.c @@ -52,6 +52,7 @@ #include "src/srun/fname.h" #include "src/srun/attach.h" #include "src/srun/io.h" +#include "src/srun/msg.h" /* @@ -80,8 +81,11 @@ static void _dist_cyclic(srun_job_t *job); static inline int _estimate_nports(int nclients, int cli_per_port); static int _compute_task_count(allocation_info_t *info); static void _set_nprocs(allocation_info_t *info); -static srun_job_t * _job_create_internal(allocation_info_t *info); +static srun_job_t *_job_create_internal(allocation_info_t *info); +static srun_job_t *_job_create_structure(allocation_info_t *info); static void _job_fake_cred(srun_job_t *job); +static void _job_noalloc_step_create(srun_job_t *job, + allocation_info_t *info); static int _job_resp_add_nodes(bitstr_t *req_bitmap, bitstr_t *exc_bitmap, int node_cnt); static int _job_resp_bitmap(hostlist_t resp_node_hl, char *nodelist, @@ -135,34 +139,6 @@ _dist_cyclic(srun_job_t *job) } } -/* - * Create an srun job structure from a resource allocation response msg - */ -srun_job_t * -job_create_allocation(resource_allocation_response_msg_t *resp) -{ - srun_job_t *job; - allocation_info_t *i = xmalloc(sizeof(*i)); - - i->nodelist = _normalize_hostlist(resp->node_list); - i->nnodes = resp->node_cnt; - i->jobid = resp->job_id; - i->stepid = NO_VAL; - i->num_cpu_groups = resp->num_cpu_groups; - i->cpus_per_node = resp->cpus_per_node; - i->cpu_count_reps = resp->cpu_count_reps; - i->addrs = resp->node_addr; - i->select_jobinfo = select_g_copy_jobinfo(resp->select_jobinfo); - - job = _job_create_internal(i); - - xfree(i->nodelist); - xfree(i); - - return (job); -} - - /* * Create an srun job structure w/out an allocation response msg. * (i.e. use the command line options) @@ -203,7 +179,8 @@ job_create_noalloc(void) /* * Create job, then fill in host addresses */ - job = _job_create_internal(ai); + job = _job_create_structure(ai); + _job_noalloc_step_create(job, ai); for (i = 0; i < job->nhosts; i++) { char *nd = get_conf_node_hostname(job->host[i]); @@ -213,13 +190,243 @@ job_create_noalloc(void) } _job_fake_cred(job); - + error: xfree(ai); return (job); } +/* + * Create an srun job structure from a resource allocation response msg + */ +extern srun_job_t * +job_create_allocation(resource_allocation_response_msg_t *resp) +{ + srun_job_t *job; + allocation_info_t *i = xmalloc(sizeof(*i)); + + i->nodelist = _normalize_hostlist(resp->node_list); + i->nnodes = resp->node_cnt; + i->jobid = resp->job_id; + i->stepid = NO_VAL; + i->num_cpu_groups = resp->num_cpu_groups; + i->cpus_per_node = resp->cpus_per_node; + i->cpu_count_reps = resp->cpu_count_reps; + i->addrs = resp->node_addr; + i->select_jobinfo = select_g_copy_jobinfo(resp->select_jobinfo); + + job = _job_create_structure(i); + + xfree(i->nodelist); + xfree(i); + + return (job); +} + +/* + * Create an srun job structure from a resource allocation response msg + */ +static srun_job_t * +_job_create_structure(allocation_info_t *info) +{ + srun_job_t *job = xmalloc(sizeof(srun_job_t)); + int i, cpu_inx, cpu_cnt; + + debug2("creating job with %d tasks", opt.nprocs); + + slurm_mutex_init(&job->state_mutex); + pthread_cond_init(&job->state_cond, NULL); + job->state = SRUN_JOB_INIT; + + job->nodelist = xstrdup(info->nodelist); + job->stepid = info->stepid; + +#ifdef HAVE_FRONT_END /* Limited job step support */ + /* All jobs execute through front-end on Blue Gene/L. + * Normally we would not permit execution of job steps, + * but can fake it by just allocating all tasks to + * one of the allocated nodes. */ + job->nhosts = 1; + opt.overcommit = true; +#else + job->nhosts = info->nnodes; +#endif + + job->select_jobinfo = info->select_jobinfo; + job->jobid = info->jobid; + + job->task_prolog = xstrdup(opt.task_prolog); + job->task_epilog = xstrdup(opt.task_epilog); + /* Compute number of file descriptors / Ports needed for Job + * control info server + */ + job->njfds = _estimate_nports(opt.nprocs, 48); + debug3("njfds = %d", job->njfds); + job->jfd = (slurm_fd *) + xmalloc(job->njfds * sizeof(slurm_fd)); + job->jaddr = (slurm_addr *) + xmalloc(job->njfds * sizeof(slurm_addr)); + /* Compute number of listening sockets needed to allow + * all of the slurmds to establish IO streams with srun, without + * overstressing the TCP/IP backoff/retry algorithm + */ + job->num_listen = _estimate_nports(opt.nprocs, 64); + job->listensock = (int *) + xmalloc(job->num_listen * sizeof(int)); + job->listenport = (int *) + xmalloc(job->num_listen * sizeof(int)); + + job->hostid = xmalloc(opt.nprocs * sizeof(uint32_t)); + + slurm_mutex_init(&job->task_mutex); + + job->old_job = false; + job->removed = false; + job->signaled = false; + job->rc = -1; + + /* + * Initialize Launch and Exit timeout values + */ + job->ltimeout = 0; + job->etimeout = 0; + + + job->eio = eio_handle_create(); + job->ioservers_ready = 0; + /* "nhosts" number of IO protocol sockets */ + job->ioserver = (eio_obj_t **)xmalloc(job->nhosts*sizeof(eio_obj_t *)); + + job->slurmd_addr = xmalloc(job->nhosts * sizeof(slurm_addr)); + if (info->addrs) + memcpy( job->slurmd_addr, info->addrs, + sizeof(slurm_addr)*job->nhosts); + + job->free_incoming = list_create(NULL); /* FIXME! Needs destructor */ + for (i = 0; i < STDIO_MAX_FREE_BUF; i++) { + list_enqueue(job->free_incoming, alloc_io_buf()); + } + job->free_outgoing = list_create(NULL); /* FIXME! Needs destructor */ + for (i = 0; i < STDIO_MAX_FREE_BUF; i++) { + list_enqueue(job->free_outgoing, alloc_io_buf()); + } + + /* ntask task states and statii*/ + job->task_state = xmalloc(opt.nprocs * sizeof(srun_task_state_t)); + job->tstatus = xmalloc(opt.nprocs * sizeof(int)); + job->free_incoming = list_create(NULL); /* FIXME! Needs destructor */ + job->incoming_count = 0; + for (i = 0; i < STDIO_MAX_FREE_BUF; i++) { + list_enqueue(job->free_incoming, alloc_io_buf()); + } + job->free_outgoing = list_create(NULL); /* FIXME! Needs destructor */ + job->outgoing_count = 0; + for (i = 0; i < STDIO_MAX_FREE_BUF; i++) { + list_enqueue(job->free_outgoing, alloc_io_buf()); + } + + job_update_io_fnames(job); + + return (job); + + +} + +extern int build_step_ctx(srun_job_t *job) +{ + job_step_create_request_msg_t *r = NULL; + uint32_t step_id; + int i; + char *temp = NULL; + r = xmalloc(sizeof(job_step_create_request_msg_t)); + if (r == NULL) { + error("calloc error"); + return -1; + } + r->job_id = job->jobid; + r->user_id = opt.uid; + r->node_count = job->nhosts; + /* Processor count not relevant to poe */ + r->cpu_count = job->nhosts; + r->num_tasks = opt.nprocs; + r->node_list = xstrdup(job->nodelist); + switch (opt.distribution) { + case SLURM_DIST_UNKNOWN: + r->task_dist = (opt.nprocs <= job->nhosts) + ? SLURM_DIST_CYCLIC : SLURM_DIST_BLOCK; + break; + case SLURM_DIST_CYCLIC: + r->task_dist = SLURM_DIST_CYCLIC; + break; + case SLURM_DIST_HOSTFILE: + r->task_dist = SLURM_DIST_HOSTFILE; + break; + default: /* (opt.distribution == SLURM_DIST_BLOCK) */ + r->task_dist = SLURM_DIST_BLOCK; + break; + } + + r->network = xstrdup(opt.network); + if (slurmctld_comm_addr.port) { + r->host = xstrdup(slurmctld_comm_addr.hostname); + r->port = slurmctld_comm_addr.port; + } + job->step_ctx = slurm_step_ctx_create(r); + if (job->step_ctx == NULL) { + error("slurm_step_ctx_create: %s", + slurm_strerror(slurm_get_errno())); + return -1; + } + + if (slurm_step_ctx_get(job->step_ctx, SLURM_STEP_CTX_NHOSTS, + &job->nhosts) != SLURM_SUCCESS) { + error("unable to get nhosts from ctx"); + } + /* nhost host states */ + job->host_state = xmalloc(job->nhosts * sizeof(srun_host_state_t)); + + if (slurm_step_ctx_get(job->step_ctx, SLURM_STEP_CTX_CPUS, + &job->cpus) != SLURM_SUCCESS) { + error("unable to get hosts from ctx"); + } + + if (slurm_step_ctx_get(job->step_ctx, SLURM_STEP_CTX_STEPID, + &job->stepid) != SLURM_SUCCESS) { + error("unable to get step id from ctx"); + } + if (slurm_step_ctx_get(job->step_ctx, SLURM_STEP_CTX_TASKS, + &job->ntask) != SLURM_SUCCESS) { + error("unable to get step id from ctx"); + } + job->tids = xmalloc(job->nhosts * sizeof(uint32_t *)); + job->host = xmalloc(job->nhosts * sizeof(char *)); + for(i=0;i<job->nhosts;i++) { + if (slurm_step_ctx_get(job->step_ctx, + SLURM_STEP_CTX_TID, i, + &job->tids[i]) != SLURM_SUCCESS) { + error("unable to get task id %d from ctx",i); + } + if (slurm_step_ctx_get(job->step_ctx, + SLURM_STEP_CTX_HOST, i, + &temp) != SLURM_SUCCESS) { + error("unable to get host %d from ctx", i); + } else + job->host[i] = xstrdup(temp); + } + if (slurm_step_ctx_get(job->step_ctx, + SLURM_STEP_CTX_CRED, + &job->cred) != SLURM_SUCCESS) { + error("unable to get cred from ctx"); + } + if (slurm_step_ctx_get(job->step_ctx, + SLURM_STEP_CTX_SWITCH_JOB, + &job->switch_job) != SLURM_SUCCESS) { + error("unable to get switch_job from ctx"); + } + slurm_free_job_step_create_request_msg(r); + job_update_io_fnames(job); +} void update_job_state(srun_job_t *job, srun_job_state_t state) @@ -414,99 +621,37 @@ _set_nprocs(allocation_info_t *info) } } -static srun_job_t * -_job_create_internal(allocation_info_t *info) +void +job_update_io_fnames(srun_job_t *job) { - int i; - int cpu_cnt = 0; - int cpu_inx = 0; - hostlist_t hl; - srun_job_t *job; - eio_obj_t *obj; - - /* Reset nprocs if necessary - */ - _set_nprocs(info); - - debug2("creating job with %d tasks", opt.nprocs); - - job = xmalloc(sizeof(*job)); - - slurm_mutex_init(&job->state_mutex); - pthread_cond_init(&job->state_cond, NULL); - job->state = SRUN_JOB_INIT; + job->ifname = fname_create(job, opt.ifname); + job->ofname = fname_create(job, opt.ofname); + job->efname = opt.efname ? fname_create(job, opt.efname) : job->ofname; +} - job->signaled = false; - job->rc = -1; +static void +_job_fake_cred(srun_job_t *job) +{ + slurm_cred_arg_t arg; + arg.jobid = job->jobid; + arg.stepid = job->stepid; + arg.uid = opt.uid; + arg.hostlist = job->nodelist; + arg.ntask_cnt = 0; + arg.ntask = NULL; + job->cred = slurm_cred_faker(&arg); +} - job->nodelist = xstrdup(info->nodelist); +static void +_job_noalloc_step_create(srun_job_t *job, allocation_info_t *info) +{ + int i=0, cpu_inx=0, cpu_cnt=0; + hostlist_t hl; hl = hostlist_create(job->nodelist); -#ifdef HAVE_FRONT_END /* Limited job step support */ - /* All jobs execute through front-end on Blue Gene/L. - * Normally we would not permit execution of job steps, - * but can fake it by just allocating all tasks to - * one of the allocated nodes. */ - job->nhosts = 1; - opt.overcommit = true; -#else - job->nhosts = hostlist_count(hl); -#endif - - job->select_jobinfo = info->select_jobinfo; - job->jobid = info->jobid; - job->stepid = info->stepid; - job->old_job = false; - job->removed = false; - - /* - * Initialize Launch and Exit timeout values - */ - job->ltimeout = 0; - job->etimeout = 0; - - job->slurmd_addr = xmalloc(job->nhosts * sizeof(slurm_addr)); - if (info->addrs) - memcpy( job->slurmd_addr, info->addrs, - sizeof(slurm_addr)*job->nhosts); job->host = (char **) xmalloc(job->nhosts * sizeof(char *)); job->cpus = (int *) xmalloc(job->nhosts * sizeof(int) ); - /* Compute number of file descriptors / Ports needed for Job - * control info server - */ - job->njfds = _estimate_nports(opt.nprocs, 48); - job->jfd = (slurm_fd *) xmalloc(job->njfds * sizeof(slurm_fd)); - job->jaddr = (slurm_addr *) xmalloc(job->njfds * sizeof(slurm_addr)); - - debug3("njfds = %d", job->njfds); - - /* Compute number of listening sockets needed to allow - * all of the slurmds to establish IO streams with srun, without - * overstressing the TCP/IP backoff/retry algorithm - */ - job->num_listen = _estimate_nports(opt.nprocs, 64); - job->listensock = (int *) xmalloc(job->num_listen * sizeof(int)); - job->listenport = (int *) xmalloc(job->num_listen * sizeof(int)); - - job->eio = eio_handle_create(); - job->ioservers_ready = 0; - /* "nhosts" number of IO protocol sockets */ - job->ioserver = (eio_obj_t **)xmalloc(job->nhosts*sizeof(eio_obj_t *)); - job->free_incoming = list_create(NULL); /* FIXME! Needs destructor */ - job->incoming_count = 0; - job->free_outgoing = list_create(NULL); /* FIXME! Needs destructor */ - job->outgoing_count = 0; - - /* nhost host states */ - job->host_state = xmalloc(job->nhosts * sizeof(srun_host_state_t)); - - /* ntask task states and statii*/ - job->task_state = xmalloc(opt.nprocs * sizeof(srun_task_state_t)); - job->tstatus = xmalloc(opt.nprocs * sizeof(int)); - - slurm_mutex_init(&job->task_mutex); - for(i = 0; i < job->nhosts; i++) { job->host[i] = hostlist_shift(hl); @@ -517,7 +662,9 @@ _job_create_internal(allocation_info_t *info) cpu_cnt = 0; } } - + /* nhost host states */ + job->host_state = xmalloc(job->nhosts * sizeof(srun_host_state_t)); + job->hostid = xmalloc(opt.nprocs * sizeof(uint32_t)); #ifdef HAVE_FRONT_END job->ntask = (int *) xmalloc(sizeof(int *)); job->ntask[0] = opt.nprocs; @@ -540,14 +687,14 @@ _job_create_internal(allocation_info_t *info) for (i = 0; i < job->nhosts; i++) job->tids[i] = xmalloc(job->ntask[i] * sizeof(uint32_t)); - if (opt.distribution == SRUN_DIST_UNKNOWN) { + if (opt.distribution == SLURM_DIST_UNKNOWN) { if (opt.nprocs <= job->nhosts) - opt.distribution = SRUN_DIST_CYCLIC; + opt.distribution = SLURM_DIST_CYCLIC; else - opt.distribution = SRUN_DIST_BLOCK; + opt.distribution = SLURM_DIST_BLOCK; } - if (opt.distribution == SRUN_DIST_BLOCK) + if (opt.distribution == SLURM_DIST_BLOCK) _dist_block(job); else _dist_cyclic(job); @@ -555,33 +702,9 @@ _job_create_internal(allocation_info_t *info) job_update_io_fnames(job); hostlist_destroy(hl); - - return job; -} - -void -job_update_io_fnames(srun_job_t *job) -{ - job->ifname = fname_create(job, opt.ifname); - job->ofname = fname_create(job, opt.ofname); - job->efname = opt.efname ? fname_create(job, opt.efname) : job->ofname; -} - -static void -_job_fake_cred(srun_job_t *job) -{ - slurm_cred_arg_t arg; - arg.jobid = job->jobid; - arg.stepid = job->stepid; - arg.uid = opt.uid; - arg.hostlist = job->nodelist; - arg.ntask_cnt = 0; - arg.ntask = NULL; - job->cred = slurm_cred_faker(&arg); + return; } - - static char * _task_state_name(srun_task_state_t state_inx) { @@ -1021,3 +1144,4 @@ _normalize_hostlist(const char *hostlist) return xstrdup(buf); } + diff --git a/src/srun/srun_job.h b/src/srun/srun_job.h index 14af524a8ce..7c3ab8eb587 100644 --- a/src/srun/srun_job.h +++ b/src/srun/srun_job.h @@ -116,14 +116,10 @@ typedef struct srun_job { pthread_t sigid; /* signals thread tid */ pthread_t jtid; /* job control thread id */ - int njfds; /* number of job control info fds */ slurm_fd *jfd; /* job control info fd */ - slurm_addr *jaddr; /* job control info ports */ - + pthread_t ioid; /* stdio thread id */ - int num_listen; /* Number of stdio listen sockets */ int *listensock; /* Array of stdio listen sockets */ - int *listenport; /* Array of stdio listen ports */ eio_handle_t *eio; /* Event IO handle */ int ioservers_ready; /* Number of servers that established contact */ eio_obj_t **ioserver; /* Array of nhosts pointers to eio_obj_t */ @@ -156,16 +152,24 @@ typedef struct srun_job { int *tstatus; /* ntask exit statii */ srun_task_state_t *task_state; /* ntask task states */ - pthread_mutex_t task_mutex; - + switch_jobinfo_t switch_job; io_filename_t *ifname; io_filename_t *ofname; io_filename_t *efname; + forked_msg_t *forked_msg; + struct slurm_step_ctx_struct *step_ctx; + char *task_epilog; /* task-epilog */ + char *task_prolog; /* task-prolog */ + pthread_mutex_t task_mutex; + int njfds; /* number of job control info fds */ + slurm_addr *jaddr; /* job control info ports */ + int num_listen; /* Number of stdio listen sockets */ + int *listenport; /* Array of stdio listen ports */ /* Output streams and stdin fileno */ - forked_msg_t *forked_msg; select_jobinfo_t select_jobinfo; + } srun_job_t; extern int message_thread; @@ -175,8 +179,12 @@ void job_force_termination(srun_job_t *job); srun_job_state_t job_state(srun_job_t *job); -srun_job_t * job_create_noalloc(void); -srun_job_t * job_create_allocation(resource_allocation_response_msg_t *resp); +extern srun_job_t * job_create_noalloc(void); +extern srun_job_t * job_create_allocation( + resource_allocation_response_msg_t *resp); +extern srun_job_t * job_create_structure( + resource_allocation_response_msg_t *resp); +extern int build_step_ctx(srun_job_t *job); /* * Update job filenames and modes for stderr, stdout, and stdin. diff --git a/testsuite/expect/test1.47 b/testsuite/expect/test1.47 index 69933d6787d..36927c0bafb 100755 --- a/testsuite/expect/test1.47 +++ b/testsuite/expect/test1.47 @@ -79,7 +79,7 @@ if {$matches != 1} { set exit_code 1 } -exec $bin_rm -f $file_in $file_out +exec $bin_rm -f $file_in exec echo "#!$bin_bash" >$file_in exec echo "#SLURM -N650000" >>$file_in exec echo "$bin_sleep $delay" >>$file_in @@ -108,8 +108,8 @@ expect { # Post-processing # if {$exit_code == 0} { - wait_for_file $file_out - exec $bin_rm -f $file_in $file_out + exec $bin_rm -f $file_in + exec $bin_rm -f $file_out send_user "\nSUCCESS\n" } exit $exit_code diff --git a/testsuite/expect/test1.51 b/testsuite/expect/test1.51 index fbe6e45c6e2..13dd4109697 100755 --- a/testsuite/expect/test1.51 +++ b/testsuite/expect/test1.51 @@ -63,7 +63,7 @@ exec $bin_chmod 700 $file_in $file_script set matches 0 spawn $file_script expect { - -re 0123 { + -re (0123|123) { set matches 1 exp_continue } diff --git a/testsuite/expect/test1.81 b/testsuite/expect/test1.81 index 2da3e8a990e..93c91b098f6 100755 --- a/testsuite/expect/test1.81 +++ b/testsuite/expect/test1.81 @@ -45,7 +45,28 @@ set host_0 "" set task_cnt 0 set can_not_run 0 set timeout $max_job_delay -spawn $srun -N1-1 -c 1 -l -t1 $bin_hostname -s + +# +# Determine if this is AIX (for appropriate build line) +# +set aix 0 +spawn $bin_uname +expect { + -re "AIX" { + set aix 1 + exp_continue + } + eof { + wait + } +} + +if {$aix == 0} { + set run_hostname "$bin_hostname -s" +} else { + set run_hostname "$bin_hostname" +} +spawn $srun -N1-1 -c 1 -l -t1 $run_hostname expect { -re "Unable to create job step: Task count specification invalid" { send_user "\nWARNING: This is not a real error for some system configurations\n" @@ -76,7 +97,7 @@ if {[string compare $host_0 ""] == 0} { set alloc_fail 0 set task_cnt2 0 -spawn $srun -N1-1 -w $host_0 -n [expr $task_cnt + 1] -l -t1 $bin_hostname -s +spawn $srun -N1-1 -w $host_0 -n [expr $task_cnt + 1] -l -t1 $run_hostname expect { -re "($number):" { incr task_cnt2 @@ -105,7 +126,7 @@ if { $task_cnt2 != 0 } { # set host_0 "" set host_1 "" -spawn $srun -N1-1 -l -t1 $bin_hostname -s +spawn $srun -N1-1 -l -t1 $run_hostname expect { -re "($number): ($alpha_numeric)" { if {$expect_out(1,string) == 0} { @@ -150,7 +171,7 @@ set host_1 "" set host_2 "" set host_3 "" set timeout $max_job_delay -spawn $srun -N1-3 -l -t1 $bin_hostname -s +spawn $srun -N1-3 -l -t1 $run_hostname expect { -re "($number): ($alpha_numeric)" { if {$expect_out(1,string) == 0} { @@ -212,7 +233,7 @@ set host_1 "" set host_2 "" set host_3 "" set timeout $max_job_delay -spawn $srun -N2-3 -l -t1 $bin_hostname -s +spawn $srun -N2-3 -l -t1 $run_hostname expect { -re "More ($alpha) requested than permitted" { send_user "\nWARNING: can't test srun task distribution\n" diff --git a/testsuite/expect/test9.8 b/testsuite/expect/test9.8 index 8e45621b805..11469408497 100755 --- a/testsuite/expect/test9.8 +++ b/testsuite/expect/test9.8 @@ -113,6 +113,7 @@ if {$start_cnt < $job_cnt} { # then kill them all # set user_name "" + exec $bin_sleep [expr $delay + 5] spawn $bin_id -un expect { @@ -122,6 +123,7 @@ expect { eof { wait } + } # # There could be hundreds of job steps, we don't want to see @@ -145,12 +147,16 @@ expect { } log_user 0 set matches 0 +set timeout 60 spawn $squeue --steps --user $user_name expect { -re "sleep" { incr matches exp_continue } + timeout { + send_user "\nFAILURE: squeue not responding\n" + } eof { wait } -- GitLab