From 386397bd04b247432ac06a2e553fc03af656c8a0 Mon Sep 17 00:00:00 2001 From: Moe Jette <jette1@llnl.gov> Date: Tue, 3 Dec 2002 00:00:31 +0000 Subject: [PATCH] Make srun verbose messages more useful (task numbers by host reported along with step id) --- src/srun/io.c | 4 ++-- src/srun/launch.c | 41 ++++++++++++++++++++++++++++++++--------- src/srun/srun.c | 7 +++---- 3 files changed, 37 insertions(+), 15 deletions(-) diff --git a/src/srun/io.c b/src/srun/io.c index 3544534fe2f..5f7aa0b90b3 100644 --- a/src/srun/io.c +++ b/src/srun/io.c @@ -404,7 +404,7 @@ _accept_io_stream(job_t *job, int i) int len = size_io_stream_header(); int j; int fd = job->iofd[i]; - verbose("Activity on IO server port %d fd %d", i, fd); + debug("Activity on IO server port %d fd %d", i, fd); for (j = 0; j < 15; i++) { int sd, size_read; @@ -470,7 +470,7 @@ _accept_io_stream(job_t *job, int i) else job->err[hdr.task_id] = sd; - verbose("accepted %s connection from %s task %ld, sd=%d", + debug("accepted %s connection from %s task %ld, sd=%d", (hdr.type ? "stderr" : "stdout"), buf, hdr.task_id, sd ); } diff --git a/src/srun/launch.c b/src/srun/launch.c index 986864430e3..2bd9df7f4d0 100644 --- a/src/srun/launch.c +++ b/src/srun/launch.c @@ -66,7 +66,8 @@ static void _dist_block(job_t *job, uint32_t **task_ids); static void _dist_cyclic(job_t *job, uint32_t **task_ids); static void _p_launch(slurm_msg_t *req_array_ptr, job_t *job); static void * _p_launch_task(void *args); -static void _print_launch_msg(launch_tasks_request_msg_t *msg); +static void _print_launch_msg(launch_tasks_request_msg_t *msg, + char * hostname); static int _envcount(char **env); static void @@ -261,8 +262,8 @@ static void * _p_launch_task(void *args) int host_inx = msg_ptr->srun_node_id; int failure = 0; - debug3("launching on host %s", job_ptr->host[host_inx]); - _print_launch_msg(msg_ptr); + if (_verbose || _debug) + _print_launch_msg(msg_ptr, job_ptr->host[host_inx]); if (slurm_send_only_node_msg(req_ptr) < 0) { /* Has timeout */ error("task launch error on %s: %m", job_ptr->host[host_inx]); pthread_mutex_lock(&job_ptr->task_mutex); @@ -288,13 +289,35 @@ static void * _p_launch_task(void *args) } -static void _print_launch_msg(launch_tasks_request_msg_t *msg) +static void +_print_launch_msg(launch_tasks_request_msg_t *msg, char * hostname) { - debug3("%d.%d uid:%ld n:%ld cwd:%s %d [%d-%d]", - msg->job_id, msg->job_step_id, (long) msg->uid, - (long) msg->tasks_to_launch, msg->cwd, - msg->srun_node_id, msg->global_task_ids[0], - msg->global_task_ids[msg->tasks_to_launch-1]); + int i; + char tmp_str[10], task_list[4096]; + + if (opt.distribution == SRUN_DIST_BLOCK) { + sprintf(task_list, "%u-%u", + msg->global_task_ids[0], + msg->global_task_ids[(msg->tasks_to_launch-1)]); + } else { + for (i=0; i<msg->tasks_to_launch; i++) { + sprintf(tmp_str, ",%u", msg->global_task_ids[i]); + if (i == 0) + strcpy(task_list, &tmp_str[1]); + else if ((strlen(tmp_str) + strlen(task_list)) < + sizeof(task_list)) + strcat(task_list, tmp_str); + else + break; + } + } + + printf("launching %u.%u on host %s, %u tasks: %s\n", + msg->job_id, msg->job_step_id, hostname, + msg->tasks_to_launch, task_list); + + debug3("uid:%ld cwd:%s %d", + (long) msg->uid, msg->cwd, msg->srun_node_id); } static int diff --git a/src/srun/srun.c b/src/srun/srun.c index 226ad006ce4..c484220c8e2 100644 --- a/src/srun/srun.c +++ b/src/srun/srun.c @@ -175,18 +175,17 @@ main(int ac, char **av) if ( !(resp = _allocate_nodes()) ) exit(1); job = job_create(resp); - if (_verbose || _debug) + if (_debug) _print_job_information(resp); _run_job_script(resp->job_id); slurm_complete_job(resp->job_id, 0, 0); - if (_verbose || _debug) - info ("Spawned srun shell terminated"); + debug ("Spawned srun shell terminated"); exit (0); } else { if ( !(resp = _allocate_nodes()) ) exit(1); - if (_verbose || _debug) + if (_debug) _print_job_information(resp); job = job_create(resp); -- GitLab