diff --git a/NEWS b/NEWS index e1bf5638181ad40d62f556091242396f2626365a..c2102e368da10a02f4580f2580372e8c2f24b425 100644 --- a/NEWS +++ b/NEWS @@ -12,6 +12,7 @@ documents those changes that are of interest to users and admins. -- Don't strip binaries, breaks parallel debuggers -- Fix bug in Munge authentication retry logic -- Change srun handling of interupts to work properly with TotalView + -- Added "reason" field to job info showing why a job is waiting to run * Changes in SLURM 0.3.7 ======================== diff --git a/doc/man/man1/squeue.1 b/doc/man/man1/squeue.1 index 75d203dca93f5dfbb36e27889e98eb6d6564c17f..8a6052bfcf1b645a3a5fa558341e59391b047b37 100644 --- a/doc/man/man1/squeue.1 +++ b/doc/man/man1/squeue.1 @@ -55,10 +55,10 @@ various options are .RS .TP 15 .I "default" -"%.7i %.9P %.8j %.8u %.2t %.9M %.6D %N" +"%.7i %.9P %.8j %.8u %.2t %.9M %.6D %R" .TP .I "-l, --long" -".7i %.9P %.8j %.8u %.8T %.9M %.9l %.6D %N" +".7i %.9P %.8j %.8u %.8T %.9M %.9l %.6D %R" .TP .I "-s, --steps" "%10i %.9P %.8u %.9M %N" @@ -147,17 +147,30 @@ Priority of the job (converted to a floating point number between 0.0 and 1.0 \fB%P\fR Partition of the job or job step .TP +\fB%r\fR +The reason a job is waiting for execution. +See the \fBJOB REASON CODES\fR section below for more information. +.TP +\fB%R\fR +For running or completed jobs: the list of allocate nodes. +For pending jobs: the reason a job is waiting for execution +is printed within parenthesis. +See the \fBJOB REASON CODES\fR section below for more information. +.TP \fB%S\fR Start time of the job or job step .TP \fB%t\fR Job state, compact form: -PD (pending), R (running), CG (completing), CD (completed), F (failed), -TO (timeout), and NF (node failure). +PD (pending), R (running), CA (cancelled), CG (completing), CD (completed), +F (failed), TO (timeout), and NF (node failure). +See the \fBJOB STATE CODES\fR section below for more information. .TP \fB%T\fR Job state, extended form: -PENDING, RUNNING, CANCELLED, COMPLETING, COMPLETED, FAILED, TIMEOUT, and NODE_FAIL. +PENDING, RUNNING, CANCELLED, COMPLETING, COMPLETED, FAILED, TIMEOUT, +and NODE_FAIL. +See the \fBJOB STATE CODES\fR section below for more information. .TP \fB%u\fR User name @@ -210,6 +223,7 @@ jobs are reported. Valid states (in both extended and compact form) include: PENDING (PD), RUNNING (R), COMPLETING (CG), COMPLETED (CD), CANCELLED (CA), FAILED (F), TIMEOUT (TO), and NODE_FAIL (NF). Note the \fB<state_list>\fR supplied is case insensitve ("pd" and "PD" work the same). +See the \fBJOB STATE CODES\fR section below for more information. .TP \fB\-u <user_list>\fR, \fB\-\-user=<user_list>\fR Specifies a comma separated list of users whose jobs or job steps are to be @@ -221,8 +235,39 @@ Report details of squeues actions. \fB\-V\fR , \fB\-\-version\fR Print version information and exit. +.SH "JOB REASON CODES" +These codes identify the reason that a job is waiting for execution. +A job may be waiting for more than one reason, in which case only +one of those reasons is displayed. +.TP 20 +Dependency +This job is waiting for a dependent job to complete. +.TP +None +No reason is set for this job. +.TP +PartitionDown +The partition required by this job is in a DOWN state. +.TP +PartitionNodeLimit +The number of nodes required by this job is outside of it's +partitions current limits. +.TP +PartitionTimeLimit +The job's time limit exceeds it's partition's current time limit. +.TP +Priority +One or more higher priority jobs exist for this partition. +.TP +Resources +The job is waiting for resources to become availble. + .SH "JOB STATE CODES" -.TP 17 +Jobs typically pass through several states in the course of their +execution. +The typical states are PENDING, RUNNING, COMPLETING, and COMPLETED. +An explanation of each state follows. +.TP 20 CA CANCELLED Job was explicitly cancelled by the user or system administrator. The job may or may not have been initiated. @@ -242,6 +287,9 @@ Job terminated due to failure of one or more allocated nodes. PD PENDING Job is awaiting resource allocation. .TP +R RUNNING +Job currently has an allocation. +.TP TO TIMEOUT Job terminated upon reaching its time limit. @@ -291,7 +339,7 @@ Print the job steps in the debug partition sorted by user: .br # squeue -s -p debug -S u .br - STEPID PARTITION USER TIME_USED NODELIST + STEPID PARTITION USER TIME_USED NODELIST(REASON) .br 65552.1 debug alice 0:23 dev[1-4] .br @@ -301,15 +349,17 @@ Print the job steps in the debug partition sorted by user: .ec .eo -Print information only about jobs 12345 and 12346: +Print information only about jobs 12345,12345, and 12348: .br -# squeue --jobs 12345,12346 +# squeue --jobs 12345,12346,12348 .br - JOBID PARTITION NAME USER ST TIME_USED NODES NODELIST + JOBID PARTITION NAME USER ST TIME_USED NODES NODELIST(REASON) .br 12345 debug job1 dave R 0:21 4 dev[9-12] .br - 12346 debug job2 dave PD 0:00 8 + 12346 debug job2 dave PD 0:00 8 (Resources) +.br + 12348 debug job3 ed PD 0:00 4 (Priority) .ec .eo @@ -317,7 +367,7 @@ Print information only about job step 65552.1: .br # squeue --steps 65552.1 .br - STEPID PARTITION USER TIME_USED NODELIST + STEPID PARTITION USER TIME_USED NODELIST(REASON) .br 65552.1 debug alice 12:49 dev[1-4] .ec diff --git a/slurm/slurm.h.in b/slurm/slurm.h.in index a1016b445814069bdd3da516f401bfb759bd8369..4a33cad7da757946364cab841498399153fe92f0 100644 --- a/slurm/slurm.h.in +++ b/slurm/slurm.h.in @@ -133,6 +133,18 @@ enum job_states { }; #define JOB_COMPLETING (0x8000) +/* Reason for job to be pending rather than executing. If multiple reasons + * exists, only one is given for the sake of system efficiency */ +enum job_wait_reason { + WAIT_NO_REASON = 0, /* not set or job not pending */ + WAIT_PRIORITY, /* higher priority jobs exist */ + WAIT_DEPENDENCY, /* depedent job has not completed */ + WAIT_RESOUCES, /* required resources not available */ + WAIT_PART_NODE_LIMIT, /* request exceeds partition node limit */ + WAIT_PART_TIME_LIMIT, /* request exceeds partition time limit */ + WAIT_PART_STATE /* requested partition is down */ +}; + /* Possible task distributions across the nodes */ enum task_dist_states { SLURM_DIST_CYCLIC, /* distribute tasks 1 per node, round robin */ @@ -279,6 +291,8 @@ typedef struct job_info { char *features; /* comma separated list of required features */ uint32_t dependency; /* defer until specified job completes */ char *account; /* charge to specified account */ + uint16_t wait_reason; /* reason job still pending, see + * slurm.h:enum job_wait_reason */ } job_info_t; typedef struct job_info_msg { diff --git a/src/api/job_info.c b/src/api/job_info.c index a85fba2dfcb5f6558c74b4ef11ca7a159a17ccc7..8cd716f72a61188123291572572f62c462a8b8f5 100644 --- a/src/api/job_info.c +++ b/src/api/job_info.c @@ -182,8 +182,9 @@ slurm_print_job_info ( FILE* out, job_info_t * job_ptr, int one_liner ) fprintf ( out, "\n "); /****** Line 9 ******/ - fprintf ( out, "Dependency=%u Account=%s", - job_ptr->dependency, job_ptr->account); + fprintf ( out, "Dependency=%u Account=%s Reason=%s", + job_ptr->dependency, job_ptr->account, + job_reason_string(job_ptr->wait_reason)); if (one_liner) fprintf ( out, " "); else diff --git a/src/common/slurm_protocol_defs.c b/src/common/slurm_protocol_defs.c index 58b6dfb447009a2e318d6b0d5cea71766caf732b..ed35a4bb67ab503f26b5520702149338c22a76e2 100644 --- a/src/common/slurm_protocol_defs.c +++ b/src/common/slurm_protocol_defs.c @@ -409,6 +409,21 @@ void inline slurm_free_checkpoint_resp_msg(checkpoint_resp_msg_t *msg) } } +/* Given a job's reason for waiting, return a descriptive string */ +extern char *job_reason_string(enum job_wait_reason inx) +{ + static char *job_reason_string[] = { + "None", /* WAIT_NO_REASON */ + "Priority", + "Dependency", + "Resources", + "PartitionNodeLimit", + "PartitionTimeLimit", + "PartitionDown" + }; + return job_reason_string[inx]; +} + char *job_state_string(enum job_states inx) { static char *job_state_string[] = { diff --git a/src/common/slurm_protocol_defs.h b/src/common/slurm_protocol_defs.h index 27dfb6c4c5b0e425887ed8e26a39a3f2479ad981..ab0e6e7a122e28e0b8073a8322183b71f24ddb18 100644 --- a/src/common/slurm_protocol_defs.h +++ b/src/common/slurm_protocol_defs.h @@ -501,6 +501,7 @@ void slurm_free_node_info_msg(node_info_msg_t * msg); void slurm_free_partition_info_msg(partition_info_msg_t * msg); extern char *job_dist_string(uint16_t inx); +extern char *job_reason_string(enum job_wait_reason inx); extern char *job_state_string(enum job_states inx); extern char *job_state_string_compact(enum job_states inx); extern char *node_state_string(enum node_states inx); diff --git a/src/common/slurm_protocol_pack.c b/src/common/slurm_protocol_pack.c index 61ae7d4d4a3215544aefd9b845b0addd5b61724c..f63d670c8c611b4e3f9fe77411e28126b6d98499 100644 --- a/src/common/slurm_protocol_pack.c +++ b/src/common/slurm_protocol_pack.c @@ -1640,6 +1640,7 @@ _unpack_job_info_members(job_info_t * job, Buf buffer) safe_unpack32(&job->min_procs, buffer); safe_unpack32(&job->min_memory, buffer); safe_unpack32(&job->min_tmp_disk, buffer); + safe_unpack16(&job->wait_reason, buffer); safe_unpackstr_xmalloc(&job->req_nodes, &uint16_tmp, buffer); safe_unpackstr_xmalloc(&node_inx_str, &uint16_tmp, buffer); diff --git a/src/slurmctld/job_mgr.c b/src/slurmctld/job_mgr.c index 233844a63aff193fd76bef2228a4713423294a48..de52c7956ebe328e1e7de42578a2b73aa80ae9ba 100644 --- a/src/slurmctld/job_mgr.c +++ b/src/slurmctld/job_mgr.c @@ -1178,9 +1178,11 @@ void rehash_jobs(void) * OUT node_list - list of nodes allocated to the job * OUT node_cnt - number of allocated nodes * OUT node_addr - slurm_addr's for the allocated nodes - * RET 0 or an error code + * RET 0 or an error code. If the job would only be able to execute with + * some change in partition configuration then + * ESLURM_REQUESTED_PART_CONFIG_UNAVAILABLE is returned * NOTE: If allocating nodes lx[0-7] to a job and those nodes have cpu counts - * of 4, 4, 4, 4, 8, 8, 4, 4 then num_cpu_groups=3, cpus_per_node={4,8,4} + * of 4, 4, 4, 4, 8, 8, 4, 4 then num_cpu_groups=3, cpus_per_node={4,8,4} * and cpu_count_reps={4,2,2} * globals: job_list - pointer to global job list * list_part - global list of partition info @@ -1500,7 +1502,9 @@ job_complete(uint32_t job_id, uid_t uid, bool requeue, * IN will_run - job is not to be created, test of validity only * OUT new_job_id - the job's ID * OUT job_pptr - pointer to the job (NULL on error) - * RET 0 on success, otherwise ESLURM error code + * RET 0 on success, otherwise ESLURM error code. If the job would only be + * able to execute with some change in partition configuration then + * ESLURM_REQUESTED_PART_CONFIG_UNAVAILABLE is returned * globals: job_list - pointer to global job list * list_part - global list of partition info * default_part_loc - pointer to default partition @@ -1512,6 +1516,8 @@ static int _job_create(job_desc_msg_t * job_desc, uint32_t * new_job_id, struct job_record **job_pptr, uid_t submit_uid) { int error_code = SLURM_SUCCESS, i; + struct job_details *detail_ptr; + enum job_wait_reason fail_reason; struct part_record *part_ptr; bitstr_t *req_bitmap = NULL, *exc_bitmap = NULL; bool super_user = false; @@ -1669,6 +1675,8 @@ static int _job_create(job_desc_msg_t * job_desc, uint32_t * new_job_id, /* Insure that requested partition is valid right now, * otherwise leave job queued and provide warning code */ + detail_ptr = (*job_pptr)->details; + fail_reason= WAIT_NO_REASON; if ((job_desc->user_id == 0) || (job_desc->user_id == slurmctld_conf.slurm_user_id)) super_user = true; @@ -1677,21 +1685,25 @@ static int _job_create(job_desc_msg_t * job_desc, uint32_t * new_job_id, info("Job %u requested too many nodes (%d) of " "partition %s(%d)", *new_job_id, job_desc->min_nodes, part_ptr->name, part_ptr->max_nodes); - error_code = ESLURM_REQUESTED_PART_CONFIG_UNAVAILABLE; + fail_reason = WAIT_PART_NODE_LIMIT; } else if ((!super_user) && (job_desc->max_nodes != 0) && /* no max_nodes for job */ (job_desc->max_nodes < part_ptr->min_nodes)) { info("Job %u requested too few nodes (%d) of partition %s(%d)", *new_job_id, job_desc->max_nodes, part_ptr->name, part_ptr->min_nodes); - error_code = ESLURM_REQUESTED_PART_CONFIG_UNAVAILABLE; + fail_reason = WAIT_PART_NODE_LIMIT; } else if (part_ptr->state_up == 0) { info("Job %u requested down partition %s", *new_job_id, part_ptr->name); + fail_reason = WAIT_PART_STATE; + } + if (fail_reason != WAIT_NO_REASON) { error_code = ESLURM_REQUESTED_PART_CONFIG_UNAVAILABLE; + (*job_pptr)->priority = 1; /* Move to end of queue */ + if (detail_ptr) + detail_ptr->wait_reason = fail_reason; } - if (error_code == ESLURM_REQUESTED_PART_CONFIG_UNAVAILABLE) - (*job_pptr)->priority = 1; /* Move to end of queue */ cleanup: FREE_NULL_BITMAP(req_bitmap); @@ -2494,6 +2506,7 @@ static void _pack_job_details(struct job_details *detail_ptr, Buf buffer) pack32((uint32_t) detail_ptr->min_procs, buffer); pack32((uint32_t) detail_ptr->min_memory, buffer); pack32((uint32_t) detail_ptr->min_tmp_disk, buffer); + pack16((uint16_t) detail_ptr->wait_reason, buffer); packstr(detail_ptr->req_nodes, buffer); pack_bit_fmt(detail_ptr->req_node_bitmap, buffer); @@ -2511,6 +2524,7 @@ static void _pack_job_details(struct job_details *detail_ptr, Buf buffer) pack32((uint32_t) 0, buffer); pack32((uint32_t) 0, buffer); pack32((uint32_t) 0, buffer); + pack16((uint16_t) 0, buffer); packstr(NULL, buffer); packstr(NULL, buffer); @@ -2748,30 +2762,38 @@ void reset_job_priority(void) */ static bool _top_priority(struct job_record *job_ptr) { - ListIterator job_iterator; - struct job_record *job_ptr2; + struct job_details *detail_ptr = job_ptr->details; bool top; - if (job_ptr->priority == 0) /* held */ - return false; - - top = true; /* assume top priority until found otherwise */ - job_iterator = list_iterator_create(job_list); - while ((job_ptr2 = (struct job_record *) list_next(job_iterator))) { - if (job_ptr2 == job_ptr) - continue; - if (job_ptr2->job_state != JOB_PENDING) - continue; - if (!job_independent(job_ptr2)) - continue; - if ((job_ptr2->priority > job_ptr->priority) && - (job_ptr2->part_ptr == job_ptr->part_ptr)) { - top = false; - break; + if (job_ptr->priority == 0) /* user held */ + top = false; + else { + ListIterator job_iterator; + struct job_record *job_ptr2; + + top = true; /* assume top priority until found otherwise */ + job_iterator = list_iterator_create(job_list); + while ((job_ptr2 = (struct job_record *) + list_next(job_iterator))) { + if (job_ptr2 == job_ptr) + continue; + if (job_ptr2->job_state != JOB_PENDING) + continue; + if (!job_independent(job_ptr2)) + continue; + if ((job_ptr2->priority > job_ptr->priority) && + (job_ptr2->part_ptr == job_ptr->part_ptr)) { + top = false; + break; + } } + list_iterator_destroy(job_iterator); } - list_iterator_destroy(job_iterator); + if ((!top) && /* not top prio and */ + (job_ptr->priority != 1) && /* not system hold */ + (detail_ptr)) + detail_ptr->wait_reason = WAIT_PRIORITY; return top; } @@ -3458,6 +3480,7 @@ extern void job_completion_logger(struct job_record *job_ptr) extern bool job_independent(struct job_record *job_ptr) { struct job_record *dep_ptr; + struct job_details *detail_ptr = job_ptr->details; if (job_ptr->dependency == 0) return true; @@ -3469,6 +3492,9 @@ extern bool job_independent(struct job_record *job_ptr) if (((dep_ptr->job_state & JOB_COMPLETING) == 0) && (dep_ptr->job_state >= JOB_COMPLETE)) return true; + + if (detail_ptr) + detail_ptr->wait_reason = WAIT_DEPENDENCY; return false; /* job exists and incomplete */ } diff --git a/src/slurmctld/node_scheduler.c b/src/slurmctld/node_scheduler.c index 91aeaf65325c56a8dbe29be603efc328c77475e8..d2627a3eedcaad7db7405b8c9cf878fdbc515c7a 100644 --- a/src/slurmctld/node_scheduler.c +++ b/src/slurmctld/node_scheduler.c @@ -852,6 +852,7 @@ int select_nodes(struct job_record *job_ptr, bool test_only) { int error_code = SLURM_SUCCESS, i, shared, node_set_size = 0; bitstr_t *req_bitmap = NULL; + struct job_details *detail_ptr = job_ptr->details; struct node_set *node_set_ptr = NULL; struct part_record *part_ptr = job_ptr->part_ptr; uint32_t min_nodes, max_nodes, part_node_limit; @@ -872,16 +873,24 @@ int select_nodes(struct job_record *job_ptr, bool test_only) /* Confirm that partition is up and has compatible nodes limits */ if ((job_ptr->user_id == 0) || (job_ptr->user_id == getuid())) super_user = true; - else if ( - (part_ptr->state_up == 0) || - ((job_ptr->time_limit != NO_VAL) && - (job_ptr->time_limit > part_ptr->max_time)) || - ((job_ptr->details->max_nodes != 0) && /* no node limit */ - (job_ptr->details->max_nodes < part_ptr->min_nodes)) || - (job_ptr->details->min_nodes > part_ptr->max_nodes)) { - job_ptr->priority = 1; /* move to end of queue */ - last_job_update = time(NULL); - return ESLURM_REQUESTED_PART_CONFIG_UNAVAILABLE; + else { + enum job_wait_reason fail_reason = WAIT_NO_REASON; + if (part_ptr->state_up == 0) + fail_reason = WAIT_PART_STATE; + else if ((job_ptr->time_limit != NO_VAL) && + (job_ptr->time_limit > part_ptr->max_time)) + fail_reason = WAIT_PART_TIME_LIMIT; + else if (((job_ptr->details->max_nodes != 0) && + (job_ptr->details->max_nodes < part_ptr->min_nodes)) || + (job_ptr->details->min_nodes > part_ptr->max_nodes)) + fail_reason = WAIT_PART_NODE_LIMIT; + if (fail_reason != WAIT_NO_REASON) { + if (detail_ptr) + detail_ptr->wait_reason = fail_reason; + job_ptr->priority = 1; /* sys hold, move to end of queue */ + last_job_update = time(NULL); + return ESLURM_REQUESTED_PART_CONFIG_UNAVAILABLE; + } } /* build sets of usable nodes based upon their configuration */ @@ -932,7 +941,11 @@ int select_nodes(struct job_record *job_ptr, bool test_only) job_ptr->details->contiguous, shared, part_node_limit); if (error_code) { + if (detail_ptr) + detail_ptr->wait_reason = WAIT_RESOUCES; if (error_code == ESLURM_REQUESTED_PART_CONFIG_UNAVAILABLE) { + /* Required nodes are down or + * too many nodes requested */ debug3("JobId=%u not runnable with present config", job_ptr->job_id); job_ptr->priority = 1; /* Move to end of queue */ @@ -948,6 +961,8 @@ int select_nodes(struct job_record *job_ptr, bool test_only) } /* assign the nodes and stage_in the job */ + if (detail_ptr) + detail_ptr->wait_reason = WAIT_NO_REASON; job_ptr->nodes = bitmap2node_name(req_bitmap); job_ptr->node_bitmap = req_bitmap; job_ptr->details->shared = shared; diff --git a/src/slurmctld/slurmctld.h b/src/slurmctld/slurmctld.h index fc2b04282635fc357ed814b99b8ebff9dc58efef..4947687bb73ae37385f3248893f06f40d55b8470 100644 --- a/src/slurmctld/slurmctld.h +++ b/src/slurmctld/slurmctld.h @@ -244,6 +244,8 @@ struct job_details { uint16_t req_tasks; /* required number of tasks */ uint16_t shared; /* set node can be shared*/ uint16_t contiguous; /* set if requires contiguous nodes */ + uint16_t wait_reason; /* reason job still pending, see + * slurm.h:enum job_wait_reason */ uint32_t min_procs; /* minimum processors per node */ uint32_t min_memory; /* minimum memory per node, MB */ uint32_t min_tmp_disk; /* minimum tempdisk per node, MB */ @@ -610,9 +612,11 @@ extern bool is_node_resp (char *name); * OUT node_list - list of nodes allocated to the job * OUT node_cnt - number of allocated nodes * OUT node_addr - slurm_addr's for the allocated nodes - * RET 0 or an error code + * RET 0 or an error code. If the job would only be able to execute with + * some change in partition configuration then + * ESLURM_REQUESTED_PART_CONFIG_UNAVAILABLE is returned * NOTE: If allocating nodes lx[0-7] to a job and those nodes have cpu counts - * of 4, 4, 4, 4, 8, 8, 4, 4 then num_cpu_groups=3, cpus_per_node={4,8,4} + * of 4, 4, 4, 4, 8, 8, 4, 4 then num_cpu_groups=3, cpus_per_node={4,8,4} * and cpu_count_reps={4,2,2} * globals: job_list - pointer to global job list * list_part - global list of partition info diff --git a/src/squeue/opts.c b/src/squeue/opts.c index 8459d4b101123cba4dddfb011d03774f79f1d08c..e8c0b3a7771bb9c449e32e4c21181d022ff91504 100644 --- a/src/squeue/opts.c +++ b/src/squeue/opts.c @@ -482,6 +482,16 @@ extern int parse_format( char* format ) field_size, right_justify, suffix ); + else if (field[0] == 'r') + job_format_add_reason( params.format_list, + field_size, + right_justify, + suffix ); + else if (field[0] == 'R') + job_format_add_reason_list( params.format_list, + field_size, + right_justify, + suffix ); else if (field[0] == 'S') job_format_add_time_start( params.format_list, field_size, diff --git a/src/squeue/print.c b/src/squeue/print.c index cd139f5b104d5d92729a01d5c960b80695854c9a..382277eeccd9207c083737bb860bb9f52fdb6bc3 100644 --- a/src/squeue/print.c +++ b/src/squeue/print.c @@ -311,10 +311,10 @@ int _print_job_job_id(job_info_t * job, int width, bool right, char* suffix) int _print_job_partition(job_info_t * job, int width, bool right, char* suffix) { - char id[FORMAT_STRING_SIZE]; if (job == NULL) /* Print the Header instead */ _print_str("PARTITION", width, right, true); else { + char id[FORMAT_STRING_SIZE]; snprintf(id, FORMAT_STRING_SIZE, "%s", job->partition); _print_str(id, width, right, true); } @@ -330,6 +330,21 @@ int _print_job_prefix(job_info_t * job, int width, bool right, char* suffix) return SLURM_SUCCESS; } +int _print_job_reason(job_info_t * job, int width, bool right, char* suffix) +{ + if (job == NULL) /* Print the Header instead */ + _print_str("REASON", width, right, true); + else { + char id[FORMAT_STRING_SIZE]; + snprintf(id, FORMAT_STRING_SIZE, "%s", + job_reason_string(job->wait_reason)); + _print_str(id, width, right, true); + } + if (suffix) + printf("%s", suffix); + return SLURM_SUCCESS; +} + int _print_job_name(job_info_t * job, int width, bool right, char* suffix) { if (job == NULL) /* Print the Header instead */ @@ -510,7 +525,7 @@ int _print_job_priority(job_info_t * job, int width, bool right, char* suffix) int _print_job_nodes(job_info_t * job, int width, bool right, char* suffix) { - if (job == NULL) /* Print the Header instead */ + if (job == NULL) /* Print the Header instead */ _print_str("NODELIST", width, right, false); else _print_nodes(job->nodes, width, right, false); @@ -520,6 +535,24 @@ int _print_job_nodes(job_info_t * job, int width, bool right, char* suffix) return SLURM_SUCCESS; } +int _print_job_reason_list(job_info_t * job, int width, bool right, + char* suffix) +{ + if (job == NULL) /* Print the Header instead */ + _print_str("NODELIST(REASON)", width, right, false); + else if (job->job_state == JOB_PENDING) { + char id[FORMAT_STRING_SIZE]; + snprintf(id, FORMAT_STRING_SIZE, "(%s)", + job_reason_string(job->wait_reason)); + _print_str(id, width, right, true); + } else + _print_nodes(job->nodes, width, right, false); + + if (suffix) + printf("%s", suffix); + return SLURM_SUCCESS; +} + int _print_job_node_inx(job_info_t * job, int width, bool right, char* suffix) { if (job == NULL) /* Print the Header instead */ diff --git a/src/squeue/print.h b/src/squeue/print.h index 44532ae6741982c565a96fc2f7d5bc6ee285abb3..68aea253eea63726e3efbb70305377c9b8da5c50 100644 --- a/src/squeue/print.h +++ b/src/squeue/print.h @@ -74,6 +74,10 @@ int job_format_add_function(List list, int width, bool right_justify, job_format_add_function(list,wid,right,suffix,_print_job_partition) #define job_format_add_prefix(list,wid,right,prefix) \ job_format_add_function(list,0,0,prefix,_print_job_prefix) +#define job_format_add_reason(list,wid,right,prefix) \ + job_format_add_function(list,0,0,prefix,_print_job_reason) +#define job_format_add_reason_list(list,wid,right,prefix) \ + job_format_add_function(list,0,0,prefix,_print_job_reason_list) #define job_format_add_name(list,wid,right,suffix) \ job_format_add_function(list,wid,right,suffix,_print_job_name) #define job_format_add_user_name(list,wid,right,suffix) \ @@ -142,6 +146,10 @@ int _print_job_partition(job_info_t * job, int width, bool right_justify, char* suffix); int _print_job_prefix(job_info_t * job, int width, bool right_justify, char* suffix); +int _print_job_reason(job_info_t * job, int width, bool right_justify, + char* suffix); +int _print_job_reason_list(job_info_t * job, int width, bool right_justify, + char* suffix); int _print_job_name(job_info_t * job, int width, bool right_justify, char* suffix); int _print_job_user_id(job_info_t * job, int width, bool right_justify, diff --git a/src/squeue/squeue.c b/src/squeue/squeue.c index 790906f1c754a5cf1b8a6e0d073c9cb548c38ea4..e840e7a642a8cd67b2eb117bc80e2733f2c07521 100644 --- a/src/squeue/squeue.c +++ b/src/squeue/squeue.c @@ -145,9 +145,9 @@ _print_job ( void ) if (params.format == NULL) { if (params.long_list) params.format = "%.7i %.9P %.8j %.8u %.8T %.10M %.9l " - "%.6D %N"; + "%.6D %R"; else - params.format = "%.7i %.9P %.8j %.8u %.2t %.10M %.6D %N"; + params.format = "%.7i %.9P %.8j %.8u %.2t %.10M %.6D %R"; } if (params.format_list == NULL) parse_format(params.format);