From 9037cdd6e164a2ac92d33cd2a8e543174527f9f0 Mon Sep 17 00:00:00 2001 From: Moe Jette <jette1@llnl.gov> Date: Thu, 2 Dec 2004 18:04:56 +0000 Subject: [PATCH] Add new job reason value "JobHeld" for jobs with priority==0 --- NEWS | 1 + slurm/slurm.h.in | 3 ++- src/common/slurm_protocol_defs.c | 31 ++++++++++++++++--------------- src/slurmctld/job_mgr.c | 13 ++++++++----- src/slurmctld/node_scheduler.c | 5 ++++- 5 files changed, 31 insertions(+), 22 deletions(-) diff --git a/NEWS b/NEWS index ae263f66b09..f7eafd0b5d2 100644 --- a/NEWS +++ b/NEWS @@ -3,6 +3,7 @@ documents those changes that are of interest to users and admins. * Changes in SLURM 0.4.0-pre6 ============================= + -- Add new job reason value "JobHeld" for jobs with priority==0 * Changes in SLURM 0.4.0-pre5 ============================= diff --git a/slurm/slurm.h.in b/slurm/slurm.h.in index daedc351623..f78ceb11730 100644 --- a/slurm/slurm.h.in +++ b/slurm/slurm.h.in @@ -151,7 +151,8 @@ enum job_wait_reason { WAIT_RESOUCES, /* required resources not available */ WAIT_PART_NODE_LIMIT, /* request exceeds partition node limit */ WAIT_PART_TIME_LIMIT, /* request exceeds partition time limit */ - WAIT_PART_STATE /* requested partition is down */ + WAIT_PART_STATE, /* requested partition is down */ + WAIT_HELD /* job is held, priority==0 */ }; #ifdef HAVE_BGL diff --git a/src/common/slurm_protocol_defs.c b/src/common/slurm_protocol_defs.c index 2f8acf87296..64ed8d88d9d 100644 --- a/src/common/slurm_protocol_defs.c +++ b/src/common/slurm_protocol_defs.c @@ -422,13 +422,14 @@ void inline slurm_free_checkpoint_resp_msg(checkpoint_resp_msg_t *msg) extern char *job_reason_string(enum job_wait_reason inx) { static char *job_reason_string[] = { - "None", /* WAIT_NO_REASON */ - "Priority", - "Dependency", - "Resources", - "PartitionNodeLimit", - "PartitionTimeLimit", - "PartitionDown" + "None", /* WAIT_NO_REASON */ + "Priority", /* WAIT_PRIORITY */ + "Dependency", /* WAIT_DEPENDENCY */ + "Resources", /* WAIT_RESOUCES */ + "PartitionNodeLimit", /* WAIT_PART_NODE_LIMIT */ + "PartitionTimeLimit", /* WAIT_PART_TIME_LIMIT */ + "PartitionDown", /* WAIT_PART_STATE */ + "JobHeld" /* WAIT_HELD */ }; return job_reason_string[inx]; } @@ -436,14 +437,14 @@ extern char *job_reason_string(enum job_wait_reason inx) char *job_state_string(enum job_states inx) { static char *job_state_string[] = { - "PENDING", - "RUNNING", - "COMPLETED", - "CANCELLED", - "FAILED", - "TIMEOUT", - "NODE_FAIL", - "END" + "PENDING", /* JOB_PENDING */ + "RUNNING", /* JOB_RUNNING */ + "COMPLETED", /* JOB_COMPLETE */ + "CANCELLED", /* JOB_CANCELLED */ + "FAILED", /* JOB_FAILED */ + "TIMEOUT", /* JOB_TIMEOUT */ + "NODE_FAIL", /* JOB_NODE_FAIL */ + "END" /* JOB_END */ }; if (inx & JOB_COMPLETING) return "COMPLETING"; diff --git a/src/slurmctld/job_mgr.c b/src/slurmctld/job_mgr.c index b3a84914ad8..465b3d880cc 100644 --- a/src/slurmctld/job_mgr.c +++ b/src/slurmctld/job_mgr.c @@ -1219,7 +1219,8 @@ extern int job_allocate(job_desc_msg_t * job_specs, int immediate, int will_run, if (independent && (!too_fragmented)) top_prio = _top_priority(job_ptr); else - top_prio = true; /* don't bother testing */ + top_prio = true; /* don't bother testing, + * it is not runable anyway */ if (immediate && (too_fragmented || (!top_prio) || (!independent))) { job_ptr->job_state = JOB_FAILED; job_ptr->start_time = job_ptr->end_time = time(NULL); @@ -2827,10 +2828,12 @@ static bool _top_priority(struct job_record *job_ptr) list_iterator_destroy(job_iterator); } - if ((!top) && /* not top prio and */ - (job_ptr->priority != 1) && /* not system hold */ - (detail_ptr)) - detail_ptr->wait_reason = WAIT_PRIORITY; + if ((!top) && detail_ptr) { /* not top prio */ + if (job_ptr->priority == 0) /* user/admin hold */ + detail_ptr->wait_reason = WAIT_HELD; + else if (job_ptr->priority != 1) /* not system hold */ + detail_ptr->wait_reason = WAIT_PRIORITY; + } return top; } diff --git a/src/slurmctld/node_scheduler.c b/src/slurmctld/node_scheduler.c index 4d594ec68a0..49924ace465 100644 --- a/src/slurmctld/node_scheduler.c +++ b/src/slurmctld/node_scheduler.c @@ -642,6 +642,8 @@ extern int select_nodes(struct job_record *job_ptr, bool test_only) fail_reason = WAIT_NO_REASON; if (part_ptr->state_up == 0) fail_reason = WAIT_PART_STATE; + else if (job_ptr->priority == 0) /* user or administrator hold */ + fail_reason = WAIT_HELD; else if (super_user) ; /* ignore any time or node count limits */ else if ((job_ptr->time_limit != NO_VAL) && @@ -654,7 +656,8 @@ extern int select_nodes(struct job_record *job_ptr, bool test_only) if (fail_reason != WAIT_NO_REASON) { if (detail_ptr) detail_ptr->wait_reason = fail_reason; - job_ptr->priority = 1; /* sys hold, move to end of queue */ + if (job_ptr->priority != 0) /* not user/admin hold */ + job_ptr->priority = 1; /* sys hold, move to end of queue */ last_job_update = time(NULL); return ESLURM_REQUESTED_PART_CONFIG_UNAVAILABLE; } -- GitLab