From d7bd148d78f477b24069731959d74a1171f337c4 Mon Sep 17 00:00:00 2001 From: Moe Jette <jette1@llnl.gov> Date: Tue, 26 Sep 2006 17:25:51 +0000 Subject: [PATCH] In sched/wiki2: add support for JOBREQUEUE command. --- NEWS | 1 + src/plugins/sched/wiki2/job_requeue.c | 19 ++++++++++++----- src/slurmctld/job_mgr.c | 30 ++++++++++++--------------- testsuite/expect/test7.7.prog.c | 16 +++++++++++++- 4 files changed, 43 insertions(+), 23 deletions(-) diff --git a/NEWS b/NEWS index caec6a36367..59e0398caef 100644 --- a/NEWS +++ b/NEWS @@ -18,6 +18,7 @@ documents those changes that are of interest to users and admins. -- Added new job field, "comment". Set by srun, salloc and sbatch. See with "scontrol show job". Used in sched/wiki2. -- Report a job's exit status in "scontrol show job". + -- In sched/wiki2: add support for JOBREQUEUE command. * Changes in SLURM 1.2.0-pre2 ============================= diff --git a/src/plugins/sched/wiki2/job_requeue.c b/src/plugins/sched/wiki2/job_requeue.c index c92f92e97ab..e6466f09672 100644 --- a/src/plugins/sched/wiki2/job_requeue.c +++ b/src/plugins/sched/wiki2/job_requeue.c @@ -37,6 +37,7 @@ #include "./msg.h" #include "src/slurmctld/slurmctld.h" +#include "src/slurmctld/locks.h" /* RET 0 on success, -1 on failure */ extern int job_requeue_wiki(char *cmd_ptr, int *err_code, char **err_msg) @@ -44,6 +45,10 @@ extern int job_requeue_wiki(char *cmd_ptr, int *err_code, char **err_msg) char *arg_ptr, *tmp_char; uint32_t jobid; static char reply_msg[128]; + int slurm_rc; + /* Write lock on job and node info */ + slurmctld_lock_t job_write_lock = { + NO_LOCK, WRITE_LOCK, WRITE_LOCK, NO_LOCK }; arg_ptr = strstr(cmd_ptr, "ARG="); if (arg_ptr == NULL) { @@ -60,11 +65,15 @@ extern int job_requeue_wiki(char *cmd_ptr, int *err_code, char **err_msg) return -1; } - /* FIXME: To be added in slurm v1.2 */ - *err_code = -300; - *err_msg = "unsupported request type"; - error("wiki: unrecognized request type: JOBREQUEUE"); - return -1; + lock_slurmctld(job_write_lock); + slurm_rc = job_requeue(0, jobid, -1); + unlock_slurmctld(job_write_lock); + if (slurm_rc != SLURM_SUCCESS) { + *err_code = -700; + *err_msg = slurm_strerror(slurm_rc); + error("wiki: Failed to requeue job %u (%m)", jobid); + return -1; + } snprintf(reply_msg, sizeof(reply_msg), "job %u requeued successfully", jobid); diff --git a/src/slurmctld/job_mgr.c b/src/slurmctld/job_mgr.c index 6e612e28ede..f902b72c8ff 100644 --- a/src/slurmctld/job_mgr.c +++ b/src/slurmctld/job_mgr.c @@ -4289,18 +4289,13 @@ extern int job_requeue (uid_t uid, uint32_t job_id, slurm_fd conn_fd) goto reply; } - /* if pending, just reset the priority */ - if (job_ptr->job_state == JOB_PENDING) { - /* just reset the priority */ - if ((job_ptr->priority == 0) - && (!super_user)) { - rc = ESLURM_ACCESS_DENIED; - goto reply; - } - _set_job_prio(job_ptr); - last_job_update = now; + /* reset the priority */ + _set_job_prio(job_ptr); + last_job_update = now; + + /* nothing else to do if pending */ + if (job_ptr->job_state == JOB_PENDING) goto reply; - } if (job_ptr->batch_flag == 0) { rc = ESLURM_BATCH_ONLY; @@ -4317,7 +4312,6 @@ extern int job_requeue (uid_t uid, uint32_t job_id, slurm_fd conn_fd) if (job_ptr->job_state == JOB_SUSPENDED) suspended = true; - last_job_update = now; job_ptr->time_last_active = now; job_ptr->job_state = JOB_PENDING | JOB_COMPLETING; if (suspended) @@ -4329,11 +4323,13 @@ extern int job_requeue (uid_t uid, uint32_t job_id, slurm_fd conn_fd) //FIXME: Test accounting reply: - slurm_msg_t_init(&resp_msg); - resp_msg.msg_type = RESPONSE_SLURM_RC; - rc_msg.return_code = rc; - resp_msg.data = &rc_msg; - slurm_send_node_msg(conn_fd, &resp_msg); + if (conn_fd >= 0) { + slurm_msg_t_init(&resp_msg); + resp_msg.msg_type = RESPONSE_SLURM_RC; + rc_msg.return_code = rc; + resp_msg.data = &rc_msg; + slurm_send_node_msg(conn_fd, &resp_msg); + } return rc; } diff --git a/testsuite/expect/test7.7.prog.c b/testsuite/expect/test7.7.prog.c index b26aa994849..e324894fb40 100644 --- a/testsuite/expect/test7.7.prog.c +++ b/testsuite/expect/test7.7.prog.c @@ -307,6 +307,17 @@ static void _resume_job(long my_job_id) _xmit(out_msg); } +static void _job_requeue(long my_job_id) +{ + time_t now = time(NULL); + char out_msg[128]; + + snprintf(out_msg, sizeof(out_msg), + "TS=%u AUTH=root DT=CMD=JOBREQUEUE ARG=%ld", + (uint32_t) now, my_job_id); + _xmit(out_msg); +} + static void _job_will_run(long my_job_id) { time_t now = time(NULL); @@ -344,8 +355,11 @@ int main(int argc, char * argv[]) if (e_port) _event_mgr(); else - sleep(1); + sleep(3); _cancel_job(job_id+1); + _job_requeue(job_id); /* Put job back into HELD state */ + sleep(5); + _start_job(job_id); _get_jobs(); printf("SUCCESS\n"); -- GitLab