diff --git a/NEWS b/NEWS index 1377200afda0bbd85a8142af6cc521b528813ce4..63bf1618c999a099dfe20d779f84aefb2007d341 100644 --- a/NEWS +++ b/NEWS @@ -1,6 +1,11 @@ This file describes changes in recent versions of SLURM. It primarily documents those changes that are of interest to users and admins. +* Changes in SLURM 1.2.0-pre1 +============================= + -- Fix bug that could run a job's prolog more than once + -- Permit batch jobs to be requeued, scontrol requeue <jobid> + * Changes in SLURM 1.1.1 ======================== -- Fix bug in packing job suspend/resume RPC. diff --git a/doc/man/Makefile.am b/doc/man/Makefile.am index f44099b40fc770ead8d55a30d97655b1ce3b8995..0a99c6bc34e45dc25cfa5113402ba31aa5bce54a 100644 --- a/doc/man/Makefile.am +++ b/doc/man/Makefile.am @@ -67,6 +67,7 @@ man3_MANS = man3/slurm_hostlist_create.3 \ man3/slurm_print_partition_info_msg.3 \ man3/slurm_reconfigure.3 \ man3/slurm_resume.3 \ + man3/slurm_requeue.3 \ man3/slurm_shutdown.3 \ man3/slurm_spawn.3 \ man3/slurm_spawn_kill.3 \ diff --git a/doc/man/Makefile.in b/doc/man/Makefile.in index 249ed0c900f9a2bd62ad45b56504f46067d60223..acb813716dea4c0d17f013f449bc22ea335cf744 100644 --- a/doc/man/Makefile.in +++ b/doc/man/Makefile.in @@ -310,6 +310,7 @@ man3_MANS = man3/slurm_hostlist_create.3 \ man3/slurm_print_partition_info_msg.3 \ man3/slurm_reconfigure.3 \ man3/slurm_resume.3 \ + man3/slurm_requeue.3 \ man3/slurm_shutdown.3 \ man3/slurm_spawn.3 \ man3/slurm_spawn_kill.3 \ diff --git a/doc/man/man1/scontrol.1 b/doc/man/man1/scontrol.1 index c9ca248da98798695281cbd869e9dd31498d4408..ccec95e2eba6b1464d4f3d211c21bfc39cdd59db 100644 --- a/doc/man/man1/scontrol.1 +++ b/doc/man/man1/scontrol.1 @@ -1,4 +1,4 @@ -.TH SCONTROL "1" "March 2006" "scontrol 1.0" "Slurm components" +.TH SCONTROL "1" "May 2006" "scontrol 1.2" "Slurm components" .SH "NAME" scontrol \- Used view and modify Slurm configuration and state. @@ -129,6 +129,9 @@ or SlurmdPort. \fIresume\fP \fIjob_id\fP Resume a previously suspended job. .TP +\fIrequeue\fP \fIjob_id\fP +Requeue a running or pending SLURM batch job. +.TP \fIshow\fP \fIENTITY\fP \fIID\fP Display the state of the specified entity with the specified identification. \fIENTITY\fP may be \fIconfig\fP, \fIdaemons\fP, \fIjob\fP, \fInode\fP, @@ -433,7 +436,7 @@ details. \fBslurm_load_ctl_conf\fR(3), \fBslurm_load_jobs\fR(3), \fBslurm_load_node\fR(3), \fBslurm_load_partitions\fR(3), -\fBslurm_reconfigure\fR(3), \fBslurm_resume\fR(3), +\fBslurm_reconfigure\fR(3), \fBslurm_requeue\fR(3), \fBslurm_resume\fR(3), \fBslurm_shutdown\fR(3), \fBslurm_suspend\fR(3), \fBslurm_update_job\fR(3), \fBslurm_update_node\fR(3), \fBslurm_update_partition\fR(3), diff --git a/doc/man/man1/srun.1 b/doc/man/man1/srun.1 index cebde626b3d76cfbbb1efa9346275db31f1c1b00..afa516fb3ffb969727ab643e40d45fed030bdd82 100644 --- a/doc/man/man1/srun.1 +++ b/doc/man/man1/srun.1 @@ -1,6 +1,6 @@ \." $Id$ .\" -.TH SRUN "1" "April 2006" "srun 1.1" "slurm components" +.TH SRUN "1" "May 2006" "srun 1.2" "slurm components" .SH "NAME" srun \- run parallel jobs .SH SYNOPSIS @@ -254,6 +254,13 @@ Initiate a job step under an already allocated job with job id \fIid\fR. Using this option will cause \fBsrun\fR to behave exactly as if the SLURM_JOBID environment variable was set. .TP +\fB\-\-no\-requeue\fR +Specifies that the batch job is not requeue. +Setting this option will prevent system administrators from being able +to restart the job (for example, after a scheduled downtime). +When a job is requeued, the batch script is initiated from its beginning. +This option is only applicable to batch job submission (see \fB\-\-batch\fR). +.TP \fB\-o\fR, \fB\-\-output\fR=\fImode\fR Specify the mode for stdout redirection. By default in interactive mode, .B srun @@ -872,6 +879,9 @@ The location of the SLURM configuration file. \fBSLURM_NNODES\fR \fB\-N, \-\-nodes\fR=(\fIn|min-max\fR) .TP +\fBSLURM_NO_REQUEUE\fR +\fB\-\-no\-requeue\fR +.TP \fBSLURM_NO_ROTATE\fR \fB\-\-no\-rotate\fR .TP diff --git a/doc/man/man3/slurm_requeue.3 b/doc/man/man3/slurm_requeue.3 new file mode 100644 index 0000000000000000000000000000000000000000..2de579b5d359661f98c154aac60d6361a1d87f31 --- /dev/null +++ b/doc/man/man3/slurm_requeue.3 @@ -0,0 +1 @@ +.so man3/slurm_resume.3 diff --git a/doc/man/man3/slurm_resume.3 b/doc/man/man3/slurm_resume.3 index 7af69c31a03fc426dac21851c5a5dd4a19aa60cb..17956403137105337c43f57d20003fb921f88b3f 100644 --- a/doc/man/man3/slurm_resume.3 +++ b/doc/man/man3/slurm_resume.3 @@ -1,7 +1,7 @@ -.TH "Slurm API" "3" "Decmeber 2005" "Morris Jette" "Slurm suspend and resume functions" +.TH "Slurm API" "3" "May 2006" "Morris Jette" "Slurm suspend, resume and requeue functions" .SH "NAME" -slurm_suspend, slurm_resume \- Slurm suspend and resume functions +slurm_suspend, slurm_resume, slurm_requeue \- Slurm suspend, resume and requeue functions .SH "SYNTAX" .LP @@ -19,6 +19,12 @@ int \fBslurm_resume\fR ( uint32_t \fIjob_id\fP .br ); +.LP +int \fBslurm_requeue\fR ( +.br + uint32_t \fIjob_id\fP +.br +); .SH "ARGUMENTS" .LP @@ -33,6 +39,11 @@ Suspend the specified job. .LP \fBslurm_resume\fR Resume execution of a previously suspended job. +.LP +\fBslurm_requeue\fR +Requeue a running or pending SLURM batch job. +The job script will be restarted from its beginning, +ignoring any previous checkpoint. .SH "RETURN VALUE" .LP @@ -41,8 +52,9 @@ On error, -1 is returned, and the Slurm error code is set appropriately. .SH "ERRORS" .LP \fBESLURM_DISABLED\fR the operation is currently disabled -(e.g. attempt to suspend a job that is not running or resume a -job that is not currently suspended). +(e.g. attempt to suspend a job that is not running, +resume a job that is not currently suspended, or +requeue a job on which the operation has been disabled). .LP \fBESLURM_INVALID_JOB_ID\fR the requested job id does not exist. .LP @@ -56,7 +68,7 @@ requested action (e.g. not user root or SlurmUser). \fBESLURM_NOT_SUPPORTED\fR the requested operation is not supported on this system. .SH "COPYING" -Copyright (C) 2005 The Regents of the University of California. +Copyright (C) 2005-2006 The Regents of the University of California. Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). UCRL-CODE-217948. .LP diff --git a/slurm/slurm.h.in b/slurm/slurm.h.in index b0b98a3114d039b7299558f4db76a74f7e3bae64..facd20a25d570cbdd26984c07892ace0e14711a6 100644 --- a/slurm/slurm.h.in +++ b/slurm/slurm.h.in @@ -392,6 +392,7 @@ typedef struct job_descriptor { /* For submit, allocate, and update requests */ time_t begin_time; /* delay initiation until this time */ uint16_t mail_type; /* see MAIL_JOB_ definitions above */ char *mail_user; /* user to receive notification */ + uint16_t no_requeue; /* disable job requeue option */ /* * The following parameters are only meaningful on a Blue Gene * system at present. Some will be of value on other system. Don't remove these @@ -1284,6 +1285,13 @@ extern int slurm_suspend PARAMS(( uint32_t job_id )); */ extern int slurm_resume PARAMS(( uint32_t job_id )); +/* + * slurm_requeue - re-queue a batch job, if already running + * then terminate it first + * RET 0 or a slurm error code + */ +extern int slurm_requeue PARAMS(( uint32_t job_id )); + /*****************************************************************************\ * SLURM JOB CHECKPOINT FUNCTIONS \*****************************************************************************/ diff --git a/src/api/init_msg.c b/src/api/init_msg.c index 78a5e86fdcd581086d698fc3f432593638cf03f1..35b5dd4416a55ba05c0f5f22aebe166ca1564904 100644 --- a/src/api/init_msg.c +++ b/src/api/init_msg.c @@ -2,7 +2,7 @@ * init_msg.c - initialize RPC messages contents * $Id$ ***************************************************************************** - * Copyright (C) 2002 The Regents of the University of California. + * Copyright (C) 2002-2006 The Regents of the University of California. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). * Written by Morris Jette <jette1@llnl.gov>. * UCRL-CODE-217948. @@ -83,6 +83,7 @@ void slurm_init_job_desc_msg(job_desc_msg_t * job_desc_msg) job_desc_msg->mail_user = NULL; job_desc_msg->port = 0; job_desc_msg->begin_time = 0; + job_desc_msg->no_requeue = (uint16_t) NO_VAL; #if SYSTEM_DIMENSIONS { int i; diff --git a/src/api/suspend.c b/src/api/suspend.c index a2fcff640408cf552eaae2cdf6c882290e75da2e..763b65a50954fb1b8d03ad4c2d7176944b2ccb16 100644 --- a/src/api/suspend.c +++ b/src/api/suspend.c @@ -2,7 +2,7 @@ * suspend.c - job step suspend and resume functions. * $Id$ ***************************************************************************** - * Copyright (C) 2005 The Regents of the University of California. + * Copyright (C) 2005-2006 The Regents of the University of California. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). * Written by Morris Jette <jette1@llnl.gov> et. al. * UCRL-CODE-217948. @@ -77,3 +77,26 @@ extern int slurm_resume (uint32_t job_id) { return _suspend_op (RESUME_JOB, job_id); } + +/* + * slurm_requeue - re-queue a batch job, if already running + * then terminate it first + * RET 0 or a slurm error code + */ +extern int slurm_requeue (uint32_t job_id) +{ + int rc; + job_id_msg_t requeue_req; + slurm_msg_t req_msg; + + requeue_req.job_id = job_id; + req_msg.msg_type = REQUEST_JOB_REQUEUE; + req_msg.data = &requeue_req; + + if (slurm_send_recv_controller_rc_msg(&req_msg, &rc) < 0) + return SLURM_ERROR; + + slurm_seterrno(rc); + return rc; +} + diff --git a/src/common/slurm_cred.c b/src/common/slurm_cred.c index a0c4fcd28e4b827ab14504e5108b300622aa9f85..5014f3076c02993d365f3128c98be4c46c3c4cfe 100644 --- a/src/common/slurm_cred.c +++ b/src/common/slurm_cred.c @@ -1,8 +1,8 @@ /*****************************************************************************\ - * src/common/slurm_cred.c - SLURM job credential functions - * $Id$ + * src/common/slurm_cred.c - SLURM job credential functions + * $Id$ ***************************************************************************** - * Copyright (C) 2002 The Regents of the University of California. + * Copyright (C) 2002-2006 The Regents of the University of California. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). * Written by Mark A. Grondona <mgrondona@llnl.gov>. * UCRL-CODE-217948. @@ -81,9 +81,9 @@ typedef struct { */ typedef struct { uint32_t jobid; - bool revoked; /* True if all creds for jobid are revoked */ + time_t revoked; /* Time at which credentials were revoked */ time_t ctime; /* Time that this entry was created */ - time_t expiration; /* Time at which credentials were revoked */ + time_t expiration; /* Time at which credentials can be purged */ } job_state_t; @@ -618,7 +618,7 @@ slurm_cred_rewind(slurm_cred_ctx_t ctx, slurm_cred_t cred) } int -slurm_cred_revoke(slurm_cred_ctx_t ctx, uint32_t jobid) +slurm_cred_revoke(slurm_cred_ctx_t ctx, uint32_t jobid, time_t time) { job_state_t *j = NULL; @@ -640,12 +640,12 @@ slurm_cred_revoke(slurm_cred_ctx_t ctx, uint32_t jobid) j = _insert_job_state(ctx, jobid); } - if (j->revoked == true) { + if (j->revoked) { slurm_seterrno(EEXIST); goto error; } - j->revoked = true; + j->revoked = time; slurm_mutex_unlock(&ctx->mutex); return SLURM_SUCCESS; @@ -1159,11 +1159,22 @@ static char * timestr (const time_t *tp, char *buf, size_t n) } extern bool -slurm_cred_revoked(slurm_cred_ctx_t ctx, uint32_t jobid) +slurm_cred_revoked(slurm_cred_ctx_t ctx, slurm_cred_t cred) { - job_state_t *j = _find_job_state(ctx, jobid); - if (j && j->revoked) + job_state_t *j = _find_job_state(ctx, cred->jobid); + + if ((j == NULL) || (j->revoked == (time_t)0)) + return false; + + if (cred->ctime <= j->revoked) return true; + + /* if we are re-running the job, the new job credential is newer + * than the revoke time (see "scontrol requeue"), purge the old + * job record so this looks like a new job */ + info("re-creating job credential records for job %u", j->jobid); + j->expiration = 0; + _clear_expired_job_states(ctx); return false; } @@ -1220,7 +1231,7 @@ _job_state_create(uint32_t jobid) job_state_t *j = xmalloc(sizeof(*j)); j->jobid = jobid; - j->revoked = false; + j->revoked = (time_t) 0; j->ctime = time(NULL); j->expiration = (time_t) MAX_TIME; @@ -1335,7 +1346,7 @@ static void _job_state_pack_one(job_state_t *j, Buf buffer) { pack32(j->jobid, buffer); - pack16((uint16_t) j->revoked, buffer); + pack_time(j->revoked, buffer); pack_time(j->ctime, buffer); pack_time(j->expiration, buffer); } @@ -1345,25 +1356,24 @@ static job_state_t * _job_state_unpack_one(Buf buffer) { char buf1[64], buf2[64]; - uint16_t revoked = 0; job_state_t *j = xmalloc(sizeof(*j)); safe_unpack32( &j->jobid, buffer); - safe_unpack16( &revoked, buffer); + safe_unpack_time( &j->revoked, buffer); safe_unpack_time( &j->ctime, buffer); safe_unpack_time( &j->expiration, buffer); debug3("cred_unpack:job %d ctime:%s%s%s", j->jobid, timestr (&j->ctime, buf1, 64), - (revoked ? " revoked:" : " expires:"), - revoked ? timestr (&j->expiration, buf2, 64) : ""); + (j->revoked ? " revoked:" : " expires:"), + j->revoked ? timestr (&j->expiration, buf2, 64) : ""); - if (revoked) { - j->revoked = true; + if (j->revoked) { if (j->expiration == (time_t) MAX_TIME) { info ("Warning: revoke on job %d has no expiration", j->jobid); + j->expiration = j->revoked + 600; } } diff --git a/src/common/slurm_cred.h b/src/common/slurm_cred.h index 0f3c0b16919cdbabd08fbd609d880e11850c96f9..f6e67a52a44b0556d9673476b0d28067be54088c 100644 --- a/src/common/slurm_cred.h +++ b/src/common/slurm_cred.h @@ -2,7 +2,7 @@ * src/common/slurm_cred.h - SLURM job credential operations * $Id$ ***************************************************************************** - * Copyright (C) 2002 The Regents of the University of California. + * Copyright (C) 2002-2006 The Regents of the University of California. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). * Written by Mark Grondona <grondona1@llnl.gov>. * UCRL-CODE-217948. @@ -166,14 +166,20 @@ int slurm_cred_rewind(slurm_cred_ctx_t ctx, slurm_cred_t cred); /* * Revoke all credentials for job id jobid + * time IN - the time the job terminiation was requested by slurmctld + * (local time from slurmctld server) */ -int slurm_cred_revoke(slurm_cred_ctx_t ctx, uint32_t jobid); +int slurm_cred_revoke(slurm_cred_ctx_t ctx, uint32_t jobid, time_t time); /* * Report if a all credentials for a give job id have been * revoked (i.e. has the job been killed) + * + * If we are re-running the job, the new job credential is newer + * than the revoke time, see "scontrol requeue", purge the old + * job record and make like it never existed */ -bool slurm_cred_revoked(slurm_cred_ctx_t ctx, uint32_t jobid); +bool slurm_cred_revoked(slurm_cred_ctx_t ctx, slurm_cred_t cred); /* * Begin expiration period for the revocation of credentials diff --git a/src/common/slurm_errno.c b/src/common/slurm_errno.c index 2eecf26d68cc1b06c18d1e3ee2fdf634cbaea62d..64c3cc1842f4fe36e9a180a849f62db95119734c 100644 --- a/src/common/slurm_errno.c +++ b/src/common/slurm_errno.c @@ -174,7 +174,7 @@ static slurm_errtab_t slurm_errtab[] = { { ESLURM_DEPENDENCY, "Immediate execution impossible, job dependency problem"}, { ESLURM_BATCH_ONLY, - "Only batch jobs are accepted" }, + "Only batch jobs are accepted or processed" }, /* slurmd error codes */ diff --git a/src/common/slurm_protocol_defs.c b/src/common/slurm_protocol_defs.c index c81fb5a7c10e5a4cc67a95987033ff25fac9906c..0ad4f891774f6c83f481ce80baaa94a3b5154faa 100644 --- a/src/common/slurm_protocol_defs.c +++ b/src/common/slurm_protocol_defs.c @@ -178,6 +178,7 @@ void slurm_free_job_launch_msg(batch_job_launch_msg_t * msg) } select_g_free_jobinfo(&msg->select_jobinfo); + slurm_cred_destroy(msg->cred); xfree(msg); } diff --git a/src/common/slurm_protocol_defs.h b/src/common/slurm_protocol_defs.h index 199b5bb207c0b00808351ea17f1b75e1968b2bf4..c178cfd1c16d2e81b19c159da8c11429216e5a78 100644 --- a/src/common/slurm_protocol_defs.h +++ b/src/common/slurm_protocol_defs.h @@ -145,6 +145,7 @@ typedef enum { REQUEST_COMPLETE_JOB_ALLOCATION, REQUEST_COMPLETE_BATCH_SCRIPT, MESSAGE_STAT_JOBACCT, + REQUEST_JOB_REQUEUE, REQUEST_LAUNCH_TASKS = 6001, RESPONSE_LAUNCH_TASKS, @@ -452,6 +453,7 @@ typedef struct return_code_msg { typedef struct kill_job_msg { uint32_t job_id; uint32_t job_uid; + time_t time; /* slurmctld's time of request */ char *nodes; select_jobinfo_t select_jobinfo; /* opaque data type */ } kill_job_msg_t; @@ -509,6 +511,7 @@ typedef struct batch_job_launch_msg { char **environment; /* environment variables to set for job, * name=value pairs, one per line */ select_jobinfo_t select_jobinfo; /* opaque data type */ + slurm_cred_t cred; } batch_job_launch_msg_t; typedef struct job_id_request_msg { diff --git a/src/common/slurm_protocol_pack.c b/src/common/slurm_protocol_pack.c index 2d9dc4304228d75775e6bb179cbfe7c1df9a59e6..30b64638703af3aa03d2b5f8c1c92988152b6a31 100644 --- a/src/common/slurm_protocol_pack.c +++ b/src/common/slurm_protocol_pack.c @@ -617,8 +617,10 @@ pack_msg(slurm_msg_t const *msg, Buf buffer) break; case REQUEST_JOB_READY: + case REQUEST_JOB_REQUEUE: _pack_job_ready_msg((job_id_msg_t *)msg->data, buffer); break; + case REQUEST_NODE_SELECT_INFO: _pack_node_select_info_req_msg( (node_info_select_request_msg_t *) msg->data, buffer); @@ -905,6 +907,7 @@ unpack_msg(slurm_msg_t * msg, Buf buffer) break; case REQUEST_JOB_READY: + case REQUEST_JOB_REQUEUE: rc = _unpack_job_ready_msg((job_id_msg_t **) & msg->data, buffer); break; @@ -1381,6 +1384,7 @@ _pack_kill_job_msg(kill_job_msg_t * msg, Buf buffer) pack32((uint32_t)msg->job_id, buffer); pack32((uint32_t)msg->job_uid, buffer); + pack_time(msg->time, buffer); packstr(msg->nodes, buffer); select_g_pack_jobinfo(msg->select_jobinfo, buffer); } @@ -1398,6 +1402,7 @@ _unpack_kill_job_msg(kill_job_msg_t ** msg, Buf buffer) safe_unpack32(&(tmp_ptr->job_id), buffer); safe_unpack32(&(tmp_ptr->job_uid), buffer); + safe_unpack_time(&(tmp_ptr->time), buffer); safe_unpackstr_xmalloc(&(tmp_ptr->nodes), &uint16_tmp, buffer); if (select_g_alloc_jobinfo (&tmp_ptr->select_jobinfo) || select_g_unpack_jobinfo(tmp_ptr->select_jobinfo, buffer)) @@ -2128,6 +2133,7 @@ _pack_job_desc_msg(job_desc_msg_t * job_desc_ptr, Buf buffer) packstr(job_desc_ptr->work_dir, buffer); pack16((uint16_t)job_desc_ptr->immediate, buffer); + pack16((uint16_t)job_desc_ptr->no_requeue, buffer); pack16((uint16_t)job_desc_ptr->shared, buffer); pack16((uint16_t)job_desc_ptr->cpus_per_task, buffer); pack32((uint32_t)job_desc_ptr->time_limit, buffer); @@ -2218,6 +2224,7 @@ _unpack_job_desc_msg(job_desc_msg_t ** job_desc_buffer_ptr, Buf buffer) safe_unpackstr_xmalloc(&job_desc_ptr->work_dir, &uint16_tmp, buffer); safe_unpack16(&job_desc_ptr->immediate, buffer); + safe_unpack16(&job_desc_ptr->no_requeue, buffer); safe_unpack16(&job_desc_ptr->shared, buffer); safe_unpack16(&job_desc_ptr->cpus_per_task, buffer); safe_unpack32(&job_desc_ptr->time_limit, buffer); @@ -3179,6 +3186,8 @@ _pack_batch_job_launch_msg(batch_job_launch_msg_t * msg, Buf buffer) pack16((uint16_t)msg->envc, buffer); packstr_array(msg->environment, msg->envc, buffer); + slurm_cred_pack(msg->cred, buffer); + select_g_pack_jobinfo(msg->select_jobinfo, buffer); } @@ -3228,6 +3237,9 @@ _unpack_batch_job_launch_msg(batch_job_launch_msg_t ** msg, Buf buffer) safe_unpackstr_array(&launch_msg_ptr->environment, &launch_msg_ptr->envc, buffer); + if (!(launch_msg_ptr->cred = slurm_cred_unpack(buffer))) + goto unpack_error; + if (select_g_alloc_jobinfo (&launch_msg_ptr->select_jobinfo) || select_g_unpack_jobinfo(launch_msg_ptr->select_jobinfo, buffer)) goto unpack_error; @@ -3235,18 +3247,7 @@ _unpack_batch_job_launch_msg(batch_job_launch_msg_t ** msg, Buf buffer) return SLURM_SUCCESS; unpack_error: - xfree(launch_msg_ptr->nodes); - xfree(launch_msg_ptr->script); - xfree(launch_msg_ptr->work_dir); - xfree(launch_msg_ptr->err); - xfree(launch_msg_ptr->in); - xfree(launch_msg_ptr->out); - xfree(launch_msg_ptr->argv); - xfree(launch_msg_ptr->environment); - xfree(launch_msg_ptr->cpus_per_node); - xfree(launch_msg_ptr->cpu_count_reps); - select_g_free_jobinfo(&launch_msg_ptr->select_jobinfo); - xfree(launch_msg_ptr); + slurm_free_job_launch_msg(launch_msg_ptr); *msg = NULL; return SLURM_ERROR; } @@ -3450,6 +3451,7 @@ static int _unpack_suspend_msg(suspend_msg_t **msg_ptr, Buf buffer) return SLURM_ERROR; } + static void _pack_checkpoint_msg(checkpoint_msg_t *msg, Buf buffer) { diff --git a/src/scontrol/scontrol.c b/src/scontrol/scontrol.c index 97b3c94c605ca9090c50541788bfdfec8d9f40fd..9ba166a3d935505e3bf1af79a10811bcda60d86d 100644 --- a/src/scontrol/scontrol.c +++ b/src/scontrol/scontrol.c @@ -109,6 +109,7 @@ static void _print_ping (void); static void _print_step (char *job_step_id_str); static void _print_version( void ); static int _process_command (int argc, char *argv[]); +static int _requeue(char *job_step_id_str); static int _suspend(char *op, char *job_id_str); static void _update_it (int argc, char *argv[]); static int _update_job (int argc, char *argv[]); @@ -1176,6 +1177,29 @@ _process_command (int argc, char *argv[]) } } } + else if (strncasecmp (argv[0], "requeue", 3) == 0) { + if (argc > 2) { + exit_code = 1; + if (quiet_flag != 1) + fprintf(stderr, + "too many arguments for keyword:%s\n", + argv[0]); + } else if (argc < 2) { + exit_code = 1; + if (quiet_flag != 1) + fprintf(stderr, + "too few arguments for keyword:%s\n", + argv[0]); + } else { + error_code =_requeue(argv[1]); + if (error_code) { + exit_code = 1; + if (quiet_flag != 1) + slurm_perror ("slurm_requeue error"); + } + } + + } else if ((strncasecmp (argv[0], "suspend", 3) == 0) || (strncasecmp (argv[0], "resume", 3) == 0)) { if (argc > 2) { @@ -1983,6 +2007,7 @@ scontrol [<OPTION>] [<COMMAND>] \n\ quiet print no messages other than error messages. \n\ quit terminate this command. \n\ reconfigure re-read configuration files. \n\ + requeue <job_id> re-queue a batch job \n\ show <ENTITY> [<ID>] display state of identified entity, default \n\ is all records. \n\ shutdown shutdown slurm controller. \n\ @@ -2136,3 +2161,33 @@ static int _suspend(char *op, char *job_id_str) return rc; } + +/* + * _requeue - requeue a pending or running batch job + * IN job_id_str - a job id + * RET 0 if no slurm error, errno otherwise. parsing error prints + * error message and returns 0 + */ +static int _requeue(char *job_id_str) +{ + int rc = SLURM_SUCCESS; + uint32_t job_id = 0; + char *next_str; + + if (job_id_str) { + job_id = (uint32_t) strtol (job_id_str, &next_str, 10); + if (next_str[0] != '\0') { + fprintf(stderr, "Invalid job id specified\n"); + exit_code = 1; + return 0; + } + } else { + fprintf(stderr, "Invalid job id specified\n"); + exit_code = 1; + return 0; + } + + rc = slurm_requeue (job_id); + return rc; +} + diff --git a/src/slurmctld/job_mgr.c b/src/slurmctld/job_mgr.c index bf1063f555ee5596424d0b0db2afd86fedb2a23c..8c8d0a9b43cba68b08cf2f4daa58992d604e09fb 100644 --- a/src/slurmctld/job_mgr.c +++ b/src/slurmctld/job_mgr.c @@ -712,6 +712,7 @@ void _dump_job_details(struct job_details *detail_ptr, Buf buffer) pack16((uint16_t) detail_ptr->shared, buffer); pack16((uint16_t) detail_ptr->contiguous, buffer); pack16((uint16_t) detail_ptr->cpus_per_task, buffer); + pack16((uint16_t) detail_ptr->no_requeue, buffer); pack32((uint32_t) detail_ptr->min_procs, buffer); pack32((uint32_t) detail_ptr->min_memory, buffer); @@ -739,7 +740,7 @@ static int _load_job_details(struct job_record *job_ptr, Buf buffer) char **argv = (char **) NULL; uint32_t min_nodes, max_nodes, min_procs; uint16_t argc = 0, req_tasks, shared, contiguous; - uint16_t cpus_per_task, name_len; + uint16_t cpus_per_task, name_len, no_requeue; uint32_t min_memory, min_tmp_disk, total_procs; time_t begin_time, submit_time; int i; @@ -753,6 +754,7 @@ static int _load_job_details(struct job_record *job_ptr, Buf buffer) safe_unpack16(&shared, buffer); safe_unpack16(&contiguous, buffer); safe_unpack16(&cpus_per_task, buffer); + safe_unpack16(&no_requeue, buffer); safe_unpack32(&min_procs, buffer); safe_unpack32(&min_memory, buffer); @@ -772,9 +774,10 @@ static int _load_job_details(struct job_record *job_ptr, Buf buffer) safe_unpackstr_array(&argv, &argc, buffer); /* validity test as possible */ - if ((shared > 1) || (contiguous > 1)) { - error("Invalid data for job %u: shared=%u contiguous=%u", - job_ptr->job_id, shared, contiguous); + if ((shared > 1) || (contiguous > 1) || (no_requeue > 1)) { + error("Invalid data for job %u: " + "shared=%u contiguous=%u no_requeue=%u", + job_ptr->job_id, shared, contiguous, no_requeue); goto unpack_error; } @@ -802,6 +805,7 @@ static int _load_job_details(struct job_record *job_ptr, Buf buffer) job_ptr->details->min_procs = min_procs; job_ptr->details->min_memory = min_memory; job_ptr->details->min_tmp_disk = min_tmp_disk; + job_ptr->details->no_requeue = no_requeue; job_ptr->details->begin_time = begin_time; job_ptr->details->submit_time = submit_time; job_ptr->details->req_nodes = req_nodes; @@ -1147,7 +1151,7 @@ void dump_job_desc(job_desc_msg_t * job_specs) long job_id, min_procs, min_memory, min_tmp_disk, num_procs; long min_nodes, max_nodes, time_limit, priority, contiguous; long kill_on_node_fail, shared, immediate, dependency; - long cpus_per_task; + long cpus_per_task, no_requeue; char buf[100]; if (job_specs == NULL) @@ -1248,8 +1252,10 @@ void dump_job_desc(job_desc_msg_t * job_specs) slurm_make_time_str(&job_specs->begin_time, buf, sizeof(buf)); cpus_per_task = (job_specs->cpus_per_task != (uint16_t) NO_VAL) ? (long) job_specs->cpus_per_task : -1L; - debug3(" network=%s begin=%s cpus_per_task=%ld", - job_specs->network, buf, cpus_per_task); + no_requeue = (job_specs->no_requeue != (uint16_t) NO_VAL) ? + (long) job_specs->no_requeue : -1L; + debug3(" network=%s begin=%s cpus_per_task=%ld no_requeue=%ld", + job_specs->network, buf, cpus_per_task, no_requeue); select_g_sprint_jobinfo(job_specs->select_jobinfo, buf, sizeof(buf), SELECT_PRINT_MIXED); @@ -1607,7 +1613,6 @@ extern int job_complete(uint32_t job_id, uid_t uid, bool requeue, uid); return ESLURM_USER_ID_MISSING; } - if (job_ptr->job_state & JOB_COMPLETING) return SLURM_SUCCESS; /* avoid replay */ @@ -2315,6 +2320,8 @@ _copy_job_desc_to_job_record(job_desc_msg_t * job_desc, detail_ptr->exclusive = job_desc->exclusive; if (job_desc->cpus_per_task != (uint16_t) NO_VAL) detail_ptr->cpus_per_task = job_desc->cpus_per_task; + if (job_desc->no_requeue != (uint16_t) NO_VAL) + detail_ptr->no_requeue = job_desc->no_requeue; if (job_desc->min_procs != NO_VAL) detail_ptr->min_procs = job_desc->min_procs; detail_ptr->min_procs = MAX(detail_ptr->min_procs, @@ -3440,7 +3447,6 @@ validate_jobs_on_node(char *node_name, uint32_t * job_count, else if (job_ptr->job_state == JOB_PENDING) { error("Registered PENDING job %u.%u on node %s ", job_id_ptr[i], step_id_ptr[i], node_name); - /* FIXME: Could possibly recover the job */ job_ptr->job_state = JOB_FAILED; last_job_update = now; job_ptr->start_time = job_ptr->end_time = now; @@ -3523,6 +3529,7 @@ kill_job_on_node(uint32_t job_id, struct job_record *job_ptr, kill_req = xmalloc(sizeof(kill_job_msg_t)); kill_req->job_id = job_id; + kill_req->time = time(NULL); if (job_ptr) { /* NULL if unknown */ kill_req->select_jobinfo = select_g_copy_jobinfo(job_ptr->select_jobinfo); @@ -3753,7 +3760,7 @@ extern bool job_epilog_complete(uint32_t job_id, char *node_name, * a new job arrives and the job_id is reused, we * could try to note the termination of a job that * hasn't really started. Very rare obviously. */ - if ((job_ptr == JOB_PENDING) + if ((job_ptr->job_state == JOB_PENDING) || (job_ptr->node_bitmap == NULL)) { error("Epilog complete request for non-running job %u, " "slurmctld and slurmd out of sync", job_id); @@ -3789,9 +3796,20 @@ extern bool job_epilog_complete(uint32_t job_id, char *node_name, } #endif - if (!(job_ptr->job_state & JOB_COMPLETING)) /* COMPLETED */ + if (!(job_ptr->job_state & JOB_COMPLETING)) { /* COMPLETED */ + if ((job_ptr->job_state == JOB_PENDING) + && (job_ptr->batch_flag)) { + info("requeue batch job %u", job_ptr->job_id); + if (job_ptr->details) { + /* the time stamp on the new batch launch + * credential must be larger than the time + * stamp on the revoke request, so delay + * for at least two seconds. */ + job_ptr->details->begin_time = time(NULL) + 2; + } + } return true; - else + } else return false; } @@ -3895,10 +3913,7 @@ static void _signal_job(struct job_record *job_ptr, int signal) int i, buf_rec_size = 0; agent_args = xmalloc(sizeof(agent_arg_t)); - if (signal == SIGKILL) - agent_args->msg_type = REQUEST_TERMINATE_JOB; - else - agent_args->msg_type = REQUEST_SIGNAL_JOB; + agent_args->msg_type = REQUEST_SIGNAL_JOB; agent_args->retry = 1; signal_job_msg = xmalloc(sizeof(kill_tasks_msg_t)); signal_job_msg->job_id = job_ptr->job_id; @@ -4185,6 +4200,99 @@ extern int job_suspend(suspend_msg_t *sus_ptr, uid_t uid, return rc; } +/* + * job_requeue - Requeue a running or pending batch job + * IN uid - user id of user issuing the RPC + * IN job_id - id of the job to be requeued + * IN conn_fd - file descriptor on which to send reply + * RET 0 on success, otherwise ESLURM error code + */ +extern int job_requeue (uid_t uid, uint32_t job_id, slurm_fd conn_fd) +{ + int rc = SLURM_SUCCESS; + struct job_record *job_ptr = NULL; + bool super_user = false, suspended = false; + slurm_msg_t resp_msg; + return_code_msg_t rc_msg; + time_t now = time(NULL); + + /* find the job */ + job_ptr = find_job_record (job_id); + if (job_ptr == NULL) { + rc = ESLURM_INVALID_JOB_ID; + goto reply; + } + + /* validate the request */ + if ((uid == 0) || (uid == slurmctld_conf.slurm_user_id)) + super_user = 1; + if ((uid != job_ptr->user_id) && (!super_user)) { + rc = ESLURM_ACCESS_DENIED; + goto reply; + } + if (IS_JOB_FINISHED(job_ptr)) { + rc = ESLURM_ALREADY_DONE; + goto reply; + } + if (job_ptr->details && job_ptr->details->no_requeue) { + rc = ESLURM_DISABLED; + goto reply; + } + if (job_ptr->job_state & JOB_COMPLETING) { + rc = ESLURM_TRANSITION_STATE_NO_UPDATE; + goto reply; + } + + /* if pending, just reset the priority */ + if (job_ptr->job_state == JOB_PENDING) { + /* just reset the priority */ + if ((job_ptr->priority == 0) + && (!super_user)) { + rc = ESLURM_ACCESS_DENIED; + goto reply; + } + _set_job_prio(job_ptr); + last_job_update = now; + goto reply; + } + + if (job_ptr->batch_flag == 0) { + rc = ESLURM_BATCH_ONLY; + goto reply; + } + + if ((job_ptr->job_state != JOB_SUSPENDED) + && (job_ptr->job_state != JOB_RUNNING)) { + error("job_requeue job %u state is bad %s", job_id, + job_state_string(job_ptr->job_state)); + rc = EINVAL; + goto reply; + } + + if (job_ptr->job_state == JOB_SUSPENDED) + suspended = true; + last_job_update = now; + job_ptr->time_last_active = now; + job_ptr->job_state = JOB_PENDING | JOB_COMPLETING; + if (suspended) + job_ptr->end_time = job_ptr->suspend_time; + else + job_ptr->end_time = now; + deallocate_nodes(job_ptr, false, suspended); + job_completion_logger(job_ptr); +//FIXME: Test accounting + + reply: + rc_msg.return_code = rc; + resp_msg.msg_type = RESPONSE_SLURM_RC; + resp_msg.data = &rc_msg; + forward_init(&resp_msg.forward, NULL); + resp_msg.ret_list = NULL; + resp_msg.forward_struct_init = 0; + slurm_send_node_msg(conn_fd, &resp_msg); + return rc; +} + /* * job_end_time - Process JOB_END_TIME * IN time_req_msg - job end time request diff --git a/src/slurmctld/job_scheduler.c b/src/slurmctld/job_scheduler.c index 52a4758fce97003deec516dbb1a151538923b131..ec59ff4db412f68c02a6826a02d8147b5d4d3f66 100644 --- a/src/slurmctld/job_scheduler.c +++ b/src/slurmctld/job_scheduler.c @@ -2,7 +2,7 @@ * job_scheduler.c - manage the scheduling of pending jobs in priority order * Note there is a global job list (job_list) ***************************************************************************** - * Copyright (C) 2002 The Regents of the University of California. + * Copyright (C) 2002-2006 The Regents of the University of California. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). * Written by Morris Jette <jette1@llnl.gov> * UCRL-CODE-217948. @@ -313,6 +313,19 @@ static void _launch_job(struct job_record *job_ptr) launch_msg_ptr->gid = job_ptr->group_id; launch_msg_ptr->nprocs = job_ptr->details->req_tasks; launch_msg_ptr->nodes = xstrdup(job_ptr->nodes); + + if (make_batch_job_cred(launch_msg_ptr)) { + error("aborting batch job %u", job_ptr->job_id); + /* FIXME: This is a kludge, but this event indicates a serious + * problem with OpenSSH and should never happen. We are + * too deep into the job launch to gracefully clean up. */ + job_ptr->end_time = time(NULL); + job_ptr->time_limit = 0; + xfree(launch_msg_ptr->nodes); + xfree(launch_msg_ptr); + return; + } + launch_msg_ptr->err = xstrdup(job_ptr->details->err); launch_msg_ptr->in = xstrdup(job_ptr->details->in); launch_msg_ptr->out = xstrdup(job_ptr->details->out); @@ -365,3 +378,29 @@ _xduparray(uint16_t size, char ** array) result[i] = xstrdup(array[i]); return result; } + +/* + * make_batch_job_cred - add a job credential to the batch_job_launch_msg + * IN/OUT launch_msg_ptr - batch_job_launch_msg in which job_id, step_id, + * uid and nodes have already been set + * RET 0 or error code + */ +extern int make_batch_job_cred(batch_job_launch_msg_t *launch_msg_ptr) +{ + slurm_cred_arg_t cred_arg; + + cred_arg.jobid = launch_msg_ptr->job_id; + cred_arg.stepid = launch_msg_ptr->step_id; + cred_arg.uid = launch_msg_ptr->uid; + cred_arg.hostlist = launch_msg_ptr->nodes; + cred_arg.ntask_cnt = 0; + cred_arg.ntask = NULL; + + launch_msg_ptr->cred = slurm_cred_create(slurmctld_config.cred_ctx, + &cred_arg); + + if (launch_msg_ptr->cred) + return SLURM_SUCCESS; + error("slurm_cred_create failure for batch job %u", cred_arg.jobid); + return SLURM_ERROR; +} diff --git a/src/slurmctld/node_scheduler.c b/src/slurmctld/node_scheduler.c index 823acf985c3c4aa03fb5ba5dccfb8d523f47ccf8..16a989b1c9422ed0be52a7d0d8b377e16bc41f14 100644 --- a/src/slurmctld/node_scheduler.c +++ b/src/slurmctld/node_scheduler.c @@ -171,9 +171,10 @@ extern void deallocate_nodes(struct job_record *job_ptr, bool timeout, agent_args->retry = 1; kill_job = xmalloc(sizeof(kill_job_msg_t)); last_node_update = time(NULL); - kill_job->job_id = job_ptr->job_id; + kill_job->job_id = job_ptr->job_id; kill_job->job_uid = job_ptr->user_id; - kill_job->nodes = xstrdup(job_ptr->nodes); + kill_job->nodes = xstrdup(job_ptr->nodes); + kill_job->time = time(NULL); kill_job->select_jobinfo = select_g_copy_jobinfo( job_ptr->select_jobinfo); @@ -1399,8 +1400,9 @@ extern void re_kill_job(struct job_record *job_ptr) agent_args->msg_type = REQUEST_TERMINATE_JOB; agent_args->retry = 0; kill_job = xmalloc(sizeof(kill_job_msg_t)); - kill_job->job_id = job_ptr->job_id; + kill_job->job_id = job_ptr->job_id; kill_job->job_uid = job_ptr->user_id; + kill_job->time = time(NULL); kill_job->select_jobinfo = select_g_copy_jobinfo( job_ptr->select_jobinfo); diff --git a/src/slurmctld/proc_req.c b/src/slurmctld/proc_req.c index 828ae1ab06438e72efdcedb21bbdc2d0c68d99f7..480c86bb95ee35556975f2933a3eb918624a3676 100644 --- a/src/slurmctld/proc_req.c +++ b/src/slurmctld/proc_req.c @@ -95,6 +95,7 @@ inline static void _slurm_rpc_node_select_info(slurm_msg_t * msg); inline static void _slurm_rpc_old_job_alloc(slurm_msg_t * msg); inline static void _slurm_rpc_ping(slurm_msg_t * msg); inline static void _slurm_rpc_reconfigure_controller(slurm_msg_t * msg); +inline static void _slurm_rpc_requeue(slurm_msg_t * msg); inline static void _slurm_rpc_shutdown_controller(slurm_msg_t * msg); inline static void _slurm_rpc_shutdown_controller_immediate(slurm_msg_t * msg); @@ -265,6 +266,10 @@ void slurmctld_req (slurm_msg_t * msg) _slurm_rpc_suspend(msg); slurm_free_suspend_msg(msg->data); break; + case REQUEST_JOB_REQUEUE: + _slurm_rpc_requeue(msg); + slurm_free_job_id_msg(msg->data); + break; case REQUEST_JOB_READY: _slurm_rpc_job_ready(msg); slurm_free_job_id_msg(msg->data); @@ -2103,6 +2108,37 @@ inline static void _slurm_rpc_suspend(slurm_msg_t * msg) } } +inline static void _slurm_rpc_requeue(slurm_msg_t * msg) +{ + int error_code = SLURM_SUCCESS; + DEF_TIMERS; + job_id_msg_t *requeue_ptr = (job_id_msg_t *) msg->data; + /* Locks: write job and node */ + slurmctld_lock_t job_write_lock = { + NO_LOCK, WRITE_LOCK, WRITE_LOCK, NO_LOCK }; + uid_t uid; + + START_TIMER; + info("Processing RPC: REQUEST_REQUEUE"); + uid = g_slurm_auth_get_uid(msg->auth_cred); + + lock_slurmctld(job_write_lock); + error_code = job_requeue(uid, requeue_ptr->job_id, + msg->conn_fd); + unlock_slurmctld(job_write_lock); + END_TIMER; + + if (error_code) { + info("_slurm_rpc_requeue %u: %s", requeue_ptr->job_id, + slurm_strerror(error_code)); + } else { + info("_slurm_rpc_requeue %u: %s", requeue_ptr->job_id, + TIME_STR); + /* Functions below provide their own locking */ + schedule_job_save(); + } +} + /* Assorted checkpoint operations */ inline static void _slurm_rpc_checkpoint(slurm_msg_t * msg) { @@ -2354,6 +2390,16 @@ int _launch_batch_step(job_desc_msg_t *job_desc_msg, uid_t uid, launch_msg_ptr->gid = job_ptr->group_id; launch_msg_ptr->uid = uid; launch_msg_ptr->nodes = xstrdup(job_ptr->nodes); + + if (make_batch_job_cred(launch_msg_ptr)) { + error("aborting batch step %u.%u", job_ptr->job_id, + job_ptr->group_id); + xfree(launch_msg_ptr->nodes); + xfree(launch_msg_ptr); + delete_step_record(job_ptr, step_rec->step_id); + return SLURM_ERROR; + } + launch_msg_ptr->err = xstrdup(job_desc_msg->err); launch_msg_ptr->in = xstrdup(job_desc_msg->in); launch_msg_ptr->out = xstrdup(job_desc_msg->out); diff --git a/src/slurmctld/slurmctld.h b/src/slurmctld/slurmctld.h index 4a09820296e010e2d74026c2d2062beb2d74e072..3d70f20d699c019af0aa2673eb4ba653c8eab843 100644 --- a/src/slurmctld/slurmctld.h +++ b/src/slurmctld/slurmctld.h @@ -3,7 +3,7 @@ * * $Id$ ***************************************************************************** - * Copyright (C) 2002 The Regents of the University of California. + * Copyright (C) 2002-2006 The Regents of the University of California. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). * Written by Morris Jette <jette1@llnl.gov> et. al. * UCRL-CODE-217948. @@ -270,6 +270,7 @@ struct job_details { char *work_dir; /* pathname of working directory */ char **argv; /* arguments for a batch job script */ uint16_t argc; /* count of argv elements */ + uint16_t no_requeue; /* don't requeue job if set */ }; struct job_record { @@ -769,6 +770,15 @@ extern int job_complete (uint32_t job_id, uid_t uid, bool requeue, */ extern bool job_independent(struct job_record *job_ptr); +/* + * job_requeue - Requeue a running or pending batch job + * IN uid - user id of user issuing the RPC + * IN job_id - id of the job to be requeued + * IN conn_fd - file descriptor on which to send reply + * RET 0 on success, otherwise ESLURM error code + */ +extern int job_requeue (uid_t uid, uint32_t job_id, slurm_fd conn_fd); + /* * job_step_complete - note normal completion the specified job step * IN job_id - id of the job to be completed @@ -879,6 +889,14 @@ extern void load_part_uid_allow_list ( int force ); */ extern int load_all_part_state ( void ); +/* + * make_batch_job_cred - add a job credential to the batch_job_launch_msg + * IN/OUT launch_msg_ptr - batch_job_launch_msg in which job_id, step_id, + * uid and nodes have already been set + * RET 0 or error code + */ +extern int make_batch_job_cred(batch_job_launch_msg_t *launch_msg_ptr); + /* make_node_alloc - flag specified node as allocated to a job * IN node_ptr - pointer to node being allocated * IN job_ptr - pointer to job that is starting diff --git a/src/slurmd/slurmd/req.c b/src/slurmd/slurmd/req.c index c76a35d4e81c3c7adcf70e5ca5637af8c7a767b2..dc7e8f0d53342463959ca78936fcdd1ad237091f 100644 --- a/src/slurmd/slurmd/req.c +++ b/src/slurmd/slurmd/req.c @@ -642,7 +642,7 @@ _rpc_launch_tasks(slurm_msg_t *msg, slurm_addr *cli) launch_tasks_request_msg_t *req = msg->data; uint32_t jobid = req->job_id; uint32_t stepid = req->job_step_id; - bool super_user = false, run_prolog = false; + bool super_user = false; slurm_addr self; socklen_t adlen; hostset_t step_hset = NULL; @@ -664,34 +664,30 @@ _rpc_launch_tasks(slurm_msg_t *msg, slurm_addr *cli) info("launch task %u.%u request from %u.%u@%s", req->job_id, req->job_step_id, req->uid, req->gid, host); -#ifndef HAVE_FRONT_END - if (!slurm_cred_jobid_cached(conf->vctx, req->job_id)) - run_prolog = true; -#endif - if (_check_job_credential(req->cred, jobid, stepid, req_uid, req->tasks_to_launch[req->srun_node_id], - &step_hset) - < 0) { + &step_hset) < 0) { errnum = ESLURMD_INVALID_JOB_CREDENTIAL; error("Invalid job credential from %ld@%s: %m", (long) req_uid, host); goto done; } - if (slurm_cred_revoked(conf->vctx, jobid)) { + if (slurm_cred_revoked(conf->vctx, req->cred)) { info("Job credential revoked for %u", jobid); errnum = ESLURMD_CREDENTIAL_REVOKED; goto done; } - /* xassert(slurm_cred_jobid_cached(conf->vctx, req->job_id));*/ - - /* Run job prolog if necessary */ - if (run_prolog && (_run_prolog(req->job_id, req->uid, NULL) != 0)) { - error("[job %u] prolog failed", req->job_id); - errnum = ESLURMD_PROLOG_FAILED; - goto done; +#ifndef HAVE_FRONT_END + if (!slurm_cred_jobid_cached(conf->vctx, req->job_id)) { + slurm_cred_insert_jobid(conf->vctx, req->job_id); + if (_run_prolog(req->job_id, req->uid, NULL) != 0) { + error("[job %u] prolog failed", req->job_id); + errnum = ESLURMD_PROLOG_FAILED; + goto done; + } } +#endif adlen = sizeof(self); _slurm_getsockname(msg->conn_fd, (struct sockaddr *)&self, &adlen); @@ -732,7 +728,7 @@ _rpc_spawn_task(slurm_msg_t *msg, slurm_addr *cli) spawn_task_request_msg_t *req = msg->data; uint32_t jobid = req->job_id; uint32_t stepid = req->job_step_id; - bool super_user = false, run_prolog = false; + bool super_user = false; slurm_addr self; socklen_t adlen; int spawn_tasks_to_launch = -1; @@ -753,11 +749,6 @@ _rpc_spawn_task(slurm_msg_t *msg, slurm_addr *cli) info("spawn task %u.%u request from %u@%s", req->job_id, req->job_step_id, req->uid, host); -#ifndef HAVE_FRONT_END - if (!slurm_cred_jobid_cached(conf->vctx, req->job_id)) - run_prolog = true; -#endif - if (_check_job_credential(req->cred, jobid, stepid, req_uid, spawn_tasks_to_launch, &step_hset) < 0) { errnum = ESLURMD_INVALID_JOB_CREDENTIAL; @@ -765,20 +756,22 @@ _rpc_spawn_task(slurm_msg_t *msg, slurm_addr *cli) (long) req_uid, host); goto done; } - if (slurm_cred_revoked(conf->vctx, jobid)) { + if (slurm_cred_revoked(conf->vctx, req->cred)) { info("Job credential revoked for %u", jobid); errnum = ESLURMD_CREDENTIAL_REVOKED; goto done; } - /* xassert(slurm_cred_jobid_cached(conf->vctx, req->job_id));*/ - - /* Run job prolog if necessary */ - if (run_prolog && (_run_prolog(req->job_id, req->uid, NULL) != 0)) { - error("[job %u] prolog failed", req->job_id); - errnum = ESLURMD_PROLOG_FAILED; - goto done; +#ifndef HAVE_FRONT_END + if (!slurm_cred_jobid_cached(conf->vctx, req->job_id)) { + slurm_cred_insert_jobid(conf->vctx, req->job_id); + if (_run_prolog(req->job_id, req->uid, NULL) != 0) { + error("[job %u] prolog failed", req->job_id); + errnum = ESLURMD_PROLOG_FAILED; + goto done; + } } +#endif adlen = sizeof(self); _slurm_getsockname(msg->conn_fd, (struct sockaddr *)&self, &adlen); @@ -852,7 +845,12 @@ _rpc_batch_job(slurm_msg_t *msg, slurm_addr *cli) (unsigned int) req_uid); rc = ESLURM_USER_ID_MISSING; /* or bad in this case */ goto done; - } + } + if (slurm_cred_revoked(conf->vctx, req->cred)) { + error("Job %u already killed, do not launch batch job", + req->job_id); + rc = ESLURMD_CREDENTIAL_REVOKED; /* job already ran */ + } if (req->step_id != SLURM_BATCH_SCRIPT && req->step_id != 0) first_job_run = false; @@ -890,11 +888,11 @@ _rpc_batch_job(slurm_msg_t *msg, slurm_addr *cli) } /* Since job could have been killed while the prolog was - * running (especially on BlueGene, which can wait minutes + * running (especially on BlueGene, which can take minutes * for partition booting). Test if the credential has since * been revoked and exit as needed. */ - if (slurm_cred_revoked(conf->vctx, req->job_id)) { - info("Job %u already killed, do not launch tasks", + if (slurm_cred_revoked(conf->vctx, req->cred)) { + info("Job %u already killed, do not launch batch job", req->job_id); rc = ESLURMD_CREDENTIAL_REVOKED; /* job already ran */ goto done; @@ -1983,7 +1981,7 @@ _rpc_terminate_job(slurm_msg_t *msg, slurm_addr *cli) /* * "revoke" all future credentials for this jobid */ - if (slurm_cred_revoke(conf->vctx, req->job_id) < 0) { + if (slurm_cred_revoke(conf->vctx, req->job_id, req->time) < 0) { debug("revoking cred for job %u: %m", req->job_id); } else { save_cred_state(conf->vctx); diff --git a/src/srun/allocate.c b/src/srun/allocate.c index cfb4e8e456389dd3ca2a9fc4db6ef774046cb1a1..eedcdbc894f08b656d3db96380d2f85a38191081 100644 --- a/src/srun/allocate.c +++ b/src/srun/allocate.c @@ -515,6 +515,7 @@ job_desc_msg_create_from_opts (char *script) j->in = opt.ifname; j->out = opt.ofname; j->work_dir = opt.cwd; + j->no_requeue = opt.no_requeue; } return (j); diff --git a/src/srun/opt.c b/src/srun/opt.c index 4833db18c0c1ae0fca6f07ec704f1983541932bd..365e2084969a8fdf976f2591de53530b0c1fb685 100644 --- a/src/srun/opt.c +++ b/src/srun/opt.c @@ -122,6 +122,7 @@ #define LONG_OPT_MEM_BIND 0x120 #define LONG_OPT_CTRL_COMM_IFHN 0x121 #define LONG_OPT_MULTI 0x122 +#define LONG_OPT_NO_REQUEUE 0x123 /*---- forward declarations of static functions ----*/ @@ -710,6 +711,7 @@ static void _opt_default() opt.kill_bad_exit = false; opt.immediate = false; + opt.no_requeue = false; opt.allocate = false; opt.noshell = false; @@ -806,6 +808,7 @@ env_vars_t env_vars[] = { {"SLURM_KILL_BAD_EXIT", OPT_INT, &opt.kill_bad_exit, NULL }, {"SLURM_LABELIO", OPT_INT, &opt.labelio, NULL }, {"SLURM_NNODES", OPT_NODES, NULL, NULL }, + {"SLURM_NO_REQUEUE", OPT_INT, &opt.no_requeue, NULL }, {"SLURM_NO_ROTATE", OPT_NO_ROTATE, NULL, NULL }, {"SLURM_NPROCS", OPT_INT, &opt.nprocs, &opt.nprocs_set}, {"SLURM_OVERCOMMIT", OPT_OVERCOMMIT, NULL, NULL }, @@ -1046,6 +1049,7 @@ void set_options(const int argc, char **argv, int first) {"nice", optional_argument, 0, LONG_OPT_NICE}, {"ctrl-comm-ifhn", required_argument, 0, LONG_OPT_CTRL_COMM_IFHN}, {"multi-prog", no_argument, 0, LONG_OPT_MULTI}, + {"no-requeue", no_argument, 0, LONG_OPT_NO_REQUEUE}, {NULL, 0, 0, 0} }; char *opt_string = "+a:Abc:C:d:D:e:g:Hi:IjJ:kKlm:n:N:" @@ -1489,6 +1493,9 @@ void set_options(const int argc, char **argv, int first) case LONG_OPT_MULTI: opt.multi_prog = true; break; + case LONG_OPT_NO_REQUEUE: + opt.no_requeue = true; + break; } } @@ -1970,6 +1977,7 @@ static void _opt_list() info("verbose : %d", _verbose); info("slurmd_debug : %d", opt.slurmd_debug); info("immediate : %s", tf_(opt.immediate)); + info("no-requeue : %s", tf_(opt.no_requeue)); info("label output : %s", tf_(opt.labelio)); info("unbuffered IO : %s", tf_(opt.unbuffered)); info("allocate : %s", tf_(opt.allocate)); @@ -2045,7 +2053,7 @@ static void _usage(void) " [--mail-type=type] [--mail-user=user][--nice[=value]]\n" " [--prolog=fname] [--epilog=fname]\n" " [--task-prolog=fname] [--task-epilog=fname]\n" -" [--ctrl-comm-ifhn=addr] [--multi-prog]" +" [--ctrl-comm-ifhn=addr] [--multi-prog] [--no-requeue]" " [-w hosts...] [-x hosts...] executable [args...]\n"); } @@ -2103,8 +2111,9 @@ static void _help(void) " --mail-type=type notify on state change: BEGIN, END, FAIL or ALL\n" " --mail-user=user who to send email notification for job state changes\n" " --ctrl-comm-ifhn=addr interface hostname for PMI commaunications from srun" -" --multi-prog if set the program name specified is the\n" +" --multi-prog if set the program name specified is the\n" " configuration specificaiton for multiple programs\n" +" --no-requeue if set, do not permit the job to be requeued\n" "\n" "Allocate only:\n" " -A, --allocate allocate resources and spawn a shell\n" diff --git a/src/srun/opt.h b/src/srun/opt.h index 90bf4c3a982821a4d42fe84c685cfebcd3187e13..0c9760ff88f11b59628cdbff16d552d0fa2f15a1 100644 --- a/src/srun/opt.h +++ b/src/srun/opt.h @@ -138,6 +138,7 @@ typedef struct srun_options { bool batch; /* --batch, -b */ bool no_kill; /* --no-kill, -k */ bool kill_bad_exit; /* --kill-on-bad-exit, -K */ + bool no_requeue; /* --no-requeue */ bool share; /* --share, -s */ int max_wait; /* --wait, -W */ bool quit_on_intr; /* --quit-on-interrupt, -q */ diff --git a/testsuite/expect/Makefile.am b/testsuite/expect/Makefile.am index 423760f0d17739075c901c0d3285133fc55aff32..f19bcd009b83024bf8a2dc164aae360260977edd 100644 --- a/testsuite/expect/Makefile.am +++ b/testsuite/expect/Makefile.am @@ -106,6 +106,7 @@ EXTRA_DIST = \ test3.6 \ test3.7 \ test3.7.prog.c \ + test3.8 \ test4.1 \ test4.2 \ test4.3 \ diff --git a/testsuite/expect/Makefile.in b/testsuite/expect/Makefile.in index f784b082c4b6aa5fe0f7012b52cf44b1431c8c18..ef35c9fab061e08c34bce85f8f4eeabae76530e5 100644 --- a/testsuite/expect/Makefile.in +++ b/testsuite/expect/Makefile.in @@ -341,6 +341,7 @@ EXTRA_DIST = \ test3.6 \ test3.7 \ test3.7.prog.c \ + test3.8 \ test4.1 \ test4.2 \ test4.3 \ diff --git a/testsuite/expect/README b/testsuite/expect/README index 0a48f220e2db62b04a7e39c3f503b962938d31a8..0b8e8c8f41c2e8beeafeae85e261ea6c82648b2a 100644 --- a/testsuite/expect/README +++ b/testsuite/expect/README @@ -181,6 +181,7 @@ test3.4 Validate scontrol update command for jobs. test3.5 Validate scontrol create, delete, and update of partition. test3.6 Testing of hidden partitions. test3.7 Test of job suspend/resume. +test3.8 Test of batch job requeue. UNTESTED "scontrol abort" would stop slurm UNTESTED "scontrol shutdown" would stop slurm diff --git a/testsuite/expect/test3.8 b/testsuite/expect/test3.8 new file mode 100755 index 0000000000000000000000000000000000000000..5bd5f5bca80c644ffb62228fb2498998f6e2c9eb --- /dev/null +++ b/testsuite/expect/test3.8 @@ -0,0 +1,226 @@ +#!/usr/bin/expect +############################################################################ +# Purpose: Test of SLURM functionality +# Test of batch job requeue. +# +# Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR +# "FAILURE: ..." otherwise with an explanation of the failure, OR +# anything else indicates a failure mode that must be investigated. +# +# Note: This script generates and then deletes files in the working directory +# named test3.8.input, test.3.8.output, test.3.8.error, and +# test3.8.run.* +############################################################################ +# Copyright (C) 2006 The Regents of the University of California. +# Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). +# Written by Morris Jette <jette1@llnl.gov> +# UCRL-CODE-217948. +# +# This file is part of SLURM, a resource management program. +# For details, see <http://www.llnl.gov/linux/slurm/>. +# +# SLURM is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# SLURM is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +# details. +# +# You should have received a copy of the GNU General Public License along +# with SLURM; if not, write to the Free Software Foundation, Inc., +# 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. +############################################################################ +source ./globals + +set test_id "3.8" +set exit_code 0 +set file_in "test$test_id.input" +set file_out "test$test_id.output" +set file_err "test$test_id.error" +set file_flag_1 "test$test_id.run.1" +set file_flag_2 "test$test_id.run.2" +set file_flag_3 "test$test_id.run.3" +set job_id 0 + +print_header $test_id + +if {[is_super_user] == 0} { + send_user "\nWARNING: This test can't be run except as SlurmUser\n" + exit 0 +} + +# +# Delete left-over input script plus stdout/err files +# Build input script file that runs two job steps +# +exec $bin_rm -f $file_in $file_out $file_err +exec $bin_rm -f $file_flag_1 $file_flag_2 $file_flag_3 +make_bash_script $file_in " + if \[ -f $file_flag_2 \] + then + $bin_touch $file_flag_3 + elif \[ -f $file_flag_1 \] + then + $bin_touch $file_flag_2 + else + $bin_touch $file_flag_1 + fi + $srun $bin_sleep 20 +" + +# +# Spawn a srun batch job that uses stdout/err and confirm their contents +# +set timeout $max_job_delay +spawn $srun --batch --output=$file_out --error=$file_err -t1 $file_in +expect { + -re "jobid ($number) submitted" { + set job_id $expect_out(1,string) + exp_continue + } + timeout { + send_user "\nFAILURE: srun not responding\n" + kill_srun + set exit_code 1 + exp_continue + } + eof { + wait + } +} +if {$job_id == 0} { + send_user "\nFAILURE: batch submit failure\n" + exit 1 +} + +# +# Wait for job to begin, then requeue it +# +if {[wait_for_job $job_id "RUNNING"] != 0} { + send_user "\nFAILURE: waiting for job to begin\n" + set exit_code 1 +} +exec $bin_sleep 2 +spawn $scontrol requeue $job_id +expect { + -re "error" { + send_user "\nFAILURE: some scontrol error happened\n" + set exit_code 1 + exp_continue + } + timeout { + send_user "\nFAILURE: scontrol not responding\n" + set exit_code 1 + } + eof { + wait + } +} + +# +# Wait for job to complete and check for files +# +if {[wait_for_job $job_id "DONE"] != 0} { + send_user "\nFAILURE: waiting for job to complete\n" + set exit_code 1 +} +if {[wait_for_file $file_flag_1] != 0} { + send_user "\nFAILURE: file $file_flag_1 is missing\n" + set exit_code 1 +} +if {[wait_for_file $file_flag_2] != 0} { + send_user "\nFAILURE: file $file_flag_2 is missing\n" + set exit_code 1 +} +if {[file exists $file_flag_3]} { + send_user "\nFAILURE: file $file_flag_3 is found\n" + set exit_code 1 +} + +# +# Now run the same test, but with job requeue disabled via the +# srun --no-requeue option +# +set job_id 0 + +exec $bin_rm -f $file_flag_1 $file_flag_2 $file_flag_3 + +spawn $srun --no-requeue --batch --output=$file_out --error=$file_err -t1 $file_in +expect { + -re "jobid ($number) submitted" { + set job_id $expect_out(1,string) + exp_continue + } + timeout { + send_user "\nFAILURE: srun not responding\n" + kill_srun + set exit_code 1 + exp_continue + } + eof { + wait + } +} +if {$job_id == 0} { + send_user "\nFAILURE: batch submit failure\n" + exit 1 +} + +# +# Wait for job to begin, then requeue it +# +if {[wait_for_job $job_id "RUNNING"] != 0} { + send_user "\nFAILURE: waiting for job to begin\n" + set exit_code 1 +} +set disabled 0 +exec $bin_sleep 2 +spawn $scontrol requeue $job_id +expect { + -re "error.*disabled" { + set disabled 1 + send_user "This error was expected, no worries\n" + exp_continue + } + timeout { + send_user "\nFAILURE: scontrol not responding\n" + set exit_code 1 + } + eof { + wait + } +} +if {$disabled == 0} { + send_user "\nFAILURE: srun's --no-requeue option ignored\n" + set exit_code 1 +} + +# +# Wait for job to complete and check for files +# +if {[wait_for_job $job_id "DONE"] != 0} { + send_user "\nFAILURE: waiting for job to complete\n" + set exit_code 1 +} +if {[wait_for_file $file_flag_1] != 0} { + send_user "\nFAILURE: file $file_flag_1 is missing\n" + set exit_code 1 +} +if {[file exists $file_flag_2]} { + send_user "\nFAILURE: file $file_flag_2 is found\n" + set exit_code 1 +} +if {[file exists $file_flag_3]} { + send_user "\nFAILURE: file $file_flag_3 is found\n" + set exit_code 1 +} + +if {$exit_code == 0} { + exec $bin_rm -f $file_in $file_out $file_err + exec $bin_rm -f $file_flag_1 $file_flag_2 $file_flag_3 + send_user "\nSUCCESS\n" +} +exit $exit_code