diff --git a/src/slurmd/slurmd/req.c b/src/slurmd/slurmd/req.c index f712a9d0cd1c81109894ad4fa8b37a7949458340..fee072888c3c20b2fdca9ff7033538a677b473fd 100644 --- a/src/slurmd/slurmd/req.c +++ b/src/slurmd/slurmd/req.c @@ -4050,14 +4050,14 @@ _rpc_complete_batch(slurm_msg_t *msg) static void _rpc_terminate_job(slurm_msg_t *msg) { +#ifndef HAVE_AIX + bool have_spank = false; +#endif int rc = SLURM_SUCCESS; kill_job_msg_t *req = msg->data; uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, NULL); int nsteps = 0; int delay; - slurm_ctl_conf_t *cf; - bool have_spank = false; - struct stat stat_buf; job_env_t job_env; debug("_rpc_terminate_job, uid = %d", uid); @@ -4155,12 +4155,12 @@ _rpc_terminate_job(slurm_msg_t *msg) _kill_all_active_steps(req->job_id, SIGTERM, true); } - cf = slurm_conf_lock(); - delay = MAX(cf->kill_wait, 5); - if (cf->plugstack && (stat(cf->plugstack, &stat_buf) == 0)) - have_spank = true; - slurm_conf_unlock(); - +#ifndef HAVE_AIX + if ((nsteps == 0) && !conf->epilog) { + struct stat stat_buf; + if (conf->plugstack && (stat(conf->plugstack, &stat_buf) == 0)) + have_spank = true; + } /* * If there are currently no active job steps and no * configured epilog to run, bypass asynchronous reply and @@ -4168,13 +4168,12 @@ _rpc_terminate_job(slurm_msg_t *msg) * request. We need to send current switch state on AIX * systems, so this bypass can not be used. */ - -#ifndef HAVE_AIX if ((nsteps == 0) && !conf->epilog && !have_spank) { debug4("sent ALREADY_COMPLETE"); - if (msg->conn_fd >= 0) + if (msg->conn_fd >= 0) { slurm_send_rc_msg(msg, ESLURMD_KILL_JOB_ALREADY_COMPLETE); + } slurm_cred_begin_expiration(conf->vctx, req->job_id); save_cred_state(conf->vctx); _waiter_complete(req->job_id); @@ -4213,6 +4212,7 @@ _rpc_terminate_job(slurm_msg_t *msg) /* * Check for corpses */ + delay = MAX(conf->kill_wait, 5); if ( !_pause_for_job_completion (req->job_id, req->nodes, delay) && (xcpu_signal(SIGKILL, req->nodes) + _terminate_all_steps(req->job_id, true)) ) { @@ -4624,14 +4624,20 @@ _run_spank_job_script (const char *mode, char **env, uint32_t job_id, uid_t uid) static int _run_job_script(const char *name, const char *path, uint32_t jobid, int timeout, char **env, uid_t uid) { - int status, rc; + bool have_spank = false; + struct stat stat_buf; + int status = 0, rc; + /* * Always run both spank prolog/epilog and real prolog/epilog script, * even if spank plugins fail. (May want to alter this in the future) * If both "script" mechanisms fail, prefer to return the "real" * prolog/epilog status. */ - status = _run_spank_job_script(name, env, jobid, uid); + if (conf->plugstack && (stat(conf->plugstack, &stat_buf) == 0)) + have_spank = true; + if (have_spank) + status = _run_spank_job_script(name, env, jobid, uid); if ((rc = run_script(name, path, jobid, timeout, env, uid))) status = rc; return (status); diff --git a/src/slurmd/slurmd/slurmd.c b/src/slurmd/slurmd/slurmd.c index 449276ca751391154725f9167aeb17bf6ab50bc6..57d0bde1f337bec8c67f74f5efdfa3915e27d5b5 100644 --- a/src/slurmd/slurmd/slurmd.c +++ b/src/slurmd/slurmd/slurmd.c @@ -903,6 +903,7 @@ _read_config(void) _massage_pathname(&conf->spooldir); _free_and_set(&conf->pidfile, xstrdup(cf->slurmd_pidfile)); _massage_pathname(&conf->pidfile); + _free_and_set(&conf->plugstack, xstrdup(cf->plugstack)); _free_and_set(&conf->select_type, xstrdup(cf->select_type)); _free_and_set(&conf->task_prolog, xstrdup(cf->task_prolog)); _free_and_set(&conf->task_epilog, xstrdup(cf->task_epilog)); @@ -940,6 +941,7 @@ _read_config(void) if (cf->slurmctld_port == 0) fatal("Unable to establish controller port"); conf->slurmd_timeout = cf->slurmd_timeout; + conf->kill_wait = cf->kill_wait; conf->use_pam = cf->use_pam; conf->task_plugin_param = cf->task_plugin_param; @@ -1163,6 +1165,7 @@ _destroy_conf(void) xfree(conf->node_topo_addr); xfree(conf->node_topo_pattern); xfree(conf->pidfile); + xfree(conf->plugstack); xfree(conf->prolog); xfree(conf->pubkey); xfree(conf->select_type); diff --git a/src/slurmd/slurmd/slurmd.h b/src/slurmd/slurmd/slurmd.h index 4c3ef6964d5d2198712b570978be4eff1b8bc01a..6c648108bd2a4f5867ad03e18ef2d5d4bce7db62 100644 --- a/src/slurmd/slurmd/slurmd.h +++ b/src/slurmd/slurmd/slurmd.h @@ -156,6 +156,9 @@ typedef struct slurmd_config { List prolog_running_jobs; pthread_mutex_t prolog_running_lock; pthread_cond_t prolog_running_cond; + char *plugstack; /* path to SPANK config file */ + uint16_t kill_wait; /* seconds between SIGXCPU to SIGKILL + * on job termination */ } slurmd_conf_t; extern slurmd_conf_t * conf;