Skip to content
Snippets Groups Projects
Commit f41aea51 authored by Moe Jette's avatar Moe Jette
Browse files

Added job completion script plugin with appropriate documentation updates.

parent ffcff69c
No related branches found
No related tags found
No related merge requests found
......@@ -7,6 +7,8 @@ documents those changes that are of interest to users and admins.
- slurm/459: Properly support partition's "Shared=force" configuration.
-- Resync node state to DRAINED or DRAINING on restart in case job
and node state recovered are out of sync.
-- Added jobcomp/script plugin (execute script on job completion,
from Nathan Huff, North Dakota State University).
* Changes in SLURM 0.3.5
========================
......
......@@ -195,6 +195,7 @@ AC_CONFIG_FILES([Makefile
src/plugins/jobcomp/Makefile
src/plugins/jobcomp/filetxt/Makefile
src/plugins/jobcomp/none/Makefile
src/plugins/jobcomp/script/Makefile
src/plugins/sched/Makefile
src/plugins/sched/backfill/Makefile
src/plugins/sched/builtin/Makefile
......
......@@ -9,7 +9,7 @@
<meta http-equiv="keywords" content="Simple Linux Utility for Resource Management, SLURM, resource management,
Linux clusters, high-performance computing, Livermore Computing">
<meta name="LLNLRandR" content="UCRL-WEB-204324"
<meta name="LLNLRandRdate" content="18 February 2004">
<meta name="LLNLRandRdate" content="8 July 2004">
<meta name="distribution" content="global">
<meta name="description" content="Simple Linux Utility for Resource Management">
<meta name="copyright"
......@@ -67,6 +67,7 @@ abbreviation for the type of scheduler. We recommend, for example:</p>
<ul>
<li><b>none</b>&#151;No job logging.</li>
<li><b>filetxt</b>&#151;Log job information to a text file.</li>
<li><b>script</b>&#151;Execute a script passing in job information in environment variables.</li>
</ul>
<p>The <span class="commandline">plugin_name</span> and
<span class="commandline">plugin_version</span>
......@@ -162,7 +163,7 @@ to implement a particular API version using the mechanism outlined for SLURM plu
<a href="mailto:slurm-dev@lists.llnl.gov">slurm-dev@lists.llnl.gov</a>.</p>
<p><a href="http://www.llnl.gov/"><img align=middle src="lll.gif" width="32" height="32" border="0"></a></p>
<p class="footer">UCRL-WEB-204324<br>
Last modified February 18, 2004</p></td>
Last modified July 8, 2004</p></td>
</tr>
</table>
</td>
......
.TH "slurm.conf" "5" "June 2004" "Morris Jette" "Slurm configuration file"
.TH "slurm.conf" "5" "July 2004" "Morris Jette" "Slurm configuration file"
.SH "NAME"
slurm.conf \- Slurm configuration file
.SH "DESCRIPTION"
......@@ -117,17 +117,20 @@ stopped by a debugger or considerable time could be required for batch
job pre- and post-processing. The default value is unlimited (zero).
.TP
\fBJobCompLoc\fR
Define the location where job completion records are to be logged.
The interpretation of this value depends upon the logging mechanism
specified by the \fBJobCompType\fR parameter.
.TP
\fBJobCompType\fR
Define the job completion logging mechanism type.
Acceptable values at present include "jobcomp/none" and "jobcomp/filetxt".
Acceptable values at present include "jobcomp/none", "jobcomp/filetxt",
and "jobcomp/script".
The default value is "jobcomp/none", which means that upon job completion
the record of the job is purged from the system.
The value "jobcomp/filetxt" indicates that a record of the job should be
written to a text file specified by the \fBJobCompLoc\fR parameter.
The value "jobcomp/script" indicates that a script specified by the
\fBJobCompLoc\fR parameter is to be executed with environment variables
indicating the job information.
.TP
\fBJobCredentialPrivateKey\fR
Fully qualified pathname of a file containing a private key used for
......
# $Id$
# Makefile for jobcomp plugins
SUBDIRS = filetxt none
SUBDIRS = filetxt none script
# $Id$
# Makefile for jobcomp/script plugin
AUTOMAKE_OPTIONS = foreign
PLUGIN_FLAGS = -module -avoid-version --export-dynamic
INCLUDES = -I$(top_srcdir) -I$(top_srcdir)/src/common
pkglib_LTLIBRARIES = jobcomp_script.la
# Script job completion logging plugin.
jobcomp_script_la_SOURCES = jobcomp_script.c
jobcomp_script_la_LDFLAGS = $(SO_LDFLAGS) $(PLUGIN_FLAGS)
if HAVE_AIX
jobcomp_script_la_LIBADD = $(top_builddir)/src/common/libcommon.la \
$(top_builddir)/src/api/libslurm.la
endif
This job completion plugin runs a program after the job is completed.
The JobCompType is jobcomp/script and the JobCompLoc is the pathname of the script to run.
The plugin sets up the environment for the program to contain the following variables.
JOBID: The job id of the slurm job
UID: The uid of the user the job was run for
JOBNAME: The name of the job
JOBSTATE: The state of the job when it ended
PARTITION: The partiton the job ran on
LIMIT: The time limit of the job
START: The start time of the job
END: The end time of the job
NODES: The nodes the job ran on or an empty string if it wasn't assigned
PATH: Sets to the standard path
/*****************************************************************************\
* jobcomp_script.c - Script running slurm job completion logging plugin.
*****************************************************************************
* Produced at Center for High Performance Computing, North Dakota State
* University
* Written by Nathan Huff <nhuff@geekshanty.com>
* UCRL-CODE-2002-040.
*
* This file is part of SLURM, a resource management program.
* For details, see <http://www.llnl.gov/linux/slurm/>.
*
* SLURM is free software; you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*
* SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License along
* with SLURM; if not, write to the Free Software Foundation, Inc.,
* 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
\*****************************************************************************/
#ifdef HAVE_CONFIG_H
# include "config.h"
#endif
#include <stdint.h>
#include <inttypes.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <paths.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <unistd.h>
#include <slurm/slurm.h>
#include <slurm/slurm_errno.h>
#include "src/common/slurm_jobcomp.h"
#include "src/common/xmalloc.h"
#include "src/common/xstring.h"
/*
* These variables are required by the generic plugin interface. If they
* are not found in the plugin, the plugin loader will ignore it.
*
* plugin_name - a string giving a human-readable description of the
* plugin. There is no maximum length, but the symbol must refer to
* a valid string.
*
* plugin_type - a string suggesting the type of the plugin or its
* applicability to a particular form of data or method of data handling.
* If the low-level plugin API is used, the contents of this string are
* unimportant and may be anything. SLURM uses the higher-level plugin
* interface which requires this string to be of the form
*
* <application>/<method>
*
* where <application> is a description of the intended application of
* the plugin (e.g., "jobcomp" for SLURM job completion logging) and <method>
* is a description of how this plugin satisfies that application. SLURM will
* only load job completion logging plugins if the plugin_type string has a
* prefix of "jobcomp/".
*
* plugin_version - an unsigned 32-bit integer giving the version number
* of the plugin. If major and minor revisions are desired, the major
* version number may be multiplied by a suitable magnitude constant such
* as 100 or 1000. Various SLURM versions will likely require a certain
* minimum versions for their plugins as the job completion logging API
* matures.
*/
const char plugin_name[] = "Job completion logging script plugin";
const char plugin_type[] = "jobcomp/script";
const uint32_t plugin_version = 90;
static int plugin_errno = SLURM_SUCCESS;
static char * script = NULL;
static char error_str[256];
/*
* init() is called when the plugin is loaded, before any other functions
* are called. Put global initialization here.
*/
int init ( void )
{
return SLURM_SUCCESS;
}
/* Set the location of the script to run*/
int slurm_jobcomp_set_location ( char * location )
{
if (location == NULL) {
plugin_errno = EACCES;
return SLURM_ERROR;
}
xfree(script);
script = xstrdup(location);
return SLURM_SUCCESS;
}
/* Create a new environment pointer containing information from
* slurm_jobcomp_log_record so that the script can access it.
*/
static char ** _create_environment(char *job, char *user, char *job_name,
char *job_state, char *partition, char *limit, char* start, char * end,
char *node_list)
{
int len = 0;
char ** envptr;
char *ptr;
len += strlen(job)+7;
len += strlen(user)+5;
len += strlen(job_name)+9;
len += strlen(job_state)+10;
len += strlen(partition)+11;
len += strlen(limit)+7;
len += strlen(start)+7;
len += strlen(end)+5;
len += strlen(node_list)+7;
len += strlen(_PATH_STDPATH)+6;
len += (11*sizeof(char *));
if(!(envptr = (char **)try_xmalloc(len))) return NULL;
ptr = (char *)envptr + (11*sizeof(char *));
envptr[0] = ptr;
memcpy(ptr,"JOBID=",6);
ptr += 6;
memcpy(ptr,job,strlen(job)+1);
ptr += strlen(job)+1;
envptr[1] = ptr;
memcpy(ptr,"UID=",4);
ptr += 4;
memcpy(ptr,user,strlen(user)+1);
ptr += strlen(user)+1;
envptr[2] = ptr;
memcpy(ptr,"JOBNAME=",8);
ptr += 8;
memcpy(ptr,job_name,strlen(job_name)+1);
ptr += strlen(job_name)+1;
envptr[3] = ptr;
memcpy(ptr,"JOBSTATE=",9);
ptr += 9;
memcpy(ptr,job_state,strlen(job_state)+1);
ptr += strlen(job_state)+1;
envptr[4] = ptr;
memcpy(ptr,"PARTITION=",10);
ptr += 10;
memcpy(ptr,partition,strlen(partition)+1);
ptr += strlen(partition)+1;
envptr[5] = ptr;
memcpy(ptr,"LIMIT=",6);
ptr += 6;
memcpy(ptr,limit,strlen(limit)+1);
ptr += strlen(limit)+1;
envptr[6] = ptr;
memcpy(ptr,"START=",6);
ptr += 6;
memcpy(ptr,start,strlen(start)+1);
ptr += strlen(start)+1;
envptr[7] = ptr;
memcpy(ptr,"END=",4);
ptr += 4;
memcpy(ptr,end,strlen(end)+1);
ptr += strlen(end)+1;
envptr[8] = ptr;
memcpy(ptr,"NODES=",6);
ptr += 6;
memcpy(ptr,node_list,strlen(node_list)+1);
ptr += strlen(node_list)+1;
envptr[9] = ptr;
memcpy(ptr,"PATH=",5);
ptr += 5;
memcpy(ptr,_PATH_STDPATH,strlen(_PATH_STDPATH)+1);
ptr += strlen(_PATH_STDPATH)+1;
envptr[10] = NULL;
return envptr;
}
int slurm_jobcomp_log_record ( uint32_t job_id, uint32_t user_id, char *job_name,
char *job_state, char *partition, uint32_t time_limit,
time_t start, time_t end_time, char *node_list)
{
pid_t pid = -1;
char user_id_str[32],job_id_str[32], nodes_cache[1];
char start_str[32], end_str[32], lim_str[32];
char * argvp[] = {script,NULL};
int ret_value = SLURM_SUCCESS;
char ** envp, * nodes;
debug3("Entering slurm_jobcomp_log_record");
snprintf(user_id_str,sizeof(user_id_str),"%u",user_id);
snprintf(job_id_str,sizeof(job_id_str),"%u",job_id);
snprintf(start_str, sizeof(start_str),"%lu",start);
snprintf(end_str, sizeof(end_str),"%lu",end_time);
nodes_cache[0] = '\0';
if (time_limit == INFINITE) {
strcpy(lim_str, "UNLIMITED");
} else {
snprintf(lim_str, sizeof(lim_str), "%lu", (unsigned long) time_limit);
}
if (node_list == NULL) {
nodes = nodes_cache;
} else {
nodes = node_list;
}
/* Setup environment */
envp = _create_environment(job_id_str,user_id_str,job_name,job_state,
partition,lim_str,start_str,end_str,nodes);
if (envp == NULL) {
plugin_errno = ENOMEM;
return SLURM_ERROR;
}
pid = fork();
if (pid < 0) {
/* Something bad happened */
error("fork: %m");
xfree(envp);
plugin_errno = errno;
return SLURM_ERROR;
} else if (pid == 0) {
/*Child process*/
/*Change directory to tmp*/
if (chdir(_PATH_TMP) != 0) {
exit(errno);
}
/*Redirect stdin, stderr, and stdout to /dev/null*/
if (freopen(_PATH_DEVNULL, "rb", stdin) == NULL) {
exit(errno);
}
if (freopen(_PATH_DEVNULL, "wb", stdout) == NULL) {
exit(errno);
}
if (freopen(_PATH_DEVNULL, "wb", stderr) == NULL) {
exit(errno);
}
/*Exec Script*/
execve(script,argvp,envp);
return SLURM_ERROR; /* should never reach this */
} else {
/*Parent Processes*/
/*
* Wait for the script to finish and get the exit status
* Not sure if this is a good idea. Might want to just return.
*/
info ("waiting");
#if 0
while (waitpid(pid, &ret_value, WNOHANG) == 0)
usleep(100000);
#else
waitpid(pid, &ret_value, 0);
#endif
info("done wait");
xfree(envp);
debug3("Exiting slurm_jobcomp_log_record");
if (WIFEXITED(ret_value) && !WEXITSTATUS(ret_value)) {
return SLURM_SUCCESS;
} else {
plugin_errno = WEXITSTATUS(ret_value);
return SLURM_ERROR;
}
}
}
/* Return the error code of the plugin */
int slurm_jobcomp_get_errno( void )
{
return plugin_errno;
}
/* Return a string representation of the error */
char *slurm_jobcomp_strerror( int errnum )
{
strerror_r(errnum,error_str,sizeof(error_str));
return error_str;
}
/* Called when script unloads */
int fini ( void )
{
xfree(script);
return SLURM_SUCCESS;
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment