From 7da16150df4194a83f0eb3e3905112d9aee287fc Mon Sep 17 00:00:00 2001
From: Moe Jette <jette1@llnl.gov>
Date: Wed, 18 Feb 2004 21:54:03 +0000
Subject: [PATCH] Added documentation for job completion, scheduler, and switch
 plugins. Minor changes are needed to the plugin code to make them consistent.

---
 doc/Makefile.am              |   3 +
 doc/html/documentation.html  |   5 +-
 doc/html/jobcompplugins.html | 177 ++++++++++++++++
 doc/html/schedplugins.html   | 175 +++++++++++++++
 doc/html/switchplugins.html  | 399 +++++++++++++++++++++++++++++++++++
 5 files changed, 758 insertions(+), 1 deletion(-)
 create mode 100644 doc/html/jobcompplugins.html
 create mode 100644 doc/html/schedplugins.html
 create mode 100644 doc/html/switchplugins.html

diff --git a/doc/Makefile.am b/doc/Makefile.am
index 1c854f9305f..ab4fbd66fa5 100644
--- a/doc/Makefile.am
+++ b/doc/Makefile.am
@@ -13,6 +13,7 @@ html_DATA = \
 	html/entities.gif \
 	html/faq.html \
 	html/help.html \
+	html/jobcompplugins.html \
 	html/lll.gif \
 	html/news.html \
 	html/overview.html \
@@ -22,11 +23,13 @@ html_DATA = \
 	html/publications.html \
 	html/quickstart_admin.html \
 	html/quickstart.html \
+	html/schedplugins.html \
 	html/slurm_banner.jpg \
 	html/slurm_design.pdf \
 	html/slurm.html \
 	html/slurmstyles.css \
 	html/spacer.gif \
+	html/switchplugins.html \
 	html/team.html
 
 EXTRA_DIST = \
diff --git a/doc/html/documentation.html b/doc/html/documentation.html
index f2c4763fb01..86ea1f9ef45 100644
--- a/doc/html/documentation.html
+++ b/doc/html/documentation.html
@@ -61,13 +61,16 @@ for SLURM administrators and developers.</p>
 <li><a href="programmer_guide.html">Programmer Guide</a></li>
 <li><a href="plugins.html">Plugin Programmer Guide</a></li>
 <li><a href="authplugins.html">Authentication Plugin Programmer Guide</a></li>
+<li><a href="jobcompplugins.html">Job Completion Logging Plugin Programmer Guide</a></li>
+<li><a href="schedplugins.html">Scheduler Plugin Programmer Guide</a></li>
+<li><a href="switchplugins.html">Switch (Interconnect) Plugin Programmer Guide</a></li>
 </ul></td>
 </tr>
 <tr> 
 <td colspan="3"><hr> <p>For information about this page, contact <a href="mailto:slurm-dev@lists.llnl.gov">slurm-dev@lists.llnl.gov</a>.</p>
 <p><a href="http://www.llnl.gov/"><img align=middle src="lll.gif" width="32" height="32" border="0"></a></p>
 <p class="footer">UCRL-WEB-201790<br>
-Last modified January 15, 2004</p></td>
+Last modified February 17, 2004</p></td>
 </tr>
 </table>
 </td>
diff --git a/doc/html/jobcompplugins.html b/doc/html/jobcompplugins.html
new file mode 100644
index 00000000000..8b62c7f4f18
--- /dev/null
+++ b/doc/html/jobcompplugins.html
@@ -0,0 +1,177 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN"
+                        "http://www.w3.org/TR/REC-html40/loose.dtd">
+
+<html>
+
+<head>
+<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
+<meta http-equiv="Pragma" content="no-cache">
+<meta http-equiv="keywords" content="Simple Linux Utility for Resource Management, SLURM, resource management, 
+Linux clusters, high-performance computing, Livermore Computing">
+<meta name="LLNLRandR" content="UCRL-WEB-201790"
+<meta name="LLNLRandRdate" content="18 February 2004">
+<meta name="distribution" content="global">
+<meta name="description" content="Simple Linux Utility for Resource Management">
+<meta name="copyright"
+content="This document is copyrighted U.S.
+Department of Energy under Contract W-7405-Eng-48">
+<meta name="Author" content="Moe Jette">
+<meta name="email" content="jette@llnl.gov">
+<meta name="Classification"
+content="DOE:DOE Web sites via organizational
+structure:Laboratories and Other Field Facilities">
+<title>Simple Linux Utility for Resource Management:Job Completion Logging Plugins</title>
+<link href="slurmstyles.css" rel="stylesheet" type="text/css">
+</head>
+
+<body bgcolor="#000000" text="#000000" leftmargin="0" topmargin="0">
+<table width="770" border="0" cellspacing="0" cellpadding="0">
+<tr> 
+<td><img src="slurm_banner.jpg" width="770" height="145" usemap="#Map" border="0" alt="Simple Linux Utility for Resource Management"></td>
+</tr>
+</table>
+<table width="770" border="0" cellspacing="0" cellpadding="3" bgcolor="#FFFFFF">
+<tr> 
+<td width="100%"> 
+<table width="760" border="0" cellspacing="0" cellpadding="4" align="right">
+<tr>
+<td valign="top" bgcolor="#000000"><p><img src="spacer.gif" width="110" height="1" alt=""></p>
+<p><a href="slurm.html" class="nav" align="center">Home</a></p>
+<p><span class="whitetext">About</span><br>
+<a href="overview.html" class="nav">Overview</a><br>
+<a href="news.html" class="nav">What's New</a><br>
+<a href="publications.html" class="nav">Publications</a><br>
+<a href="team.html" class="nav">SLURM Team</a></p>
+<p><span class="whitetext">Using</span><br>
+<a href="documentation.html" class="nav">Documentation</a><br>
+<a href="faq.html" class="nav">FAQ</a><br>
+<a href="help.html" class="nav">Getting Help</a></p>
+<p><span class="whitetext">Installing</span><br>
+<a href="platforms.html" class="nav">Platforms</a><br>
+<a href="download.html" class="nav">Download</a><br>
+<a href="quickstart_admin.html" class="nav">Guide</a></p></td>
+<td><img src="spacer.gif" width="10" height="1" alt=""></td>
+<td valign="top"><h2><a name="top">SLURM Job Completion Logging Plugin API</a></h2>
+
+<h3> Overview</h3>
+<p> This document describes SLURM job completion logging plugins and the API that defines 
+them. It is intended as a resource to programmers wishing to write their own SLURM 
+job completion logging plugins. This is version 0 of the API.</p>
+<p>SLURM job completion logging plugins are SLURM plugins that implement the SLURM 
+API for logging job information upon their completion. This may be used to log job information 
+to a text file, database, etc. The plugins must conform to the SLURM Plugin API with the following 
+specifications:</p>
+<p><span class="commandline">const char plugin_type[]</span><br>
+The major type must be &quot;jobcomp.&quot; The minor type can be any recognizable 
+abbreviation for the type of scheduler. We recommend, for example:</p>
+<ul>
+<li><b>none</b>&#151;No job logging.</li>
+<li><b>filetext</b>&#151;Log job information to a text file.</li>
+</ul>
+<p>The <span class="commandline">plugin_name</span> and 
+<span class="commandline">plugin_version</span> 
+symbols required by the SLURM Plugin API require no specialization for 
+job completion logging support. 
+Note carefully, however, the versioning discussion below.</p>
+<p>The programmer is urged to study 
+<span class="commandline">src/plugins/jobcomp/jobcomp_filetxt.c</span> and
+<span class="commandline">src/plugins/jobcomp/jobcomp_none.c</span> 
+for sample implementations of a SLURM job completion logging plugin.</p>
+<p class="footer"><a href="#top">top</a></p>
+
+<h3>Data Objects</h3>
+<p>The implementation must maintain (though not necessarily directly export) an 
+enumerated <span class="commandline">errno</span>  to allow SLURM to discover 
+as practically as possible the reason for any failed API call. Plugin-specific enumerated 
+integer values should be used when appropriate. It is desirable that these values 
+be mapped into the range ESLURM_JOBCOMP_MIN and ESLURM_JOBCOMP_MAX 
+as defined in <span class="commandline">slurm/slurm_errno.h</span>.
+The error number should be returned by the function
+<a href="#get_errno"><span class="commandline">slurm_jobcomp_get_errno()</span></a> 
+and this error number can be converted to an appropriate string description using the 
+<a href="#strerror"><span class="commandline">slurm_jobcomp_strerror()</span></a> 
+function described below.</p>
+
+<p>These values must not be used as return values in integer-valued functions 
+in the API. The proper error return value from integer-valued functions is SLURM_ERROR. 
+The implementation should endeavor to provide useful and pertinent information by 
+whatever means is practical. 
+Successful API calls are not required to reset any errno to a known value. However, 
+the initial value of any errno, prior to any error condition arising, should be 
+SLURM_SUCCESS. </p>
+<p class="footer"><a href="#top">top</a></p>
+
+<h3>API Functions</h3>
+<p>The following functions must appear. Functions which are not implemented should 
+be stubbed.</p>
+
+<p class="commandline">int slurm_jobcomp_set_location (char * location);</p>
+<p style="margin-left:.2in"><b>Description</b>: Specify the location to be used for job logging.</p>
+<p style="margin-left:.2in"><b>Arguments</b>:<span class="commandline"> location</span>&nbsp; 
+&nbsp;&nbsp;(input) specification of where logging should be done. The interpretation of 
+this string is at the discression of the plugin implementation.</p>
+<p style="margin-left:.2in"><b>Returns</b>: SLURM_SUCCESS if successful. On failure, 
+the plugin should return SLURM_ERROR and set the errno to an appropriate value 
+to indicate the reason for failure.</p>
+
+<p class="commandline">int slurm_jobcomp_log_record ( uint32_t job_id, uint32_t user_id, 
+char *job_name, char *job_state, char *partition, uint32_t time_limit, time_t start_time, 
+time_t end_time, char *node_list);</p>
+<p style="margin-left:.2in"><b>Description</b>: Note termation of a job with the specified 
+characteristics.</p>
+<p style="margin-left:.2in"><b>Arguments</b>: <br>
+<span class="commandline"> jobid</span>&nbsp;&nbsp;&nbsp;(input) SLURM id of the job.<br>
+<span class="commandline"> user_id</span>&nbsp;&nbsp;&nbsp;(input) User id of the job's owner.<br>
+<span class="commandline"> job_name</span>&nbsp;&nbsp;&nbsp;(input) Name of the job.<br>
+<span class="commandline"> partition</span>&nbsp;&nbsp;&nbsp;(input) SLURM partition where the job ran.<br>
+<span class="commandline"> time_limit</span>&nbsp;&nbsp;&nbsp;(input) Time limit of the job in minutes (or INFINITE).<br>
+<span class="commandline"> start_time</span>&nbsp;&nbsp;&nbsp;(input) Time the job began execution.<br>
+<span class="commandline"> end_time</span>&nbsp;&nbsp;&nbsp;(input) Time the job ended.<br>
+<span class="commandline"> node_list</span>&nbsp;&nbsp;&nbsp;(input) List of nodes allocated to the job.
+May use SLURM node list expression. May be NULL if job was never allocated resources.</p>
+<p style="margin-left:.2in"><b>Returns</b>: SLURM_SUCCESS if successful. On failure, 
+the plugin should return SLURM_ERROR and set the errno to an appropriate value 
+to indicate the reason for failure.</p>
+<p class="footer"><a href="#top">top</a></p>
+
+<a name="get_errno"><p class="commandline">int slurm_jobcomp_get_errno (void);</p></a>
+<p style="margin-left:.2in"><b>Description</b>: Return the number of a job completion
+logger specific error.</p>
+<p style="margin-left:.2in"><b>Arguments</b>: None</p>
+<p style="margin-left:.2in"><b>Returns</b>: Error number for the last failure encountered by 
+the job completion logging plugin.</p>
+
+<p class="commandline"><a name="strerror">const char *slurm_jobcomp_strerror(int errnum);</a></p>
+<p style="margin-left:.2in"><b>Description</b>: Return a string description of a job completion 
+logger specific error code.</p>
+<p style="margin-left:.2in"><b>Arguments</b>:
+<span class="commandline"> errnum</span>&nbsp; &nbsp;&nbsp;(input) a job completion logger
+specific error code.</p>
+<p style="margin-left:.2in"><b>Returns</b>: Pointer to string describing the error 
+or NULL if no description found in this plugin.</p>
+<p class="footer"><a href="#top">top</a></p>
+
+<h3>Versioning</h3>
+<p> This document describes version 0 of the SLURM Scheduler API. Future 
+releases of SLURM may revise this API. A scheduler plugin conveys its ability 
+to implement a particular API version using the mechanism outlined for SLURM plugins.</p>
+<p class="footer"><a href="#top">top</a></p></td>
+</tr>
+<tr> 
+<td colspan="3"><hr> <p>For information about this page, contact 
+<a href="mailto:slurm-dev@lists.llnl.gov">slurm-dev@lists.llnl.gov</a>.</p>
+<p><a href="http://www.llnl.gov/"><img align=middle src="lll.gif" width="32" height="32" border="0"></a></p>
+<p class="footer">UCRL-WEB-201790<br>
+Last modified February 18, 2004</p></td>
+</tr>
+</table>
+</td>
+ </tr>
+</table>
+<map name="Map">
+<area shape="rect" coords="616,4,762,97" href="../">
+<area shape="rect" coords="330,1,468,11" href="http://www.llnl.gov/disclaimer.html">
+<area shape="rect" coords="11,23,213,115" href="slurm.html">
+</map>
+</body>
+</html>
diff --git a/doc/html/schedplugins.html b/doc/html/schedplugins.html
new file mode 100644
index 00000000000..02a2fee7992
--- /dev/null
+++ b/doc/html/schedplugins.html
@@ -0,0 +1,175 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN"
+                        "http://www.w3.org/TR/REC-html40/loose.dtd">
+
+<html>
+
+<head>
+<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
+<meta http-equiv="Pragma" content="no-cache">
+<meta http-equiv="keywords" content="Simple Linux Utility for Resource Management, SLURM, resource management, 
+Linux clusters, high-performance computing, Livermore Computing">
+<meta name="LLNLRandR" content="UCRL-WEB-201790"
+<meta name="LLNLRandRdate" content="18 February 2004">
+<meta name="distribution" content="global">
+<meta name="description" content="Simple Linux Utility for Resource Management">
+<meta name="copyright"
+content="This document is copyrighted U.S.
+Department of Energy under Contract W-7405-Eng-48">
+<meta name="Author" content="Moe Jette">
+<meta name="email" content="jette@llnl.gov">
+<meta name="Classification"
+content="DOE:DOE Web sites via organizational
+structure:Laboratories and Other Field Facilities">
+<title>Simple Linux Utility for Resource Management:Scheduler Plugins</title>
+<link href="slurmstyles.css" rel="stylesheet" type="text/css">
+</head>
+
+<body bgcolor="#000000" text="#000000" leftmargin="0" topmargin="0">
+<table width="770" border="0" cellspacing="0" cellpadding="0">
+<tr> 
+<td><img src="slurm_banner.jpg" width="770" height="145" usemap="#Map" border="0" alt="Simple Linux Utility for Resource Management"></td>
+</tr>
+</table>
+<table width="770" border="0" cellspacing="0" cellpadding="3" bgcolor="#FFFFFF">
+<tr> 
+<td width="100%"> 
+<table width="760" border="0" cellspacing="0" cellpadding="4" align="right">
+<tr>
+<td valign="top" bgcolor="#000000"><p><img src="spacer.gif" width="110" height="1" alt=""></p>
+<p><a href="slurm.html" class="nav" align="center">Home</a></p>
+<p><span class="whitetext">About</span><br>
+<a href="overview.html" class="nav">Overview</a><br>
+<a href="news.html" class="nav">What's New</a><br>
+<a href="publications.html" class="nav">Publications</a><br>
+<a href="team.html" class="nav">SLURM Team</a></p>
+<p><span class="whitetext">Using</span><br>
+<a href="documentation.html" class="nav">Documentation</a><br>
+<a href="faq.html" class="nav">FAQ</a><br>
+<a href="help.html" class="nav">Getting Help</a></p>
+<p><span class="whitetext">Installing</span><br>
+<a href="platforms.html" class="nav">Platforms</a><br>
+<a href="download.html" class="nav">Download</a><br>
+<a href="quickstart_admin.html" class="nav">Guide</a></p></td>
+<td><img src="spacer.gif" width="10" height="1" alt=""></td>
+<td valign="top"><h2><a name="top">SLURM Scheduler Plugin API</a></h2>
+
+<h3> Overview</h3>
+<p> This document describes SLURM scheduler plugins and the API that defines 
+them. It is intended as a resource to programmers wishing to write their own SLURM 
+scheduler plugins. This is version 0 of the API.</p>
+<p>SLURM scheduler plugins are SLURM plugins that implement the SLURM scheduler
+API described herein. They must conform to the SLURM Plugin API with the following 
+specifications:</p>
+<p><span class="commandline">const char plugin_type[]</span><br>
+The major type must be &quot;sched.&quot; The minor type can be any recognizable 
+abbreviation for the type of scheduler. We recommend, for example:</p>
+<ul>
+<li><b>builtin</b>&#151;A plugin that implements the API without providing any actual 
+scheduling services. This is the default behavior and implements first-in-first-out scheduling.</li>
+<li><b>backfill</b>&#151;Raise the priority of jobs if doing so results in their starting earlier 
+without any delay in the expected initiation time of any higher priority job.</li>
+<li><b>wiki</b>&#151;Use <a href="http://supercluster.org/maui">The Maui Scheduler</a>
+(Wiiki version) as an external entity to control SLURM job scheduling.</li>
+</ul>
+<p>The <span class="commandline">plugin_name</span> and 
+<span class="commandline">plugin_version</span> 
+symbols required by the SLURM Plugin API require no specialization for scheduler support. 
+Note carefully, however, the versioning discussion below.</p>
+<p>The programmer is urged to study 
+<span class="commandline">src/plugins/sched/backfill</span> and
+<span class="commandline">src/plugins/sched/builtin</span> 
+for sample implementations of a SLURM scheduler plugin.</p>
+<p class="footer"><a href="#top">top</a></p>
+
+<h3>Data Objects</h3>
+<p>The implementation must maintain (though not necessarily directly export) an
+enumerated <span class="commandline">errno</span>  to allow SLURM to discover
+as practically as possible the reason for any failed API call. Plugin-specific enumerated
+integer values should be used when appropriate. It is desirable that these values 
+be mapped into the range ESLURM_SCHED_MIN and ESLURM_SCHED_MAX
+as defined in <span class="commandline">slurm/slurm_errno.h</span>.
+The error number should be returned by the function
+<a href="#get_errno"><span class="commandline">slurm_sched_get_errno()</span></a>
+and  string describing the error's meaning should be returned by the function
+<a href="#strerror"><span class="commandline">slurm_sched_strerror()</span></a> 
+described below.</p>
+
+<p>These values must not be used as return values in integer-valued functions
+in the API. The proper error return value from integer-valued functions is SLURM_ERROR.
+The implementation should endeavor to provide useful and pertinent information by 
+whatever means is practical. In some cases this means an errno for each credential,
+since plugins must be re-entrant. If a plugin maintains a global errno in place of or in
+addition to a per-credential errno, it is not required to enforce mutual exclusion on it.
+Successful API calls are not required to reset any errno to a known value. However,
+the initial value of any errno, prior to any error condition arising, should be
+SLURM_SUCCESS. </p>
+<p class="footer"><a href="#top">top</a></p>
+
+<h3>API Functions</h3>
+<p>The following functions must appear. Functions which are not implemented should 
+be stubbed.</p>
+
+<p class="commandline">int slurm_sched_plugin_schedule (void);</p>
+<p style="margin-left:.2in"><b>Description</b>: For passive schedulers, invoke a scheduling pass.</p>
+<p style="margin-left:.2in"><b>Arguments</b>: None</p>
+<p style="margin-left:.2in"><b>Returns</b>: SLURM_SUCCESS if successful. On failure, 
+the plugin should return SLURM_ERROR and set the errno to an appropriate value 
+to indicate the reason for failure.</p>
+
+<p class="commandline">uint32_t slurm_sched_plugin_initial_priority (uint32_t max_prio);</p>
+<p style="margin-left:.2in"><b>Description</b>: Establish the initial priority of a new job.</p>
+<p style="margin-left:.2in"><b>Arguments</b>:<span class="commandline"> max_prio</span>&nbsp; 
+&nbsp;&nbsp;(input) maximum priority of any previously submitted job. This can be used to 
+provide First-In-First-Out scheduling by assigning the new job a priority lower than this value.
+This could also be used to establish an initial priority of zero for all jobs, representing a 
+"held" state. The scheduler plugin can then decide where and when to initiate pending jobs 
+by altering their priority and (optionally) list of required nodes.</p>
+<p style="margin-left:.2in"><b>Returns</b>: The priority to be assigned to this job.</p>
+
+<p class="commandline">void slurm_sched_plugin_job_is_pending (void);</p>
+<p style="margin-left:.2in"><b>Description</b>: Note that some job is pending execution..</p>
+<p style="margin-left:.2in"><b>Arguments</b>: None</p>
+<p style="margin-left:.2in"><b>Returns</b>: Nothing.</p>
+<p class="footer"><a href="#top">top</a></p>
+
+<a name="get_errno"><p class="commandline">int slurm_sched_get_errno (void);</p></a>
+<p style="margin-left:.2in"><b>Description</b>: Return the number of a scheduler
+specific error.</p>
+<p style="margin-left:.2in"><b>Arguments</b>: None</p>
+<p style="margin-left:.2in"><b>Returns</b>: Error number for the last failure encountered by
+the scheduler plugin.</p>
+
+<p class="commandline"><a name="strerror">const char *slurm_sched_strerror(int errnum);</a></p>
+<p style="margin-left:.2in"><b>Description</b>: Return a string description of a scheduler
+specific error code.</p>
+<p style="margin-left:.2in"><b>Arguments</b>:
+<span class="commandline"> errnum</span>&nbsp; &nbsp;&nbsp;(input) a scheduler
+specific error code.</p>
+<p style="margin-left:.2in"><b>Returns</b>: Pointer to string describing the error
+or NULL if no description found in this plugin.</p>
+<p class="footer"><a href="#top">top</a></p>
+
+<h3>Versioning</h3>
+<p> This document describes version 0 of the SLURM Scheduler API. Future 
+releases of SLURM may revise this API. A scheduler plugin conveys its ability 
+to implement a particular API version using the mechanism outlined for SLURM plugins.</p>
+<p class="footer"><a href="#top">top</a></p></td>
+</tr>
+<tr> 
+<td colspan="3"><hr> <p>For information about this page, contact 
+<a href="mailto:slurm-dev@lists.llnl.gov">slurm-dev@lists.llnl.gov</a>.</p>
+<p><a href="http://www.llnl.gov/"><img align=middle src="lll.gif" width="32" height="32" border="0"></a></p>
+<p class="footer">UCRL-WEB-201790<br>
+Last modified February 18, 2004</p></td>
+</tr>
+</table>
+</td>
+ </tr>
+</table>
+<map name="Map">
+<area shape="rect" coords="616,4,762,97" href="../">
+<area shape="rect" coords="330,1,468,11" href="http://www.llnl.gov/disclaimer.html">
+<area shape="rect" coords="11,23,213,115" href="slurm.html">
+</map>
+</body>
+</html>
diff --git a/doc/html/switchplugins.html b/doc/html/switchplugins.html
new file mode 100644
index 00000000000..e67db30e809
--- /dev/null
+++ b/doc/html/switchplugins.html
@@ -0,0 +1,399 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN"
+                        "http://www.w3.org/TR/REC-html40/loose.dtd">
+
+<html>
+
+<head>
+<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
+<meta http-equiv="Pragma" content="no-cache">
+<meta http-equiv="keywords" content="Simple Linux Utility for Resource Management, SLURM, resource management, 
+Linux clusters, high-performance computing, Livermore Computing">
+<meta name="LLNLRandR" content="UCRL-WEB-201790"
+<meta name="LLNLRandRdate" content="18 February 2004">
+<meta name="distribution" content="global">
+<meta name="description" content="Simple Linux Utility for Resource Management">
+<meta name="copyright"
+content="This document is copyrighted U.S.
+Department of Energy under Contract W-7405-Eng-48">
+<meta name="Author" content="Moe Jette">
+<meta name="email" content="jette@llnl.gov">
+<meta name="Classification"
+content="DOE:DOE Web sites via organizational
+structure:Laboratories and Other Field Facilities">
+<title>Simple Linux Utility for Resource Management:Switch Plugins</title>
+<link href="slurmstyles.css" rel="stylesheet" type="text/css">
+</head>
+
+<body bgcolor="#000000" text="#000000" leftmargin="0" topmargin="0">
+<table width="770" border="0" cellspacing="0" cellpadding="0">
+<tr> 
+<td><img src="slurm_banner.jpg" width="770" height="145" usemap="#Map" border="0" alt="Simple Linux Utility for Resource Management"></td>
+</tr>
+</table>
+<table width="770" border="0" cellspacing="0" cellpadding="3" bgcolor="#FFFFFF">
+<tr> 
+<td width="100%"> 
+<table width="760" border="0" cellspacing="0" cellpadding="4" align="right">
+<tr>
+<td valign="top" bgcolor="#000000"><p><img src="spacer.gif" width="110" height="1" alt=""></p>
+<p><a href="slurm.html" class="nav" align="center">Home</a></p>
+<p><span class="whitetext">About</span><br>
+<a href="overview.html" class="nav">Overview</a><br>
+<a href="news.html" class="nav">What's New</a><br>
+<a href="publications.html" class="nav">Publications</a><br>
+<a href="team.html" class="nav">SLURM Team</a></p>
+<p><span class="whitetext">Using</span><br>
+<a href="documentation.html" class="nav">Documentation</a><br>
+<a href="faq.html" class="nav">FAQ</a><br>
+<a href="help.html" class="nav">Getting Help</a></p>
+<p><span class="whitetext">Installing</span><br>
+<a href="platforms.html" class="nav">Platforms</a><br>
+<a href="download.html" class="nav">Download</a><br>
+<a href="quickstart_admin.html" class="nav">Guide</a></p></td>
+<td><img src="spacer.gif" width="10" height="1" alt=""></td>
+<td valign="top"><h2><a name="top">SLURM Switch Plugin API</a></h2>
+
+<h3> Overview</h3>
+<p> This document describes SLURM switch (interconnect) plugins and the API that defines 
+them. It is intended as a resource to programmers wishing to write their own SLURM 
+switch plugins. This is version 0 of the API.</p>
+<p>SLURM switch plugins are SLURM plugins that implement the SLURM switch or interconnect
+API described herein. They must conform to the SLURM Plugin API with the following 
+specifications:</p>
+<p><span class="commandline">const char plugin_type[]</span><br>
+The major type must be &quot;switch.&quot; The minor type can be any recognizable 
+abbreviation for the type of switch. We recommend, for example:</p>
+<ul>
+<li><b>none</b>&#151;A plugin that implements the API without providing any actual 
+switch service. This is the case for Ethernet and Myrinet interconnects.</li>
+<li><b>elan</b>&#151;<a href="http://www.quadrics.com/">Quadrics</a>  Elan3 or Elan4 
+interconnect.</li>
+<li><b>federation</b>&#151;IBM Federation interconnects (presently under development).</li>
+</ul>
+<p>The <span class="commandline">plugin_name</span> and 
+<span class="commandline">plugin_version</span> 
+symbols required by the SLURM Plugin API require no specialization for switch support. 
+Note carefully, however, the versioning discussion below.</p>
+<p>The programmer is urged to study 
+<span class="commandline">src/plugins/switch/switch_elan.c</span> and
+<span class="commandline">src/plugins/switch/switch_none.c</span> 
+for sample implementations of a SLURM switch plugin.</p>
+<p class="footer"><a href="#top">top</a></p>
+
+<h3>Data Objects</h3>
+<p> The implementation must support an opaque class, which it defines, to be used 
+as an job's switch &quot;credential.&quot; This class must encapsulate all job-specific 
+information necessary for the operation of the API specification below. The credential 
+is referred to in SLURM code by an anonymous pointer (void *).</p>
+
+<p>The implementation must maintain (though not necessarily directly export) an 
+enumerated <span class="commandline">errno</span>  to allow SLURM to discover 
+as practically as possible the reason for any failed API call. Plugin-specific enumerated 
+integer values should be used when appropriate. It is desirable that these values 
+be mapped into the range ESLURM_SWITCH_MIN and ESLURM_SWITCH_MAX 
+as defined in <span class="commandline">slurm/slurm_errno.h</span>.
+The error number should be returned by the function
+<a href="#get_errno"><span class="commandline">switch_get_errno()</span></a>
+and this error number can be converted to an appropriate string description using the
+<a href="#strerror"><span class="commandline">switch_strerror()</span></a>
+function described below.</p>
+
+<p>These values must not be used as return values in integer-valued functions 
+in the API. The proper error return value from integer-valued functions is SLURM_ERROR. 
+The implementation should endeavor to provide useful and pertinent information by 
+whatever means is practical. In some cases this means an errno for each credential, 
+since plugins must be re-entrant. If a plugin maintains a global errno in place of or in 
+addition to a per-credential errno, it is not required to enforce mutual exclusion on it. 
+Successful API calls are not required to reset any errno to a known value. However, 
+the initial value of any errno, prior to any error condition arising, should be 
+SLURM_SUCCESS. </p>
+<p class="footer"><a href="#top">top</a></p>
+
+<h3>API Functions</h3>
+<p>The following functions must appear. Functions which are not implemented should 
+be stubbed.</p>
+
+<h4>Global Switch State Functions</h4>
+<p class="commandline">int switch_p_libstate_save (char *dir_name);</p>
+<p style="margin-left:.2in"><b>Description</b>: Save any global switch state to a file 
+within the specified directory. The actual file name used is plugin specific. It is recommended 
+that the global switch state contain a magic number for validation purposes. This function 
+is called by the slurmctld deamon on shutdown.</p>
+<p style="margin-left:.2in"><b>Arguments</b>:<span class="commandline"> dir_name</span>&nbsp; 
+&nbsp;&nbsp;(input) fully-qualified pathname of a directory into which user SlurmUser (as defined
+in slurm.conf) can create a file and write state information into that file. Cannot be NULL.</p>
+<p style="margin-left:.2in"><b>Returns</b>: SLURM_SUCCESS if successful. On failure, 
+the plugin should return SLURM_ERROR and set the errno to an appropriate value 
+to indicate the reason for failure.</p>
+
+<p class="commandline">int switch_p_libstate_restore(char *dir_name);</p>
+<p style="margin-left:.2in"><b>Description</b>: Restore any global switch state from a file 
+within the specified directory. The actual file name used is plugin specific. It is recommended 
+that any magic number associated with the global switch state be verified. This function 
+is called by the slurmctld deamon on startup.</p>
+<p style="margin-left:.2in"><b>Arguments</b>:<span class="commandline"> dir_name</span>&nbsp; 
+&nbsp;&nbsp;(input) fully-qualified pathname of a directory containing a state information file 
+from which user SlurmUser (as defined in slurm.conf) can read. Cannot be NULL.</p>
+<p style="margin-left:.2in"><b>Returns</b>: SLURM_SUCCESS if successful. On failure, 
+the plugin should return SLURM_ERROR and set the errno to an appropriate value 
+to indicate the reason for failure.</p>
+
+<p class="commandline">bool switch_p_no_frag(void);</p>
+<p style="margin-left:.2in"><b>Description</b>: Report if resource fragmentation is important. 
+If so, delay scheduling a new job while another is in the process of terminating.</p>
+<p style="margin-left:.2in"><b>Arguments</b>:<span class="commandline"> None</span></p>
+<p style="margin-left:.2in"><b>Returns</b>: TRUE if job scheduling should be delayed while 
+any other job is in the process of terminating.</p>
+<p class="footer"><a href="#top">top</a></p>
+
+<h4>Job's Switch Credential Management Functions</h4>
+<p class="commandline">int switch_p_alloc_jobinfo(switch_jobinfo_t *switch_job);</p>
+<p style="margin-left:.2in"><b>Description</b>: Allocate storage for a job's switch credential.
+It is recommended that the credential contain a magic number for validation purposes.</p>
+<p style="margin-left:.2in"><b>Arguments</b>:<span class="commandline"> switch_job</span>&nbsp; 
+&nbsp;&nbsp;(output) location for writing location of job's switch credential.</p>
+<p style="margin-left:.2in"><b>Returns</b>: SLURM_SUCCESS if successful. On failure, 
+the plugin should return SLURM_ERROR and set the errno to an appropriate value 
+to indicate the reason for failure.</p>
+
+<p class="commandline">int switch_p_build_jobinfo (switch_jobinfo_t switch_job, 
+char *nodelist, int nprocs, int cyclic_alloc);</p>
+<p style="margin-left:.2in"><b>Description</b>: Build a job's switch credential.
+It is recommended that the credential's magic number be validated.</p>
+<p style="margin-left:.2in"><b>Arguments</b>:<br>
+<span class="commandline">switch_job</span>&nbsp; &nbsp;&nbsp;(input/output) Job's 
+switch credential to be updated<br>
+<span class="commandline">nodelist</span>&nbsp;&nbsp;&nbsp; (input) List of nodes 
+allocated to the job. This may contain expressions to specify node ranges (e.g. 
+"linux[1-20]" or "linux[2,4,6,8]").<br>
+<span class="commandline">nprocs</span>&nbsp;&nbsp;&nbsp; (input) Number of 
+processes to be initiated as part of the job.<br>
+<span class="commandline">cyclic_alloc</span>&nbsp;&nbsp;&nbsp; (input) Non-zero 
+if job's processes are to be allocated across nodes in a cyclic fashion (task 0 on node 0, 
+task 1 on node 1, etc). If zero, processes are allocated sequentially on a node before 
+moving to the next node (tasks 0 and 1 on node 0, tasks 2 and 3 on node 1, etc.).</p>
+<p style="margin-left:.2in"><b>Returns</b>: SLURM_SUCCESS if successful. On failure, 
+the plugin should return SLURM_ERROR and set the errno to an appropriate value 
+to indicate the reason for failure.</p>
+
+<p class="commandline">switch_jobinfo_t switch_p_copy_jobinfo  (switch_jobinfo_t switch_job);</p>
+<p style="margin-left:.2in"><b>Description</b>: Allocate storage for a job's switch credential 
+and copy an existing credential to that location.</p>
+<p style="margin-left:.2in"><b>Arguments</b>:<span class="commandline"> switch_job</span>&nbsp; 
+&nbsp;&nbsp;(input) an existing job switch credential.</p>
+<p style="margin-left:.2in"><b>Returns</b>: A newly allocated job switch credential containing a
+copy of the function argument.</p>
+
+<p class="commandline">void switch_p_free_jobinfo (switch_jobinfo_t switch_job);</p>
+<p style="margin-left:.2in"><b>Description</b>: Release the storage associated with a job's
+ switch credential.</p>
+<p style="margin-left:.2in"><b>Arguments</b>:<span class="commandline"> switch_job</span>&nbsp; 
+&nbsp;&nbsp;(intput) an existing job switch credential.</p>
+<p style="margin-left:.2in"><b>Returns</b>: None</p>
+
+<p class="commandline">int switch_p_pack_jobinfo (switch_jobinfo_t switch_job, Buf buffer);</p>
+<p style="margin-left:.2in"><b>Description</b>: Pack the data associated with a job's 
+switch credential into a buffer for network transmission.</p>
+<p style="margin-left:.2in"><b>Arguments</b>:<br>
+<span class="commandline"> switch_job</span>&nbsp; &nbsp;&nbsp;(input) an existing job 
+switch credential.<br>
+<span class="commandline"> buffer</span>&nbsp; &nbsp;&nbsp;(input/output) buffer onto 
+which the credential's contents are appended.</p>
+<p style="margin-left:.2in"><b>Returns</b>: SLURM_SUCCESS if successful. On failure, 
+the plugin should return SLURM_ERROR and set the errno to an appropriate value 
+to indicate the reason for failure.</p>
+
+<p class="commandline">int switch_p_unpack_jobinfo (switch_jobinfo_t switch_job, Buf buffer);</p>
+<p style="margin-left:.2in"><b>Description</b>: Unack the data associated with a job's 
+switch credential from a buffer.</p>
+<p style="margin-left:.2in"><b>Arguments</b>:<br>
+<span class="commandline"> switch_job</span>&nbsp; &nbsp;&nbsp;(input/output) a previously
+allocated job switch credential to be filled in with data read from the buffer.<br>
+<span class="commandline"> buffer</span>&nbsp; &nbsp;&nbsp;(input/output) buffer from 
+which the credential's contents are read.</p>
+<p style="margin-left:.2in"><b>Returns</b>: SLURM_SUCCESS if successful. On failure, 
+the plugin should return SLURM_ERROR and set the errno to an appropriate value 
+to indicate the reason for failure.</p>
+
+<p class="commandline">void switch_p_print_jobinfo(FILE *fp, switch_jobinfo_t switch_job);</p>
+<p style="margin-left:.2in"><b>Description</b>: Print the contents of a job's 
+switch credential to a file.</p>
+<p style="margin-left:.2in"><b>Arguments</b>:<br>
+<span class="commandline"> fp</span>&nbsp; &nbsp;&nbsp;(input) pointer to an open file.<br>
+<span class="commandline"> switch_job</span>&nbsp; &nbsp;&nbsp;(input) a job's 
+switch credential.</p>
+<p style="margin-left:.2in"><b>Returns</b>: None.</p>
+
+<p class="commandline">char *switch_p_sprint_jobinfo(switch_jobinfo_t switch_job, 
+char *buf, size_t size);</p>
+<p style="margin-left:.2in"><b>Description</b>: Print the contents of a job's 
+switch credential to a buffer.</p>
+<p style="margin-left:.2in"><b>Arguments</b>:<br>
+<span class="commandline"> switch_job</span>&nbsp; &nbsp;&nbsp;(input) a job's 
+switch credential.<br>
+<span class="commandline"> buf</span>&nbsp; &nbsp;&nbsp;(input/output) pointer to 
+buffer into which the job credential information is to be written.<br>
+<span class="commandline"> size</span>&nbsp; &nbsp;&nbsp;(input) size of buf in 
+bytes</p>
+<p style="margin-left:.2in"><b>Returns</b>: location of buffer, same as <i>buf</i>.</p>
+<p class="footer"><a href="#top">top</a></p>
+
+<h4>Node Specific Switch Management Functions</h4>
+<p class="commandline">int switch_p_node_init (void);</p>
+<p style="margin-left:.2in"><b>Description</b>: This function is run from the top level slurmd 
+only once per slurmd run. It may be used, for instance, to perform some one-time
+interconnect setup or spawn an error handling thread.</p>
+<p style="margin-left:.2in"><b>Arguments</b>:<span class="commandline"> None</span></p>
+<p style="margin-left:.2in"><b>Returns</b>: SLURM_SUCCESS if successful. On failure, 
+the plugin should return SLURM_ERROR and set the errno to an appropriate value 
+to indicate the reason for failure.</p>
+
+<p class="commandline">int switch_p_node_fini (void);</p>
+<p style="margin-left:.2in"><b>Description</b>: This function is called once as slurmd exits 
+(slurmd will wait for this function to return before continuing the exit process).</p>
+<p style="margin-left:.2in"><b>Arguments</b>:<span class="commandline"> None</span></p>
+<p style="margin-left:.2in"><b>Returns</b>: SLURM_SUCCESS if successful. On failure, 
+the plugin should return SLURM_ERROR and set the errno to an appropriate value 
+to indicate the reason for failure.</p>
+<p class="footer"><a href="#top">top</a></p>
+
+<h4>Job Management Functions</h4>
+<blockquote><pre>
+=========================================================================
+Process 1 (root)        Process 2 (root, user)  |  Process 3 (user task) 
+                                                |                        
+switch_p_job_preinit                            |                        
+fork ------------------ switch_p_job_init       |                        
+waitpid                 setuid, chdir, etc.     |                        
+                        fork N procs -----------+--- switch_p_job_attach 
+                        wait all                |    exec mpi process    
+                        switch_p_job_fini*      |                        
+switch_p_job_postfini                           |                        
+=========================================================================
+</pre></blockquote>
+
+<p class="commandline">int switch_p_job_preinit (switch_jobinfo_t jobinfo switch_job);</p>
+<p style="margin-left:.2in"><b>Description</b>: Preinit is run as root in the first slurmd process, 
+the so called job manager. This function can be used to perform any initialization
+that needs to be performed in the same process as switch_p_job_fini().</p>
+<p style="margin-left:.2in"><b>Arguments</b>:
+<span class="commandline"> switch_job</span>&nbsp; &nbsp;&nbsp;(input) a job's 
+switch credential.</p>
+<p style="margin-left:.2in"><b>Returns</b>: SLURM_SUCCESS if successful. On failure, 
+the plugin should return SLURM_ERROR and set the errno to an appropriate value 
+to indicate the reason for failure.</p>
+
+<p class="commandline">int switch_p_job_init (switch_jobinfo_t jobinfo switch_job, uid_t uid);</p>
+<p style="margin-left:.2in"><b>Description</b>: Initialize interconnect on node for a job. 
+This function is run from the second slurmd process (some interconnect implementations 
+may require the switch_p_job_init functions to be executed from a separate process
+than the process executing switch_p_job_fini() [e.g. Quadrics Elan]).</p>
+<p style="margin-left:.2in"><b>Arguments</b>:<br>
+<span class="commandline"> switch_job</span>&nbsp; &nbsp;&nbsp;(input) a job's 
+switch credential.<br>
+<span class="commandline"> uid</span>&nbsp; &nbsp;&nbsp;(input) the user id 
+to execute a job.</p>
+<p style="margin-left:.2in"><b>Returns</b>: SLURM_SUCCESS if successful. On failure, 
+the plugin should return SLURM_ERROR and set the errno to an appropriate value 
+to indicate the reason for failure.</p>
+
+<p class="commandline">int switch_p_job_attach ( switch_jobinfo_t switch_job, char ***env, 
+uint32_t nodeid, uint32_t procid, uint32_t nnodes, uint32_t nprocs, uint32_t rank );</p>
+<p style="margin-left:.2in"><b>Description</b>: Attach process to interconnect
+(Called from within the process, so it is appropriate to set interconnect specific 
+environment variables here).</p>
+<p style="margin-left:.2in"><b>Arguments</b>:<br>
+<span class="commandline"> switch_job</span>&nbsp; &nbsp;&nbsp;(input) a job's 
+switch credential.<br>
+<span class="commandline"> env</span>&nbsp; &nbsp;&nbsp;(input/output) the 
+environment variables to be set upon job initiation. Switch specific environment 
+variables are added as needed.<br>
+<span class="commandline"> nodeid</span>&nbsp; &nbsp;&nbsp;(input) zero-origin
+id of this node.<br>
+<span class="commandline"> procid</span>&nbsp; &nbsp;&nbsp;(input) zero-origin
+process id local to slurmd and <b>not</b> equivalent to the global task id or MPI rank.<br>
+<span class="commandline"> nnodes</span>&nbsp; &nbsp;&nbsp;(input) count of 
+nodes allocated to this job.<br>
+<span class="commandline"> nprocs</span>&nbsp; &nbsp;&nbsp;(input) total count of 
+processes or tasks to be initiated for this job.<br>
+<span class="commandline"> rank</span>&nbsp; &nbsp;&nbsp;(input) zero-origin
+id of this task.</p>
+<p style="margin-left:.2in"><b>Returns</b>: SLURM_SUCCESS if successful. On failure, 
+the plugin should return SLURM_ERROR and set the errno to an appropriate value 
+to indicate the reason for failure.</p>
+
+<p class="commandline">int switch_p_job_fini (switch_jobinfo_t jobinfo switch_job);</p>
+<p style="margin-left:.2in"><b>Description</b>: This function is run from the same process 
+as switch_p_job_init() after all job tasks have exited. It is *not* run as root, because
+the process in question has already setuid to the job owner.</p>
+<p style="margin-left:.2in"><b>Arguments</b>:
+<span class="commandline"> switch_job</span>&nbsp; &nbsp;&nbsp;(input) a job's 
+switch credential.</p>
+<p style="margin-left:.2in"><b>Returns</b>: SLURM_SUCCESS if successful. On failure, 
+the plugin should return SLURM_ERROR and set the errno to an appropriate value 
+to indicate the reason for failure.</p>
+
+<p class="commandline">int switch_p_job_postfini ( switch_jobinfo_t switch_job, uid_t pgid, 
+uint32_t job_id, uint32_t step_id );</p>
+<p style="margin-left:.2in"><b>Description</b>: This function is run from the initial slurmd 
+process (same process as switch_p_job_preinit()), and is run as root. Any cleanup routines
+that need to be run with root privileges should be run from this function.</p>
+<p style="margin-left:.2in"><b>Arguments</b>:<br>
+<span class="commandline"> switch_job</span>&nbsp; &nbsp;&nbsp;(input) a job's 
+switch credential.<br>
+<span class="commandline"> pgid</span>&nbsp; &nbsp;&nbsp;(input) The process 
+group id associated with this task.<br>
+<span class="commandline"> job_id</span>&nbsp; &nbsp;&nbsp;(input) the 
+associated SLURM job id.<br>
+<span class="commandline"> step_id</span>&nbsp; &nbsp;&nbsp;(input) the 
+associated SLURM job step id.</p>
+<p style="margin-left:.2in"><b>Returns</b>: SLURM_SUCCESS if successful. On failure, 
+the plugin should return SLURM_ERROR and set the errno to an appropriate value 
+to indicate the reason for failure.</p>
+<p class="footer"><a href="#top">top</a></p>
+
+<h4>Error Handling Functions</h4>
+<a name="get_errno"><p class="commandline">int switch_get_errno (void);</p></a>
+<p style="margin-left:.2in"><b>Description</b>: Return the number of a switch
+specific error.</p>
+<p style="margin-left:.2in"><b>Arguments</b>: None</p>
+<p style="margin-left:.2in"><b>Returns</b>: Error number for the last failure encountered by
+the switch plugin.</p>
+
+<p class="commandline"><a name="strerror">char *switch_strerror(int errnum);</a></p>
+<p style="margin-left:.2in"><b>Description</b>: Return a string description of a switch 
+specific error code.</p>
+<p style="margin-left:.2in"><b>Arguments</b>:
+<span class="commandline"> errnum</span>&nbsp; &nbsp;&nbsp;(input) a switch 
+specific error code.</p>
+<p style="margin-left:.2in"><b>Returns</b>: Pointer to string describing the error 
+or NULL if no description found in this plugin.</p>
+<p class="footer"><a href="#top">top</a></p>
+
+<h3>Versioning</h3>
+<p> This document describes version 0 of the SLURM Switch API. Future 
+releases of SLURM may revise this API. A switch plugin conveys its ability 
+to implement a particular API version using the mechanism outlined for SLURM plugins. 
+In addition, the credential is transmitted along with the version number of the 
+plugin that transmitted it. It is at the discretion of the plugin author whether 
+to maintain data format compatibility across different versions of the plugin.</p>
+<p class="footer"><a href="#top">top</a></p></td>
+</tr>
+<tr> 
+<td colspan="3"><hr> <p>For information about this page, contact 
+<a href="mailto:slurm-dev@lists.llnl.gov">slurm-dev@lists.llnl.gov</a>.</p>
+<p><a href="http://www.llnl.gov/"><img align=middle src="lll.gif" width="32" height="32" border="0"></a></p>
+<p class="footer">UCRL-WEB-201790<br>
+Last modified February 18, 2004</p></td>
+</tr>
+</table>
+</td>
+ </tr>
+</table>
+<map name="Map">
+<area shape="rect" coords="616,4,762,97" href="../">
+<area shape="rect" coords="330,1,468,11" href="http://www.llnl.gov/disclaimer.html">
+<area shape="rect" coords="11,23,213,115" href="slurm.html">
+</map>
+</body>
+</html>
-- 
GitLab