From 4d4db57dd444ed542691d2df7e10c9c43e3686b3 Mon Sep 17 00:00:00 2001
From: Moe Jette <jette1@llnl.gov>
Date: Tue, 12 Oct 2004 17:34:25 +0000
Subject: [PATCH] Define new switch function to re-initialize the state of a
 node.

---
 doc/html/switchplugins.html           | 13 +++++++++++--
 src/common/switch.c                   | 10 ++++++++++
 src/common/switch.h                   |  5 +++++
 src/plugins/switch/elan/switch_elan.c |  5 +++++
 src/plugins/switch/none/switch_none.c |  5 +++++
 src/slurmd/slurmd.c                   |  2 ++
 6 files changed, 38 insertions(+), 2 deletions(-)

diff --git a/doc/html/switchplugins.html b/doc/html/switchplugins.html
index e55334e83d4..e228e483de8 100644
--- a/doc/html/switchplugins.html
+++ b/doc/html/switchplugins.html
@@ -9,7 +9,7 @@
 <meta http-equiv="keywords" content="Simple Linux Utility for Resource Management, SLURM, resource management, 
 Linux clusters, high-performance computing, Livermore Computing">
 <meta name="LLNLRandR" content="UCRL-WEB-204324"
-<meta name="LLNLRandRdate" content="29 September 2004">
+<meta name="LLNLRandRdate" content="12 October 2004">
 <meta name="distribution" content="global">
 <meta name="description" content="Simple Linux Utility for Resource Management">
 <meta name="copyright"
@@ -162,6 +162,15 @@ any other job is in the process of terminating.</p>
 is initiated. The slurmctld daemon will also request that slurmd supply current 
 switch state information on a periodic basis.</p>
 
+<p class="commandline">int switch_p_clear_node_state(void);</p>
+<p style="margin-left:.2in"><b>Description</b>: Initialize node state. 
+If any switch state has previously been established for a job, it will be cleared. 
+This will be used to establish a "clean" state for the switch on the node upon 
+which it is executed.</p>
+<p style="margin-left:.2in"><b>Returns</b>: SLURM_SUCCESS if successful. On failure,
+the plugin should return SLURM_ERROR and set the errno to an appropriate value
+to indicate the reason for failure.</p>
+
 <p class="commandline">int switch_p_alloc_node_info(switch_node_info_t *switch_node);</p>
 <p style="margin-left:.2in"><b>Description</b>: Allocate storage for a node's switch 
 state record. It is recommended that the record contain a magic number for validation 
@@ -511,7 +520,7 @@ to maintain data format compatibility across different versions of the plugin.</
 <a href="mailto:slurm-dev@lists.llnl.gov">slurm-dev@lists.llnl.gov</a>.</p>
 <p><a href="http://www.llnl.gov/"><img align=middle src="lll.gif" width="32" height="32" border="0"></a></p>
 <p class="footer">UCRL-WEB-204324<br>
-Last modified 29 September 2004</p></td>
+Last modified 12 October 2004</p></td>
 </tr>
 </table>
 </td>
diff --git a/src/common/switch.c b/src/common/switch.c
index 5754b3863b9..bd22a51441f 100644
--- a/src/common/switch.c
+++ b/src/common/switch.c
@@ -79,6 +79,7 @@ typedef struct slurm_switch_ops {
 						uint32_t nprocs, uint32_t rank);
 	char *	     (*switch_strerror)   ( int errnum );
 	int          (*switch_errno)      ( void );
+	int          (*clear_node)        ( void );
 	int          (*alloc_nodeinfo)    ( switch_node_info_t *nodeinfo );
 	int          (*build_nodeinfo)    ( switch_node_info_t nodeinfo );
 	int          (*pack_nodeinfo)     ( switch_node_info_t nodeinfo,
@@ -184,6 +185,7 @@ _slurm_switch_get_ops( slurm_switch_context_t c )
 		"switch_p_job_attach",
 		"switch_p_strerror",
 		"switch_p_get_errno",
+		"switch_p_clear_node_state",
 		"switch_p_alloc_node_info",
 		"switch_p_build_node_info",
 		"switch_p_pack_node_info",
@@ -456,6 +458,14 @@ extern char *switch_strerror(int errnum)
  * node switch state monitoring functions
  * required for IBM Federation switch 
  */
+extern int switch_g_clear_node_state(void)
+{
+	if ( switch_init() < 0 )
+		return SLURM_ERROR;
+
+	return (*(g_context->ops.clear_node))();
+}
+
 extern int switch_g_alloc_node_info(switch_node_info_t *switch_node)
 {
 	if ( switch_init() < 0 )
diff --git a/src/common/switch.h b/src/common/switch.h
index 71ffee7cd59..a03cfd1852d 100644
--- a/src/common/switch.h
+++ b/src/common/switch.h
@@ -258,6 +258,11 @@ extern int interconnect_attach(switch_jobinfo_t jobinfo, char ***env,
 		uint32_t nodeid, uint32_t procid, uint32_t nnodes, 
 		uint32_t nprocs, uint32_t rank);
 
+/*
+ * Clear switch state on this node
+ */
+extern int switch_g_clear_node_state(void);
+
 /*
  * Allocate storage for a node's switch state record
  */
diff --git a/src/plugins/switch/elan/switch_elan.c b/src/plugins/switch/elan/switch_elan.c
index 0e7ee998331..89d50564278 100644
--- a/src/plugins/switch/elan/switch_elan.c
+++ b/src/plugins/switch/elan/switch_elan.c
@@ -724,6 +724,11 @@ extern char *switch_p_strerror(int errnum)
  * node switch state monitoring functions
  * required for IBM Federation switch
  */
+extern int switch_p_clear_node_state(void)
+{
+	return SLURM_SUCCESS;
+}
+
 extern int switch_p_alloc_node_info(switch_node_info_t *switch_node)
 {
 	return SLURM_SUCCESS;
diff --git a/src/plugins/switch/none/switch_none.c b/src/plugins/switch/none/switch_none.c
index ec0f47db0d1..9e9787308c4 100644
--- a/src/plugins/switch/none/switch_none.c
+++ b/src/plugins/switch/none/switch_none.c
@@ -224,6 +224,11 @@ extern char *switch_p_strerror(int errnum)
  * node switch state monitoring functions
  * required for IBM Federation switch 
  */
+extern int switch_p_clear_node_state(void)
+{
+	return SLURM_SUCCESS;
+}
+
 extern int switch_p_alloc_node_info(switch_node_info_t *switch_node)
 {
 	return SLURM_SUCCESS;
diff --git a/src/slurmd/slurmd.c b/src/slurmd/slurmd.c
index 374262c627d..fb87a59b2c8 100644
--- a/src/slurmd/slurmd.c
+++ b/src/slurmd/slurmd.c
@@ -189,6 +189,8 @@ main (int argc, char *argv[])
 	
 	if (interconnect_node_init() < 0)
 		fatal("Unable to initialize interconnect.");
+	if (conf->cleanstart && switch_g_clear_node_state())
+		fatal("Unable to clear interconnect state.");
 
 	_create_msg_socket();
 
-- 
GitLab