From 4d4db57dd444ed542691d2df7e10c9c43e3686b3 Mon Sep 17 00:00:00 2001 From: Moe Jette <jette1@llnl.gov> Date: Tue, 12 Oct 2004 17:34:25 +0000 Subject: [PATCH] Define new switch function to re-initialize the state of a node. --- doc/html/switchplugins.html | 13 +++++++++++-- src/common/switch.c | 10 ++++++++++ src/common/switch.h | 5 +++++ src/plugins/switch/elan/switch_elan.c | 5 +++++ src/plugins/switch/none/switch_none.c | 5 +++++ src/slurmd/slurmd.c | 2 ++ 6 files changed, 38 insertions(+), 2 deletions(-) diff --git a/doc/html/switchplugins.html b/doc/html/switchplugins.html index e55334e83d4..e228e483de8 100644 --- a/doc/html/switchplugins.html +++ b/doc/html/switchplugins.html @@ -9,7 +9,7 @@ <meta http-equiv="keywords" content="Simple Linux Utility for Resource Management, SLURM, resource management, Linux clusters, high-performance computing, Livermore Computing"> <meta name="LLNLRandR" content="UCRL-WEB-204324" -<meta name="LLNLRandRdate" content="29 September 2004"> +<meta name="LLNLRandRdate" content="12 October 2004"> <meta name="distribution" content="global"> <meta name="description" content="Simple Linux Utility for Resource Management"> <meta name="copyright" @@ -162,6 +162,15 @@ any other job is in the process of terminating.</p> is initiated. The slurmctld daemon will also request that slurmd supply current switch state information on a periodic basis.</p> +<p class="commandline">int switch_p_clear_node_state(void);</p> +<p style="margin-left:.2in"><b>Description</b>: Initialize node state. +If any switch state has previously been established for a job, it will be cleared. +This will be used to establish a "clean" state for the switch on the node upon +which it is executed.</p> +<p style="margin-left:.2in"><b>Returns</b>: SLURM_SUCCESS if successful. On failure, +the plugin should return SLURM_ERROR and set the errno to an appropriate value +to indicate the reason for failure.</p> + <p class="commandline">int switch_p_alloc_node_info(switch_node_info_t *switch_node);</p> <p style="margin-left:.2in"><b>Description</b>: Allocate storage for a node's switch state record. It is recommended that the record contain a magic number for validation @@ -511,7 +520,7 @@ to maintain data format compatibility across different versions of the plugin.</ <a href="mailto:slurm-dev@lists.llnl.gov">slurm-dev@lists.llnl.gov</a>.</p> <p><a href="http://www.llnl.gov/"><img align=middle src="lll.gif" width="32" height="32" border="0"></a></p> <p class="footer">UCRL-WEB-204324<br> -Last modified 29 September 2004</p></td> +Last modified 12 October 2004</p></td> </tr> </table> </td> diff --git a/src/common/switch.c b/src/common/switch.c index 5754b3863b9..bd22a51441f 100644 --- a/src/common/switch.c +++ b/src/common/switch.c @@ -79,6 +79,7 @@ typedef struct slurm_switch_ops { uint32_t nprocs, uint32_t rank); char * (*switch_strerror) ( int errnum ); int (*switch_errno) ( void ); + int (*clear_node) ( void ); int (*alloc_nodeinfo) ( switch_node_info_t *nodeinfo ); int (*build_nodeinfo) ( switch_node_info_t nodeinfo ); int (*pack_nodeinfo) ( switch_node_info_t nodeinfo, @@ -184,6 +185,7 @@ _slurm_switch_get_ops( slurm_switch_context_t c ) "switch_p_job_attach", "switch_p_strerror", "switch_p_get_errno", + "switch_p_clear_node_state", "switch_p_alloc_node_info", "switch_p_build_node_info", "switch_p_pack_node_info", @@ -456,6 +458,14 @@ extern char *switch_strerror(int errnum) * node switch state monitoring functions * required for IBM Federation switch */ +extern int switch_g_clear_node_state(void) +{ + if ( switch_init() < 0 ) + return SLURM_ERROR; + + return (*(g_context->ops.clear_node))(); +} + extern int switch_g_alloc_node_info(switch_node_info_t *switch_node) { if ( switch_init() < 0 ) diff --git a/src/common/switch.h b/src/common/switch.h index 71ffee7cd59..a03cfd1852d 100644 --- a/src/common/switch.h +++ b/src/common/switch.h @@ -258,6 +258,11 @@ extern int interconnect_attach(switch_jobinfo_t jobinfo, char ***env, uint32_t nodeid, uint32_t procid, uint32_t nnodes, uint32_t nprocs, uint32_t rank); +/* + * Clear switch state on this node + */ +extern int switch_g_clear_node_state(void); + /* * Allocate storage for a node's switch state record */ diff --git a/src/plugins/switch/elan/switch_elan.c b/src/plugins/switch/elan/switch_elan.c index 0e7ee998331..89d50564278 100644 --- a/src/plugins/switch/elan/switch_elan.c +++ b/src/plugins/switch/elan/switch_elan.c @@ -724,6 +724,11 @@ extern char *switch_p_strerror(int errnum) * node switch state monitoring functions * required for IBM Federation switch */ +extern int switch_p_clear_node_state(void) +{ + return SLURM_SUCCESS; +} + extern int switch_p_alloc_node_info(switch_node_info_t *switch_node) { return SLURM_SUCCESS; diff --git a/src/plugins/switch/none/switch_none.c b/src/plugins/switch/none/switch_none.c index ec0f47db0d1..9e9787308c4 100644 --- a/src/plugins/switch/none/switch_none.c +++ b/src/plugins/switch/none/switch_none.c @@ -224,6 +224,11 @@ extern char *switch_p_strerror(int errnum) * node switch state monitoring functions * required for IBM Federation switch */ +extern int switch_p_clear_node_state(void) +{ + return SLURM_SUCCESS; +} + extern int switch_p_alloc_node_info(switch_node_info_t *switch_node) { return SLURM_SUCCESS; diff --git a/src/slurmd/slurmd.c b/src/slurmd/slurmd.c index 374262c627d..fb87a59b2c8 100644 --- a/src/slurmd/slurmd.c +++ b/src/slurmd/slurmd.c @@ -189,6 +189,8 @@ main (int argc, char *argv[]) if (interconnect_node_init() < 0) fatal("Unable to initialize interconnect."); + if (conf->cleanstart && switch_g_clear_node_state()) + fatal("Unable to clear interconnect state."); _create_msg_socket(); -- GitLab