From 214fff55c5788162900cc38e14f58eacd8ae5ae0 Mon Sep 17 00:00:00 2001 From: Moe Jette <jette1@llnl.gov> Date: Tue, 20 Mar 2007 18:42:31 +0000 Subject: [PATCH] svn merge -r11200:11204 https://eris.llnl.gov/svn/slurm/branches/slurm-1.1 (Moab event notication for partition state changes). --- doc/html/schedplugins.shtml | 18 ++++++---- src/plugins/sched/backfill/backfill_wrapper.c | 8 +++++ src/plugins/sched/builtin/builtin_wrapper.c | 8 +++++ src/plugins/sched/gang/sched_gang.c | 8 +++++ src/plugins/sched/hold/hold_wrapper.c | 8 +++++ src/plugins/sched/wiki/sched_wiki.c | 8 +++++ src/plugins/sched/wiki2/event.c | 34 ++++++++++++------- src/plugins/sched/wiki2/msg.c | 4 +-- src/plugins/sched/wiki2/msg.h | 4 +-- src/plugins/sched/wiki2/sched_wiki.c | 14 ++++++-- src/slurmctld/controller.c | 1 + src/slurmctld/partition_mgr.c | 4 ++- src/slurmctld/proc_req.c | 2 ++ src/slurmctld/sched_plugin.c | 15 ++++++++ src/slurmctld/sched_plugin.h | 5 +++ 15 files changed, 114 insertions(+), 27 deletions(-) diff --git a/doc/html/schedplugins.shtml b/doc/html/schedplugins.shtml index 0b2a122ef46..915ab86a886 100644 --- a/doc/html/schedplugins.shtml +++ b/doc/html/schedplugins.shtml @@ -5,7 +5,7 @@ <h2> Overview</h2> <p> This document describes SLURM scheduler plugins and the API that defines them. It is intended as a resource to programmers wishing to write their own SLURM -scheduler plugins. This is version 0 of the API.</p> +scheduler plugins. This is version 1 of the API.</p> <p>It is noteworthy that two different models are used for job scheduling. The <b>backfill</b> scheduler lets SLURM establish the initial job priority @@ -28,11 +28,11 @@ abbreviation for the type of scheduler. We recommend, for example:</p> scheduling services. This is the default behavior and implements first-in-first-out scheduling.</li> <li><b>backfill</b>—Raise the priority of jobs if doing so results in their starting earlier without any delay in the expected initiation time of any higher priority job.</li> -<li><b>wiki</b>—Use +<li><b>wiki</b>—Uses <a href="http://www.clusterresources.com/pages/products/maui-cluster-scheduler.php"> The Maui Scheduler</a> (Wiki version) as an external entity to control SLURM job scheduling.</li> -<li><b>wiki2</b>—Use +<li><b>wiki2</b>—Uses <a href="http://www.clusterresources.com/pages/products/moab-cluster-suite.php"> Moab Cluster Suite</a> as an external entity to control SLURM job scheduling. Note that wiki2 is an expanded version of the wiki plugin with additional @@ -103,7 +103,13 @@ by altering their priority and (optionally) list of required nodes.</p> <p style="margin-left:.2in"><b>Returns</b>: The priority to be assigned to this job.</p> <p class="commandline">void slurm_sched_plugin_job_is_pending (void);</p> -<p style="margin-left:.2in"><b>Description</b>: Note that some job is pending execution..</p> +<p style="margin-left:.2in"><b>Description</b>: Note that some job is pending execution.</p> +<p style="margin-left:.2in"><b>Arguments</b>: None</p> +<p style="margin-left:.2in"><b>Returns</b>: Nothing.</p> + +<p class="commandline">void slurm_sched_plugin_partition_change (void);</p> +<p style="margin-left:.2in"><b>Description</b>: Note that some partition state change +happened such as time or size limits.</p> <p style="margin-left:.2in"><b>Arguments</b>: None</p> <p style="margin-left:.2in"><b>Returns</b>: Nothing.</p> <p class="footer"><a href="#top">top</a></p> @@ -126,11 +132,11 @@ or NULL if no description found in this plugin.</p> <p class="footer"><a href="#top">top</a></p> <h2>Versioning</h2> -<p> This document describes version 0 of the SLURM Scheduler API. Future +<p> This document describes version 1 of the SLURM Scheduler API. Future releases of SLURM may revise this API. A scheduler plugin conveys its ability to implement a particular API version using the mechanism outlined for SLURM plugins.</p> <p class="footer"><a href="#top">top</a></p> -<p style="text-align:center;">Last modified 18 December 2006</p> +<p style="text-align:center;">Last modified 20 March 2007</p> <!--#include virtual="footer.txt"--> diff --git a/src/plugins/sched/backfill/backfill_wrapper.c b/src/plugins/sched/backfill/backfill_wrapper.c index ff60533507b..47a176c6673 100644 --- a/src/plugins/sched/backfill/backfill_wrapper.c +++ b/src/plugins/sched/backfill/backfill_wrapper.c @@ -157,6 +157,14 @@ void slurm_sched_plugin_job_is_pending( void ) run_backfill(); } +/**************************************************************************/ +/* TAG( slurm_sched_plugin_partition_change ) */ +/**************************************************************************/ +void slurm_sched_plugin_partition_change( void ) +{ + /* Empty. */ +} + /**************************************************************************/ /* TAG( slurm_sched_get_errno ) */ /**************************************************************************/ diff --git a/src/plugins/sched/builtin/builtin_wrapper.c b/src/plugins/sched/builtin/builtin_wrapper.c index 95290aec238..422787b361b 100644 --- a/src/plugins/sched/builtin/builtin_wrapper.c +++ b/src/plugins/sched/builtin/builtin_wrapper.c @@ -103,6 +103,14 @@ void slurm_sched_plugin_job_is_pending( void ) /* Empty. */ } +/**************************************************************************/ +/* TAG( slurm_sched_plugin_partition_change ) */ +/**************************************************************************/ +void slurm_sched_plugin_partition_change( void ) +{ + /* Empty. */ +} + /**************************************************************************/ /* TAG( slurm_sched_get_errno ) */ /**************************************************************************/ diff --git a/src/plugins/sched/gang/sched_gang.c b/src/plugins/sched/gang/sched_gang.c index ee0474387d8..483ed6e6e28 100644 --- a/src/plugins/sched/gang/sched_gang.c +++ b/src/plugins/sched/gang/sched_gang.c @@ -97,6 +97,14 @@ void slurm_sched_plugin_job_is_pending( void ) /* No action required */ } +/**************************************************************************/ +/* TAG( slurm_sched_plugin_partition_change ) */ +/**************************************************************************/ +void slurm_sched_plugin_partition_change( void ) +{ + /* No action required */ +} + /**************************************************************************/ /* TAG( slurm_sched_get_errno ) */ /**************************************************************************/ diff --git a/src/plugins/sched/hold/hold_wrapper.c b/src/plugins/sched/hold/hold_wrapper.c index 5a4d952644e..bf1169162cc 100644 --- a/src/plugins/sched/hold/hold_wrapper.c +++ b/src/plugins/sched/hold/hold_wrapper.c @@ -113,6 +113,14 @@ void slurm_sched_plugin_job_is_pending( void ) /* Empty. */ } +/**************************************************************************/ +/* TAG( slurm_sched_plugin_partition_change ) */ +/**************************************************************************/ +void slurm_sched_plugin_partition_change( void ) +{ + /* Empty. */ +} + /**************************************************************************/ /* TAG( slurm_sched_get_errno ) */ /**************************************************************************/ diff --git a/src/plugins/sched/wiki/sched_wiki.c b/src/plugins/sched/wiki/sched_wiki.c index 491775a93d4..1df597425d2 100644 --- a/src/plugins/sched/wiki/sched_wiki.c +++ b/src/plugins/sched/wiki/sched_wiki.c @@ -98,6 +98,14 @@ void slurm_sched_plugin_job_is_pending( void ) /* No action required */ } +/**************************************************************************/ +/* TAG( slurm_sched_plugin_partition_change ) */ +/**************************************************************************/ +void slurm_sched_plugin_partition_change( void ) +{ + /* Empty. */ +} + /**************************************************************************/ /* TAG( slurm_sched_plugin_reconfig ) */ /**************************************************************************/ diff --git a/src/plugins/sched/wiki2/event.c b/src/plugins/sched/wiki2/event.c index 9999442dc80..bab1e8bba32 100644 --- a/src/plugins/sched/wiki2/event.c +++ b/src/plugins/sched/wiki2/event.c @@ -1,7 +1,7 @@ /*****************************************************************************\ * event.c - Moab event notification ***************************************************************************** - * Copyright (C) 2006 The Regents of the University of California. + * Copyright (C) 2006-2007 The Regents of the University of California. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). * Written by Morris Jette <jette1@llnl.gov> * UCRL-CODE-226842. @@ -99,23 +99,35 @@ static void _close_fd(void) /* * event_notify - Notify Moab of some event - * msg IN - event type, NULL to close connection + * event_code IN - message code to send Moab + * 1234 - job state change + * TBD - partition state change + * desc IN - event description * RET 0 on success, -1 on failure */ -extern int event_notify(char *msg) +extern int event_notify(int event_code, char *desc) { time_t now = time(NULL); int rc = 0, retry = 2; + char *event_msg; if (e_port == 0) { /* Event notification disabled */ return 0; } - if (job_aggregation_time - && (difftime(now, last_notify_time) < job_aggregation_time)) { - debug("wiki event notification already sent recently"); - return 0; + if (event_code == 1234) { + if (job_aggregation_time + && (difftime(now, last_notify_time) < job_aggregation_time)) { + debug("wiki event notification already sent recently"); + return 0; + } + event_msg = "1234\0"; + } else if (event_code == 5) { /* actual value TBD */ + event_msg = "5\0"; /* actual value TBD */ + } else { + error("event_notify: invalid event code: %d", event_code); + return -1; } pthread_mutex_lock(&event_mutex); @@ -128,12 +140,8 @@ extern int event_notify(char *msg) break; } - /* Always send "1234\0" as the message - * (we do not care if all of the message is sent, - * just that some of it went through to wake up Moab) - */ - if (write(event_fd, "1234", 5) > 0) { - verbose("wiki event_notification sent: %s", msg); + if (write(event_fd, event_msg, (strlen(event_msg) + 1)) > 0) { + verbose("wiki event_notification sent: %s", desc); last_notify_time = now; rc = 0; /* Dave Jackson says to leave the connection diff --git a/src/plugins/sched/wiki2/msg.c b/src/plugins/sched/wiki2/msg.c index 44fee14ea31..ca47133a787 100644 --- a/src/plugins/sched/wiki2/msg.c +++ b/src/plugins/sched/wiki2/msg.c @@ -1,7 +1,7 @@ /*****************************************************************************\ * msg.c - Message/communcation manager for Wiki plugin ***************************************************************************** - * Copyright (C) 2006 The Regents of the University of California. + * Copyright (C) 2006-2007 The Regents of the University of California. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). * Written by Morris Jette <jette1@llnl.gov> * UCRL-CODE-226842. @@ -89,7 +89,7 @@ extern int spawn_msg_thread(void) _msg_thread, NULL)) fatal("pthread_create %m"); - (void) event_notify("slurm startup"); + (void) event_notify(1234, "Slurm startup"); slurm_attr_destroy(&thread_attr_msg); thread_running = true; pthread_mutex_unlock(&thread_flag_mutex); diff --git a/src/plugins/sched/wiki2/msg.h b/src/plugins/sched/wiki2/msg.h index 12fdec816b4..ac4e096b5f8 100644 --- a/src/plugins/sched/wiki2/msg.h +++ b/src/plugins/sched/wiki2/msg.h @@ -1,7 +1,7 @@ /*****************************************************************************\ * msg.h - Message/communcation manager for Wiki plugin ***************************************************************************** - * Copyright (C) 2006 The Regents of the University of California. + * Copyright (C) 2006-2007 The Regents of the University of California. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). * Written by Morris Jette <jette1@llnl.gov> * UCRL-CODE-226842. @@ -94,7 +94,7 @@ extern uint16_t job_aggregation_time; extern uint16_t kill_wait; extern uint16_t use_host_exp; -extern int event_notify(char *msg); +extern int event_notify(int event_code, char *desc); extern int spawn_msg_thread(void); extern void term_msg_thread(void); extern char * bitmap2wiki_node_name(bitstr_t *bitmap); diff --git a/src/plugins/sched/wiki2/sched_wiki.c b/src/plugins/sched/wiki2/sched_wiki.c index 231be3e996c..b7304c56873 100644 --- a/src/plugins/sched/wiki2/sched_wiki.c +++ b/src/plugins/sched/wiki2/sched_wiki.c @@ -1,7 +1,7 @@ /*****************************************************************************\ * sched_wiki.c - Wiki plugin for Moab and Maui schedulers. ***************************************************************************** - * Copyright (C) 2006 The Regents of the University of California. + * Copyright (C) 2006-2007 The Regents of the University of California. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). * Written by Morris Jette <jette1@llnl.gov> * UCRL-CODE-226842. @@ -72,7 +72,7 @@ extern void fini( void ) /***************************************************************************/ extern int slurm_sched_plugin_schedule( void ) { - (void) event_notify("Requested by slurm"); + (void) event_notify(1234, "Requested by Slurm"); return SLURM_SUCCESS; } @@ -82,7 +82,7 @@ extern int slurm_sched_plugin_schedule( void ) /**************************************************************************/ extern uint32_t slurm_sched_plugin_initial_priority( uint32_t last_prio ) { - (void) event_notify("Job submit"); + (void) event_notify(1234, "Job submit"); if (init_prio_mode == PRIO_DECREMENT) { if (last_prio >= 2) return (last_prio - 1); @@ -108,6 +108,14 @@ int slurm_sched_plugin_reconfig( void ) return parse_wiki_config(); } +/**************************************************************************/ +/* TAG( slurm_sched_plugin_partition_change ) */ +/**************************************************************************/ +void slurm_sched_plugin_partition_change( void ) +{ + (void) event_notify(1234, "Partition change"); +} + /**************************************************************************/ /* TAG( slurm_sched_get_errno ) */ /**************************************************************************/ diff --git a/src/slurmctld/controller.c b/src/slurmctld/controller.c index 0028b840d9c..2cc71cd2543 100644 --- a/src/slurmctld/controller.c +++ b/src/slurmctld/controller.c @@ -521,6 +521,7 @@ static void *_slurmctld_signal_hand(void *no_data) } unlock_slurmctld(config_write_lock); trigger_reconfig(); + slurm_sched_partition_change(); break; case SIGABRT: /* abort */ info("SIGABRT received"); diff --git a/src/slurmctld/partition_mgr.c b/src/slurmctld/partition_mgr.c index 57210dc31b8..4023cf79717 100644 --- a/src/slurmctld/partition_mgr.c +++ b/src/slurmctld/partition_mgr.c @@ -4,7 +4,7 @@ * time stamp (last_part_update) * $Id$ ***************************************************************************** - * Copyright (C) 2002 The Regents of the University of California. + * Copyright (C) 2002-2007 The Regents of the University of California. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). * Written by Morris Jette <jette@llnl.gov> et. al. * UCRL-CODE-226842. @@ -62,6 +62,7 @@ #include "src/slurmctld/locks.h" #include "src/slurmctld/proc_req.h" +#include "src/slurmctld/sched_plugin.h" #include "src/slurmctld/slurmctld.h" /* Global variables */ @@ -867,6 +868,7 @@ int update_part(update_part_msg_t * part_desc) } if (error_code == SLURM_SUCCESS) { + slurm_sched_partition_change(); /* notify sched plugin */ reset_job_priority(); /* free jobs */ if (select_g_block_init(part_list) != SLURM_SUCCESS ) error("failed to update node selection plugin state"); diff --git a/src/slurmctld/proc_req.c b/src/slurmctld/proc_req.c index 65ae13b3660..0dd79befd2e 100644 --- a/src/slurmctld/proc_req.c +++ b/src/slurmctld/proc_req.c @@ -74,6 +74,7 @@ #include "src/slurmctld/locks.h" #include "src/slurmctld/proc_req.h" #include "src/slurmctld/read_config.h" +#include "src/slurmctld/sched_plugin.h" #include "src/slurmctld/slurmctld.h" #include "src/slurmctld/state_save.h" #include "src/slurmctld/trigger_mgr.h" @@ -1429,6 +1430,7 @@ static void _slurm_rpc_reconfigure_controller(slurm_msg_t * msg) info("_slurm_rpc_reconfigure_controller: completed %s", TIME_STR); slurm_send_rc_msg(msg, SLURM_SUCCESS); + slurm_sched_partition_change(); schedule(); /* has its own locks */ save_all_state(); } diff --git a/src/slurmctld/sched_plugin.c b/src/slurmctld/sched_plugin.c index cfc3c9d235c..44c00f15bf2 100644 --- a/src/slurmctld/sched_plugin.c +++ b/src/slurmctld/sched_plugin.c @@ -54,6 +54,7 @@ typedef struct slurm_sched_ops { uint32_t (*initial_priority) ( uint32_t ); void (*job_is_pending) ( void ); int (*reconfig) ( void ); + void (*partition_change) ( void ); int (*get_errno) ( void ); char * (*strerror) ( int ); } slurm_sched_ops_t; @@ -88,6 +89,7 @@ slurm_sched_get_ops( slurm_sched_context_t *c ) "slurm_sched_plugin_initial_priority", "slurm_sched_plugin_job_is_pending", "slurm_sched_plugin_reconfig", + "slurm_sched_plugin_partition_change", "slurm_sched_get_errno", "slurm_sched_strerror" }; @@ -268,6 +270,7 @@ slurm_sched_initial_priority( u_int32_t last_prio ) return (*(g_sched_context->ops.initial_priority))( last_prio ); } + /* *********************************************************************** */ /* TAG( slurm_sched_job_is_pending ) */ /* *********************************************************************** */ @@ -280,6 +283,18 @@ slurm_sched_job_is_pending( void ) (*(g_sched_context->ops.job_is_pending))(); } +/* *********************************************************************** */ +/* TAG( slurm_sched_partition_change ) */ +/* *********************************************************************** */ +void +slurm_sched_partition_change( void ) +{ + if ( slurm_sched_init() < 0 ) + return; + + (*(g_sched_context->ops.partition_change))(); +} + /* *********************************************************************** */ /* TAG( slurm_sched_p_get_errno ) */ /* *********************************************************************** */ diff --git a/src/slurmctld/sched_plugin.h b/src/slurmctld/sched_plugin.h index 6d24ac006bb..4dbb99ff9db 100644 --- a/src/slurmctld/sched_plugin.h +++ b/src/slurmctld/sched_plugin.h @@ -80,6 +80,11 @@ uint32_t slurm_sched_initial_priority( uint32_t max_prio ); */ void slurm_sched_job_is_pending( void ); +/* + * Note that some partition state change happened. + */ +void slurm_sched_partition_change( void ); + /* * Return any plugin-specific error number */ -- GitLab