From 874f797fafbc10165e5a08b208f91f0e5315792d Mon Sep 17 00:00:00 2001 From: Moe Jette <jette1@llnl.gov> Date: Wed, 3 Nov 2010 23:38:52 +0000 Subject: [PATCH] Move the start time calculation of pending jobs into a separate pthread for sched/builtin for better efficiency --- src/plugins/sched/builtin/Makefile.am | 5 +- src/plugins/sched/builtin/Makefile.in | 9 +- src/plugins/sched/builtin/builtin.c | 238 ++++++++++++++++++++ src/plugins/sched/builtin/builtin.h | 53 +++++ src/plugins/sched/builtin/builtin_wrapper.c | 103 +++------ 5 files changed, 336 insertions(+), 72 deletions(-) create mode 100644 src/plugins/sched/builtin/builtin.c create mode 100644 src/plugins/sched/builtin/builtin.h diff --git a/src/plugins/sched/builtin/Makefile.am b/src/plugins/sched/builtin/Makefile.am index 56cc1a4bbf0..ff36ef1e592 100644 --- a/src/plugins/sched/builtin/Makefile.am +++ b/src/plugins/sched/builtin/Makefile.am @@ -10,5 +10,8 @@ INCLUDES = -I$(top_srcdir) -I$(top_srcdir)/src/common pkglib_LTLIBRARIES = sched_builtin.la -sched_builtin_la_SOURCES = builtin_wrapper.c +sched_builtin_la_SOURCES = \ + builtin_wrapper.c \ + builtin.c \ + builtin.h sched_builtin_la_LDFLAGS = $(SO_LDFLAGS) $(PLUGIN_FLAGS) diff --git a/src/plugins/sched/builtin/Makefile.in b/src/plugins/sched/builtin/Makefile.in index ef76d176493..a790fbc0afb 100644 --- a/src/plugins/sched/builtin/Makefile.in +++ b/src/plugins/sched/builtin/Makefile.in @@ -105,7 +105,7 @@ am__base_list = \ am__installdirs = "$(DESTDIR)$(pkglibdir)" LTLIBRARIES = $(pkglib_LTLIBRARIES) sched_builtin_la_LIBADD = -am_sched_builtin_la_OBJECTS = builtin_wrapper.lo +am_sched_builtin_la_OBJECTS = builtin_wrapper.lo builtin.lo sched_builtin_la_OBJECTS = $(am_sched_builtin_la_OBJECTS) sched_builtin_la_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ @@ -323,7 +323,11 @@ AUTOMAKE_OPTIONS = foreign PLUGIN_FLAGS = -module -avoid-version --export-dynamic INCLUDES = -I$(top_srcdir) -I$(top_srcdir)/src/common pkglib_LTLIBRARIES = sched_builtin.la -sched_builtin_la_SOURCES = builtin_wrapper.c +sched_builtin_la_SOURCES = \ + builtin_wrapper.c \ + builtin.c \ + builtin.h + sched_builtin_la_LDFLAGS = $(SO_LDFLAGS) $(PLUGIN_FLAGS) all: all-am @@ -399,6 +403,7 @@ mostlyclean-compile: distclean-compile: -rm -f *.tab.c +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/builtin.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/builtin_wrapper.Plo@am__quote@ .c.o: diff --git a/src/plugins/sched/builtin/builtin.c b/src/plugins/sched/builtin/builtin.c new file mode 100644 index 00000000000..530abda1a7d --- /dev/null +++ b/src/plugins/sched/builtin/builtin.c @@ -0,0 +1,238 @@ +/*****************************************************************************\ + * builtin.h - header for simple builtin scheduler plugin. + * Periodically when pending jobs can start. + * This is a minimal implementation of the logic found in + * src/plugins/sched/backfill/backfill.c and disregards + * how jobs are scheduled sequencially. + ***************************************************************************** + * Copyright (C) 2003-2007 The Regents of the University of California. + * Copyright (C) 2008-2010 Lawrence Livermore National Security. + * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). + * Written by Morris Jette <jette1@llnl.gov> + * CODE-OCEC-09-009. All rights reserved. + * + * This file is part of SLURM, a resource management program. + * For details, see <https://computing.llnl.gov/linux/slurm/>. + * Please also read the included file: DISCLAIMER. + * + * SLURM is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * In addition, as a special exception, the copyright holders give permission + * to link the code of portions of this program with the OpenSSL library under + * certain conditions as described in each individual source file, and + * distribute linked combinations including the two. You must obey the GNU + * General Public License in all respects for all of the code used other than + * OpenSSL. If you modify file(s) with this exception, you may extend this + * exception to your version of the file(s), but you are not obligated to do + * so. If you do not wish to do so, delete this exception statement from your + * version. If you delete this exception statement from all source files in + * the program, then also delete it here. + * + * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with SLURM; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +\*****************************************************************************/ + +#include <pthread.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> +#include <unistd.h> + +#include "slurm/slurm.h" +#include "slurm/slurm_errno.h" + +#include "src/common/list.h" +#include "src/common/macros.h" +#include "src/common/node_select.h" +#include "src/common/parse_time.h" +#include "src/common/slurm_protocol_api.h" +#include "src/common/xmalloc.h" +#include "src/common/xstring.h" + +#include "src/slurmctld/locks.h" +#include "src/slurmctld/preempt.h" +#include "src/slurmctld/reservation.h" +#include "src/slurmctld/slurmctld.h" +#include "src/plugins/sched/builtin/builtin.h" + +#ifndef BACKFILL_INTERVAL +# define BACKFILL_INTERVAL 30 +#endif + +/*********************** local variables *********************/ +static bool stop_builtin = false; +static pthread_mutex_t thread_flag_mutex = PTHREAD_MUTEX_INITIALIZER; +static pthread_mutex_t term_lock = PTHREAD_MUTEX_INITIALIZER; +static pthread_cond_t term_cond = PTHREAD_COND_INITIALIZER; +static bool config_flag = false; +static int backfill_interval = BACKFILL_INTERVAL; +static int max_backfill_job_cnt = 50; +static int sched_timeout = 0; + +/*********************** local functions *********************/ +static void _compute_start_times(void); +static void _load_config(void); +static void _my_sleep(int secs); + +/* Terminate builtin_agent */ +extern void stop_builtin_agent(void) +{ + pthread_mutex_lock(&term_lock); + stop_builtin = true; + pthread_cond_signal(&term_cond); + pthread_mutex_unlock(&term_lock); +} + +static void _my_sleep(int secs) +{ + struct timespec ts = {0, 0}; + + ts.tv_sec = time(NULL) + secs; + pthread_mutex_lock(&term_lock); + if (!stop_builtin) + pthread_cond_timedwait(&term_cond, &term_lock, &ts); + pthread_mutex_unlock(&term_lock); +} + +static void _load_config(void) +{ + char *sched_params, *tmp_ptr; + + sched_timeout = slurm_get_msg_timeout() / 2; + sched_timeout = MAX(sched_timeout, 1); + sched_timeout = MIN(sched_timeout, 10); + + sched_params = slurm_get_sched_params(); + + if (sched_params && (tmp_ptr=strstr(sched_params, "interval="))) + backfill_interval = atoi(tmp_ptr + 9); + if (backfill_interval < 1) { + fatal("Invalid backfill scheduler interval: %d", + backfill_interval); + } + + if (sched_params && (tmp_ptr=strstr(sched_params, "max_job_bf="))) + max_backfill_job_cnt = atoi(tmp_ptr + 11); + if (max_backfill_job_cnt < 1) { + fatal("Invalid backfill scheduler max_job_bf: %d", + max_backfill_job_cnt); + } + xfree(sched_params); +} + +static void _compute_start_times(void) +{ + int j, rc = SLURM_SUCCESS, job_cnt = 0; + List job_queue; + job_queue_rec_t *job_queue_rec; + List preemptee_candidates = NULL; + struct job_record *job_ptr; + struct part_record *part_ptr; + bitstr_t *avail_bitmap = NULL; + uint32_t max_nodes, min_nodes, req_nodes; + time_t now = time(NULL), sched_start; + + sched_start = now; + job_queue = build_job_queue(); + while ((job_queue_rec = (job_queue_rec_t *) + list_pop_bottom(job_queue, sort_job_queue2))) { + job_ptr = job_queue_rec->job_ptr; + part_ptr = job_queue_rec->part_ptr; + xfree(job_queue_rec); + if (part_ptr != job_ptr->part_ptr) + continue; /* Only test one partition */ + + if (job_cnt++ > max_backfill_job_cnt) { + debug("backfill: loop taking to long, breaking out"); + break; + } + + /* Determine minimum and maximum node counts */ + min_nodes = MAX(job_ptr->details->min_nodes, + part_ptr->min_nodes); + + if (job_ptr->details->max_nodes == 0) + max_nodes = part_ptr->max_nodes; + else + max_nodes = MIN(job_ptr->details->max_nodes, + part_ptr->max_nodes); + + max_nodes = MIN(max_nodes, 500000); /* prevent overflows */ + + if (job_ptr->details->max_nodes) + req_nodes = max_nodes; + else + req_nodes = min_nodes; + + if (min_nodes > max_nodes) { + /* job's min_nodes exceeds partition's max_nodes */ + continue; + } + + j = job_test_resv(job_ptr, &now, true, &avail_bitmap); + if (j != SLURM_SUCCESS) + continue; + + rc = select_g_job_test(job_ptr, avail_bitmap, + min_nodes, max_nodes, req_nodes, + SELECT_MODE_WILL_RUN, + preemptee_candidates, NULL); + last_job_update = now; + FREE_NULL_BITMAP(avail_bitmap); + + if ((time(NULL) - sched_start) >= sched_timeout) { + debug("backfill: loop taking to long, breaking out"); + break; + } + } + list_destroy(job_queue); +} + +/* Note that slurm.conf has changed */ +extern void builtin_reconfig(void) +{ + config_flag = true; +} + +/* builtin_agent - detached thread periodically when pending jobs can start */ +extern void *builtin_agent(void *args) +{ + time_t now; + double wait_time; + static time_t last_backfill_time = 0; + /* Read config and partitions; Write jobs and nodes */ + slurmctld_lock_t all_locks = { + READ_LOCK, WRITE_LOCK, WRITE_LOCK, READ_LOCK }; + + _load_config(); + last_backfill_time = time(NULL); + while (!stop_builtin) { + _my_sleep(backfill_interval); + if (stop_builtin) + break; + if (config_flag) { + config_flag = false; + _load_config(); + } + now = time(NULL); + wait_time = difftime(now, last_backfill_time); + if ((wait_time < backfill_interval)) + continue; + + lock_slurmctld(all_locks); + _compute_start_times(); + last_backfill_time = time(NULL); + unlock_slurmctld(all_locks); + } + return NULL; +} diff --git a/src/plugins/sched/builtin/builtin.h b/src/plugins/sched/builtin/builtin.h new file mode 100644 index 00000000000..8682356ad60 --- /dev/null +++ b/src/plugins/sched/builtin/builtin.h @@ -0,0 +1,53 @@ +/*****************************************************************************\ + * builtin.h - header for simple builtin scheduler plugin. + ***************************************************************************** + * Copyright (C) 2003-2007 The Regents of the University of California. + * Copyright (C) 2008-2010 Lawrence Livermore National Security. + * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). + * Written by Morris Jette <jette1@llnl.gov> + * CODE-OCEC-09-009. All rights reserved. + * + * This file is part of SLURM, a resource management program. + * For details, see <https://computing.llnl.gov/linux/slurm/>. + * Please also read the included file: DISCLAIMER. + * + * SLURM is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * In addition, as a special exception, the copyright holders give permission + * to link the code of portions of this program with the OpenSSL library under + * certain conditions as described in each individual source file, and + * distribute linked combinations including the two. You must obey the GNU + * General Public License in all respects for all of the code used other than + * OpenSSL. If you modify file(s) with this exception, you may extend this + * exception to your version of the file(s), but you are not obligated to do + * so. If you do not wish to do so, delete this exception statement from your + * version. If you delete this exception statement from all source files in + * the program, then also delete it here. + * + * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with SLURM; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +\*****************************************************************************/ + + +#ifndef _SLURM_BUILTIN_H +#define _SLURM_BUILTIN_H + +/* builtin_agent - detached thread periodically when pending jobs can start */ +extern void *builtin_agent(void *args); + +/* Terminate builtin_agent */ +extern void stop_builtin_agent(void); + +/* Note that slurm.conf has changed */ +extern void builtin_reconfig(void); + +#endif /* _SLURM_BUILTIN_H */ diff --git a/src/plugins/sched/builtin/builtin_wrapper.c b/src/plugins/sched/builtin/builtin_wrapper.c index e6ee3c93550..2ea60856ec8 100644 --- a/src/plugins/sched/builtin/builtin_wrapper.c +++ b/src/plugins/sched/builtin/builtin_wrapper.c @@ -42,11 +42,12 @@ #include "src/common/plugin.h" #include "src/common/log.h" -#include "src/slurmctld/slurmctld.h" +#include "src/common/node_select.h" #include "src/common/slurm_priority.h" #include "src/slurmctld/job_scheduler.h" #include "src/slurmctld/reservation.h" -#include "src/common/node_select.h" +#include "src/slurmctld/slurmctld.h" +#include "src/plugins/sched/builtin/builtin.h" const char plugin_name[] = "SLURM Built-in Scheduler plugin"; const char plugin_type[] = "sched/builtin"; @@ -55,12 +56,33 @@ const uint32_t plugin_version = 100; /* A plugin-global errno. */ static int plugin_errno = SLURM_SUCCESS; +static pthread_t builtin_thread = 0; +static pthread_mutex_t thread_flag_mutex = PTHREAD_MUTEX_INITIALIZER; + /**************************************************************************/ /* TAG( init ) */ /**************************************************************************/ int init( void ) { + pthread_attr_t attr; + verbose( "sched: Built-in scheduler plugin loaded" ); + + pthread_mutex_lock( &thread_flag_mutex ); + if ( builtin_thread ) { + debug2( "Built-in scheduler thread already running, " + "not starting another" ); + pthread_mutex_unlock( &thread_flag_mutex ); + return SLURM_ERROR; + } + + slurm_attr_init( &attr ); + /* since we do a join on this later we don't make it detached */ + if (pthread_create( &builtin_thread, &attr, builtin_agent, NULL)) + error("Unable to start built-in scheduler thread: %m"); + pthread_mutex_unlock( &thread_flag_mutex ); + slurm_attr_destroy( &attr ); + return SLURM_SUCCESS; } @@ -69,7 +91,14 @@ int init( void ) /**************************************************************************/ void fini( void ) { - /* Empty. */ + pthread_mutex_lock( &thread_flag_mutex ); + if ( builtin_thread ) { + verbose( "Built-in scheduler plugin shutting down" ); + stop_builtin_agent(); + pthread_join(builtin_thread, NULL); + builtin_thread = 0; + } + pthread_mutex_unlock( &thread_flag_mutex ); } /**************************************************************************/ @@ -77,6 +106,7 @@ void fini( void ) /**************************************************************************/ int slurm_sched_plugin_reconfig( void ) { + builtin_reconfig(); return SLURM_SUCCESS; } @@ -126,72 +156,7 @@ slurm_sched_plugin_initial_priority( uint32_t last_prio, /**************************************************************************/ void slurm_sched_plugin_job_is_pending( void ) { - int j, rc = SLURM_SUCCESS; - List job_queue; - job_queue_rec_t *job_queue_rec; - List preemptee_candidates = NULL; - struct job_record *job_ptr; - struct part_record *part_ptr; - bitstr_t *avail_bitmap = NULL; - uint32_t max_nodes, min_nodes, req_nodes; - time_t now = time(NULL), sched_start; - static int sched_timeout = 0; - - sched_start = now; - if (sched_timeout == 0) { - sched_timeout = slurm_get_msg_timeout() / 2; - sched_timeout = MAX(sched_timeout, 1); - sched_timeout = MIN(sched_timeout, 10); - } - - job_queue = build_job_queue(); - while ((job_queue_rec = (job_queue_rec_t *) - list_pop_bottom(job_queue, sort_job_queue2))) { - job_ptr = job_queue_rec->job_ptr; - part_ptr = job_queue_rec->part_ptr; - xfree(job_queue_rec); - if (part_ptr != job_ptr->part_ptr) - continue; /* Only test one partition */ - - /* Determine minimum and maximum node counts */ - min_nodes = MAX(job_ptr->details->min_nodes, - part_ptr->min_nodes); - - if (job_ptr->details->max_nodes == 0) - max_nodes = part_ptr->max_nodes; - else - max_nodes = MIN(job_ptr->details->max_nodes, - part_ptr->max_nodes); - - max_nodes = MIN(max_nodes, 500000); /* prevent overflows */ - - if (job_ptr->details->max_nodes) - req_nodes = max_nodes; - else - req_nodes = min_nodes; - - if (min_nodes > max_nodes) { - /* job's min_nodes exceeds partition's max_nodes */ - continue; - } - - j = job_test_resv(job_ptr, &now, true, &avail_bitmap); - if (j != SLURM_SUCCESS) - continue; - - rc = select_g_job_test(job_ptr, avail_bitmap, - min_nodes, max_nodes, req_nodes, - SELECT_MODE_WILL_RUN, - preemptee_candidates, NULL); - - FREE_NULL_BITMAP(avail_bitmap); - - if ((time(NULL) - sched_start) >= sched_timeout) { - debug("backfill: loop taking to long, breaking out"); - break; - } - } - list_destroy(job_queue); + /* Empty. */ } /**************************************************************************/ -- GitLab