From 874f797fafbc10165e5a08b208f91f0e5315792d Mon Sep 17 00:00:00 2001
From: Moe Jette <jette1@llnl.gov>
Date: Wed, 3 Nov 2010 23:38:52 +0000
Subject: [PATCH] Move the start time calculation of pending jobs into a
 separate pthread for sched/builtin for better efficiency

---
 src/plugins/sched/builtin/Makefile.am       |   5 +-
 src/plugins/sched/builtin/Makefile.in       |   9 +-
 src/plugins/sched/builtin/builtin.c         | 238 ++++++++++++++++++++
 src/plugins/sched/builtin/builtin.h         |  53 +++++
 src/plugins/sched/builtin/builtin_wrapper.c | 103 +++------
 5 files changed, 336 insertions(+), 72 deletions(-)
 create mode 100644 src/plugins/sched/builtin/builtin.c
 create mode 100644 src/plugins/sched/builtin/builtin.h

diff --git a/src/plugins/sched/builtin/Makefile.am b/src/plugins/sched/builtin/Makefile.am
index 56cc1a4bbf0..ff36ef1e592 100644
--- a/src/plugins/sched/builtin/Makefile.am
+++ b/src/plugins/sched/builtin/Makefile.am
@@ -10,5 +10,8 @@ INCLUDES = -I$(top_srcdir) -I$(top_srcdir)/src/common
 
 pkglib_LTLIBRARIES = sched_builtin.la
 
-sched_builtin_la_SOURCES = builtin_wrapper.c
+sched_builtin_la_SOURCES = \
+			builtin_wrapper.c \
+			builtin.c	\
+			builtin.h
 sched_builtin_la_LDFLAGS = $(SO_LDFLAGS) $(PLUGIN_FLAGS)
diff --git a/src/plugins/sched/builtin/Makefile.in b/src/plugins/sched/builtin/Makefile.in
index ef76d176493..a790fbc0afb 100644
--- a/src/plugins/sched/builtin/Makefile.in
+++ b/src/plugins/sched/builtin/Makefile.in
@@ -105,7 +105,7 @@ am__base_list = \
 am__installdirs = "$(DESTDIR)$(pkglibdir)"
 LTLIBRARIES = $(pkglib_LTLIBRARIES)
 sched_builtin_la_LIBADD =
-am_sched_builtin_la_OBJECTS = builtin_wrapper.lo
+am_sched_builtin_la_OBJECTS = builtin_wrapper.lo builtin.lo
 sched_builtin_la_OBJECTS = $(am_sched_builtin_la_OBJECTS)
 sched_builtin_la_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) \
 	$(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
@@ -323,7 +323,11 @@ AUTOMAKE_OPTIONS = foreign
 PLUGIN_FLAGS = -module -avoid-version --export-dynamic
 INCLUDES = -I$(top_srcdir) -I$(top_srcdir)/src/common
 pkglib_LTLIBRARIES = sched_builtin.la
-sched_builtin_la_SOURCES = builtin_wrapper.c
+sched_builtin_la_SOURCES = \
+			builtin_wrapper.c \
+			builtin.c	\
+			builtin.h
+
 sched_builtin_la_LDFLAGS = $(SO_LDFLAGS) $(PLUGIN_FLAGS)
 all: all-am
 
@@ -399,6 +403,7 @@ mostlyclean-compile:
 distclean-compile:
 	-rm -f *.tab.c
 
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/builtin.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/builtin_wrapper.Plo@am__quote@
 
 .c.o:
diff --git a/src/plugins/sched/builtin/builtin.c b/src/plugins/sched/builtin/builtin.c
new file mode 100644
index 00000000000..530abda1a7d
--- /dev/null
+++ b/src/plugins/sched/builtin/builtin.c
@@ -0,0 +1,238 @@
+/*****************************************************************************\
+ *  builtin.h - header for simple builtin scheduler plugin.
+ *		Periodically when pending jobs can start.
+ *		This is a minimal implementation of the logic found in
+ *		src/plugins/sched/backfill/backfill.c and disregards
+ *		how jobs are scheduled sequencially.
+ *****************************************************************************
+ *  Copyright (C) 2003-2007 The Regents of the University of California.
+ *  Copyright (C) 2008-2010 Lawrence Livermore National Security.
+ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+ *  Written by Morris Jette <jette1@llnl.gov>
+ *  CODE-OCEC-09-009. All rights reserved.
+ *
+ *  This file is part of SLURM, a resource management program.
+ *  For details, see <https://computing.llnl.gov/linux/slurm/>.
+ *  Please also read the included file: DISCLAIMER.
+ *
+ *  SLURM is free software; you can redistribute it and/or modify it under
+ *  the terms of the GNU General Public License as published by the Free
+ *  Software Foundation; either version 2 of the License, or (at your option)
+ *  any later version.
+ *
+ *  In addition, as a special exception, the copyright holders give permission
+ *  to link the code of portions of this program with the OpenSSL library under
+ *  certain conditions as described in each individual source file, and
+ *  distribute linked combinations including the two. You must obey the GNU
+ *  General Public License in all respects for all of the code used other than
+ *  OpenSSL. If you modify file(s) with this exception, you may extend this
+ *  exception to your version of the file(s), but you are not obligated to do
+ *  so. If you do not wish to do so, delete this exception statement from your
+ *  version.  If you delete this exception statement from all source files in
+ *  the program, then also delete it here.
+ *
+ *  SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
+ *  WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ *  FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
+ *  details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with SLURM; if not, write to the Free Software Foundation, Inc.,
+ *  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA.
+\*****************************************************************************/
+
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <unistd.h>
+
+#include "slurm/slurm.h"
+#include "slurm/slurm_errno.h"
+
+#include "src/common/list.h"
+#include "src/common/macros.h"
+#include "src/common/node_select.h"
+#include "src/common/parse_time.h"
+#include "src/common/slurm_protocol_api.h"
+#include "src/common/xmalloc.h"
+#include "src/common/xstring.h"
+
+#include "src/slurmctld/locks.h"
+#include "src/slurmctld/preempt.h"
+#include "src/slurmctld/reservation.h"
+#include "src/slurmctld/slurmctld.h"
+#include "src/plugins/sched/builtin/builtin.h"
+
+#ifndef BACKFILL_INTERVAL
+#  define BACKFILL_INTERVAL	30
+#endif
+
+/*********************** local variables *********************/
+static bool stop_builtin = false;
+static pthread_mutex_t thread_flag_mutex = PTHREAD_MUTEX_INITIALIZER;
+static pthread_mutex_t term_lock = PTHREAD_MUTEX_INITIALIZER;
+static pthread_cond_t  term_cond = PTHREAD_COND_INITIALIZER;
+static bool config_flag = false;
+static int backfill_interval = BACKFILL_INTERVAL;
+static int max_backfill_job_cnt = 50;
+static int sched_timeout = 0;
+
+/*********************** local functions *********************/
+static void _compute_start_times(void);
+static void _load_config(void);
+static void _my_sleep(int secs);
+
+/* Terminate builtin_agent */
+extern void stop_builtin_agent(void)
+{
+	pthread_mutex_lock(&term_lock);
+	stop_builtin = true;
+	pthread_cond_signal(&term_cond);
+	pthread_mutex_unlock(&term_lock);
+}
+
+static void _my_sleep(int secs)
+{
+	struct timespec ts = {0, 0};
+
+	ts.tv_sec = time(NULL) + secs;
+	pthread_mutex_lock(&term_lock);
+	if (!stop_builtin)
+		pthread_cond_timedwait(&term_cond, &term_lock, &ts);
+	pthread_mutex_unlock(&term_lock);
+}
+
+static void _load_config(void)
+{
+	char *sched_params, *tmp_ptr;
+
+	sched_timeout = slurm_get_msg_timeout() / 2;
+	sched_timeout = MAX(sched_timeout, 1);
+	sched_timeout = MIN(sched_timeout, 10);
+
+	sched_params = slurm_get_sched_params();
+
+	if (sched_params && (tmp_ptr=strstr(sched_params, "interval=")))
+		backfill_interval = atoi(tmp_ptr + 9);
+	if (backfill_interval < 1) {
+		fatal("Invalid backfill scheduler interval: %d",
+		      backfill_interval);
+	}
+
+	if (sched_params && (tmp_ptr=strstr(sched_params, "max_job_bf=")))
+		max_backfill_job_cnt = atoi(tmp_ptr + 11);
+	if (max_backfill_job_cnt < 1) {
+		fatal("Invalid backfill scheduler max_job_bf: %d",
+		      max_backfill_job_cnt);
+	}
+	xfree(sched_params);
+}
+
+static void _compute_start_times(void)
+{
+	int j, rc = SLURM_SUCCESS, job_cnt = 0;
+	List job_queue;
+	job_queue_rec_t *job_queue_rec;
+	List preemptee_candidates = NULL;
+	struct job_record *job_ptr;
+	struct part_record *part_ptr;
+	bitstr_t *avail_bitmap = NULL;
+	uint32_t max_nodes, min_nodes, req_nodes;
+	time_t now = time(NULL), sched_start;
+
+	sched_start = now;
+	job_queue = build_job_queue();
+	while ((job_queue_rec = (job_queue_rec_t *) 
+				list_pop_bottom(job_queue, sort_job_queue2))) {
+		job_ptr  = job_queue_rec->job_ptr;
+		part_ptr = job_queue_rec->part_ptr;
+		xfree(job_queue_rec);
+		if (part_ptr != job_ptr->part_ptr)
+			continue;	/* Only test one partition */
+
+		if (job_cnt++ > max_backfill_job_cnt) {
+			debug("backfill: loop taking to long, breaking out");
+			break;
+		}
+
+		/* Determine minimum and maximum node counts */
+		min_nodes = MAX(job_ptr->details->min_nodes,
+				part_ptr->min_nodes);
+
+		if (job_ptr->details->max_nodes == 0)
+			max_nodes = part_ptr->max_nodes;
+		else
+			max_nodes = MIN(job_ptr->details->max_nodes,
+					part_ptr->max_nodes);
+
+		max_nodes = MIN(max_nodes, 500000);     /* prevent overflows */
+
+		if (job_ptr->details->max_nodes)
+			req_nodes = max_nodes;
+		else
+			req_nodes = min_nodes;
+
+		if (min_nodes > max_nodes) {
+			/* job's min_nodes exceeds partition's max_nodes */
+			continue;
+		}
+
+		j = job_test_resv(job_ptr, &now, true, &avail_bitmap);
+		if (j != SLURM_SUCCESS)
+			continue;
+
+		rc = select_g_job_test(job_ptr, avail_bitmap,
+				       min_nodes, max_nodes, req_nodes,
+				       SELECT_MODE_WILL_RUN,
+				       preemptee_candidates, NULL);
+		last_job_update = now;
+		FREE_NULL_BITMAP(avail_bitmap);
+
+		if ((time(NULL) - sched_start) >= sched_timeout) {
+			debug("backfill: loop taking to long, breaking out");
+			break;
+		}
+	}
+	list_destroy(job_queue);
+}
+
+/* Note that slurm.conf has changed */
+extern void builtin_reconfig(void)
+{
+	config_flag = true;
+}
+
+/* builtin_agent - detached thread periodically when pending jobs can start */
+extern void *builtin_agent(void *args)
+{
+	time_t now;
+	double wait_time;
+	static time_t last_backfill_time = 0;
+	/* Read config and partitions; Write jobs and nodes */
+	slurmctld_lock_t all_locks = {
+		READ_LOCK, WRITE_LOCK, WRITE_LOCK, READ_LOCK };
+
+	_load_config();
+	last_backfill_time = time(NULL);
+	while (!stop_builtin) {
+		_my_sleep(backfill_interval);
+		if (stop_builtin)
+			break;
+		if (config_flag) {
+			config_flag = false;
+			_load_config();
+		}
+		now = time(NULL);
+		wait_time = difftime(now, last_backfill_time);
+		if ((wait_time < backfill_interval))
+			continue;
+
+		lock_slurmctld(all_locks);
+		_compute_start_times();
+		last_backfill_time = time(NULL);
+		unlock_slurmctld(all_locks);
+	}
+	return NULL;
+}
diff --git a/src/plugins/sched/builtin/builtin.h b/src/plugins/sched/builtin/builtin.h
new file mode 100644
index 00000000000..8682356ad60
--- /dev/null
+++ b/src/plugins/sched/builtin/builtin.h
@@ -0,0 +1,53 @@
+/*****************************************************************************\
+ *  builtin.h - header for simple builtin scheduler plugin.
+ *****************************************************************************
+ *  Copyright (C) 2003-2007 The Regents of the University of California.
+ *  Copyright (C) 2008-2010 Lawrence Livermore National Security.
+ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+ *  Written by Morris Jette <jette1@llnl.gov>
+ *  CODE-OCEC-09-009. All rights reserved.
+ *
+ *  This file is part of SLURM, a resource management program.
+ *  For details, see <https://computing.llnl.gov/linux/slurm/>.
+ *  Please also read the included file: DISCLAIMER.
+ *
+ *  SLURM is free software; you can redistribute it and/or modify it under
+ *  the terms of the GNU General Public License as published by the Free
+ *  Software Foundation; either version 2 of the License, or (at your option)
+ *  any later version.
+ *
+ *  In addition, as a special exception, the copyright holders give permission
+ *  to link the code of portions of this program with the OpenSSL library under
+ *  certain conditions as described in each individual source file, and
+ *  distribute linked combinations including the two. You must obey the GNU
+ *  General Public License in all respects for all of the code used other than
+ *  OpenSSL. If you modify file(s) with this exception, you may extend this
+ *  exception to your version of the file(s), but you are not obligated to do
+ *  so. If you do not wish to do so, delete this exception statement from your
+ *  version.  If you delete this exception statement from all source files in
+ *  the program, then also delete it here.
+ *
+ *  SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
+ *  WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ *  FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
+ *  details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with SLURM; if not, write to the Free Software Foundation, Inc.,
+ *  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA.
+\*****************************************************************************/
+
+
+#ifndef _SLURM_BUILTIN_H
+#define _SLURM_BUILTIN_H
+
+/* builtin_agent - detached thread periodically when pending jobs can start */
+extern void *builtin_agent(void *args);
+
+/* Terminate builtin_agent */
+extern void stop_builtin_agent(void);
+
+/* Note that slurm.conf has changed */
+extern void builtin_reconfig(void);
+
+#endif	/* _SLURM_BUILTIN_H */
diff --git a/src/plugins/sched/builtin/builtin_wrapper.c b/src/plugins/sched/builtin/builtin_wrapper.c
index e6ee3c93550..2ea60856ec8 100644
--- a/src/plugins/sched/builtin/builtin_wrapper.c
+++ b/src/plugins/sched/builtin/builtin_wrapper.c
@@ -42,11 +42,12 @@
 
 #include "src/common/plugin.h"
 #include "src/common/log.h"
-#include "src/slurmctld/slurmctld.h"
+#include "src/common/node_select.h"
 #include "src/common/slurm_priority.h"
 #include "src/slurmctld/job_scheduler.h"
 #include "src/slurmctld/reservation.h"
-#include "src/common/node_select.h"
+#include "src/slurmctld/slurmctld.h"
+#include "src/plugins/sched/builtin/builtin.h"
 
 const char		plugin_name[]	= "SLURM Built-in Scheduler plugin";
 const char		plugin_type[]	= "sched/builtin";
@@ -55,12 +56,33 @@ const uint32_t		plugin_version	= 100;
 /* A plugin-global errno. */
 static int plugin_errno = SLURM_SUCCESS;
 
+static pthread_t builtin_thread = 0;
+static pthread_mutex_t thread_flag_mutex = PTHREAD_MUTEX_INITIALIZER;
+
 /**************************************************************************/
 /*  TAG(                              init                              ) */
 /**************************************************************************/
 int init( void )
 {
+	pthread_attr_t attr;
+
 	verbose( "sched: Built-in scheduler plugin loaded" );
+
+	pthread_mutex_lock( &thread_flag_mutex );
+	if ( builtin_thread ) {
+		debug2( "Built-in scheduler thread already running, "
+			"not starting another" );
+		pthread_mutex_unlock( &thread_flag_mutex );
+		return SLURM_ERROR;
+	}
+
+	slurm_attr_init( &attr );
+	/* since we do a join on this later we don't make it detached */
+	if (pthread_create( &builtin_thread, &attr, builtin_agent, NULL))
+		error("Unable to start built-in scheduler thread: %m");
+	pthread_mutex_unlock( &thread_flag_mutex );
+	slurm_attr_destroy( &attr );
+
 	return SLURM_SUCCESS;
 }
 
@@ -69,7 +91,14 @@ int init( void )
 /**************************************************************************/
 void fini( void )
 {
-	/* Empty. */
+	pthread_mutex_lock( &thread_flag_mutex );
+	if ( builtin_thread ) {
+		verbose( "Built-in scheduler plugin shutting down" );
+		stop_builtin_agent();
+		pthread_join(builtin_thread, NULL);
+		builtin_thread = 0;
+	}
+	pthread_mutex_unlock( &thread_flag_mutex );
 }
 
 /**************************************************************************/
@@ -77,6 +106,7 @@ void fini( void )
 /**************************************************************************/
 int slurm_sched_plugin_reconfig( void )
 {
+	builtin_reconfig();
 	return SLURM_SUCCESS;
 }
 
@@ -126,72 +156,7 @@ slurm_sched_plugin_initial_priority( uint32_t last_prio,
 /**************************************************************************/
 void slurm_sched_plugin_job_is_pending( void )
 {
-	int j, rc = SLURM_SUCCESS;
-	List job_queue;
-	job_queue_rec_t *job_queue_rec;
-	List preemptee_candidates = NULL;
-	struct job_record *job_ptr;
-	struct part_record *part_ptr;
-	bitstr_t *avail_bitmap = NULL;
-	uint32_t max_nodes, min_nodes, req_nodes;
-	time_t now = time(NULL), sched_start;
-	static int sched_timeout = 0;
-
-	sched_start = now;
-	if (sched_timeout == 0) {
-		sched_timeout = slurm_get_msg_timeout() / 2;
-		sched_timeout = MAX(sched_timeout, 1);
-		sched_timeout = MIN(sched_timeout, 10);
-	}
-
-	job_queue = build_job_queue();
-	while ((job_queue_rec = (job_queue_rec_t *) 
-				list_pop_bottom(job_queue, sort_job_queue2))) {
-		job_ptr  = job_queue_rec->job_ptr;
-		part_ptr = job_queue_rec->part_ptr;
-		xfree(job_queue_rec);
-		if (part_ptr != job_ptr->part_ptr)
-			continue;	/* Only test one partition */
-
-		/* Determine minimum and maximum node counts */
-		min_nodes = MAX(job_ptr->details->min_nodes,
-				part_ptr->min_nodes);
-
-		if (job_ptr->details->max_nodes == 0)
-			max_nodes = part_ptr->max_nodes;
-		else
-			max_nodes = MIN(job_ptr->details->max_nodes,
-					part_ptr->max_nodes);
-
-		max_nodes = MIN(max_nodes, 500000);     /* prevent overflows */
-
-		if (job_ptr->details->max_nodes)
-			req_nodes = max_nodes;
-		else
-			req_nodes = min_nodes;
-
-		if (min_nodes > max_nodes) {
-			/* job's min_nodes exceeds partition's max_nodes */
-			continue;
-		}
-
-		j = job_test_resv(job_ptr, &now, true, &avail_bitmap);
-		if (j != SLURM_SUCCESS)
-			continue;
-
-		rc = select_g_job_test(job_ptr, avail_bitmap,
-				       min_nodes, max_nodes, req_nodes,
-				       SELECT_MODE_WILL_RUN,
-				       preemptee_candidates, NULL);
-
-		FREE_NULL_BITMAP(avail_bitmap);
-
-		if ((time(NULL) - sched_start) >= sched_timeout) {
-			debug("backfill: loop taking to long, breaking out");
-			break;
-		}
-	}
-	list_destroy(job_queue);
+	/* Empty. */
 }
 
 /**************************************************************************/
-- 
GitLab