From 0dea4c1c75d1682afd8e16c400a764bf4a1a7480 Mon Sep 17 00:00:00 2001 From: Moe Jette <jette1@llnl.gov> Date: Mon, 23 Feb 2009 23:24:58 +0000 Subject: [PATCH] Add support for SchedulerParameters=interval=<sec> to control the time interval between executions of the backfill scheduler logic. --- NEWS | 2 ++ doc/man/man5/slurm.conf.5 | 9 +++++++++ src/common/slurm_protocol_api.c | 17 +++++++++++++++++ src/common/slurm_protocol_api.h | 6 +++++- src/plugins/sched/backfill/backfill.c | 20 +++++++++++++------- 5 files changed, 46 insertions(+), 8 deletions(-) diff --git a/NEWS b/NEWS index 5a5811e0bc0..31d7dfc4507 100644 --- a/NEWS +++ b/NEWS @@ -32,6 +32,8 @@ documents those changes that are of interest to users and admins. management. Add resv_port_cnt and resv_ports fields to the job step data structures. Add environment variable SLURM_STEP_RESV_PORTS to show what ports are reserved for a job step. + -- Add support for SchedulerParameters=interval=<sec> to control the time + interval between executions of the backfill scheduler logic. -- NOTE: Cold-start (without preserving state) required for upgrade from version 1.4.0-pre7. diff --git a/doc/man/man5/slurm.conf.5 b/doc/man/man5/slurm.conf.5 index 5608d04e0b7..a3edff41a01 100644 --- a/doc/man/man5/slurm.conf.5 +++ b/doc/man/man5/slurm.conf.5 @@ -1025,6 +1025,15 @@ and would run \fBxterm\fR with the title set to the SLURM jobid. +.TP +\fBSchedulerParameters\fR +The interprettation of this parameter varies by \fBSchedulerType\fR. +In the case of \fBSchedulerType=sched/backfill\fR, there is one +optional argument of the form "interval:#", where "#" is number of +seconds between iterations. Higher values result in less overhead +and responsivenss, The default value is 5 secondson BlueGene systems +and 10 seconds otherwise. + .TP \fBSchedulerPort\fR The port number on which slurmctld should listen for connection requests. diff --git a/src/common/slurm_protocol_api.c b/src/common/slurm_protocol_api.c index 633fa0f3bb8..c645de8b359 100644 --- a/src/common/slurm_protocol_api.c +++ b/src/common/slurm_protocol_api.c @@ -1275,6 +1275,23 @@ extern uint16_t slurm_get_root_filter(void) } return root_filter; } + +/* slurm_get_sched_params + * RET char * - Value of SchedulerParameters, MUST be xfreed by caller */ +extern char *slurm_get_sched_params(void) +{ + char *params = 0; + slurm_ctl_conf_t *conf; + + if(slurmdbd_conf) { + } else { + conf = slurm_conf_lock(); + params = conf->sched_params; + slurm_conf_unlock(); + } + return params; +} + /* slurm_get_sched_port * RET uint16_t - Value of SchedulerPort */ extern uint16_t slurm_get_sched_port(void) diff --git a/src/common/slurm_protocol_api.h b/src/common/slurm_protocol_api.h index 13df3a70137..d9c7830ba18 100644 --- a/src/common/slurm_protocol_api.h +++ b/src/common/slurm_protocol_api.h @@ -3,7 +3,7 @@ * definitions ***************************************************************************** * Copyright (C) 2002-2006 The Regents of the University of California. - * Copyright (C) 2008 Lawrence Livermore National Security. + * Copyright (C) 2008-2009 Lawrence Livermore National Security. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). * Written by Kevin Tew <tew1@llnl.gov>, et. al. * CODE-OCEC-09-009. All rights reserved. @@ -424,6 +424,10 @@ char *slurm_get_proctrack_type(void); * RET uint16_t - Value of SchedulerRootFilter */ extern uint16_t slurm_get_root_filter(void); +/* slurm_get_sched_params + * RET char * - Value of SchedulerParameters, MUST be xfreed by caller */ +extern char *slurm_get_sched_params(void); + /* slurm_get_sched_port * RET uint16_t - Value of SchedulerPort */ extern uint16_t slurm_get_sched_port(void); diff --git a/src/plugins/sched/backfill/backfill.c b/src/plugins/sched/backfill/backfill.c index 09414d02c4e..191daa95c73 100644 --- a/src/plugins/sched/backfill/backfill.c +++ b/src/plugins/sched/backfill/backfill.c @@ -91,10 +91,6 @@ static bool new_work = false; static bool stop_backfill = false; static pthread_mutex_t thread_flag_mutex = PTHREAD_MUTEX_INITIALIZER; -/* Backfill scheduling has considerable overhead, - * so only attempt it every BACKFILL_INTERVAL seconds. - * Much of the scheduling for BlueGene happens through backfill, - * so we run it more frequently. */ #ifndef BACKFILL_INTERVAL # ifdef HAVE_BG # define BACKFILL_INTERVAL 5 @@ -176,14 +172,24 @@ extern void stop_backfill_agent(void) extern void *backfill_agent(void *args) { struct timeval tv1, tv2; - char tv_str[20]; + char tv_str[20], *sched_params, *tmp_ptr; time_t now; - int i, iter; + int backfill_interval = 0, i, iter; static time_t last_backfill_time = 0; /* Read config, and partitions; Write jobs and nodes */ slurmctld_lock_t all_locks = { READ_LOCK, WRITE_LOCK, WRITE_LOCK, READ_LOCK }; + sched_params = slurm_get_sched_params(); + if (sched_params && (tmp_ptr=strstr(sched_params, "interval:"))) + backfill_interval = atoi(tmp_ptr+9); + else + backfill_interval = BACKFILL_INTERVAL; + if (backfill_interval < 1) { + fatal("Invalid backfill scheduler interval: %d", + backfill_interval); + } + while (!stop_backfill) { iter = (BACKFILL_CHECK_SEC * 1000000) / STOP_CHECK_USEC; @@ -197,7 +203,7 @@ extern void *backfill_agent(void *args) /* Avoid resource fragmentation if important */ if (job_is_completing()) continue; - if ((difftime(now, last_backfill_time) < BACKFILL_INTERVAL) || + if ((difftime(now, last_backfill_time) < backfill_interval) || stop_backfill || (!_more_work())) continue; last_backfill_time = now; -- GitLab