From 97aedaf890f603275b560180f4e3c60f2f787c85 Mon Sep 17 00:00:00 2001 From: Morris Jette <jette@schedmd.com> Date: Tue, 28 Apr 2015 08:07:38 -0700 Subject: [PATCH] minor updates to sched_min_interval logic Minor revisiions to the logic and documentation of commit 266246025044bdf4ed7c470f320b6c610f24d70e --- doc/man/man5/slurm.conf.5 | 4 +++- src/slurmctld/job_scheduler.c | 28 ++++++++++++++++------------ 2 files changed, 19 insertions(+), 13 deletions(-) diff --git a/doc/man/man5/slurm.conf.5 b/doc/man/man5/slurm.conf.5 index 2db1dfca395..8021179996d 100644 --- a/doc/man/man5/slurm.conf.5 +++ b/doc/man/man5/slurm.conf.5 @@ -2278,8 +2278,10 @@ The scheduler runs in a limited fashion every time that any event happens which could enable a job to start (e.g. job submit, job terminate, etc.). If these events happen at a high frequency, the scheduler can run very frequently and consume significant resources if not throttled by this option. +This option specifies the minimum time between the start of a scheduling +cycle, without considering how long the scheduling logic runs. A value of zero will disable throttling of the scheduling logic interval. -The default value is 10,000 microseconds. +The default value is zero microseconds (throttling is disabled). .RE .TP diff --git a/src/slurmctld/job_scheduler.c b/src/slurmctld/job_scheduler.c index b816d6b4fcf..c33e163e533 100644 --- a/src/slurmctld/job_scheduler.c +++ b/src/slurmctld/job_scheduler.c @@ -770,7 +770,7 @@ static bool _all_partition_priorities_same(void) */ extern int schedule(uint32_t job_limit) { - static int sched_job_limit = 1; + static int sched_job_limit = -1; int job_count = 0; struct timeval now; long delta_t; @@ -781,20 +781,23 @@ extern int schedule(uint32_t job_limit) } else { delta_t = (now.tv_sec - sched_last.tv_sec) * 1000000; delta_t += now.tv_usec - sched_last.tv_usec; - } slurm_mutex_lock(&sched_mutex); - if (job_limit == 0) - sched_job_limit = 0; /* unlimited */ - else if (job_limit > sched_job_limit) - sched_job_limit = job_limit; + if (sched_job_limit == 0) + ; /* leave unlimited */ + else if (job_limit == 0) + sched_job_limit = 0; /* set unlimited */ + else if (sched_job_limit == -1) + sched_job_limit = job_limit; /* set initial value */ + else + sched_job_limit += job_limit; /* test more jobs */ if (delta_t >= sched_min_interval) { sched_last.tv_sec = now.tv_sec; sched_last.tv_usec = now.tv_usec; job_limit = sched_job_limit; - sched_job_limit = 1; + sched_job_limit = -1; slurm_mutex_unlock(&sched_mutex); job_count = _schedule(job_limit); @@ -813,7 +816,7 @@ extern int schedule(uint32_t job_limit) error("pthread_create error %m"); } else sched_pend_thread = 1; - slurm_attr_destroy(&attr_agent); + slurm_attr_destroy(&attr_agent); slurm_mutex_unlock(&sched_mutex); } else { /* Nothing to do, agent already pending */ @@ -851,8 +854,8 @@ static void *_sched_agent(void *args) slurm_mutex_unlock(&sched_mutex); if (job_cnt) { /* jobs were started, save state */ - schedule_node_save(); /* Has own locking */ - schedule_job_save(); /* Has own locking */ + schedule_node_save(); /* Has own locking */ + schedule_job_save(); /* Has own locking */ } return NULL; @@ -1051,9 +1054,10 @@ static int _schedule(uint32_t job_limit) sched_update = slurmctld_conf.last_update; info("SchedulerParameters=default_queue_depth=%d," "max_rpc_cnt=%d,max_sched_time=%d,partition_job_depth=%d," - "sched_max_job_start=%d", + "sched_max_job_start=%d,sched_min_interval=%d", def_job_limit, defer_rpc_cnt, sched_timeout, - max_jobs_per_part, sched_max_job_start); + max_jobs_per_part, sched_max_job_start, + sched_min_interval); } if ((defer_rpc_cnt > 0) && -- GitLab