From 9beeb3a6d3a3dec164f8567151bc3dd9894665a2 Mon Sep 17 00:00:00 2001 From: Morris Jette <jette@schedmd.com> Date: Fri, 4 Mar 2016 10:21:28 -0800 Subject: [PATCH] parsing of scheduling parameters These changes apply to both the main scheduling logic and backfill scheduler. If some SchedulerParameters value was configured, the slurmctld started, then completely removed, and slurmctld reconfigured the value would not be reset to it's default value but the originally configured value would persist until slurmctld restarted. --- src/plugins/sched/backfill/backfill.c | 136 ++++++++++++++++---------- src/slurmctld/job_scheduler.c | 84 +++++++++------- 2 files changed, 138 insertions(+), 82 deletions(-) diff --git a/src/plugins/sched/backfill/backfill.c b/src/plugins/sched/backfill/backfill.c index b467c9d09f6..e6edcfb7b40 100644 --- a/src/plugins/sched/backfill/backfill.c +++ b/src/plugins/sched/backfill/backfill.c @@ -501,113 +501,151 @@ static void _load_config(void) sched_params = slurm_get_sched_params(); debug_flags = slurm_get_debug_flags(); - if (sched_params && (tmp_ptr=strstr(sched_params, "bf_interval="))) + if (sched_params && (tmp_ptr = strstr(sched_params, "bf_interval="))) { backfill_interval = atoi(tmp_ptr + 12); - if (backfill_interval < 1) { - error("Invalid SchedulerParameters bf_interval: %d", - backfill_interval); + if (backfill_interval < 1) { + error("Invalid SchedulerParameters bf_interval: %d", + backfill_interval); + backfill_interval = BACKFILL_INTERVAL; + } + } else { backfill_interval = BACKFILL_INTERVAL; } - if (sched_params && (tmp_ptr=strstr(sched_params, "bf_window="))) + if (sched_params && (tmp_ptr = strstr(sched_params, "bf_window="))) { backfill_window = atoi(tmp_ptr + 10) * 60; /* mins to secs */ - if (backfill_window < 1) { - error("Invalid SchedulerParameters bf_window: %d", - backfill_window); + if (backfill_window < 1) { + error("Invalid SchedulerParameters bf_window: %d", + backfill_window); + backfill_window = BACKFILL_WINDOW; + } + } else { backfill_window = BACKFILL_WINDOW; } /* "max_job_bf" replaced by "bf_max_job_test" in version 14.03 and * can be removed later. Only "bf_max_job_test" is documented. */ - if (sched_params && (tmp_ptr=strstr(sched_params, "max_job_bf="))) - max_backfill_job_cnt = atoi(tmp_ptr + 11); if (sched_params && (tmp_ptr=strstr(sched_params, "bf_max_job_test="))) max_backfill_job_cnt = atoi(tmp_ptr + 16); + else if (sched_params && (tmp_ptr=strstr(sched_params, "max_job_bf="))) + max_backfill_job_cnt = atoi(tmp_ptr + 11); + else + max_backfill_job_cnt = 100; if (max_backfill_job_cnt < 1) { error("Invalid SchedulerParameters bf_max_job_test: %d", max_backfill_job_cnt); - max_backfill_job_cnt = 50; + max_backfill_job_cnt = 100; } - if (sched_params && (tmp_ptr=strstr(sched_params, "bf_resolution="))) + if (sched_params && (tmp_ptr=strstr(sched_params, "bf_resolution="))) { backfill_resolution = atoi(tmp_ptr + 14); - if (backfill_resolution < 1) { - error("Invalid SchedulerParameters bf_resolution: %d", - backfill_resolution); + if (backfill_resolution < 1) { + error("Invalid SchedulerParameters bf_resolution: %d", + backfill_resolution); + backfill_resolution = BACKFILL_RESOLUTION; + } + } else { backfill_resolution = BACKFILL_RESOLUTION; } if (sched_params && - (tmp_ptr=strstr(sched_params, "bf_max_job_array_resv="))) + (tmp_ptr = strstr(sched_params, "bf_max_job_array_resv="))) { bf_max_job_array_resv = atoi(tmp_ptr + 22); - if (bf_max_job_array_resv < 0) { - error("Invalid SchedulerParameters bf_max_job_array_resv: %d", - bf_max_job_array_resv); + if (bf_max_job_array_resv < 0) { + error("Invalid SchedulerParameters bf_max_job_array_resv: %d", + bf_max_job_array_resv); + bf_max_job_array_resv = BF_MAX_JOB_ARRAY_RESV; + } + } else { bf_max_job_array_resv = BF_MAX_JOB_ARRAY_RESV; } - if (sched_params && (tmp_ptr=strstr(sched_params, "bf_max_job_part="))) + if (sched_params && + (tmp_ptr = strstr(sched_params, "bf_max_job_part="))) { max_backfill_job_per_part = atoi(tmp_ptr + 16); - if (max_backfill_job_per_part < 0) { - error("Invalid SchedulerParameters bf_max_job_part: %d", - max_backfill_job_per_part); + if (max_backfill_job_per_part < 0) { + error("Invalid SchedulerParameters bf_max_job_part: %d", + max_backfill_job_per_part); + max_backfill_job_per_part = 0; + } + } else { max_backfill_job_per_part = 0; } - if (sched_params && (tmp_ptr=strstr(sched_params, "bf_max_job_start="))) + if (sched_params && + (tmp_ptr = strstr(sched_params, "bf_max_job_start="))) { max_backfill_jobs_start = atoi(tmp_ptr + 17); - if (max_backfill_jobs_start < 0) { - error("Invalid SchedulerParameters bf_max_job_start: %d", - max_backfill_jobs_start); + if (max_backfill_jobs_start < 0) { + error("Invalid SchedulerParameters bf_max_job_start: %d", + max_backfill_jobs_start); + max_backfill_jobs_start = 0; + } + } else { max_backfill_jobs_start = 0; } - if (sched_params && (tmp_ptr=strstr(sched_params, "bf_max_job_user="))) + if (sched_params && + (tmp_ptr = strstr(sched_params, "bf_max_job_user="))) { max_backfill_job_per_user = atoi(tmp_ptr + 16); - if (max_backfill_job_per_user < 0) { - error("Invalid SchedulerParameters bf_max_job_user: %d", - max_backfill_job_per_user); + if (max_backfill_job_per_user < 0) { + error("Invalid SchedulerParameters bf_max_job_user: %d", + max_backfill_job_per_user); + max_backfill_job_per_user = 0; + } + } else { max_backfill_job_per_user = 0; } if (sched_params && - (tmp_ptr=strstr(sched_params, "bf_min_age_reserve="))) + (tmp_ptr = strstr(sched_params, "bf_min_age_reserve="))) { bf_min_age_reserve = atoi(tmp_ptr + 19); - else - bf_min_age_reserve = 0; - if (bf_min_age_reserve < 0) { - error("Invalid SchedulerParameters bf_min_age_reserve: %d", - bf_min_age_reserve); + if (bf_min_age_reserve < 0) { + error("Invalid SchedulerParameters bf_min_age_reserve: %d", + bf_min_age_reserve); + bf_min_age_reserve = 0; + } + } else { bf_min_age_reserve = 0; } - /* bf_continue makes backfill continue where it was if interrupted - */ + /* bf_continue makes backfill continue where it was if interrupted */ if (sched_params && (strstr(sched_params, "bf_continue"))) { backfill_continue = true; + } else { + backfill_continue = false; } - if (sched_params && (tmp_ptr=strstr(sched_params, "bf_yield_interval="))) + if (sched_params && + (tmp_ptr = strstr(sched_params, "bf_yield_interval="))) { sched_timeout = atoi(tmp_ptr + 18); - if (sched_timeout <= 0) { - error("Invalid backfill scheduler bf_sched_timeout: %d", - sched_timeout); + if (sched_timeout <= 0) { + error("Invalid backfill scheduler bf_yield_interval: %d", + sched_timeout); + sched_timeout = SCHED_TIMEOUT; + } + } else { sched_timeout = SCHED_TIMEOUT; } - if (sched_params && (tmp_ptr=strstr(sched_params, "bf_yield_sleep="))) + if (sched_params && + (tmp_ptr = strstr(sched_params, "bf_yield_sleep="))) { yield_sleep = atoi(tmp_ptr + 15); - if (yield_sleep <= 0) { - error("Invalid backfill scheduler bf_yield_sleep: %d", - yield_sleep); + if (yield_sleep <= 0) { + error("Invalid backfill scheduler bf_yield_sleep: %d", + yield_sleep); + yield_sleep = YIELD_SLEEP; + } + } else { yield_sleep = YIELD_SLEEP; } - if (sched_params && (tmp_ptr=strstr(sched_params, "max_rpc_cnt="))) + if (sched_params && (tmp_ptr = strstr(sched_params, "max_rpc_cnt="))) defer_rpc_cnt = atoi(tmp_ptr + 12); else if (sched_params && - (tmp_ptr=strstr(sched_params, "max_rpc_count="))) + (tmp_ptr = strstr(sched_params, "max_rpc_count="))) defer_rpc_cnt = atoi(tmp_ptr + 14); + else + defer_rpc_cnt = 0; if (defer_rpc_cnt < 0) { error("Invalid SchedulerParameters max_rpc_cnt: %d", defer_rpc_cnt); diff --git a/src/slurmctld/job_scheduler.c b/src/slurmctld/job_scheduler.c index 7bb5e50af70..134214df9e0 100644 --- a/src/slurmctld/job_scheduler.c +++ b/src/slurmctld/job_scheduler.c @@ -1124,12 +1124,14 @@ static int _schedule(uint32_t job_limit) assoc_limit_continue = false; if (sched_params && - (tmp_ptr=strstr(sched_params, "batch_sched_delay="))) - /* 012345678901234567 */ + (tmp_ptr=strstr(sched_params, "batch_sched_delay="))) { batch_sched_delay = atoi(tmp_ptr + 18); - if (batch_sched_delay < 0) { - error("Invalid batch_sched_delay: %d", - batch_sched_delay); + if (batch_sched_delay < 0) { + error("Invalid batch_sched_delay: %d", + batch_sched_delay); + batch_sched_delay = 3; + } + } else { batch_sched_delay = 3; } @@ -1143,44 +1145,51 @@ static int _schedule(uint32_t job_limit) } if (sched_params && - (tmp_ptr=strstr(sched_params, "build_queue_timeout="))) - /* 01234567890123456789 */ + (tmp_ptr = strstr(sched_params, "build_queue_timeout="))) { build_queue_timeout = atoi(tmp_ptr + 20); - if (build_queue_timeout < 100) { - error("Invalid build_queue_time: %d", - build_queue_timeout); + if (build_queue_timeout < 100) { + error("Invalid build_queue_time: %d", + build_queue_timeout); + build_queue_timeout = BUILD_TIMEOUT; + } + } else { build_queue_timeout = BUILD_TIMEOUT; } if (sched_params && (tmp_ptr = strstr(sched_params, "default_queue_depth="))) { - /* 01234567890123456789 */ - i = atoi(tmp_ptr + 20); - if (i < 0) { + def_job_limit = atoi(tmp_ptr + 20); + if (def_job_limit < 0) { error("ignoring SchedulerParameters: " - "default_queue_depth value of %d", i); - } else { - def_job_limit = i; + "default_queue_depth value of %d", + def_job_limit); + def_job_limit = 100; } + } else { + def_job_limit = 100; } if (sched_params && - (tmp_ptr=strstr(sched_params, "partition_job_depth="))) { - /* 01234567890123456789 */ - i = atoi(tmp_ptr + 20); - if (i < 0) { + (tmp_ptr = strstr(sched_params, "partition_job_depth="))) { + max_jobs_per_part = atoi(tmp_ptr + 20); + if (max_jobs_per_part < 0) { error("ignoring SchedulerParameters: " - "partition_job_depth value of %d", i); - } else { - max_jobs_per_part = i; + "partition_job_depth value of %d", + max_jobs_per_part); + max_jobs_per_part = 0; } + } else { + max_jobs_per_part = 0; } + if (sched_params && - (tmp_ptr=strstr(sched_params, "max_rpc_cnt="))) + (tmp_ptr = strstr(sched_params, "max_rpc_cnt="))) defer_rpc_cnt = atoi(tmp_ptr + 12); else if (sched_params && - (tmp_ptr=strstr(sched_params, "max_rpc_count="))) + (tmp_ptr = strstr(sched_params, "max_rpc_count="))) defer_rpc_cnt = atoi(tmp_ptr + 14); + else + defer_rpc_cnt = 0; if (defer_rpc_cnt < 0) { error("Invalid max_rpc_cnt: %d", defer_rpc_cnt); defer_rpc_cnt = 0; @@ -1188,7 +1197,7 @@ static int _schedule(uint32_t job_limit) time_limit = slurm_get_msg_timeout() / 2; if (sched_params && - (tmp_ptr=strstr(sched_params, "max_sched_time="))) { + (tmp_ptr = strstr(sched_params, "max_sched_time="))) { sched_timeout = atoi(tmp_ptr + 15); if ((sched_timeout <= 0) || (sched_timeout > time_limit)) { @@ -1196,6 +1205,8 @@ static int _schedule(uint32_t job_limit) sched_timeout); sched_timeout = 0; } + } else { + sched_timeout = 0; } if (sched_timeout == 0) { sched_timeout = MAX(time_limit, 1); @@ -1203,10 +1214,14 @@ static int _schedule(uint32_t job_limit) } if (sched_params && - (tmp_ptr=strstr(sched_params, "sched_interval="))) + (tmp_ptr = strstr(sched_params, "sched_interval="))) { sched_interval = atoi(tmp_ptr + 15); - if (sched_interval < 0) { - error("Invalid sched_interval: %d", sched_interval); + if (sched_interval < 0) { + error("Invalid sched_interval: %d", + sched_interval); + sched_interval = 60; + } + } else { sched_interval = 60; } @@ -1220,11 +1235,14 @@ static int _schedule(uint32_t job_limit) } if (sched_params && - (tmp_ptr=strstr(sched_params, "sched_max_job_start="))) + (tmp_ptr = strstr(sched_params, "sched_max_job_start="))) { sched_max_job_start = atoi(tmp_ptr + 20); - if (sched_max_job_start < 0) { - error("Invalid sched_max_job_start: %d", - sched_max_job_start); + if (sched_max_job_start < 0) { + error("Invalid sched_max_job_start: %d", + sched_max_job_start); + sched_max_job_start = 0; + } + } else { sched_max_job_start = 0; } -- GitLab