diff --git a/src/plugins/sched/backfill/backfill.c b/src/plugins/sched/backfill/backfill.c index b467c9d09f6f22bbb3f4bee1ca49f7a6ac706bed..e6edcfb7b40a6c323d813c3c5bcb2a593fd32774 100644 --- a/src/plugins/sched/backfill/backfill.c +++ b/src/plugins/sched/backfill/backfill.c @@ -501,113 +501,151 @@ static void _load_config(void) sched_params = slurm_get_sched_params(); debug_flags = slurm_get_debug_flags(); - if (sched_params && (tmp_ptr=strstr(sched_params, "bf_interval="))) + if (sched_params && (tmp_ptr = strstr(sched_params, "bf_interval="))) { backfill_interval = atoi(tmp_ptr + 12); - if (backfill_interval < 1) { - error("Invalid SchedulerParameters bf_interval: %d", - backfill_interval); + if (backfill_interval < 1) { + error("Invalid SchedulerParameters bf_interval: %d", + backfill_interval); + backfill_interval = BACKFILL_INTERVAL; + } + } else { backfill_interval = BACKFILL_INTERVAL; } - if (sched_params && (tmp_ptr=strstr(sched_params, "bf_window="))) + if (sched_params && (tmp_ptr = strstr(sched_params, "bf_window="))) { backfill_window = atoi(tmp_ptr + 10) * 60; /* mins to secs */ - if (backfill_window < 1) { - error("Invalid SchedulerParameters bf_window: %d", - backfill_window); + if (backfill_window < 1) { + error("Invalid SchedulerParameters bf_window: %d", + backfill_window); + backfill_window = BACKFILL_WINDOW; + } + } else { backfill_window = BACKFILL_WINDOW; } /* "max_job_bf" replaced by "bf_max_job_test" in version 14.03 and * can be removed later. Only "bf_max_job_test" is documented. */ - if (sched_params && (tmp_ptr=strstr(sched_params, "max_job_bf="))) - max_backfill_job_cnt = atoi(tmp_ptr + 11); if (sched_params && (tmp_ptr=strstr(sched_params, "bf_max_job_test="))) max_backfill_job_cnt = atoi(tmp_ptr + 16); + else if (sched_params && (tmp_ptr=strstr(sched_params, "max_job_bf="))) + max_backfill_job_cnt = atoi(tmp_ptr + 11); + else + max_backfill_job_cnt = 100; if (max_backfill_job_cnt < 1) { error("Invalid SchedulerParameters bf_max_job_test: %d", max_backfill_job_cnt); - max_backfill_job_cnt = 50; + max_backfill_job_cnt = 100; } - if (sched_params && (tmp_ptr=strstr(sched_params, "bf_resolution="))) + if (sched_params && (tmp_ptr=strstr(sched_params, "bf_resolution="))) { backfill_resolution = atoi(tmp_ptr + 14); - if (backfill_resolution < 1) { - error("Invalid SchedulerParameters bf_resolution: %d", - backfill_resolution); + if (backfill_resolution < 1) { + error("Invalid SchedulerParameters bf_resolution: %d", + backfill_resolution); + backfill_resolution = BACKFILL_RESOLUTION; + } + } else { backfill_resolution = BACKFILL_RESOLUTION; } if (sched_params && - (tmp_ptr=strstr(sched_params, "bf_max_job_array_resv="))) + (tmp_ptr = strstr(sched_params, "bf_max_job_array_resv="))) { bf_max_job_array_resv = atoi(tmp_ptr + 22); - if (bf_max_job_array_resv < 0) { - error("Invalid SchedulerParameters bf_max_job_array_resv: %d", - bf_max_job_array_resv); + if (bf_max_job_array_resv < 0) { + error("Invalid SchedulerParameters bf_max_job_array_resv: %d", + bf_max_job_array_resv); + bf_max_job_array_resv = BF_MAX_JOB_ARRAY_RESV; + } + } else { bf_max_job_array_resv = BF_MAX_JOB_ARRAY_RESV; } - if (sched_params && (tmp_ptr=strstr(sched_params, "bf_max_job_part="))) + if (sched_params && + (tmp_ptr = strstr(sched_params, "bf_max_job_part="))) { max_backfill_job_per_part = atoi(tmp_ptr + 16); - if (max_backfill_job_per_part < 0) { - error("Invalid SchedulerParameters bf_max_job_part: %d", - max_backfill_job_per_part); + if (max_backfill_job_per_part < 0) { + error("Invalid SchedulerParameters bf_max_job_part: %d", + max_backfill_job_per_part); + max_backfill_job_per_part = 0; + } + } else { max_backfill_job_per_part = 0; } - if (sched_params && (tmp_ptr=strstr(sched_params, "bf_max_job_start="))) + if (sched_params && + (tmp_ptr = strstr(sched_params, "bf_max_job_start="))) { max_backfill_jobs_start = atoi(tmp_ptr + 17); - if (max_backfill_jobs_start < 0) { - error("Invalid SchedulerParameters bf_max_job_start: %d", - max_backfill_jobs_start); + if (max_backfill_jobs_start < 0) { + error("Invalid SchedulerParameters bf_max_job_start: %d", + max_backfill_jobs_start); + max_backfill_jobs_start = 0; + } + } else { max_backfill_jobs_start = 0; } - if (sched_params && (tmp_ptr=strstr(sched_params, "bf_max_job_user="))) + if (sched_params && + (tmp_ptr = strstr(sched_params, "bf_max_job_user="))) { max_backfill_job_per_user = atoi(tmp_ptr + 16); - if (max_backfill_job_per_user < 0) { - error("Invalid SchedulerParameters bf_max_job_user: %d", - max_backfill_job_per_user); + if (max_backfill_job_per_user < 0) { + error("Invalid SchedulerParameters bf_max_job_user: %d", + max_backfill_job_per_user); + max_backfill_job_per_user = 0; + } + } else { max_backfill_job_per_user = 0; } if (sched_params && - (tmp_ptr=strstr(sched_params, "bf_min_age_reserve="))) + (tmp_ptr = strstr(sched_params, "bf_min_age_reserve="))) { bf_min_age_reserve = atoi(tmp_ptr + 19); - else - bf_min_age_reserve = 0; - if (bf_min_age_reserve < 0) { - error("Invalid SchedulerParameters bf_min_age_reserve: %d", - bf_min_age_reserve); + if (bf_min_age_reserve < 0) { + error("Invalid SchedulerParameters bf_min_age_reserve: %d", + bf_min_age_reserve); + bf_min_age_reserve = 0; + } + } else { bf_min_age_reserve = 0; } - /* bf_continue makes backfill continue where it was if interrupted - */ + /* bf_continue makes backfill continue where it was if interrupted */ if (sched_params && (strstr(sched_params, "bf_continue"))) { backfill_continue = true; + } else { + backfill_continue = false; } - if (sched_params && (tmp_ptr=strstr(sched_params, "bf_yield_interval="))) + if (sched_params && + (tmp_ptr = strstr(sched_params, "bf_yield_interval="))) { sched_timeout = atoi(tmp_ptr + 18); - if (sched_timeout <= 0) { - error("Invalid backfill scheduler bf_sched_timeout: %d", - sched_timeout); + if (sched_timeout <= 0) { + error("Invalid backfill scheduler bf_yield_interval: %d", + sched_timeout); + sched_timeout = SCHED_TIMEOUT; + } + } else { sched_timeout = SCHED_TIMEOUT; } - if (sched_params && (tmp_ptr=strstr(sched_params, "bf_yield_sleep="))) + if (sched_params && + (tmp_ptr = strstr(sched_params, "bf_yield_sleep="))) { yield_sleep = atoi(tmp_ptr + 15); - if (yield_sleep <= 0) { - error("Invalid backfill scheduler bf_yield_sleep: %d", - yield_sleep); + if (yield_sleep <= 0) { + error("Invalid backfill scheduler bf_yield_sleep: %d", + yield_sleep); + yield_sleep = YIELD_SLEEP; + } + } else { yield_sleep = YIELD_SLEEP; } - if (sched_params && (tmp_ptr=strstr(sched_params, "max_rpc_cnt="))) + if (sched_params && (tmp_ptr = strstr(sched_params, "max_rpc_cnt="))) defer_rpc_cnt = atoi(tmp_ptr + 12); else if (sched_params && - (tmp_ptr=strstr(sched_params, "max_rpc_count="))) + (tmp_ptr = strstr(sched_params, "max_rpc_count="))) defer_rpc_cnt = atoi(tmp_ptr + 14); + else + defer_rpc_cnt = 0; if (defer_rpc_cnt < 0) { error("Invalid SchedulerParameters max_rpc_cnt: %d", defer_rpc_cnt); diff --git a/src/slurmctld/job_scheduler.c b/src/slurmctld/job_scheduler.c index 7bb5e50af70716055d331e0ec11743e9aaf11099..134214df9e08d60e8198c3dbb1df21e8e273b10f 100644 --- a/src/slurmctld/job_scheduler.c +++ b/src/slurmctld/job_scheduler.c @@ -1124,12 +1124,14 @@ static int _schedule(uint32_t job_limit) assoc_limit_continue = false; if (sched_params && - (tmp_ptr=strstr(sched_params, "batch_sched_delay="))) - /* 012345678901234567 */ + (tmp_ptr=strstr(sched_params, "batch_sched_delay="))) { batch_sched_delay = atoi(tmp_ptr + 18); - if (batch_sched_delay < 0) { - error("Invalid batch_sched_delay: %d", - batch_sched_delay); + if (batch_sched_delay < 0) { + error("Invalid batch_sched_delay: %d", + batch_sched_delay); + batch_sched_delay = 3; + } + } else { batch_sched_delay = 3; } @@ -1143,44 +1145,51 @@ static int _schedule(uint32_t job_limit) } if (sched_params && - (tmp_ptr=strstr(sched_params, "build_queue_timeout="))) - /* 01234567890123456789 */ + (tmp_ptr = strstr(sched_params, "build_queue_timeout="))) { build_queue_timeout = atoi(tmp_ptr + 20); - if (build_queue_timeout < 100) { - error("Invalid build_queue_time: %d", - build_queue_timeout); + if (build_queue_timeout < 100) { + error("Invalid build_queue_time: %d", + build_queue_timeout); + build_queue_timeout = BUILD_TIMEOUT; + } + } else { build_queue_timeout = BUILD_TIMEOUT; } if (sched_params && (tmp_ptr = strstr(sched_params, "default_queue_depth="))) { - /* 01234567890123456789 */ - i = atoi(tmp_ptr + 20); - if (i < 0) { + def_job_limit = atoi(tmp_ptr + 20); + if (def_job_limit < 0) { error("ignoring SchedulerParameters: " - "default_queue_depth value of %d", i); - } else { - def_job_limit = i; + "default_queue_depth value of %d", + def_job_limit); + def_job_limit = 100; } + } else { + def_job_limit = 100; } if (sched_params && - (tmp_ptr=strstr(sched_params, "partition_job_depth="))) { - /* 01234567890123456789 */ - i = atoi(tmp_ptr + 20); - if (i < 0) { + (tmp_ptr = strstr(sched_params, "partition_job_depth="))) { + max_jobs_per_part = atoi(tmp_ptr + 20); + if (max_jobs_per_part < 0) { error("ignoring SchedulerParameters: " - "partition_job_depth value of %d", i); - } else { - max_jobs_per_part = i; + "partition_job_depth value of %d", + max_jobs_per_part); + max_jobs_per_part = 0; } + } else { + max_jobs_per_part = 0; } + if (sched_params && - (tmp_ptr=strstr(sched_params, "max_rpc_cnt="))) + (tmp_ptr = strstr(sched_params, "max_rpc_cnt="))) defer_rpc_cnt = atoi(tmp_ptr + 12); else if (sched_params && - (tmp_ptr=strstr(sched_params, "max_rpc_count="))) + (tmp_ptr = strstr(sched_params, "max_rpc_count="))) defer_rpc_cnt = atoi(tmp_ptr + 14); + else + defer_rpc_cnt = 0; if (defer_rpc_cnt < 0) { error("Invalid max_rpc_cnt: %d", defer_rpc_cnt); defer_rpc_cnt = 0; @@ -1188,7 +1197,7 @@ static int _schedule(uint32_t job_limit) time_limit = slurm_get_msg_timeout() / 2; if (sched_params && - (tmp_ptr=strstr(sched_params, "max_sched_time="))) { + (tmp_ptr = strstr(sched_params, "max_sched_time="))) { sched_timeout = atoi(tmp_ptr + 15); if ((sched_timeout <= 0) || (sched_timeout > time_limit)) { @@ -1196,6 +1205,8 @@ static int _schedule(uint32_t job_limit) sched_timeout); sched_timeout = 0; } + } else { + sched_timeout = 0; } if (sched_timeout == 0) { sched_timeout = MAX(time_limit, 1); @@ -1203,10 +1214,14 @@ static int _schedule(uint32_t job_limit) } if (sched_params && - (tmp_ptr=strstr(sched_params, "sched_interval="))) + (tmp_ptr = strstr(sched_params, "sched_interval="))) { sched_interval = atoi(tmp_ptr + 15); - if (sched_interval < 0) { - error("Invalid sched_interval: %d", sched_interval); + if (sched_interval < 0) { + error("Invalid sched_interval: %d", + sched_interval); + sched_interval = 60; + } + } else { sched_interval = 60; } @@ -1220,11 +1235,14 @@ static int _schedule(uint32_t job_limit) } if (sched_params && - (tmp_ptr=strstr(sched_params, "sched_max_job_start="))) + (tmp_ptr = strstr(sched_params, "sched_max_job_start="))) { sched_max_job_start = atoi(tmp_ptr + 20); - if (sched_max_job_start < 0) { - error("Invalid sched_max_job_start: %d", - sched_max_job_start); + if (sched_max_job_start < 0) { + error("Invalid sched_max_job_start: %d", + sched_max_job_start); + sched_max_job_start = 0; + } + } else { sched_max_job_start = 0; }