Skip to content
Snippets Groups Projects
Commit 4824daf0 authored by Morris Jette's avatar Morris Jette
Browse files

Backfill logging

Log not only the count of jobs tested since the last time locks
were released, but also the total job count since the backfill
scheduler started.
parent 863ecabe
No related branches found
No related tags found
No related merge requests found
...@@ -660,7 +660,10 @@ static int _attempt_backfill(void) ...@@ -660,7 +660,10 @@ static int _attempt_backfill(void)
job_queue = build_job_queue(true, true); job_queue = build_job_queue(true, true);
if (list_count(job_queue) == 0) { if (list_count(job_queue) == 0) {
debug("backfill: no jobs to backfill"); if (debug_flags & DEBUG_FLAG_BACKFILL)
info("backfill: no jobs to backfill");
else
debug("backfill: no jobs to backfill");
list_destroy(job_queue); list_destroy(job_queue);
return 0; return 0;
} }
...@@ -716,7 +719,8 @@ static int _attempt_backfill(void) ...@@ -716,7 +719,8 @@ static int _attempt_backfill(void)
if (debug_flags & DEBUG_FLAG_BACKFILL) { if (debug_flags & DEBUG_FLAG_BACKFILL) {
END_TIMER; END_TIMER;
info("backfill: completed yielding locks " info("backfill: completed yielding locks "
"after testing %d jobs, %s", "after testing %u(%d) jobs, %s",
slurmctld_diag_stats.bf_last_depth,
job_test_count, TIME_STR); job_test_count, TIME_STR);
} }
if ((_yield_locks(yield_sleep) && !backfill_continue) || if ((_yield_locks(yield_sleep) && !backfill_continue) ||
...@@ -724,8 +728,10 @@ static int _attempt_backfill(void) ...@@ -724,8 +728,10 @@ static int _attempt_backfill(void)
(last_part_update != part_update)) { (last_part_update != part_update)) {
if (debug_flags & DEBUG_FLAG_BACKFILL) { if (debug_flags & DEBUG_FLAG_BACKFILL) {
info("backfill: system state changed, " info("backfill: system state changed, "
"breaking out after testing %d " "breaking out after testing "
"jobs", job_test_count); "%u(%d) jobs",
slurmctld_diag_stats.bf_last_depth,
job_test_count);
} }
rc = 1; rc = 1;
xfree(job_queue_rec); xfree(job_queue_rec);
...@@ -750,7 +756,10 @@ static int _attempt_backfill(void) ...@@ -750,7 +756,10 @@ static int _attempt_backfill(void)
} }
orig_time_limit = job_ptr->time_limit; orig_time_limit = job_ptr->time_limit;
part_ptr = job_queue_rec->part_ptr; part_ptr = job_queue_rec->part_ptr;
job_test_count++; job_test_count++;
slurmctld_diag_stats.bf_last_depth++;
already_counted = false;
xfree(job_queue_rec); xfree(job_queue_rec);
if (!IS_JOB_PENDING(job_ptr)) if (!IS_JOB_PENDING(job_ptr))
...@@ -770,9 +779,6 @@ static int _attempt_backfill(void) ...@@ -770,9 +779,6 @@ static int _attempt_backfill(void)
if (debug_flags & DEBUG_FLAG_BACKFILL) if (debug_flags & DEBUG_FLAG_BACKFILL)
info("backfill test for job %u", job_ptr->job_id); info("backfill test for job %u", job_ptr->job_id);
slurmctld_diag_stats.bf_last_depth++;
already_counted = false;
if (max_backfill_job_per_part) { if (max_backfill_job_per_part) {
bool skip_job = false; bool skip_job = false;
for (j = 0; j < bf_parts; j++) { for (j = 0; j < bf_parts; j++) {
...@@ -903,7 +909,8 @@ static int _attempt_backfill(void) ...@@ -903,7 +909,8 @@ static int _attempt_backfill(void)
if (debug_flags & DEBUG_FLAG_BACKFILL) { if (debug_flags & DEBUG_FLAG_BACKFILL) {
END_TIMER; END_TIMER;
info("backfill: completed yielding locks " info("backfill: completed yielding locks "
"after testing %d jobs, %s", "after testing %u(%d) jobs, %s",
slurmctld_diag_stats.bf_last_depth,
job_test_count, TIME_STR); job_test_count, TIME_STR);
} }
if ((_yield_locks(yield_sleep) && !backfill_continue) || if ((_yield_locks(yield_sleep) && !backfill_continue) ||
...@@ -911,8 +918,10 @@ static int _attempt_backfill(void) ...@@ -911,8 +918,10 @@ static int _attempt_backfill(void)
(last_part_update != part_update)) { (last_part_update != part_update)) {
if (debug_flags & DEBUG_FLAG_BACKFILL) { if (debug_flags & DEBUG_FLAG_BACKFILL) {
info("backfill: system state changed, " info("backfill: system state changed, "
"breaking out after testing %d " "breaking out after testing "
"jobs", job_test_count); "%u(%d) jobs",
slurmctld_diag_stats.bf_last_depth,
job_test_count);
} }
rc = 1; rc = 1;
break; break;
...@@ -1165,7 +1174,8 @@ static int _attempt_backfill(void) ...@@ -1165,7 +1174,8 @@ static int _attempt_backfill(void)
_do_diag_stats(&bf_time1, &bf_time2, yield_sleep); _do_diag_stats(&bf_time1, &bf_time2, yield_sleep);
if (debug_flags & DEBUG_FLAG_BACKFILL) { if (debug_flags & DEBUG_FLAG_BACKFILL) {
END_TIMER; END_TIMER;
info("backfill: completed testing %d jobs, %s", info("backfill: completed testing %u(%d) jobs, %s",
slurmctld_diag_stats.bf_last_depth,
job_test_count, TIME_STR); job_test_count, TIME_STR);
} }
return rc; return rc;
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment