From 08b5678380abee81a57f6b4e5ee6e83ec7861c39 Mon Sep 17 00:00:00 2001 From: alejluther <alucero@os3sl.com> Date: Tue, 15 May 2012 16:20:39 +0300 Subject: [PATCH] Avoiding a slurmctld crash when scheduling problems due to resources. Setting an ADMIN hold instead. --- src/plugins/select/cons_res/dist_tasks.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/plugins/select/cons_res/dist_tasks.c b/src/plugins/select/cons_res/dist_tasks.c index f8b23adc2c6..d56af783f0d 100644 --- a/src/plugins/select/cons_res/dist_tasks.c +++ b/src/plugins/select/cons_res/dist_tasks.c @@ -499,7 +499,11 @@ static void _cyclic_sync_core_bitmap(struct job_record *job_ptr, } if (prev_cpus == cpus) { /* we're stuck! */ - fatal("cons_res: sync loop not progressing"); + job_ptr->priority = 0; + job_ptr->state_reason = WAIT_HELD; + error("cons_res: sync loop not progressing, " + "job %u", job_ptr->job_id); + return -1; } } /* clear the rest of the cores in each socket -- GitLab