From 04fbf26a2254c8505a7111a66867acdef984ca6c Mon Sep 17 00:00:00 2001 From: Morris Jette <jette@schedmd.com> Date: Wed, 16 Jan 2013 11:53:18 -0800 Subject: [PATCH] Fix for scheduling batch jobs in multiple partitions Without this change a high priority batch job may not start at submit time. In addtion, a pending job with mutltiple partitions be cancelled when the scheduler runs if any of it's partitions can not be used by the job. --- src/slurmctld/job_scheduler.c | 6 ++++++ src/slurmctld/proc_req.c | 5 ++++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/src/slurmctld/job_scheduler.c b/src/slurmctld/job_scheduler.c index 03fd0d7cc15..cf740c0422a 100644 --- a/src/slurmctld/job_scheduler.c +++ b/src/slurmctld/job_scheduler.c @@ -1016,6 +1016,12 @@ next_part: part_ptr = (struct part_record *) launch_job(job_ptr); rebuild_job_part_list(job_ptr); job_cnt++; + } else if ((error_code == + ESLURM_REQUESTED_NODE_CONFIG_UNAVAILABLE) && + job_ptr->part_ptr_list) { + debug("JobId=%u non-runnable in partition %s: %s", + job_ptr->job_id, job_ptr->part_ptr->name, + slurm_strerror(error_code)); } else if ((error_code != ESLURM_REQUESTED_PART_CONFIG_UNAVAILABLE) && (error_code != ESLURM_NODE_NOT_AVAIL) && diff --git a/src/slurmctld/proc_req.c b/src/slurmctld/proc_req.c index 1d3b711177a..410d88299ca 100644 --- a/src/slurmctld/proc_req.c +++ b/src/slurmctld/proc_req.c @@ -2695,6 +2695,9 @@ static void _slurm_rpc_submit_batch_job(slurm_msg_t * msg) slurm_strerror(error_code)); slurm_send_rc_msg(msg, error_code); } else { + int sched_count = 1; /* Job count to attempt to schedule */ + if (job_ptr->part_ptr_list) + sched_count = list_count(job_ptr->part_ptr_list); info("_slurm_rpc_submit_batch_job JobId=%u %s", job_ptr->job_id, TIME_STR); /* send job_ID */ @@ -2709,7 +2712,7 @@ static void _slurm_rpc_submit_batch_job(slurm_msg_t * msg) * We also run schedule() even if this job could not start, * say due to a higher priority job, since the locks are * released above and we might start some other job here. */ - schedule(1); /* has own locks */ + schedule(sched_count); /* has own locks */ schedule_job_save(); /* has own locks */ schedule_node_save(); /* has own locks */ } -- GitLab