diff --git a/src/slurmctld/proc_req.c b/src/slurmctld/proc_req.c index fab8975554a7927291da0e5de58950be63ee0071..50f5ea8835403065436f9654d4f046951b910f07 100644 --- a/src/slurmctld/proc_req.c +++ b/src/slurmctld/proc_req.c @@ -1443,6 +1443,8 @@ static void _slurm_rpc_dump_partitions(slurm_msg_t * msg) * the epilog denoting the completion of a job it its entirety */ static void _slurm_rpc_epilog_complete(slurm_msg_t * msg) { + static time_t config_update = 0; + static bool defer_sched = false; DEF_TIMERS; /* Locks: Read configuration, write job, write node */ slurmctld_lock_t job_write_lock = { @@ -1460,6 +1462,12 @@ static void _slurm_rpc_epilog_complete(slurm_msg_t * msg) return; } + if (config_update != slurmctld_conf.last_update) { + char *sched_params = slurm_get_sched_params(); + defer_sched = (sched_params && strstr(sched_params,"defer")); + xfree(sched_params); + } + lock_slurmctld(job_write_lock); if (job_epilog_complete(epilog_msg->job_id, epilog_msg->node_name, epilog_msg->return_code)) @@ -1478,7 +1486,16 @@ static void _slurm_rpc_epilog_complete(slurm_msg_t * msg) /* Functions below provide their own locking */ if (run_scheduler) { - (void) schedule(0); + /* + * In defer mode, avoid triggering the scheduler logic + * for every epilog complete message. + * As one epilog message is sent from every node of each + * job at termination, the number of simultaneous schedule + * calls can be very high for large machine or large number + * of managed jobs. + */ + if (!defer_sched) + (void) schedule(0); schedule_node_save(); schedule_job_save(); } @@ -2739,6 +2756,8 @@ static void _slurm_rpc_step_update(slurm_msg_t *msg) /* _slurm_rpc_submit_batch_job - process RPC to submit a batch job */ static void _slurm_rpc_submit_batch_job(slurm_msg_t * msg) { + static time_t config_update = 0; + static bool defer_sched = false; static int active_rpc_cnt = 0; int error_code = SLURM_SUCCESS; DEF_TIMERS; @@ -2756,6 +2775,12 @@ static void _slurm_rpc_submit_batch_job(slurm_msg_t * msg) START_TIMER; debug2("Processing RPC: REQUEST_SUBMIT_BATCH_JOB from uid=%d", uid); + if (config_update != slurmctld_conf.last_update) { + char *sched_params = slurm_get_sched_params(); + defer_sched = (sched_params && strstr(sched_params,"defer")); + xfree(sched_params); + } + slurm_msg_t_init(&response_msg); response_msg.flags = msg->flags; response_msg.protocol_version = msg->protocol_version; @@ -2898,8 +2923,13 @@ static void _slurm_rpc_submit_batch_job(slurm_msg_t * msg) * to run the various prologs, boot the node, etc. * We also run schedule() even if this job could not start, * say due to a higher priority job, since the locks are - * released above and we might start some other job here. */ - schedule(schedule_cnt); /* has own locks */ + * released above and we might start some other job here. + * + * In defer mode, avoid triggering the scheduler logic + * for every submit batch job request. + */ + if (!defer_sched) + (void) schedule(schedule_cnt); /* has own locks */ schedule_job_save(); /* has own locks */ schedule_node_save(); /* has own locks */ }