From a3311b1711fff20e1905a1a2ce77fb4ff03bd1dc Mon Sep 17 00:00:00 2001 From: Morris Jette <jette@schedmd.com> Date: Thu, 27 Jun 2013 07:10:37 -0700 Subject: [PATCH] Serialze some more slurmctld calls for better overall performance This is extends the logic of commit ba58d59c2d091af37df38990b8ca32e2e6190807 to the following RPC types: job complete batch script complete and job step complete --- src/slurmctld/proc_req.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/src/slurmctld/proc_req.c b/src/slurmctld/proc_req.c index df724410134..fab8975554a 100644 --- a/src/slurmctld/proc_req.c +++ b/src/slurmctld/proc_req.c @@ -1582,6 +1582,7 @@ static void _slurm_rpc_job_step_kill(slurm_msg_t * msg) * completion of a job allocation */ static void _slurm_rpc_complete_job_allocation(slurm_msg_t * msg) { + static int active_rpc_cnt = 0; int error_code = SLURM_SUCCESS; DEF_TIMERS; complete_job_allocation_msg_t *comp_msg = @@ -1599,6 +1600,7 @@ static void _slurm_rpc_complete_job_allocation(slurm_msg_t * msg) "uid=%u, JobId=%u rc=%d", uid, comp_msg->job_id, comp_msg->job_rc); + _throttle_start(&active_rpc_cnt); lock_slurmctld(job_write_lock); /* do RPC call */ @@ -1606,6 +1608,7 @@ static void _slurm_rpc_complete_job_allocation(slurm_msg_t * msg) error_code = job_complete(comp_msg->job_id, uid, job_requeue, false, comp_msg->job_rc); unlock_slurmctld(job_write_lock); + _throttle_fini(&active_rpc_cnt); END_TIMER2("_slurm_rpc_complete_job_allocation"); /* return result */ @@ -1626,6 +1629,7 @@ static void _slurm_rpc_complete_job_allocation(slurm_msg_t * msg) * completion of a batch script */ static void _slurm_rpc_complete_batch_script(slurm_msg_t * msg) { + static int active_rpc_cnt = 0; int error_code = SLURM_SUCCESS, i; DEF_TIMERS; complete_batch_script_msg_t *comp_msg = @@ -1658,7 +1662,7 @@ static void _slurm_rpc_complete_batch_script(slurm_msg_t * msg) return; } - + _throttle_start(&active_rpc_cnt); lock_slurmctld(job_write_lock); job_ptr = find_job_record(comp_msg->job_id); @@ -1773,6 +1777,7 @@ static void _slurm_rpc_complete_batch_script(slurm_msg_t * msg) comp_msg->job_rc); error_code = MAX(error_code, i); unlock_slurmctld(job_write_lock); + _throttle_fini(&active_rpc_cnt); #ifdef HAVE_BG if (block_desc.bg_block_id) { @@ -2564,6 +2569,7 @@ static void _slurm_rpc_shutdown_controller_immediate(slurm_msg_t * msg) * represent the termination of an entire job */ static void _slurm_rpc_step_complete(slurm_msg_t *msg) { + static int active_rpc_cnt = 0; int error_code = SLURM_SUCCESS, rc, rem; uint32_t step_rc; DEF_TIMERS; @@ -2583,12 +2589,14 @@ static void _slurm_rpc_step_complete(slurm_msg_t *msg) req->range_first, req->range_last, req->step_rc, uid); + _throttle_start(&active_rpc_cnt); lock_slurmctld(job_write_lock); rc = step_partial_comp(req, uid, &rem, &step_rc); if (rc || rem) { /* some error or not totally done */ /* Note: Error printed within step_partial_comp */ unlock_slurmctld(job_write_lock); + _throttle_fini(&active_rpc_cnt); slurm_send_rc_msg(msg, rc); if (!rc) /* partition completion */ schedule_job_save(); /* Has own locking */ @@ -2600,6 +2608,7 @@ static void _slurm_rpc_step_complete(slurm_msg_t *msg) error_code = job_complete(req->job_id, uid, job_requeue, false, step_rc); unlock_slurmctld(job_write_lock); + _throttle_fini(&active_rpc_cnt); END_TIMER2("_slurm_rpc_step_complete"); /* return result */ @@ -2617,6 +2626,7 @@ static void _slurm_rpc_step_complete(slurm_msg_t *msg) error_code = job_step_complete(req->job_id, req->job_step_id, uid, job_requeue, step_rc); unlock_slurmctld(job_write_lock); + _throttle_fini(&active_rpc_cnt); END_TIMER2("_slurm_rpc_step_complete"); /* return result */ -- GitLab