diff --git a/src/slurmctld/controller.c b/src/slurmctld/controller.c index 76b68003318f780f701a096f817ef914aae27c75..a3a6db3ee623118092c0eb2a5f80c0152dd81967 100644 --- a/src/slurmctld/controller.c +++ b/src/slurmctld/controller.c @@ -346,12 +346,11 @@ slurmctld_background ( void * no_data ) if ((now - last_sched_time) > PERIODIC_SCHEDULE) { last_sched_time = now; - /* locking is done outside of schedule() because it is called - * from other many functions that already have their locks set */ lock_slurmctld (job_write_lock); purge_old_job (); /* remove defunct job records */ - schedule (); unlock_slurmctld (job_write_lock); + if (schedule ()) + last_checkpoint_time = 0; /* force state save */ } if (shutdown_time || (now - last_checkpoint_time) > PERIODIC_CHECKPOINT) { @@ -387,9 +386,9 @@ save_all_state ( void ) clock_t start_time; start_time = clock (); - dump_all_node_state ( ); - dump_all_part_state ( ); - dump_all_job_state ( ); + (void) dump_all_node_state ( ); + (void) dump_all_part_state ( ); + (void) dump_all_job_state ( ); info ("save_all_state complete, time=%ld", (long) (clock () - start_time)); } @@ -708,6 +707,9 @@ slurm_rpc_job_step_cancel ( slurm_msg_t * msg ) info ("slurm_rpc_job_step_cancel success for JobId=%u, time=%ld", job_step_id_msg->job_id, (long) (clock () - start_time)); slurm_send_rc_msg ( msg , SLURM_SUCCESS ); + schedule (); + (void) dump_all_job_state ( ); + } } else { @@ -727,10 +729,9 @@ slurm_rpc_job_step_cancel ( slurm_msg_t * msg ) job_step_id_msg->job_id, job_step_id_msg->job_step_id, (long) (clock () - start_time)); slurm_send_rc_msg ( msg , SLURM_SUCCESS ); + (void) dump_all_job_state ( ); } } - - schedule(); } /* slurm_rpc_job_step_complete - process RPC to note the completion an entire job or @@ -761,6 +762,8 @@ slurm_rpc_job_step_complete ( slurm_msg_t * msg ) info ("slurm_rpc_job_step_complete success for JobId=%u, time=%ld", job_step_id_msg->job_id, (long) (clock () - start_time)); slurm_send_rc_msg ( msg , SLURM_SUCCESS ); + schedule (); + (void) dump_all_job_state (); } } else { @@ -780,10 +783,9 @@ slurm_rpc_job_step_complete ( slurm_msg_t * msg ) job_step_id_msg->job_id, job_step_id_msg->job_step_id, (long) (clock () - start_time)); slurm_send_rc_msg ( msg , SLURM_SUCCESS ); + (void) dump_all_job_state ( ); } } - - schedule(); } void @@ -858,9 +860,9 @@ slurm_rpc_update_job ( slurm_msg_t * msg ) job_desc_msg->job_id, (long) (clock () - start_time)); slurm_send_rc_msg ( msg , SLURM_SUCCESS ); + schedule (); + (void) dump_all_job_state (); } - - schedule(); } /* slurm_rpc_update_node - process RPC to update the configuration of a node (e.g. UP/DOWN) */ @@ -893,7 +895,9 @@ slurm_rpc_update_node ( slurm_msg_t * msg ) (long) (clock () - start_time)); slurm_send_rc_msg ( msg , SLURM_SUCCESS ); } - schedule(); + if (schedule ()) + (void) dump_all_job_state (); + (void) dump_all_node_state (); } /* slurm_rpc_update_partition - process RPC to update the configuration of a partition (e.g. UP/DOWN) */ @@ -921,8 +925,10 @@ slurm_rpc_update_partition ( slurm_msg_t * msg ) info ("slurm_rpc_update_partition complete for partition %s, time=%ld", part_desc_ptr->name, (long) (clock () - start_time)); slurm_send_rc_msg ( msg , SLURM_SUCCESS ); + (void) dump_all_part_state (); + if (schedule ()) + (void) dump_all_job_state (); } - schedule(); } /* slurm_rpc_submit_batch_job - process RPC to submit a batch job */ @@ -961,7 +967,8 @@ slurm_rpc_submit_batch_job ( slurm_msg_t * msg ) response_msg . msg_type = RESPONSE_SUBMIT_BATCH_JOB ; response_msg . data = & submit_msg ; slurm_send_node_msg ( msg->conn_fd , & response_msg ) ; - schedule(); + schedule (); + (void) dump_all_job_state (); } } @@ -1013,6 +1020,7 @@ slurm_rpc_allocate_resources ( slurm_msg_t * msg , uint8_t immediate ) response_msg . data = & alloc_msg ; slurm_send_node_msg ( msg->conn_fd , & response_msg ) ; + (void) dump_all_job_state ( ); } } @@ -1080,8 +1088,7 @@ slurm_rpc_allocate_and_run ( slurm_msg_t * msg ) response_msg . data = & alloc_msg ; slurm_send_node_msg ( msg->conn_fd , & response_msg ) ; - - schedule (); + (void) dump_all_job_state ( ); } } @@ -1148,9 +1155,9 @@ slurm_rpc_reconfigure_controller ( slurm_msg_t * msg ) info ("slurm_rpc_reconfigure_controller completed successfully, time=%ld", (long) (clock () - start_time)); slurm_send_rc_msg ( msg , SLURM_SUCCESS ); + schedule (); + save_all_state (); } - - schedule(); } @@ -1218,10 +1225,8 @@ slurm_rpc_job_step_create( slurm_msg_t* msg ) resp. data = &job_step_resp ; slurm_send_node_msg ( msg->conn_fd , &resp); + (void) dump_all_job_state ( ); } - - schedule(); - } /* slurm_rpc_node_registration - process RPC to determine if a node's actual configuration satisfies the diff --git a/src/slurmctld/job_scheduler.c b/src/slurmctld/job_scheduler.c index 5f297a04cf50520db9a8b97188506eab8adc2fce..b3ccf3005622115ba64c8cf53e8b7bfded34aafb 100644 --- a/src/slurmctld/job_scheduler.c +++ b/src/slurmctld/job_scheduler.c @@ -35,6 +35,7 @@ #include <string.h> #include <src/common/list.h> +#include <src/slurmctld/locks.h> #include <src/slurmctld/slurmctld.h> struct job_queue { @@ -92,6 +93,7 @@ build_job_queue (struct job_queue **job_queue) * schedule - attempt to schedule all pending jobs * pending jobs for each partition will be scheduled in priority * order until a request fails + * output: returns count of jobs scheduled * global: job_list - global list of job records * last_job_update - time of last update to job table * Note: We re-build the queue every time. Jobs can not only be added @@ -99,17 +101,22 @@ build_job_queue (struct job_queue **job_queue) * changed with the update_job RPC. In general nodes will be in priority * order (by submit time), so the sorting should be pretty fast. */ -void -schedule() +int +schedule (void) { struct job_queue *job_queue; - int i, j, error_code, failed_part_cnt, job_queue_size; + int i, j, error_code, failed_part_cnt, job_queue_size, job_cnt = 0; struct job_record *job_ptr; struct part_record **failed_parts; + /* Locks: Write job, write node, read partition */ + slurmctld_lock_t job_write_lock = { NO_LOCK, WRITE_LOCK, WRITE_LOCK, READ_LOCK }; + lock_slurmctld (job_write_lock); job_queue_size = build_job_queue (&job_queue); - if (job_queue_size == 0) - return; + if (job_queue_size == 0) { + unlock_slurmctld (job_write_lock); + return 0; + } sort_job_queue (job_queue, job_queue_size); failed_part_cnt = 0; @@ -131,6 +138,7 @@ schedule() last_job_update = time (NULL); info ("schedule: job_id %u on nodes %s", job_ptr->job_id, job_ptr->nodes); + job_cnt++; } else { info ("schedule: job_id %u non-runnable, errno %d", @@ -146,6 +154,8 @@ schedule() xfree(failed_parts); if (job_queue) xfree(job_queue); + unlock_slurmctld (job_write_lock); + return job_cnt; } diff --git a/src/slurmctld/slurmctld.h b/src/slurmctld/slurmctld.h index eab44e808c1332cd11c7a6ad6a46a7ed44cb09f9..22943d82a302dc24283f2ed3d175584e4e6fd4e4 100644 --- a/src/slurmctld/slurmctld.h +++ b/src/slurmctld/slurmctld.h @@ -46,7 +46,7 @@ #include <src/common/xmalloc.h> /* Perform full slurmctld's state every PERIODIC_CHECKPOINT seconds */ -#define PERIODIC_CHECKPOINT 60 +#define PERIODIC_CHECKPOINT 300 /* Attempt to schedule jobs every PERIODIC_SCHEDULE seconds despite any RPC activity * This will catch any state transisions that may have otherwise been missed */ @@ -457,7 +457,7 @@ extern void reset_job_bitmaps (); extern int rmdir2 (char * path); /* schedule - attempt to schedule all pending jobs */ -void schedule(); +extern int schedule (void); /* select_nodes - select and allocate nodes to a specific job */ extern int select_nodes (struct job_record *job_ptr, int test_only);