diff --git a/NEWS b/NEWS index 42258c6ca67b8536d801d04d4bbb79f69a054c9b..de614dc0ba210674612e0e1c8459459d87265273 100644 --- a/NEWS +++ b/NEWS @@ -106,6 +106,9 @@ documents those changes that are of interest to users and admins. -- Fix Lua link order, patch from Pär Andersson, NSC. -- Set SLURM_CPUS_PER_TASK=1 when user specifies --cpus-per-task=1. -- Fix for fatal error managing GRES. Patch by Carles Fenoy, BSC. + -- Fixed race condition when using the DBD in accounting where if a job + wasn't started at the time the eligible message was sent but started + before the db_index was returned information like start time would be lost. * Changes in SLURM 2.3.1 ======================== diff --git a/src/plugins/accounting_storage/mysql/as_mysql_job.c b/src/plugins/accounting_storage/mysql/as_mysql_job.c index 5c3df1d1d5596394915c532ca5fbd977b6eebffd..c5a37deaa485498245f460cbcca194477364c0d9 100644 --- a/src/plugins/accounting_storage/mysql/as_mysql_job.c +++ b/src/plugins/accounting_storage/mysql/as_mysql_job.c @@ -236,33 +236,40 @@ extern int as_mysql_job_start(mysql_conn_t *mysql_conn, job_state = job_ptr->job_state; + if (job_ptr->resize_time) { + begin_time = job_ptr->resize_time; + submit_time = job_ptr->resize_time; + start_time = job_ptr->resize_time; + } else { + begin_time = job_ptr->details->begin_time; + submit_time = job_ptr->details->submit_time; + start_time = job_ptr->start_time; + } + /* Since we need a new db_inx make sure the old db_inx * removed. This is most likely the only time we are going to * be notified of the change also so make the state without * the resize. */ if (IS_JOB_RESIZING(job_ptr)) { /* If we have a db_index lets end the previous record. */ - if (job_ptr->db_index) - as_mysql_job_complete(mysql_conn, job_ptr); - else + if (!job_ptr->db_index) { error("We don't have a db_index for job %u, " "this should never happen.", job_ptr->job_id); + job_ptr->db_index = _get_db_index(mysql_conn, + submit_time, + job_ptr->job_id, + job_ptr->assoc_id); + } + + if (job_ptr->db_index) + as_mysql_job_complete(mysql_conn, job_ptr); + job_state &= (~JOB_RESIZING); job_ptr->db_index = 0; } job_state &= JOB_STATE_BASE; - if (job_ptr->resize_time) { - begin_time = job_ptr->resize_time; - submit_time = job_ptr->resize_time; - start_time = job_ptr->resize_time; - } else { - begin_time = job_ptr->details->begin_time; - submit_time = job_ptr->details->submit_time; - start_time = job_ptr->start_time; - } - /* See what we are hearing about here if no start time. If * this job latest time is before the last roll up we will * need to reset it to look at this job. */ diff --git a/src/plugins/accounting_storage/slurmdbd/accounting_storage_slurmdbd.c b/src/plugins/accounting_storage/slurmdbd/accounting_storage_slurmdbd.c index e77bb0f11a7dd2311420fce8148828cd4572f0df..0505dc5811563e2119d0b06bfb7e4b11e31c2e46 100644 --- a/src/plugins/accounting_storage/slurmdbd/accounting_storage_slurmdbd.c +++ b/src/plugins/accounting_storage/slurmdbd/accounting_storage_slurmdbd.c @@ -202,9 +202,6 @@ static void *_set_db_inx_thread(void *no_data) { struct job_record *job_ptr = NULL; ListIterator itr; - /* Read lock on jobs */ - slurmctld_lock_t job_read_lock = - { NO_LOCK, READ_LOCK, NO_LOCK, NO_LOCK }; /* Write lock on jobs */ slurmctld_lock_t job_write_lock = { NO_LOCK, WRITE_LOCK, NO_LOCK, NO_LOCK }; @@ -231,7 +228,7 @@ static void *_set_db_inx_thread(void *no_data) * faster and not lock up the * controller waiting for the db inx * back from the database. */ - lock_slurmctld(job_read_lock); + lock_slurmctld(job_write_lock); itr = list_iterator_create(job_list); while ((job_ptr = list_next(itr))) { if (!job_ptr->db_index) { @@ -242,6 +239,23 @@ static void *_set_db_inx_thread(void *no_data) _partial_destroy_dbd_job_start(req); continue; } + + /* We set the db_index to NO_VAL here + * to avoid a potential race condition + * where at this moment in time the + * job is only eligible to run and + * before this call to the DBD returns, + * the job starts and needs to send + * the start message as well, but + * won't if the db_index is 0 + * resulting in lost information about + * the allocation. Setting + * it to NO_VAL will inform the DBD of + * this situation and it will handle + * it accordingly. + */ + job_ptr->db_index = NO_VAL; + /* we only want to destory the pointer here not the contents (except block_id) so call special function @@ -254,7 +268,7 @@ static void *_set_db_inx_thread(void *no_data) } } list_iterator_destroy(itr); - unlock_slurmctld(job_read_lock); + unlock_slurmctld(job_write_lock); if (local_job_list) { slurmdbd_msg_t req, resp; diff --git a/src/slurmdbd/proc_req.c b/src/slurmdbd/proc_req.c index dc3f934afdbe0cea44c9f22dc250faa7254c7780..db797e0aafb58458e8b070832ef07a7ebc74a071 100644 --- a/src/slurmdbd/proc_req.c +++ b/src/slurmdbd/proc_req.c @@ -1810,7 +1810,8 @@ static int _job_complete(slurmdbd_conn_t *slurmdbd_conn, job.assoc_id = job_comp_msg->assoc_id; job.comment = job_comp_msg->comment; - job.db_index = job_comp_msg->db_index; + if (job_comp_msg->db_index != NO_VAL) + job.db_index = job_comp_msg->db_index; job.derived_ec = job_comp_msg->derived_ec; job.end_time = job_comp_msg->end_time; job.exit_code = job_comp_msg->exit_code; @@ -1922,7 +1923,8 @@ static int _job_suspend(slurmdbd_conn_t *slurmdbd_conn, memset(&details, 0, sizeof(struct job_details)); job.assoc_id = job_suspend_msg->assoc_id; - job.db_index = job_suspend_msg->db_index; + if (job_suspend_msg->db_index != NO_VAL) + job.db_index = job_suspend_msg->db_index; job.job_id = job_suspend_msg->job_id; job.job_state = job_suspend_msg->job_state; details.submit_time = job_suspend_msg->submit_time; @@ -2615,7 +2617,8 @@ static void _process_job_start(slurmdbd_conn_t *slurmdbd_conn, job.account = _replace_double_quotes(job_start_msg->account); job.assoc_id = job_start_msg->assoc_id; job.comment = job_start_msg->block_id; - job.db_index = job_start_msg->db_index; + if (job_start_msg->db_index != NO_VAL) + job.db_index = job_start_msg->db_index; details.begin_time = job_start_msg->eligible_time; job.user_id = job_start_msg->uid; job.group_id = job_start_msg->gid; @@ -3474,7 +3477,8 @@ static int _step_complete(slurmdbd_conn_t *slurmdbd_conn, memset(&details, 0, sizeof(struct job_details)); job.assoc_id = step_comp_msg->assoc_id; - job.db_index = step_comp_msg->db_index; + if (step_comp_msg->db_index != NO_VAL) + job.db_index = step_comp_msg->db_index; job.end_time = step_comp_msg->end_time; step.exit_code = step_comp_msg->exit_code; step.jobacct = step_comp_msg->jobacct; @@ -3547,7 +3551,8 @@ static int _step_start(slurmdbd_conn_t *slurmdbd_conn, memset(&layout, 0, sizeof(slurm_step_layout_t)); job.assoc_id = step_start_msg->assoc_id; - job.db_index = step_start_msg->db_index; + if (step_start_msg->db_index != NO_VAL) + job.db_index = step_start_msg->db_index; job.job_id = step_start_msg->job_id; step.name = step_start_msg->name; job.nodes = step_start_msg->nodes; diff --git a/testsuite/expect/test1.27 b/testsuite/expect/test1.27 index b6f845a5a196e87f505b651c9f6e14f18674fe9a..6479df7ab315e2009ad33c46e0eaa0adb27ea456 100755 --- a/testsuite/expect/test1.27 +++ b/testsuite/expect/test1.27 @@ -66,7 +66,7 @@ array set good_vars { # Spawn a job via srun to print environment variables # set timeout $max_job_delay -set srun_pid [spawn $srun -N1 -n1 -t1 $bin_env] +set srun_pid [spawn $srun -N1 -n1 --cpus-per-task=1 -t1 $bin_env] expect { -re "(SLURM_$alpha_under)=($alpha_numeric_under)" { set found_vars($expect_out(1,string)) "$expect_out(2,string)"