diff --git a/src/plugins/accounting_storage/mysql/as_mysql_job.c b/src/plugins/accounting_storage/mysql/as_mysql_job.c index c09a778b8474c105454cf80b5ce61ba13c1d637a..daa2431efa1ebd89bafd6881214da49057f4e3ac 100644 --- a/src/plugins/accounting_storage/mysql/as_mysql_job.c +++ b/src/plugins/accounting_storage/mysql/as_mysql_job.c @@ -1296,6 +1296,20 @@ extern int as_mysql_step_complete(mysql_conn_t *mysql_conn, rc = mysql_db_query(mysql_conn, query); xfree(query); + /* set the energy for the entire job. */ + if (step_ptr->job_ptr->tres_alloc_str) { + query = xstrdup_printf( + "update \"%s_%s\" set tres_alloc='%s' where " + "job_db_inx=%"PRIu64, + mysql_conn->cluster_name, job_table, + step_ptr->job_ptr->tres_alloc_str, + step_ptr->job_ptr->db_index); + if (debug_flags & DEBUG_FLAG_DB_STEP) + DB_DEBUG(mysql_conn->conn, "query\n%s", query); + rc = mysql_db_query(mysql_conn, query); + xfree(query); + } + return rc; } diff --git a/src/plugins/accounting_storage/slurmdbd/accounting_storage_slurmdbd.c b/src/plugins/accounting_storage/slurmdbd/accounting_storage_slurmdbd.c index 572c485e5c2d3119921f25eed30dd2b6b00fe9ed..edb8cfe4a862dc099cc4683a9c54fd95dd701270 100644 --- a/src/plugins/accounting_storage/slurmdbd/accounting_storage_slurmdbd.c +++ b/src/plugins/accounting_storage/slurmdbd/accounting_storage_slurmdbd.c @@ -2645,6 +2645,9 @@ extern int jobacct_storage_p_job_complete(void *db_conn, req.submit_time = job_ptr->details->submit_time; } + if (!(job_ptr->bit_flags && TRES_STR_CALC)) + req.tres_alloc_str = job_ptr->tres_alloc_str; + msg.msg_type = DBD_JOB_COMPLETE; msg.data = &req; @@ -2738,6 +2741,7 @@ extern int jobacct_storage_p_step_start(void *db_conn, req.total_tasks = tasks; req.tres_alloc_str = step_ptr->tres_alloc_str; + req.req_cpufreq_min = step_ptr->cpu_freq_min; req.req_cpufreq_max = step_ptr->cpu_freq_max; req.req_cpufreq_gov = step_ptr->cpu_freq_gov; @@ -2807,6 +2811,10 @@ extern int jobacct_storage_p_step_complete(void *db_conn, req.job_submit_time = step_ptr->job_ptr->resize_time; else if (step_ptr->job_ptr->details) req.job_submit_time = step_ptr->job_ptr->details->submit_time; + + if (step_ptr->job_ptr->bit_flags && TRES_STR_CALC) + req.job_tres_alloc_str = step_ptr->job_ptr->tres_alloc_str; + req.state = step_ptr->state; req.step_id = step_ptr->step_id; req.total_tasks = tasks; diff --git a/src/slurmctld/job_mgr.c b/src/slurmctld/job_mgr.c index 62164459274d2521481a4fe34e6b64e5ca87b4c7..2e2fae18f3d05da955386ff8a4b77cf3ec6a0bc8 100644 --- a/src/slurmctld/job_mgr.c +++ b/src/slurmctld/job_mgr.c @@ -2202,9 +2202,13 @@ static int _load_job_state(Buf buffer, uint16_t protocol_version) /* make sure we have this job completed in the * database */ if (IS_JOB_FINISHED(job_ptr)) { - if (slurmctld_init_db) - jobacct_storage_g_job_complete( - acct_db_conn, job_ptr); + if (slurmctld_init_db && + !(job_ptr->bit_flags && TRES_STR_CALC) && + job_ptr->tres_alloc_cnt && + (job_ptr->tres_alloc_cnt[TRES_ENERGY] != NO_VAL64)) + set_job_tres_alloc_str(job_ptr, false); + jobacct_storage_g_job_complete( + acct_db_conn, job_ptr); job_finished = 1; } } @@ -13325,6 +13329,11 @@ extern void job_completion_logger(struct job_record *job_ptr, bool requeue) if (!with_slurmdbd && !job_ptr->db_index) jobacct_storage_g_job_start(acct_db_conn, job_ptr); + if (!(job_ptr->bit_flags && TRES_STR_CALC) && + job_ptr->tres_alloc_cnt && + (job_ptr->tres_alloc_cnt[TRES_ENERGY] != NO_VAL64)) + set_job_tres_alloc_str(job_ptr, false); + jobacct_storage_g_job_complete(acct_db_conn, job_ptr); } @@ -15655,6 +15664,10 @@ extern bool job_hold_requeue(struct job_record *job_ptr) return false; /* Sent event requeue to the database. */ + if (!(job_ptr->bit_flags && TRES_STR_CALC) && + job_ptr->tres_alloc_cnt && + (job_ptr->tres_alloc_cnt[TRES_ENERGY] != NO_VAL64)) + set_job_tres_alloc_str(job_ptr, false); jobacct_storage_g_job_complete(acct_db_conn, job_ptr); debug("%s: job %u state 0x%x", __func__, job_ptr->job_id, state); diff --git a/src/slurmctld/step_mgr.c b/src/slurmctld/step_mgr.c index c4e44ce4c0103e540fb83d94ae5d0af3618e6fb2..7bf2c19a130f48729366df31989e861c79380aa0 100644 --- a/src/slurmctld/step_mgr.c +++ b/src/slurmctld/step_mgr.c @@ -245,6 +245,27 @@ static void _build_pending_step(struct job_record *job_ptr, static void _internal_step_complete(struct job_record *job_ptr, struct step_record *step_ptr) { + struct jobacctinfo *jobacct = (struct jobacctinfo *)step_ptr->jobacct; + if (jobacct && job_ptr->tres_alloc_cnt && + (jobacct->energy.consumed_energy != NO_VAL64)) { + if (job_ptr->tres_alloc_cnt[TRES_ARRAY_ENERGY] == NO_VAL64) + job_ptr->tres_alloc_cnt[TRES_ARRAY_ENERGY] = 0; + job_ptr->tres_alloc_cnt[TRES_ARRAY_ENERGY] += + jobacct->energy.consumed_energy; + } + + if (IS_JOB_FINISHED(job_ptr) && + (job_ptr->tres_alloc_cnt[TRES_ENERGY] != NO_VAL64) && + (list_count(job_ptr->step_list) == 1)) { + set_job_tres_alloc_str(job_ptr, false); + /* This flag says we have processed the tres alloc including + * energy from all steps, so don't process or handle it again + * with the job. It also tells the slurmdbd plugin to send it + * to the DBD. + */ + job_ptr->bit_flags |= TRES_STR_CALC; + } + jobacct_storage_g_step_complete(acct_db_conn, step_ptr); if (step_ptr->step_id == SLURM_PENDING_STEP) diff --git a/src/slurmdbd/proc_req.c b/src/slurmdbd/proc_req.c index 698050d56b24d930d1897e75d2ac363070a771a9..318fabd3107835c8bf804d6e158119708236e3e9 100644 --- a/src/slurmdbd/proc_req.c +++ b/src/slurmdbd/proc_req.c @@ -1814,6 +1814,8 @@ static int _job_complete(slurmdbd_conn_t *slurmdbd_conn, job.nodes = job_comp_msg->nodes; job.start_time = job_comp_msg->start_time; details.submit_time = job_comp_msg->submit_time; + job.start_protocol_ver = slurmdbd_conn->conn->version; + job.tres_alloc_str = job_comp_msg->tres_alloc_str; job.details = &details; @@ -1904,6 +1906,7 @@ static int _job_suspend(slurmdbd_conn_t *slurmdbd_conn, job.job_id = job_suspend_msg->job_id; job.job_state = job_suspend_msg->job_state; details.submit_time = job_suspend_msg->submit_time; + job.start_protocol_ver = slurmdbd_conn->conn->version; job.suspend_time = job_suspend_msg->suspend_time; job.details = &details; @@ -2552,6 +2555,7 @@ static void _process_job_start(slurmdbd_conn_t *slurmdbd_conn, job.qos_id = job_start_msg->qos_id; job.resv_id = job_start_msg->resv_id; job.priority = job_start_msg->priority; + job.start_protocol_ver = slurmdbd_conn->conn->version; job.start_time = job_start_msg->start_time; job.time_limit = job_start_msg->timelimit; job.tres_alloc_str = job_start_msg->tres_alloc_str; @@ -3359,7 +3363,9 @@ static int _step_complete(slurmdbd_conn_t *slurmdbd_conn, step.jobacct = step_comp_msg->jobacct; job.job_id = step_comp_msg->job_id; step.requid = step_comp_msg->req_uid; + job.start_protocol_ver = slurmdbd_conn->conn->version; job.start_time = step_comp_msg->start_time; + job.tres_alloc_str = step_comp_msg->job_tres_alloc_str; step.state = step_comp_msg->state; step.step_id = step_comp_msg->step_id; details.submit_time = step_comp_msg->job_submit_time; @@ -3426,6 +3432,7 @@ static int _step_start(slurmdbd_conn_t *slurmdbd_conn, step.name = step_start_msg->name; job.nodes = step_start_msg->nodes; step.network = step_start_msg->node_inx; + job.start_protocol_ver = slurmdbd_conn->conn->version; step.start_time = step_start_msg->start_time; details.submit_time = step_start_msg->job_submit_time; step.step_id = step_start_msg->step_id;