diff --git a/src/plugins/accounting_storage/mysql/as_mysql_job.c b/src/plugins/accounting_storage/mysql/as_mysql_job.c index daa2431efa1ebd89bafd6881214da49057f4e3ac..cc761c98124baaddec2e070cd917d8ae22d4955c 100644 --- a/src/plugins/accounting_storage/mysql/as_mysql_job.c +++ b/src/plugins/accounting_storage/mysql/as_mysql_job.c @@ -234,6 +234,70 @@ no_wckeyid: return wckeyid; } +static char *_set_energy_tres(mysql_conn_t *mysql_conn, + struct job_record *job_ptr) +{ + int row_cnt = 0; + uint64_t *tres_alloc_cnt = NULL; + char *tres_alloc_str = NULL; + MYSQL_RES *result = NULL; + MYSQL_ROW row; + char * query = xstrdup_printf( + "select job.tres_alloc, SUM(consumed_energy) from " + "\"%s_%s\" as job left outer join \"%s_%s\" " + "as step on job.job_db_inx=step.job_db_inx " + "and (step.id_step>=0) and step.consumed_energy != %"PRIu64 + " where job.job_db_inx=%"PRIu64";", + mysql_conn->cluster_name, job_table, + mysql_conn->cluster_name, step_table, + NO_VAL64, job_ptr->db_index); + if (debug_flags & DEBUG_FLAG_DB_USAGE) + DB_DEBUG(mysql_conn->conn, "query\n%s", query); + if (!(result = mysql_db_query_ret(mysql_conn, query, 0))) { + return NULL; + } + xfree(query); + + row_cnt = mysql_num_rows(result); + + if (!row_cnt) { + DB_DEBUG(mysql_conn->conn, + "Nothing for job inx %"PRIu64" from cluster %s, this should never happen", + job_ptr->db_index, + mysql_conn->cluster_name); + } else { + if (row_cnt > 1) + error("Some how with job inx %"PRIu64" from cluster %s we got %d rows for consumed energy. This should never happen, only taking the first one.", + job_ptr->db_index, + mysql_conn->cluster_name, + row_cnt); + row = mysql_fetch_row(result); + + if (job_ptr->tres_alloc_str) + tres_alloc_str = xstrdup(job_ptr->tres_alloc_str); + else if (row[0] && row[0][0]) + tres_alloc_str = xstrdup(row[0]); + + assoc_mgr_set_tres_cnt_array( + &tres_alloc_cnt, tres_alloc_str, 0, false); + + if (row[1] && row[1][0]) { + if (tres_alloc_cnt[TRES_ARRAY_ENERGY] && + tres_alloc_cnt[TRES_ARRAY_ENERGY] != NO_VAL64) + debug("we had %"PRIu64" for energy, but we are over writing it with %s", + tres_alloc_cnt[TRES_ARRAY_ENERGY], + row[1]); + tres_alloc_cnt[TRES_ARRAY_ENERGY] = + slurm_atoull(row[1]); + } else if (!tres_alloc_cnt[TRES_ARRAY_ENERGY] || + tres_alloc_cnt[TRES_ARRAY_ENERGY] != NO_VAL64) + tres_alloc_cnt[TRES_ARRAY_ENERGY] = NO_VAL64; + } + mysql_free_result(result); + + return assoc_mgr_make_tres_str_from_array( + tres_alloc_cnt, TRES_STR_FLAG_SIMPLE, true); +} /* extern functions */ extern int as_mysql_job_start(mysql_conn_t *mysql_conn, @@ -255,6 +319,7 @@ extern int as_mysql_job_start(mysql_conn_t *mysql_conn, (job_ptr->array_job_id) ? job_ptr->array_task_id : NO_VAL; uint64_t job_db_inx = job_ptr->db_index; job_array_struct_t *array_recs = job_ptr->array_recs; + char *tres_alloc_str = NULL; if ((!job_ptr->details || !job_ptr->details->submit_time) && !job_ptr->resize_time) { @@ -455,6 +520,24 @@ no_rollup_change: gres_alloc = slurm_add_slash_to_quotes(job_ptr->gres_alloc); if (!job_ptr->db_index) { + if (start_time && (job_state >= JOB_COMPLETE) && + (!job_ptr->tres_alloc_str && + (slurmdbd_conf && + (job_ptr->start_protocol_ver <= + SLURM_17_02_PROTOCOL_VERSION)))) { + /* Since we now rollup from only the job table we need + * to grab all the consumed_energy from the steps + * running while we were on the old version of Slurm. + * This only has to happen here if we have already + * started the job otherwise we will just have to do it + * again. This can be removed when 17.02 is 2 versions + * old. + */ + if (!(tres_alloc_str = _set_energy_tres( + mysql_conn, job_ptr))) + goto end_it; + } + query = xstrdup_printf( "insert into \"%s_%s\" " "(id_job, mod_time, id_array_job, id_array_task, " @@ -487,7 +570,7 @@ no_rollup_change: else xstrcat(query, ", array_task_str, array_task_pending"); - if (job_ptr->tres_alloc_str) + if (job_ptr->tres_alloc_str || tres_alloc_str) xstrcat(query, ", tres_alloc"); if (job_ptr->tres_req_str) xstrcat(query, ", tres_req"); @@ -532,7 +615,9 @@ no_rollup_change: else xstrcat(query, ", NULL, 0"); - if (job_ptr->tres_alloc_str) + if (tres_alloc_str) + xstrfmtcat(query, ", '%s'", tres_alloc_str); + else if (job_ptr->tres_alloc_str) xstrfmtcat(query, ", '%s'", job_ptr->tres_alloc_str); if (job_ptr->tres_req_str) xstrfmtcat(query, ", '%s'", job_ptr->tres_req_str); @@ -585,7 +670,9 @@ no_rollup_change: xstrfmtcat(query, ", array_task_str=NULL, " "array_task_pending=0"); - if (job_ptr->tres_alloc_str) + if (tres_alloc_str) + xstrfmtcat(query, ", tres_alloc='%s'", tres_alloc_str); + else if (job_ptr->tres_alloc_str) xstrfmtcat(query, ", tres_alloc='%s'", job_ptr->tres_alloc_str); if (job_ptr->tres_req_str) @@ -639,7 +726,9 @@ no_rollup_change: xstrfmtcat(query, "array_task_str=NULL, " "array_task_pending=0, "); - if (job_ptr->tres_alloc_str) + if (tres_alloc_str) + xstrfmtcat(query, ", tres_alloc='%s'", tres_alloc_str); + else if (job_ptr->tres_alloc_str) xstrfmtcat(query, "tres_alloc='%s', ", job_ptr->tres_alloc_str); if (job_ptr->tres_req_str) @@ -680,6 +769,8 @@ no_rollup_change: if (IS_JOB_SUSPENDED(job_ptr)) as_mysql_suspend(mysql_conn, job_db_inx, job_ptr); } +end_it: + xfree(tres_alloc_str); return rc; } @@ -799,6 +890,7 @@ extern int as_mysql_job_complete(mysql_conn_t *mysql_conn, int rc = SLURM_SUCCESS, job_state; time_t submit_time, end_time; uint32_t exit_code = 0; + char *tres_alloc_str = NULL; if (!job_ptr->db_index && ((!job_ptr->details || !job_ptr->details->submit_time) @@ -888,6 +980,19 @@ extern int as_mysql_job_complete(mysql_conn_t *mysql_conn, } job_ptr->comment = comment; } + } else if (!job_ptr->tres_alloc_str && + (slurmdbd_conf && + (job_ptr->start_protocol_ver <= + SLURM_17_02_PROTOCOL_VERSION))) { + /* Since we now rollup from only the job table we need to grab + * all the consumed_energy from the steps running while we were + * on the old version of Slurm. + * This only has to happen here if we have already started the + * job otherwise we will just have to do it again. + * This can be removed when 17.02 is 2 versions old. + */ + if (!(tres_alloc_str = _set_energy_tres(mysql_conn, job_ptr))) + goto end_it; } /* @@ -903,6 +1008,11 @@ extern int as_mysql_job_complete(mysql_conn_t *mysql_conn, if (job_ptr->derived_ec != NO_VAL) xstrfmtcat(query, ", derived_ec=%u", job_ptr->derived_ec); + if (tres_alloc_str) + xstrfmtcat(query, ", tres_alloc='%s'", tres_alloc_str); + else if (job_ptr->tres_alloc_str) + xstrfmtcat(query, ", tres_alloc='%s'", job_ptr->tres_alloc_str); + if (job_ptr->comment) { char *comment = slurm_add_slash_to_quotes(job_ptr->comment); xstrfmtcat(query, ", derived_es='%s'", comment); @@ -933,7 +1043,8 @@ extern int as_mysql_job_complete(mysql_conn_t *mysql_conn, DB_DEBUG(mysql_conn->conn, "query\n%s", query); rc = mysql_db_query(mysql_conn, query); xfree(query); - +end_it: + xfree(tres_alloc_str); return rc; }