diff --git a/src/common/slurm_accounting_storage.c b/src/common/slurm_accounting_storage.c index 59879e61592c5cc4fcd341632d95c9f391de6193..31d94d6277282d9f514827ac56e4b77fe4ce61c5 100644 --- a/src/common/slurm_accounting_storage.c +++ b/src/common/slurm_accounting_storage.c @@ -107,7 +107,8 @@ typedef struct slurm_acct_storage_ops { void *acct_assoc, time_t start, time_t end); - int (*roll_usage) (void *db_conn); + int (*roll_usage) (void *db_conn, + time_t sent_start); int (*node_down) (void *db_conn, char *cluster, struct node_record *node_ptr, @@ -1850,11 +1851,12 @@ extern int acct_storage_g_get_usage(void *db_conn, (db_conn, acct_assoc, start, end); } -extern int acct_storage_g_roll_usage(void *db_conn) +extern int acct_storage_g_roll_usage(void *db_conn, + time_t sent_start) { if (slurm_acct_storage_init(NULL) < 0) return SLURM_ERROR; - return (*(g_acct_storage_context->ops.roll_usage))(db_conn); + return (*(g_acct_storage_context->ops.roll_usage))(db_conn, sent_start); } extern int clusteracct_storage_g_node_down(void *db_conn, diff --git a/src/common/slurm_accounting_storage.h b/src/common/slurm_accounting_storage.h index 1aec1b9f61913c650aa3f0365846d17675ed07b7..710d0457a50cc10387de02ea407d8400a8cb55dc 100644 --- a/src/common/slurm_accounting_storage.h +++ b/src/common/slurm_accounting_storage.h @@ -458,9 +458,11 @@ extern int acct_storage_g_get_usage( void *db_conn, void *assoc, time_t start, time_t end); /* * roll up data in the storage + * IN: sent_start (option time to do a re-roll or start from this point) * RET: SLURM_SUCCESS on success SLURM_ERROR else */ -extern int acct_storage_g_roll_usage(void *db_conn); +extern int acct_storage_g_roll_usage(void *db_conn, + time_t sent_start); /* * record shares used information for backup in case slurmctld restarts * IN: account_list List of shares_used_object_t * diff --git a/src/common/slurmdbd_defs.c b/src/common/slurmdbd_defs.c index 0b71e38c2bc81f798a16d99fae4465eccde6f47f..7cec1b4967ea658bf67d026cc4f17743812363e9 100644 --- a/src/common/slurmdbd_defs.c +++ b/src/common/slurmdbd_defs.c @@ -1288,6 +1288,7 @@ void inline slurmdbd_free_job_complete_msg(dbd_job_comp_msg_t *msg) void inline slurmdbd_free_job_start_msg(dbd_job_start_msg_t *msg) { if (msg) { + xfree(msg->account); xfree(msg->block_id); xfree(msg->name); xfree(msg->nodes); @@ -1752,6 +1753,7 @@ unpack_error: void inline slurmdbd_pack_job_start_msg(dbd_job_start_msg_t *msg, Buf buffer) { + packstr(msg->account, buffer); pack32(msg->alloc_cpus, buffer); pack32(msg->assoc_id, buffer); packstr(msg->block_id, buffer); @@ -1776,6 +1778,7 @@ slurmdbd_unpack_job_start_msg(dbd_job_start_msg_t **msg, Buf buffer) uint32_t uint32_tmp; dbd_job_start_msg_t *msg_ptr = xmalloc(sizeof(dbd_job_start_msg_t)); *msg = msg_ptr; + safe_unpackstr_xmalloc(&msg_ptr->account, &uint32_tmp, buffer); safe_unpack32(&msg_ptr->alloc_cpus, buffer); safe_unpack32(&msg_ptr->assoc_id, buffer); safe_unpackstr_xmalloc(&msg_ptr->block_id, &uint32_tmp, buffer); @@ -2147,6 +2150,7 @@ slurmdbd_unpack_roll_usage_msg(dbd_roll_usage_msg_t **msg, Buf buffer) { dbd_roll_usage_msg_t *msg_ptr = xmalloc(sizeof(dbd_roll_usage_msg_t)); + *msg = msg_ptr; safe_unpack_time(&msg_ptr->start, buffer); return SLURM_SUCCESS; diff --git a/src/common/slurmdbd_defs.h b/src/common/slurmdbd_defs.h index f11647fd2a26943bbb82b2b70e908e7853825f5f..9e3ff3f7aad3cba3ffed536bfa8e80a941cacff0 100644 --- a/src/common/slurmdbd_defs.h +++ b/src/common/slurmdbd_defs.h @@ -185,6 +185,8 @@ typedef struct dbd_job_comp_msg { } dbd_job_comp_msg_t; typedef struct dbd_job_start_msg { + char * account; /* Account name for those not running + * with associations */ uint32_t alloc_cpus; /* count of allocated processors */ uint32_t assoc_id; /* accounting association id */ char * block_id; /* Bluegene block id */ diff --git a/src/plugins/accounting_storage/filetxt/accounting_storage_filetxt.c b/src/plugins/accounting_storage/filetxt/accounting_storage_filetxt.c index 458305e457d552f3f03e8db0e6b78cf4d13c09f1..1e95e1baea91e962b6b6e912bafa40f76fe1f0ae 100644 --- a/src/plugins/accounting_storage/filetxt/accounting_storage_filetxt.c +++ b/src/plugins/accounting_storage/filetxt/accounting_storage_filetxt.c @@ -365,7 +365,8 @@ extern int acct_storage_p_get_usage(void *db_conn, return rc; } -extern int acct_storage_p_roll_usage(void *db_conn) +extern int acct_storage_p_roll_usage(void *db_conn, + time_t sent_start) { int rc = SLURM_SUCCESS; diff --git a/src/plugins/accounting_storage/gold/accounting_storage_gold.c b/src/plugins/accounting_storage/gold/accounting_storage_gold.c index df6fe00da8411fecdb0ea6fc7663725878d50cd0..1697520cdedaa08710828d1737ec7664b98a9372 100644 --- a/src/plugins/accounting_storage/gold/accounting_storage_gold.c +++ b/src/plugins/accounting_storage/gold/accounting_storage_gold.c @@ -2511,7 +2511,8 @@ extern int acct_storage_p_get_usage(void *db_conn, return rc; } -extern int acct_storage_p_roll_usage(void *db_conn) +extern int acct_storage_p_roll_usage(void *db_conn, + time_t sent_start) { int rc = SLURM_ERROR; /* FIX ME: This doesn't do anything now */ diff --git a/src/plugins/accounting_storage/mysql/accounting_storage_mysql.c b/src/plugins/accounting_storage/mysql/accounting_storage_mysql.c index 52bed03a963c03945577c18c87a2dcae6c0851ca..0b309827bf0f590ebd2ff225d85dba9856dbedf3 100644 --- a/src/plugins/accounting_storage/mysql/accounting_storage_mysql.c +++ b/src/plugins/accounting_storage/mysql/accounting_storage_mysql.c @@ -901,6 +901,7 @@ static int _mysql_acct_check_tables(MYSQL *acct_mysql_db) { "down_cpu_secs", "bigint default 0" }, { "idle_cpu_secs", "bigint default 0" }, { "resv_cpu_secs", "bigint default 0" }, + { "over_cpu_secs", "bigint default 0" }, { NULL, NULL} }; @@ -4440,7 +4441,8 @@ extern int acct_storage_p_get_usage(mysql_conn_t *mysql_conn, #endif } -extern int acct_storage_p_roll_usage(mysql_conn_t *mysql_conn) +extern int acct_storage_p_roll_usage(mysql_conn_t *mysql_conn, + time_t sent_start) { #ifdef HAVE_MYSQL int rc = SLURM_SUCCESS; @@ -4452,11 +4454,12 @@ extern int acct_storage_p_roll_usage(mysql_conn_t *mysql_conn) MYSQL_ROW row; char *query = NULL; char *tmp = NULL; - time_t last_hour = 0; - time_t last_day = 0; - time_t last_month = 0; + time_t last_hour = sent_start; + time_t last_day = sent_start; + time_t last_month = sent_start; time_t start_time = 0; time_t end_time = 0; + DEF_TIMERS; char *update_req_inx[] = { "hourly_rollup", @@ -4484,46 +4487,60 @@ extern int acct_storage_p_roll_usage(mysql_conn_t *mysql_conn) } } - i=0; - xstrfmtcat(tmp, "%s", update_req_inx[i]); - for(i=1; i<UPDATE_COUNT; i++) { - xstrfmtcat(tmp, ", %s", update_req_inx[i]); - } - - query = xstrdup_printf("select %s from %s", tmp, last_ran_table); - xfree(tmp); - - debug3("%d query\n%s", mysql_conn->conn, query); - if(!(result = mysql_db_query_ret( - mysql_conn->acct_mysql_db, query, 0))) { - xfree(query); - return SLURM_ERROR; - } - - xfree(query); - row = mysql_fetch_row(result); - if(row) { - /* the last times were one second before the next - * period so increment here 1 - */ - last_hour = atoi(row[UPDATE_HOUR]); - last_day = atoi(row[UPDATE_DAY]); - last_month = atoi(row[UPDATE_MONTH]); - } else { - query = xstrdup_printf( - "insert into %s " - "(hourly_rollup, daily_rollup, monthly_rollup) " - "values (0, 0, 0)", - last_ran_table); + if(!sent_start) { + i=0; + xstrfmtcat(tmp, "%s", update_req_inx[i]); + for(i=1; i<UPDATE_COUNT; i++) { + xstrfmtcat(tmp, ", %s", update_req_inx[i]); + } + query = xstrdup_printf("select %s from %s", + tmp, last_ran_table); + xfree(tmp); + + if(!(result = mysql_db_query_ret( + mysql_conn->acct_mysql_db, query, 0))) { + xfree(query); + return SLURM_ERROR; + } - rc = mysql_db_query(mysql_conn->acct_mysql_db, query); xfree(query); - if(rc == SLURM_ERROR) - return rc; + row = mysql_fetch_row(result); + if(row) { + last_hour = atoi(row[UPDATE_HOUR]); + last_day = atoi(row[UPDATE_DAY]); + last_month = atoi(row[UPDATE_MONTH]); + mysql_free_result(result); + } else { + query = xstrdup_printf( + "select @PS := period_start from %s limit 1;" + "insert into %s " + "(hourly_rollup, daily_rollup, monthly_rollup) " + "values (@PS, @PS, @PS);", + event_table, last_ran_table); + + mysql_free_result(result); + if(!(result = mysql_db_query_ret( + mysql_conn->acct_mysql_db, query, 0))) { + xfree(query); + return SLURM_ERROR; + } + xfree(query); + row = mysql_fetch_row(result); + if(!row) { + debug("No clusters have been added " + "not doing rollup"); + mysql_free_result(result); + return SLURM_SUCCESS; + } + + last_hour = last_day = last_month = atoi(row[0]); + mysql_free_result(result); + } } - last_hour = 1211475599; - last_day = 1211475599; - last_month = 1211475599; +/* last_hour = 1211475599; */ +/* last_day = 1211475599; */ +/* last_month = 1211475599; */ + // last_hour = 1211403599; // last_hour = 1206946800; // last_day = 1207033199; @@ -4558,10 +4575,12 @@ extern int acct_storage_p_roll_usage(mysql_conn_t *mysql_conn) end_tm.tm_isdst = -1; end_time = mktime(&end_tm); if(end_time-start_time > 0) { + START_TIMER; if((rc = mysql_hourly_rollup(mysql_conn, start_time, end_time)) != SLURM_SUCCESS) return rc; - query = xstrdup_printf("update %s set hour_rollup=%d", + END_TIMER2("hourly_rollup"); + query = xstrdup_printf("update %s set hourly_rollup=%d", last_ran_table, end_time); } else { debug2("no need to run this hour %d < %d", @@ -4583,9 +4602,11 @@ extern int acct_storage_p_roll_usage(mysql_conn_t *mysql_conn) end_tm.tm_isdst = -1; end_time = mktime(&end_tm); if(end_time-start_time > 0) { + START_TIMER; if((rc = mysql_daily_rollup(mysql_conn, start_time, end_time)) != SLURM_SUCCESS) return rc; + END_TIMER2("daily_rollup"); if(query) xstrfmtcat(query, ", daily_rollup=%d", end_time); else @@ -4614,9 +4635,12 @@ extern int acct_storage_p_roll_usage(mysql_conn_t *mysql_conn) end_tm.tm_isdst = -1; end_time = mktime(&end_tm); if(end_time-start_time > 0) { + START_TIMER; if((rc = mysql_monthly_rollup( mysql_conn, start_time, end_time)) != SLURM_SUCCESS) return rc; + END_TIMER2("monthly_rollup"); + if(query) xstrfmtcat(query, ", montly_rollup=%d", end_time); else @@ -4629,8 +4653,8 @@ extern int acct_storage_p_roll_usage(mysql_conn_t *mysql_conn) } if(query) { - info("%s", query); - //rc = mysql_db_query(mysql_conn->acct_mysql_db, query); + debug3("%s", query); + rc = mysql_db_query(mysql_conn->acct_mysql_db, query); xfree(query); } return rc; diff --git a/src/plugins/accounting_storage/mysql/mysql_rollup.c b/src/plugins/accounting_storage/mysql/mysql_rollup.c index 58ee6a5afa9d3aac6380ddf4dcdc3db6c1c0f7da..4fe1d41c120c6a67709742dde47fc5e385d65144 100644 --- a/src/plugins/accounting_storage/mysql/mysql_rollup.c +++ b/src/plugins/accounting_storage/mysql/mysql_rollup.c @@ -54,6 +54,7 @@ typedef struct { int cpu_count; int d_cpu; int i_cpu; + int o_cpu; int r_cpu; time_t start; time_t end; @@ -166,13 +167,16 @@ extern int mysql_hourly_rollup(mysql_conn_t *mysql_conn, /* info("begin start %s", ctime(&curr_start)); */ /* info("begin end %s", ctime(&curr_end)); */ - a_itr = list_iterator_create(cluster_usage_list); + a_itr = list_iterator_create(assoc_usage_list); c_itr = list_iterator_create(cluster_usage_list); while(curr_start < end) { int last_id = 0; int seconds = 0; local_cluster_usage_t *c_usage = NULL; local_assoc_usage_t *a_usage = NULL; + debug3("curr hour is now %d-%d", curr_start, curr_end); +/* info("start %s", ctime(&curr_start)); */ +/* info("end %s", ctime(&curr_end)); */ // first get the events during this time query = xstrdup_printf("select %s from %s where " @@ -259,14 +263,15 @@ extern int mysql_hourly_rollup(mysql_conn_t *mysql_conn, seconds = (local_end - local_start); - info("node %s adds (%d)(%d-%d) * %d = %d " - "to %d", - row[EVENT_REQ_NAME], - seconds, - local_end, local_start, - row_cpu, - seconds * row_cpu, - row_cpu); +/* info("node %s adds " */ +/* "(%d)(%d-%d) * %d = %d " */ +/* "to %d", */ +/* row[EVENT_REQ_NAME], */ +/* seconds, */ +/* local_end, local_start, */ +/* row_cpu, */ +/* seconds * row_cpu, */ +/* row_cpu); */ c_usage->d_cpu += seconds * row_cpu; break; @@ -368,7 +373,7 @@ extern int mysql_hourly_rollup(mysql_conn_t *mysql_conn, } if(seconds < 1) { - debug("This job (%u) was suspended " + debug4("This job (%u) was suspended " "the entire hour", job_id); continue; } @@ -387,43 +392,47 @@ extern int mysql_hourly_rollup(mysql_conn_t *mysql_conn, if(!row_start || seconds < 1) goto calc_resv; - info("%d assoc %d adds (%d)(%d-%d) * %d = %d " - "to %d", - job_id, - assoc_id, - seconds, - row_end, row_start, - row_acpu, - seconds * row_acpu, - row_acpu); +/* info("%d assoc %d adds " */ +/* "(%d)(%d-%d) * %d = %d " */ +/* "to %d", */ +/* job_id, */ +/* a_usage->assoc_id, */ +/* seconds, */ +/* row_end, row_start, */ +/* row_acpu, */ +/* seconds * row_acpu, */ +/* row_acpu); */ c_usage->a_cpu += seconds * row_acpu; calc_resv: /* now reserved time */ - if(row_start < c_usage->start) + if(row_start && + row_start < c_usage->start) continue; - row_start = row_eligible; + row_end = row_start; + row_start = row_eligible; if(c_usage->start > row_start) row_start = c_usage->start; if(c_usage->end < row_end) row_end = c_usage->end; - + if((row_end - row_start) < 1) continue; seconds = (row_end - row_start); - info("%d assoc %d reserved (%d)(%d-%d) * %d = %d " - "to %d", - job_id, - assoc_id, - seconds, - row_end, row_start, - row_rcpu, - seconds * row_rcpu, - row_rcpu); +/* info("%d assoc %d reserved " */ +/* "(%d)(%d-%d) * %d = %d " */ +/* "to %d", */ +/* job_id, */ +/* assoc_id, */ +/* seconds, */ +/* row_end, row_start, */ +/* row_rcpu, */ +/* seconds * row_rcpu, */ +/* row_rcpu); */ c_usage->r_cpu += seconds * row_rcpu; break; @@ -433,55 +442,76 @@ extern int mysql_hourly_rollup(mysql_conn_t *mysql_conn, mysql_free_result(result); list_iterator_reset(c_itr); - while((c_usage = list_next(c_itr))) { c_usage->i_cpu = c_usage->total_time - c_usage->a_cpu - c_usage->d_cpu - c_usage->r_cpu; /* sanity check just to make sure we have a * legitimate time after we calulated - * idle/reserved time + * idle/reserved time put extra in the over + * commit field */ if(c_usage->i_cpu < 0) { c_usage->r_cpu += c_usage->i_cpu; + c_usage->o_cpu -= c_usage->i_cpu; c_usage->i_cpu = 0; - if(c_usage->r_cpu < 0) - c_usage->r_cpu = 0; } - info("cluster %s(%u) down %u alloc %u " - "resv %u idle %u total= %u = %u from %s", - c_usage->name, - c_usage->cpu_count, c_usage->d_cpu, c_usage->a_cpu, - c_usage->r_cpu, c_usage->i_cpu, - c_usage->d_cpu + c_usage->a_cpu + - c_usage->r_cpu + c_usage->i_cpu, - c_usage->total_time, - ctime(&c_usage->start)); - info("to %s", ctime(&c_usage->end)); +/* info("cluster %s(%d) down %d alloc %d " */ +/* "resv %d idle %d over %d " */ +/* "total= %d = %d from %s", */ +/* c_usage->name, */ +/* c_usage->cpu_count, c_usage->d_cpu, */ +/* c_usage->a_cpu, */ +/* c_usage->r_cpu, c_usage->i_cpu, c_usage->o_cpu, */ +/* c_usage->d_cpu + c_usage->a_cpu + */ +/* c_usage->r_cpu + c_usage->i_cpu, */ +/* c_usage->total_time, */ +/* ctime(&c_usage->start)); */ +/* info("to %s", ctime(&c_usage->end)); */ if(query) { xstrfmtcat(query, - ", (%d, %d, '%s', %d, %u, " - "%u, %u, %u, %u)", + ", (%d, %d, '%s', %d, %d, " + "%d, %d, %d, %d, %d) " + "on duplicate key update " + "mod_time=%d, cpu_count=%d, " + "alloc_cpu_secs=%d, " + "down_cpu_secs=%d, " + "idle_cpu_secs=%d, " + "over_cpu_secs=%d, resv_cpu_secs=%d", now, now, c_usage->name, c_usage->start, c_usage->cpu_count, c_usage->a_cpu, c_usage->d_cpu, c_usage->i_cpu, - c_usage->r_cpu); + c_usage->o_cpu, c_usage->r_cpu, + now, + c_usage->cpu_count, c_usage->a_cpu, + c_usage->d_cpu, c_usage->i_cpu, + c_usage->o_cpu, c_usage->r_cpu); } else { xstrfmtcat(query, "insert into %s (creation_time, " "mod_time, cluster, period_start, " "cpu_count, alloc_cpu_secs, " "down_cpu_secs, idle_cpu_secs, " - "resv_cpu_secs) values " - "(%d, %d, '%s', %d, %u, " - "%u, %u, %u, %u)", + "over_cpu_secs, resv_cpu_secs) " + "values (%d, %d, '%s', %d, %d, " + "%d, %d, %d, %d, %d) " + "on duplicate key update " + "mod_time=%d, cpu_count=%d, " + "alloc_cpu_secs=%d, " + "down_cpu_secs=%d, " + "idle_cpu_secs=%d, " + "over_cpu_secs=%d, resv_cpu_secs=%d", cluster_hour_table, now, now, c_usage->name, c_usage->start, c_usage->cpu_count, c_usage->a_cpu, c_usage->d_cpu, c_usage->i_cpu, - c_usage->r_cpu); + c_usage->o_cpu, c_usage->r_cpu, + now, + c_usage->cpu_count, c_usage->a_cpu, + c_usage->d_cpu, c_usage->i_cpu, + c_usage->o_cpu, c_usage->r_cpu); } } if(query) { @@ -495,28 +525,36 @@ extern int mysql_hourly_rollup(mysql_conn_t *mysql_conn, list_iterator_reset(a_itr); while((a_usage = list_next(a_itr))) { - info("association (%u) alloc %u", - a_usage->assoc_id, - a_usage->a_cpu); +/* info("association (%d) %d alloc %d", */ +/* a_usage->assoc_id, last_id, */ +/* a_usage->a_cpu); */ if(query) { xstrfmtcat(query, - ", (%d, %d, '%u', %d, %u, " - "%u, %u, %u, %u)", + ", (%d, %d, %d, %d, %d, " + "%d, %d, %d, %d) " + "on duplicate key update " + "mod_time=%d, alloc_cpu_secs=%d", now, now, a_usage->assoc_id, curr_start, - a_usage->a_cpu); + a_usage->a_cpu, + now, a_usage->a_cpu); } else { xstrfmtcat(query, "insert into %s (creation_time, " "mod_time, id, period_start, " "alloc_cpu_secs) values " - "(%d, %d, %u, %d, %u)", + "(%d, %d, %d, %d, %d) " + "on duplicate key update " + "mod_time=%d, alloc_cpu_secs=%d", assoc_hour_table, now, now, a_usage->assoc_id, curr_start, - a_usage->a_cpu); + a_usage->a_cpu, + now, a_usage->a_cpu); } } + if(query) { + debug3("%d query\n%s", mysql_conn->conn, query); rc = mysql_db_query(mysql_conn->acct_mysql_db, query); xfree(query); if(rc != SLURM_SUCCESS) { @@ -528,7 +566,6 @@ extern int mysql_hourly_rollup(mysql_conn_t *mysql_conn, list_flush(cluster_usage_list); curr_start = curr_end; curr_end = curr_start + add_sec; - debug3("curr hour is now %d-%d", curr_start, curr_end); } end_it: xfree(suspend_str); @@ -567,32 +604,43 @@ extern int mysql_daily_rollup(mysql_conn_t *mysql_conn, start_tm.tm_isdst = -1; curr_end = mktime(&start_tm); - info("begin start %s", ctime(&curr_start)); - info("begin end %s", ctime(&curr_end)); while(curr_start < end) { + debug3("curr day is now %d-%d", curr_start, curr_end); +/* info("start %s", ctime(&curr_start)); */ +/* info("end %s", ctime(&curr_end)); */ query = xstrdup_printf( "insert into %s (creation_time, mod_time, id, " "period_start, alloc_cpu_secs) select %d, %d, id, " - "%d, SUM(alloc_cpu_secs) from %s where " + "%d, @ASUM:=SUM(alloc_cpu_secs) from %s where " "(period_start < %d && period_start >= %d) " - "group by id;", + "group by id on duplicate key update mod_time=%d, " + "alloc_cpu_secs=@ASUM;", assoc_day_table, now, now, curr_start, assoc_hour_table, - curr_end, curr_start); + curr_end, curr_start, now); xstrfmtcat(query, "insert into %s (creation_time, " "mod_time, cluster, period_start, cpu_count, " "alloc_cpu_secs, down_cpu_secs, idle_cpu_secs, " - "resv_cpu_secs) select %d, %d, cluster, " - "%d, MAX(cpu_count), SUM(alloc_cpu_secs), " - "SUM(down_cpu_secs), SUM(idle_cpu_secs), " - "SUM(resv_cpu_secs) from %s where " + "over_cpu_secs, resv_cpu_secs) " + "select %d, %d, cluster, " + "%d, @CPU:=MAX(cpu_count), " + "@ASUM:=SUM(alloc_cpu_secs), " + "@DSUM:=SUM(down_cpu_secs), " + "@ISUM:=SUM(idle_cpu_secs), " + "@OSUM:=SUM(over_cpu_secs), " + "@RSUM:=SUM(resv_cpu_secs) from %s where " "(period_start < %d && period_start >= %d) " - "group by cluster;", + "group by cluster on duplicate key update " + "mod_time=%d, cpu_count=@CPU, " + "alloc_cpu_secs=@ASUM, down_cpu_secs=@DSUM, " + "idle_cpu_secs=@ISUM, over_cpu_secs=@OSUM, " + "resv_cpu_secs=@RSUM;", cluster_day_table, now, now, curr_start, cluster_hour_table, - curr_end, curr_start); - //rc = mysql_db_query(mysql_conn->acct_mysql_db, query); + curr_end, curr_start, now); + debug3("%d query\n%s", mysql_conn->conn, query); + rc = mysql_db_query(mysql_conn->acct_mysql_db, query); xfree(query); if(rc != SLURM_SUCCESS) { error("Couldn't add day rollup"); @@ -611,10 +659,20 @@ extern int mysql_daily_rollup(mysql_conn_t *mysql_conn, start_tm.tm_mday++; start_tm.tm_isdst = -1; curr_end = mktime(&start_tm); - debug3("curr day is now %d-%d", curr_start, curr_end); - info("stop start %s", ctime(&curr_start)); - info("stop end %s", ctime(&curr_end)); } + /* remove all data from suspend table that was older than + * start. + */ + query = xstrdup_printf("delete from %s where end < %d && end != 0", + suspend_table, start); + rc = mysql_db_query(mysql_conn->acct_mysql_db, query); + xfree(query); + if(rc != SLURM_SUCCESS) { + error("Couldn't remove old suspend data"); + return SLURM_ERROR; + } + + /* info("stop start %s", ctime(&curr_start)); */ /* info("stop end %s", ctime(&curr_end)); */ @@ -623,12 +681,13 @@ extern int mysql_daily_rollup(mysql_conn_t *mysql_conn, extern int mysql_monthly_rollup(mysql_conn_t *mysql_conn, time_t start, time_t end) { + int rc = SLURM_SUCCESS; struct tm start_tm; time_t curr_start = start; time_t curr_end; + time_t now = time(NULL); + char *query = NULL; -/* info("begin month start %s", ctime(&start)); */ -/* info("begin month end %s", ctime(&end)); */ if(!localtime_r(&curr_start, &start_tm)) { error("Couldn't get localtime from month start %d", curr_start); return SLURM_ERROR; @@ -640,11 +699,49 @@ extern int mysql_monthly_rollup(mysql_conn_t *mysql_conn, start_tm.tm_mon++; start_tm.tm_isdst = -1; curr_end = mktime(&start_tm); -/* info("begin start %s", ctime(&curr_start)); */ -/* info("begin end %s", ctime(&curr_end)); */ - while(curr_start < end) { - /* FIX ME: DO CALCULATIONS HERE */ + while(curr_start < end) { + debug3("curr month is now %d-%d", curr_start, curr_end); +/* info("start %s", ctime(&curr_start)); */ +/* info("end %s", ctime(&curr_end)); */ + query = xstrdup_printf( + "insert into %s (creation_time, mod_time, id, " + "period_start, alloc_cpu_secs) select %d, %d, id, " + "%d, @ASUM:=SUM(alloc_cpu_secs) from %s where " + "(period_start < %d && period_start >= %d) " + "group by id on duplicate key update mod_time=%d, " + "alloc_cpu_secs=@ASUM;", + assoc_month_table, now, now, curr_start, + assoc_day_table, + curr_end, curr_start, now); + xstrfmtcat(query, + "insert into %s (creation_time, " + "mod_time, cluster, period_start, cpu_count, " + "alloc_cpu_secs, down_cpu_secs, idle_cpu_secs, " + "over_cpu_secs, resv_cpu_secs) " + "select %d, %d, cluster, " + "%d, @CPU:=MAX(cpu_count), " + "@ASUM:=SUM(alloc_cpu_secs), " + "@DSUM:=SUM(down_cpu_secs), " + "@ISUM:=SUM(idle_cpu_secs), " + "@OSUM:=SUM(over_cpu_secs), " + "@RSUM:=SUM(resv_cpu_secs) from %s where " + "(period_start < %d && period_start >= %d) " + "group by cluster on duplicate key update " + "mod_time=%d, cpu_count=@CPU, " + "alloc_cpu_secs=@ASUM, down_cpu_secs=@DSUM, " + "idle_cpu_secs=@ISUM, over_cpu_secs=@OSUM, " + "resv_cpu_secs=@RSUM;", + cluster_month_table, now, now, curr_start, + cluster_day_table, + curr_end, curr_start, now); + debug3("%d query\n%s", mysql_conn->conn, query); + rc = mysql_db_query(mysql_conn->acct_mysql_db, query); + xfree(query); + if(rc != SLURM_SUCCESS) { + error("Couldn't add day rollup"); + return SLURM_ERROR; + } curr_start = curr_end; if(!localtime_r(&curr_start, &start_tm)) { @@ -658,10 +755,7 @@ extern int mysql_monthly_rollup(mysql_conn_t *mysql_conn, start_tm.tm_mon++; start_tm.tm_isdst = -1; curr_end = mktime(&start_tm); - debug3("curr month is now %d-%d", curr_start, curr_end); } -/* info("stop start %s", ctime(&curr_start)); */ -/* info("stop end %s", ctime(&curr_end)); */ return SLURM_SUCCESS; } diff --git a/src/plugins/accounting_storage/none/accounting_storage_none.c b/src/plugins/accounting_storage/none/accounting_storage_none.c index 24de0dd251dc37bb7ca0a09883afa4d65e30caac..0701440e26b77f8f7577cd08981bfe852ce8e520 100644 --- a/src/plugins/accounting_storage/none/accounting_storage_none.c +++ b/src/plugins/accounting_storage/none/accounting_storage_none.c @@ -223,7 +223,8 @@ extern int acct_storage_p_get_usage(void *db_conn, return rc; } -extern int acct_storage_p_roll_usage(void *db_conn) +extern int acct_storage_p_roll_usage(void *db_conn, + time_t sent_start) { int rc = SLURM_SUCCESS; diff --git a/src/plugins/accounting_storage/pgsql/accounting_storage_pgsql.c b/src/plugins/accounting_storage/pgsql/accounting_storage_pgsql.c index 0c7c390ab1652f992222bd96a9588afca8e6a6ed..9fdbc22682082bf2a6c9a4f7d6e537f8e010f24f 100644 --- a/src/plugins/accounting_storage/pgsql/accounting_storage_pgsql.c +++ b/src/plugins/accounting_storage/pgsql/accounting_storage_pgsql.c @@ -827,7 +827,8 @@ extern int acct_storage_p_get_usage(PGconn *acct_pgsql_db, return rc; } -extern int acct_storage_p_roll_usage(PGconn *acct_pgsql_db) +extern int acct_storage_p_roll_usage(PGconn *acct_pgsql_db, + time_t sent_start) { int rc = SLURM_SUCCESS; diff --git a/src/plugins/accounting_storage/slurmdbd/accounting_storage_slurmdbd.c b/src/plugins/accounting_storage/slurmdbd/accounting_storage_slurmdbd.c index c75a2b8077784ee53a2a7cef2958934546c372b2..3dd41ad223dd35fdee5027640a23c45642f576b5 100644 --- a/src/plugins/accounting_storage/slurmdbd/accounting_storage_slurmdbd.c +++ b/src/plugins/accounting_storage/slurmdbd/accounting_storage_slurmdbd.c @@ -710,13 +710,14 @@ extern int acct_storage_p_get_usage(void *db_conn, return rc; } -extern int acct_storage_p_roll_usage(void *db_conn) +extern int acct_storage_p_roll_usage(void *db_conn, + time_t sent_start) { slurmdbd_msg_t req; dbd_roll_usage_msg_t get_msg; int rc, resp_code; - get_msg.start = time(NULL); + get_msg.start = sent_start; req.msg_type = DBD_ROLL_USAGE; @@ -879,6 +880,7 @@ extern int jobacct_storage_p_job_start(void *db_conn, } req.alloc_cpus = job_ptr->total_procs; + req.account = job_ptr->account; req.assoc_id = job_ptr->assoc_id; #ifdef HAVE_BG select_g_get_jobinfo(job_ptr->select_jobinfo, @@ -886,8 +888,7 @@ extern int jobacct_storage_p_job_start(void *db_conn, &block_id); #endif req.block_id = block_id; - xfree(block_id); - req.db_index = job_ptr->db_index; + req.db_index = job_ptr->db_index; if (job_ptr->details) req.eligible_time = job_ptr->details->begin_time; req.gid = job_ptr->group_id; @@ -910,8 +911,11 @@ extern int jobacct_storage_p_job_start(void *db_conn, * again just send the message */ if(req.db_index) { - if (slurm_send_slurmdbd_msg(&msg) < 0) + if (slurm_send_slurmdbd_msg(&msg) < 0) { + xfree(block_id); return SLURM_ERROR; + } + xfree(block_id); return SLURM_SUCCESS; } @@ -920,8 +924,10 @@ extern int jobacct_storage_p_job_start(void *db_conn, */ rc = slurm_send_recv_slurmdbd_msg(&msg, &msg_rc); if (rc != SLURM_SUCCESS) { - if (slurm_send_slurmdbd_msg(&msg) < 0) + if (slurm_send_slurmdbd_msg(&msg) < 0) { + xfree(block_id); return SLURM_ERROR; + } } else if (msg_rc.msg_type != DBD_JOB_START_RC) { error("slurmdbd: response type not DBD_GOT_JOBS: %u", msg_rc.msg_type); @@ -930,6 +936,7 @@ extern int jobacct_storage_p_job_start(void *db_conn, job_ptr->db_index = resp->db_index; slurmdbd_free_job_start_rc_msg(resp); } + xfree(block_id); return rc; } diff --git a/src/sacctmgr/sacctmgr.c b/src/sacctmgr/sacctmgr.c index 536081bbec291c6b18343dd473acea983211cc0e..e451f74d4d1d8f2d5422bf38134112a92056ff8c 100644 --- a/src/sacctmgr/sacctmgr.c +++ b/src/sacctmgr/sacctmgr.c @@ -427,15 +427,17 @@ _process_command (int argc, char *argv[]) } quiet_flag = -1; } else if (strncasecmp (argv[0], "rollup", 2) == 0) { - if (argc > 1) { + time_t my_time = 0; + if (argc > 2) { exit_code = 1; fprintf (stderr, "too many arguments for %s keyword\n", argv[0]); } - printf("ROLLUP DOESN'T WORK YET, " - "this is for test purposes only.\n"); - if(acct_storage_g_roll_usage(db_conn) == SLURM_SUCCESS) { + if(argc > 1) + my_time = parse_time(argv[1]); + if(acct_storage_g_roll_usage(db_conn, my_time) + == SLURM_SUCCESS) { if(commit_check("Would you like to commit rollup?")) { acct_storage_g_commit(db_conn, 1); } else { diff --git a/src/slurmctld/job_mgr.c b/src/slurmctld/job_mgr.c index 4174b65678729ca20a7d87f3a21f7637457c92d6..6e3b19b11caf49225dd8d0de7fd1b4fd0dad7168 100644 --- a/src/slurmctld/job_mgr.c +++ b/src/slurmctld/job_mgr.c @@ -1011,8 +1011,8 @@ extern int kill_job_by_part_name(char *part_name) difftime(now, job_ptr->suspend_time); } else job_ptr->end_time = now; - job_completion_logger(job_ptr); deallocate_nodes(job_ptr, false, suspended); + job_completion_logger(job_ptr); } else if (job_ptr->job_state == JOB_PENDING) { job_count++; info("Killing job_id %u on defunct partition %s", @@ -4769,6 +4769,7 @@ extern void job_completion_logger(struct job_record *job_ptr) } g_slurm_jobcomp_write(job_ptr); + jobacct_storage_g_job_complete(acct_db_conn, job_ptr); } /* diff --git a/src/slurmctld/node_scheduler.c b/src/slurmctld/node_scheduler.c index d6ea15e892d16efd6a398af51ff3744d0c19a887..2798ba45ecfed288d005fe274f874c05c87dfa97 100644 --- a/src/slurmctld/node_scheduler.c +++ b/src/slurmctld/node_scheduler.c @@ -210,10 +210,6 @@ extern void deallocate_nodes(struct job_record *job_ptr, bool timeout, return; } - /* log this in the accounting plugin since it was allocated - * something */ - jobacct_storage_g_job_complete(acct_db_conn, job_ptr); - agent_args->msg_args = kill_job; agent_queue_request(agent_args); return; diff --git a/src/slurmdbd/proc_req.c b/src/slurmdbd/proc_req.c index 945732abb7e5a956defeef24afc23f726917f09a..a3065e982a1ee0c46fbb3eee92c2cc7c07de28d8 100644 --- a/src/slurmdbd/proc_req.c +++ b/src/slurmdbd/proc_req.c @@ -962,6 +962,7 @@ static int _job_start(void *db_conn, memset(&job_start_rc_msg, 0, sizeof(dbd_job_start_rc_msg_t)); job.total_procs = job_start_msg->alloc_cpus; + job.account = job_start_msg->account; job.assoc_id = job_start_msg->assoc_id; job.comment = job_start_msg->block_id; job.db_index = job_start_msg->db_index; @@ -1043,7 +1044,7 @@ static int _job_suspend(void *db_conn, rc = SLURM_SUCCESS; end_it: slurmdbd_free_job_suspend_msg(job_suspend_msg); - *out_buffer = make_dbd_rc_msg(rc, comment, DBD_JOB_START); + *out_buffer = make_dbd_rc_msg(rc, comment, DBD_JOB_SUSPEND); return SLURM_SUCCESS; } @@ -1634,8 +1635,7 @@ static int _roll_usage(void *db_conn, goto end_it; } - rc = acct_storage_g_roll_usage(db_conn); - slurmdbd_free_roll_usage_msg(get_msg); + rc = acct_storage_g_roll_usage(db_conn, get_msg->start); end_it: slurmdbd_free_roll_usage_msg(get_msg);