From f8a29b99fb55896c3f2106bdcd348332b4d1b844 Mon Sep 17 00:00:00 2001 From: Danny Auble <da@schedmd.com> Date: Thu, 28 Aug 2014 15:56:18 -0700 Subject: [PATCH] Get array_task_str and array_max_tasks to the database and out of it. --- slurm/slurmdb.h | 6 ++ src/common/slurmdb_defs.c | 1 + src/common/slurmdb_pack.c | 5 ++ src/common/slurmdbd_defs.c | 8 ++ src/common/slurmdbd_defs.h | 3 + .../mysql/accounting_storage_mysql.c | 3 + .../mysql/as_mysql_archive.c | 7 ++ .../accounting_storage/mysql/as_mysql_job.c | 32 ++++++++ .../mysql/as_mysql_jobacct_process.c | 11 +++ src/sacct/print.c | 82 ++++++++++++++++++- src/slurmdbd/proc_req.c | 6 ++ 11 files changed, 163 insertions(+), 1 deletion(-) diff --git a/slurm/slurmdb.h b/slurm/slurmdb.h index c5123e9e80d..f5b8ed7f1a0 100644 --- a/slurm/slurmdb.h +++ b/slurm/slurmdb.h @@ -590,8 +590,14 @@ typedef struct { char *alloc_gres; uint32_t alloc_nodes; uint32_t array_job_id; /* job_id of a job array or 0 if N/A */ + uint32_t array_max_tasks; /* How many tasks of the array can be + running at one time. + */ uint32_t array_task_id; /* task_id of a job array of NO_VAL * if N/A */ + char *array_task_str; /* If pending these are the array + tasks this record represents. + */ uint32_t associd; char *blockid; char *cluster; diff --git a/src/common/slurmdb_defs.c b/src/common/slurmdb_defs.c index 2d4afb9e283..cc8563a3370 100644 --- a/src/common/slurmdb_defs.c +++ b/src/common/slurmdb_defs.c @@ -533,6 +533,7 @@ extern void slurmdb_destroy_job_rec(void *object) if (job) { xfree(job->account); xfree(job->alloc_gres); + xfree(job->array_task_str); xfree(job->blockid); xfree(job->cluster); xfree(job->derived_es); diff --git a/src/common/slurmdb_pack.c b/src/common/slurmdb_pack.c index 4fd2d2c4f4e..3ac2e897b83 100644 --- a/src/common/slurmdb_pack.c +++ b/src/common/slurmdb_pack.c @@ -3562,7 +3562,9 @@ extern void slurmdb_pack_job_rec(void *object, uint16_t rpc_version, Buf buffer) packstr(job->alloc_gres, buffer); pack32(job->alloc_nodes, buffer); pack32(job->array_job_id, buffer); + pack32(job->array_max_tasks, buffer); pack32(job->array_task_id, buffer); + packstr(job->array_task_str, buffer); pack32(job->associd, buffer); packstr(job->blockid, buffer); packstr(job->cluster, buffer); @@ -3698,7 +3700,10 @@ extern int slurmdb_unpack_job_rec(void **job, uint16_t rpc_version, Buf buffer) buffer); safe_unpack32(&job_ptr->alloc_nodes, buffer); safe_unpack32(&job_ptr->array_job_id, buffer); + safe_unpack32(&job_ptr->array_max_tasks, buffer); safe_unpack32(&job_ptr->array_task_id, buffer); + safe_unpackstr_xmalloc(&job_ptr->array_task_str, + &uint32_tmp, buffer); safe_unpack32(&job_ptr->associd, buffer); safe_unpackstr_xmalloc(&job_ptr->blockid, &uint32_tmp, buffer); safe_unpackstr_xmalloc(&job_ptr->cluster, &uint32_tmp, buffer); diff --git a/src/common/slurmdbd_defs.c b/src/common/slurmdbd_defs.c index c15e2eb3d06..f0330a36dfb 100644 --- a/src/common/slurmdbd_defs.c +++ b/src/common/slurmdbd_defs.c @@ -2639,6 +2639,7 @@ extern void slurmdbd_free_job_start_msg(void *in) dbd_job_start_msg_t *msg = (dbd_job_start_msg_t *)in; if (msg) { xfree(msg->account); + xfree(msg->array_task_str); xfree(msg->block_id); xfree(msg->gres_alloc); xfree(msg->gres_req); @@ -3232,7 +3233,10 @@ slurmdbd_pack_job_start_msg(void *in, pack32(msg->alloc_cpus, buffer); pack32(msg->alloc_nodes, buffer); pack32(msg->array_job_id, buffer); + pack32(msg->array_max_tasks, buffer); pack32(msg->array_task_id, buffer); + packstr(msg->array_task_str, buffer); + pack32(msg->array_task_pending, buffer); pack32(msg->assoc_id, buffer); packstr(msg->block_id, buffer); pack32(msg->db_index, buffer); @@ -3304,7 +3308,11 @@ slurmdbd_unpack_job_start_msg(void **msg, safe_unpack32(&msg_ptr->alloc_cpus, buffer); safe_unpack32(&msg_ptr->alloc_nodes, buffer); safe_unpack32(&msg_ptr->array_job_id, buffer); + safe_unpack32(&msg_ptr->array_max_tasks, buffer); safe_unpack32(&msg_ptr->array_task_id, buffer); + safe_unpackstr_xmalloc(&msg_ptr->array_task_str, + &uint32_tmp, buffer); + safe_unpack32(&msg_ptr->array_task_pending, buffer); safe_unpack32(&msg_ptr->assoc_id, buffer); safe_unpackstr_xmalloc(&msg_ptr->block_id, &uint32_tmp, buffer); safe_unpack32(&msg_ptr->db_index, buffer); diff --git a/src/common/slurmdbd_defs.h b/src/common/slurmdbd_defs.h index 494c731d8bd..3532d9e3fb7 100644 --- a/src/common/slurmdbd_defs.h +++ b/src/common/slurmdbd_defs.h @@ -272,8 +272,11 @@ typedef struct dbd_job_start_msg { uint32_t alloc_cpus; /* count of allocated processors */ uint32_t alloc_nodes; /* how many nodes used in job */ uint32_t array_job_id; /* job_id of a job array or 0 if N/A */ + uint32_t array_max_tasks;/* max number of tasks able to run at once */ uint32_t array_task_id; /* task_id of a job array of NO_VAL * if N/A */ + char * array_task_str;/* hex string of unstarted tasks */ + uint32_t array_task_pending;/* number of tasks still pending */ uint32_t assoc_id; /* accounting association id */ char * block_id; /* Bluegene block id */ uint32_t db_index; /* index into the db for this job */ diff --git a/src/plugins/accounting_storage/mysql/accounting_storage_mysql.c b/src/plugins/accounting_storage/mysql/accounting_storage_mysql.c index a7559589f56..63bdc33ab7a 100644 --- a/src/plugins/accounting_storage/mysql/accounting_storage_mysql.c +++ b/src/plugins/accounting_storage/mysql/accounting_storage_mysql.c @@ -973,6 +973,9 @@ extern int create_cluster_tables(mysql_conn_t *mysql_conn, char *cluster_name) { "mod_time", "int unsigned default 0 not null" }, { "deleted", "tinyint default 0 not null" }, { "account", "tinytext" }, + { "array_task_str", "text" }, + { "array_max_tasks", "int unsigned default 0 not null" }, + { "array_task_pending", "int unsigned default 0 not null" }, { "cpus_req", "int unsigned not null" }, { "cpus_alloc", "int unsigned not null" }, { "derived_ec", "int unsigned default 0 not null" }, diff --git a/src/plugins/accounting_storage/mysql/as_mysql_archive.c b/src/plugins/accounting_storage/mysql/as_mysql_archive.c index 2d0141fb264..6ad92f171f1 100644 --- a/src/plugins/accounting_storage/mysql/as_mysql_archive.c +++ b/src/plugins/accounting_storage/mysql/as_mysql_archive.c @@ -65,6 +65,7 @@ typedef struct { char *alloc_nodes; char *associd; char *array_jobid; + char *array_max_tasks; char *array_taskid; char *blockid; char *derived_ec; @@ -193,6 +194,7 @@ enum { * enum below */ static char *job_req_inx[] = { "account", + "array_max_tasks", "cpus_alloc", "nodes_alloc", "id_assoc", @@ -230,6 +232,7 @@ static char *job_req_inx[] = { enum { JOB_REQ_ACCOUNT, + JOB_REQ_ARRAY_MAX, JOB_REQ_ALLOC_CPUS, JOB_REQ_ALLOC_NODES, JOB_REQ_ASSOCID, @@ -453,6 +456,7 @@ static void _pack_local_job(local_job_t *object, packstr(object->alloc_nodes, buffer); packstr(object->associd, buffer); packstr(object->array_jobid, buffer); + packstr(object->array_max_tasks, buffer); packstr(object->array_taskid, buffer); packstr(object->blockid, buffer); packstr(object->derived_ec, buffer); @@ -497,6 +501,7 @@ static int _unpack_local_job(local_job_t *object, unpackstr_ptr(&object->alloc_nodes, &tmp32, buffer); unpackstr_ptr(&object->associd, &tmp32, buffer); unpackstr_ptr(&object->array_jobid, &tmp32, buffer); + unpackstr_ptr(&object->array_max_tasks, &tmp32, buffer); unpackstr_ptr(&object->array_taskid, &tmp32, buffer); unpackstr_ptr(&object->blockid, &tmp32, buffer); unpackstr_ptr(&object->derived_ec, &tmp32, buffer); @@ -1612,6 +1617,7 @@ static uint32_t _archive_jobs(mysql_conn_t *mysql_conn, char *cluster_name, job.alloc_nodes = row[JOB_REQ_ALLOC_NODES]; job.associd = row[JOB_REQ_ASSOCID]; job.array_jobid = row[JOB_REQ_ARRAYJOBID]; + job.array_max_tasks = row[JOB_REQ_ARRAY_MAX]; job.array_taskid = row[JOB_REQ_ARRAYTASKID]; job.blockid = row[JOB_REQ_BLOCKID]; job.derived_ec = row[JOB_REQ_DERIVED_EC]; @@ -1695,6 +1701,7 @@ static char *_load_jobs(uint16_t rpc_version, Buf buffer, object.alloc_nodes, object.associd, object.array_jobid, + object.array_max_tasks, object.array_taskid, object.blockid, object.derived_ec, diff --git a/src/plugins/accounting_storage/mysql/as_mysql_job.c b/src/plugins/accounting_storage/mysql/as_mysql_job.c index a3bf070f0e9..e352a3c0189 100644 --- a/src/plugins/accounting_storage/mysql/as_mysql_job.c +++ b/src/plugins/accounting_storage/mysql/as_mysql_job.c @@ -250,6 +250,7 @@ extern int as_mysql_job_start(mysql_conn_t *mysql_conn, uint32_t wckeyid = 0; int job_state, node_cnt = 0; uint32_t job_db_inx = job_ptr->db_index; + job_array_struct_t *array_recs = job_ptr->array_recs; if ((!job_ptr->details || !job_ptr->details->submit_time) && !job_ptr->resize_time) { @@ -471,6 +472,11 @@ no_rollup_change: xstrcat(query, ", gres_req"); if (gres_alloc) xstrcat(query, ", gres_alloc"); + if (array_recs && array_recs->task_id_str) + xstrcat(query, ", array_task_str, array_max_tasks, " + "array_task_pending"); + else + xstrcat(query, ", array_task_str, array_task_pending"); xstrfmtcat(query, ") values (%u, %u, %u, %u, %u, %u, %u, %u, " @@ -501,6 +507,13 @@ no_rollup_change: xstrfmtcat(query, ", '%s'", gres_req); if (gres_alloc) xstrfmtcat(query, ", '%s'", gres_alloc); + if (array_recs && array_recs->task_id_str) + xstrfmtcat(query, ", '%s', %u, %u", + array_recs->task_id_str, + array_recs->max_run_tasks, + array_recs->task_cnt); + else + xstrcat(query, ", NULL, 0"); xstrfmtcat(query, ") on duplicate key update " @@ -536,6 +549,15 @@ no_rollup_change: xstrfmtcat(query, ", gres_req='%s'", gres_req); if (gres_alloc) xstrfmtcat(query, ", gres_alloc='%s'", gres_alloc); + if (array_recs && array_recs->task_id_str) + xstrfmtcat(query, ", array_task_str='%s', " + "array_max_tasks=%u, array_task_pending=%u", + array_recs->task_id_str, + array_recs->max_run_tasks, + array_recs->task_cnt); + else + xstrfmtcat(query, ", array_task_str=NULL, " + "array_task_pending=0"); if (debug_flags & DEBUG_FLAG_DB_JOB) DB_DEBUG(mysql_conn->conn, "query\n%s", query); @@ -573,6 +595,16 @@ no_rollup_change: xstrfmtcat(query, "gres_req='%s', ", gres_req); if (gres_alloc) xstrfmtcat(query, "gres_alloc='%s', ", gres_alloc); + if (array_recs && array_recs->task_id_str) + xstrfmtcat(query, "array_task_str='%s', " + "array_max_tasks=%u, " + "array_task_pending=%u, ", + array_recs->task_id_str, + array_recs->max_run_tasks, + array_recs->task_cnt); + else + xstrfmtcat(query, "array_task_str=NULL, " + "array_task_pending=0, "); xstrfmtcat(query, "time_start=%ld, job_name='%s', state=%u, " "cpus_alloc=%u, nodes_alloc=%u, id_qos=%u, " diff --git a/src/plugins/accounting_storage/mysql/as_mysql_jobacct_process.c b/src/plugins/accounting_storage/mysql/as_mysql_jobacct_process.c index e21978075ce..100a9b44969 100644 --- a/src/plugins/accounting_storage/mysql/as_mysql_jobacct_process.c +++ b/src/plugins/accounting_storage/mysql/as_mysql_jobacct_process.c @@ -55,6 +55,8 @@ typedef struct { * enum below also t1 is job_table */ char *job_req_inx[] = { "t1.account", + "t1.array_max_tasks", + "t1.array_task_str", "t1.cpus_alloc", "t1.cpus_req", "t1.derived_ec", @@ -98,6 +100,8 @@ char *job_req_inx[] = { enum { JOB_REQ_ACCOUNT1, + JOB_REQ_ARRAY_MAX, + JOB_REQ_ARRAY_STR, JOB_REQ_ALLOC_CPUS, JOB_REQ_REQ_CPUS, JOB_REQ_DERIVED_EC, @@ -539,6 +543,13 @@ static int _cluster_get_jobs(mysql_conn_t *mysql_conn, else if (row[JOB_REQ_ACCOUNT1] && row[JOB_REQ_ACCOUNT1][0]) job->account = xstrdup(row[JOB_REQ_ACCOUNT1]); + if (row[JOB_REQ_ARRAY_STR] && row[JOB_REQ_ARRAY_STR][0]) + job->array_task_str = xstrdup(row[JOB_REQ_ARRAY_STR]); + + if (row[JOB_REQ_ARRAY_MAX]) + job->array_max_tasks = + slurm_atoul(row[JOB_REQ_ARRAY_MAX]); + if (row[JOB_REQ_BLOCKID]) job->blockid = xstrdup(row[JOB_REQ_BLOCKID]); diff --git a/src/sacct/print.c b/src/sacct/print.c index 42fab3ee5c8..f8d35603d9a 100644 --- a/src/sacct/print.c +++ b/src/sacct/print.c @@ -119,6 +119,80 @@ static void _print_small_double( snprintf(outbuf, buf_size, "0"); } +/* Translate bitmap representation from hex to decimal format, replacing + * array_task_str. */ +static void _xlate_task_str(slurmdb_job_rec_t *job_ptr) +{ + static int bitstr_len = -1; + int buf_size, len; + int i, i_first, i_last, i_prev, i_step = 0; + bitstr_t *task_bitmap; + char *in_buf = job_ptr->array_task_str; + char *out_buf = NULL; + + if (!in_buf) + return; + + i = strlen(in_buf); + task_bitmap = bit_alloc(i * 4); + bit_unfmt_hexmask(task_bitmap, in_buf); + + /* Check first for a step function */ + i_first = bit_ffs(task_bitmap); + i_last = bit_fls(task_bitmap); + if (((i_last - i_first) > 10) && + !bit_test(task_bitmap, i_first + 1)) { + bool is_step = true; + i_prev = i_first; + for (i = i_first + 1; i <= i_last; i++) { + if (!bit_test(task_bitmap, i)) + continue; + if (i_step == 0) { + i_step = i - i_prev; + } else if ((i - i_prev) != i_step) { + is_step = false; + break; + } + i_prev = i; + } + if (is_step) { + xstrfmtcat(out_buf, "%d-%d:%d", + i_first, i_last, i_step); + } + } + + if (bitstr_len > 0) { + /* Print the first bitstr_len bytes of the bitmap string */ + buf_size = bitstr_len; + out_buf = xmalloc(buf_size); + bit_fmt(out_buf, buf_size, task_bitmap); + len = strlen(out_buf); + if (len > (buf_size - 3)) + for (i = 0; i < 3; i++) + out_buf[buf_size - 2 - i] = '.'; + } else { + /* Print the full bitmap's string representation. + * For huge bitmaps this can take roughly one minute, + * so let the client do the work */ + buf_size = bit_size(task_bitmap) * 8; + while (1) { + out_buf = xmalloc(buf_size); + bit_fmt(out_buf, buf_size, task_bitmap); + len = strlen(out_buf); + if ((len > 0) && (len < (buf_size - 32))) + break; + xfree(out_buf); + buf_size *= 2; + } + } + + if (job_ptr->array_max_tasks) + xstrfmtcat(out_buf, "%c%u", '%', job_ptr->array_max_tasks); + + xfree(job_ptr->array_task_str); + job_ptr->array_task_str = out_buf; +} + void print_fields(type_t type, void *object) { if (!object) { @@ -660,7 +734,13 @@ void print_fields(type_t type, void *object) job = step->job_ptr; if (job) { - if (job->array_task_id != NO_VAL) + if (job->array_task_str) { + _xlate_task_str(job); + snprintf(id, FORMAT_STRING_SIZE, + "%u_[%s]", + job->array_job_id, + job->array_task_str); + } else if (job->array_task_id != NO_VAL) snprintf(id, FORMAT_STRING_SIZE, "%u_%u", job->array_job_id, diff --git a/src/slurmdbd/proc_req.c b/src/slurmdbd/proc_req.c index d594cbcb3d8..87a36da8e5a 100644 --- a/src/slurmdbd/proc_req.c +++ b/src/slurmdbd/proc_req.c @@ -2820,9 +2820,11 @@ static void _process_job_start(slurmdbd_conn_t *slurmdbd_conn, { struct job_record job, *job_ptr; struct job_details details; + job_array_struct_t array_recs; memset(&job, 0, sizeof(struct job_record)); memset(&details, 0, sizeof(struct job_details)); + memset(&array_recs, 0, sizeof(job_array_struct_t)); memset(id_rc_msg, 0, sizeof(dbd_id_rc_msg_t)); job.total_cpus = job_start_msg->alloc_cpus; @@ -2830,6 +2832,9 @@ static void _process_job_start(slurmdbd_conn_t *slurmdbd_conn, job.account = _replace_double_quotes(job_start_msg->account); job.array_job_id = job_start_msg->array_job_id; job.array_task_id = job_start_msg->array_task_id; + array_recs.task_id_str = job_start_msg->array_task_str; + array_recs.max_run_tasks = job_start_msg->array_max_tasks; + array_recs.task_cnt = job_start_msg->array_task_pending; job.assoc_id = job_start_msg->assoc_id; job.comment = job_start_msg->block_id; if (job_start_msg->db_index != NO_VAL) @@ -2856,6 +2861,7 @@ static void _process_job_start(slurmdbd_conn_t *slurmdbd_conn, job.wckey = _replace_double_quotes(job_start_msg->wckey); details.submit_time = job_start_msg->submit_time; + job.array_recs = &array_recs; job.details = &details; job_ptr = &job; -- GitLab