diff --git a/doc/man/man5/slurm.conf.5 b/doc/man/man5/slurm.conf.5 index 92aaf81b654755a55e1d67f65a60b06bcba48ed5..ace3e4329e0d0899020d29c119e71f07a1d21c24 100644 --- a/doc/man/man5/slurm.conf.5 +++ b/doc/man/man5/slurm.conf.5 @@ -125,12 +125,32 @@ is submitted. Currently this consists of any GRES, BB (burst buffer) or license along with CPU, Memory, Node, Energy, FS/[Disk|Lustre], IC/OFED, Pages, and VMem. By default Billing, CPU, Energy, Memory, Node, FS/Disk, Pages and VMem are tracked. + AccountingStorageTRES=gres/craynetwork,license/iop1 will track billing, cpu, energy, memory, nodes, FS/disk along with a gres called craynetwork as well as a license called iop1. Whenever these resources are used on the cluster they are recorded. The TRES are automatically set up in the database on the start of the slurmctld. +If multiple GRES of different types are tracked (e.g. GPUs of different types), +then job requests with matching type specifications will be recorded. +Given a configuration of +"AccountingStorageTRES=gres/gpu,gres/gpu:tesla,gres/gpu:volta" +Then "gres/gpu:tesla" and "gres/gpu:volta" will track only jobs that explicitly +request those two GPU types, while "gres/gpu" will track allocated GPUs of any +type ("tesla", "volta" or any other GPU type). + +Given a configuration of +"AccountingStorageTRES=gres/gpu:tesla,gres/gpu:volta" +Then "gres/gpu:tesla" and "gres/gpu:volta" will track jobs that explicitly +request those GPU types. +If a job requests GPUs, but does not explicitly specify the GPU type, then +it's resource allocation will be accounted for as either "gres/gpu:tesla" or +"gres/gpu:volta", although the accounting may not match the actualy GPU type +allocated to the job and the GPUs allocated to the job could be heterogeneous. +In an environment containing various GPU types, use of a job_submit plugin +may be desired in order to force jobs to explicitly specify some GPU type. + .TP \fBAccountingStorageType\fR The accounting storage mechanism type. Acceptable values at diff --git a/slurm/slurmdb.h b/slurm/slurmdb.h index 208abe9f529962d709e90d8cf7631eb5c55dfde3..101765e7e7170dce9aaef0a5775866da4734fea1 100644 --- a/slurm/slurmdb.h +++ b/slurm/slurmdb.h @@ -249,12 +249,13 @@ typedef struct { uint64_t alloc_secs; /* total amount of secs allocated if used in an accounting_list */ uint32_t rec_count; /* number of records alloc_secs is, DON'T PACK */ - uint64_t count; /* Count of tres on a given cluster, 0 if - listed generically. */ - uint32_t id; /* Database ID for the tres */ - char *name; /* Name of tres if type is generic like GRES - or License. */ - char *type; /* Type of tres (CPU, MEM, etc) */ + uint64_t count; /* Count of TRES on a given cluster, 0 if + * listed generically. */ + uint32_t id; /* Database ID for the TRES */ + char *name; /* Name of TRES if type is generic like GRES + * or License. Make include optional GRES type + * (e.g. "gpu" or "gpu:tesla") */ + char *type; /* Type of TRES (CPU, MEM, etc) */ } slurmdb_tres_rec_t; /* slurmdb_assoc_cond_t is used in other structures below so diff --git a/src/common/assoc_mgr.c b/src/common/assoc_mgr.c index 913daa5d7663c9f0b7d660fc738170906bc8867b..f83959bb880331c6b7a6d64fce457cd0f21fc2e9 100644 --- a/src/common/assoc_mgr.c +++ b/src/common/assoc_mgr.c @@ -6106,7 +6106,7 @@ extern void assoc_mgr_normalize_assoc_shares(slurmdb_assoc_rec_t *assoc) /* * Find the position of the given TRES ID or type/name in the - * assoc_mgr_tres_array. If the ID isn't found -1 is returned. + * assoc_mgr_tres_array. If the TRES name or ID isn't found -1 is returned. */ extern int assoc_mgr_find_tres_pos(slurmdb_tres_rec_t *tres_rec, bool locked) { @@ -6129,9 +6129,9 @@ extern int assoc_mgr_find_tres_pos(slurmdb_tres_rec_t *tres_rec, bool locked) tres_pos = i; break; } else if (!xstrcasecmp(assoc_mgr_tres_array[i]->type, - tres_rec->type) && - !xstrcasecmp(assoc_mgr_tres_array[i]->name, - tres_rec->name)) { + tres_rec->type) && + !xstrcasecmp(assoc_mgr_tres_array[i]->name, + tres_rec->name)) { tres_pos = i; break; } @@ -6143,8 +6143,51 @@ extern int assoc_mgr_find_tres_pos(slurmdb_tres_rec_t *tres_rec, bool locked) return tres_pos; } -/* The assoc_mgr tres read lock needs to be locked before calling this - * function and while using the returned record */ +/* + * Find the position of the given TRES name in the + * assoc_mgr_tres_array. Ignore anything after ":" in the TRES name. + * So tres_rec->name of "gpu" can match accounting TRES name of "gpu:tesla". + * If the TRES name isn't found -1 is returned. + */ +extern int assoc_mgr_find_tres_pos2(slurmdb_tres_rec_t *tres_rec, bool locked) +{ + int i, len, tres_pos = -1; + assoc_mgr_lock_t locks = { .tres = READ_LOCK }; + + if (!tres_rec->type) + return tres_pos; + + if (!locked) + assoc_mgr_lock(&locks); + + xassert(assoc_mgr_tres_array); + xassert(g_tres_count); + xassert(assoc_mgr_tres_array[g_tres_count - 1]); + + len = strlen(tres_rec->name); + for (i = 0; i < g_tres_count; i++) { + if (xstrcasecmp(assoc_mgr_tres_array[i]->type, tres_rec->type)) + continue; + if (xstrncasecmp(assoc_mgr_tres_array[i]->name, tres_rec->name, + len) || + (assoc_mgr_tres_array[i]->name[len] != ':')) + continue; + tres_pos = i; + break; + } + + if (!locked) + assoc_mgr_unlock(&locks); + + return tres_pos; +} + +/* + * Calls assoc_mgr_find_tres_pos and returns the pointer in the + * assoc_mgr_tres_array. + * NOTE: The assoc_mgr tres read lock needs to be locked before calling this + * function and while using the returned record. + */ extern slurmdb_tres_rec_t *assoc_mgr_find_tres_rec(slurmdb_tres_rec_t *tres_rec) { int pos = assoc_mgr_find_tres_pos(tres_rec, 1); @@ -6155,6 +6198,23 @@ extern slurmdb_tres_rec_t *assoc_mgr_find_tres_rec(slurmdb_tres_rec_t *tres_rec) return assoc_mgr_tres_array[pos]; } +/* + * Calls assoc_mgr_find_tres_pos and returns the pointer in the + * assoc_mgr_tres_array. Ignores GRES "type" option. + * NOTE: The assoc_mgr tres read lock needs to be locked before calling this + * function and while using the returned record. + */ +extern slurmdb_tres_rec_t *assoc_mgr_find_tres_rec2( + slurmdb_tres_rec_t *tres_rec) +{ + int pos = assoc_mgr_find_tres_pos2(tres_rec, 1); + + if (pos == -1) + return NULL; + else + return assoc_mgr_tres_array[pos]; +} + extern int assoc_mgr_set_tres_cnt_array(uint64_t **tres_cnt, char *tres_str, uint64_t init_val, bool locked) { diff --git a/src/common/assoc_mgr.h b/src/common/assoc_mgr.h index 7f433a4547375a452e9a2fb6317aa40064f8cdf4..0744933ac59cff3f87beec01141412c32dfbc5c6 100644 --- a/src/common/assoc_mgr.h +++ b/src/common/assoc_mgr.h @@ -445,11 +445,20 @@ extern void assoc_mgr_normalize_assoc_shares(slurmdb_assoc_rec_t *assoc); /* * Find the position of the given TRES ID or type/name in the - * assoc_mgr_tres_array. If the ID isn't found -1 is returned. + * assoc_mgr_tres_array. If the TRES name or ID isn't found -1 is returned. */ extern int assoc_mgr_find_tres_pos(slurmdb_tres_rec_t *tres_rec, bool locked); -/* calls assoc_mgr_find_tres_pos and returns the pointer in the +/* + * Find the position of the given TRES name in the + * assoc_mgr_tres_array. Ignore anything after ":" in the TRES name. + * So tres_rec->name of "gpu" can match accounting TRES name of "gpu:tesla". + * If the TRES name isn't found -1 is returned. + */ +extern int assoc_mgr_find_tres_pos2(slurmdb_tres_rec_t *tres_rec, bool locked); + +/* + * Calls assoc_mgr_find_tres_pos and returns the pointer in the * assoc_mgr_tres_array. * NOTE: The assoc_mgr tres read lock needs to be locked before calling this * function and while using the returned record. @@ -457,6 +466,15 @@ extern int assoc_mgr_find_tres_pos(slurmdb_tres_rec_t *tres_rec, bool locked); extern slurmdb_tres_rec_t *assoc_mgr_find_tres_rec( slurmdb_tres_rec_t *tres_rec); +/* + * Calls assoc_mgr_find_tres_pos and returns the pointer in the + * assoc_mgr_tres_array. Ignores GRES "type" option. + * NOTE: The assoc_mgr tres read lock needs to be locked before calling this + * function and while using the returned record. + */ +extern slurmdb_tres_rec_t *assoc_mgr_find_tres_rec2( + slurmdb_tres_rec_t *tres_rec); + /* fills in allocates and sets tres_cnt based off tres_str * OUT tres_cnt - array to be filled in g_tres_cnt in length * IN tres_str - simple format of tres used with id and count set diff --git a/src/common/gres.c b/src/common/gres.c index a8e62ace5ffe5b4c93f2b01224ddd955c3f9e303..2de28499a77ab7b0d1c8e8ceb347d7ea5998860f 100644 --- a/src/common/gres.c +++ b/src/common/gres.c @@ -11143,25 +11143,48 @@ extern char *gres_2_tres_str(List gres_list, bool is_job, bool locked) tres_str ? "," : "", tres_rec->id, count); - /* - * Now lets put of the : name tres if we are tracking - * it as well. This would be handy for gres like - * gpu:tesla, where you might want to track both as TRES. - */ - if (col_name && (i < gres_context_cnt)) { - tres_req.name = xstrdup_printf( - "%s%s", - gres_context[i].gres_name_colon, - col_name); - tres_rec = assoc_mgr_find_tres_rec(&tres_req); - xfree(tres_req.name); - if (tres_rec && - slurmdb_find_tres_count_in_string( - tres_str, tres_rec->id) == INFINITE64) - /* New gres */ - xstrfmtcat(tres_str, "%s%u=%"PRIu64, - tres_str ? "," : "", - tres_rec->id, count); + if (i < gres_context_cnt) { + if (col_name) { + /* + * Now let's put of the : name TRES if we are + * tracking it as well. This would be handy + * for GRES like "gpu:tesla", where you might + * want to track both as TRES. + */ + tres_req.name = xstrdup_printf( + "%s%s", + gres_context[i].gres_name_colon, + col_name); + tres_rec = assoc_mgr_find_tres_rec(&tres_req); + xfree(tres_req.name); + if (tres_rec && + slurmdb_find_tres_count_in_string( + tres_str, tres_rec->id) == INFINITE64) + /* New GRES */ + xstrfmtcat(tres_str, "%s%u=%"PRIu64, + tres_str ? "," : "", + tres_rec->id, count); + } else { + /* + * Job allocated GRES without "type" + * specification, but Slurm is only accounting + * for this GRES by specific "type", so pick + * some valid "type" to get some accounting. + * Although the reported "type" may not be + * accurate, it is better than nothing... + */ + tres_req.name = xstrdup_printf( + "%s", gres_context[i].gres_name); + tres_rec = assoc_mgr_find_tres_rec2(&tres_req); + xfree(tres_req.name); + if (tres_rec && + slurmdb_find_tres_count_in_string( + tres_str, tres_rec->id) == INFINITE64) + /* New GRES */ + xstrfmtcat(tres_str, "%s%u=%"PRIu64, + tres_str ? "," : "", + tres_rec->id, count); + } } } list_iterator_destroy(itr); @@ -11206,8 +11229,17 @@ static void _set_type_tres_cnt(gres_state_type_enum_t state_type, assoc_mgr_lock(&locks); slurm_mutex_lock(&gres_context_lock); + /* Initialize all GRES counters to zero. Increment them later. */ + for (i = 0; i < gres_context_cnt; i++) { + tres_rec.name = gres_context[i].gres_name; + if (tres_rec.name && + ((tres_pos = assoc_mgr_find_tres_pos(&tres_rec,true)) !=-1)) + tres_cnt[tres_pos] = 0; + } + itr = list_iterator_create(gres_list); while ((gres_state_ptr = list_next(itr))) { + bool set_total = false; for (i = 0; i < gres_context_cnt; i++) { if (gres_context[i].plugin_id == gres_state_ptr->plugin_id) { @@ -11215,7 +11247,6 @@ static void _set_type_tres_cnt(gres_state_type_enum_t state_type, break; } } - if (!tres_rec.name) { debug("%s: couldn't find name", __func__); continue; @@ -11242,13 +11273,23 @@ static void _set_type_tres_cnt(gres_state_type_enum_t state_type, state_type); continue; } - /* Set main TRES's count (i.e. if no GRES "type"). */ - if ((tres_pos = assoc_mgr_find_tres_pos(&tres_rec, true)) != -1) - tres_cnt[tres_pos] = count; + /* + * Set main TRES's count (i.e. if no GRES "type" is being + * accounted for). We need to increment counter since the job + * may have been allocated multiple GRES types, but Slurm is + * only configured to track the total count. For example, a job + * allocated 1 GPU of type "tesla" and 1 GPU of type "volta", + * but we want to record that the job was allocated a total of + * 2 GPUs. + */ + if ((tres_pos = assoc_mgr_find_tres_pos(&tres_rec,true)) != -1){ + tres_cnt[tres_pos] += count; + set_total = true; + } /* * Set TRES count for GRES model types. This would be handy for - * GRES like gpu:tesla, where you might want to track both as + * GRES like "gpu:tesla", where you might want to track both as * TRES. */ switch (state_type) { @@ -11263,11 +11304,25 @@ static void _set_type_tres_cnt(gres_state_type_enum_t state_type, "%s%s", gres_context[i].gres_name_colon, col_name); - if ((tres_pos = assoc_mgr_find_tres_pos( &tres_rec, true)) != -1) tres_cnt[tres_pos] = count; xfree(tres_rec.name); + } else if (!set_total) { + /* + * Job allocated GRES without "type" + * specification, but Slurm is only accounting + * for this GRES by specific "type", so pick + * some valid "type" to get some accounting. + * Although the reported "type" may not be + * accurate, it is better than nothing... + */ + tres_rec.name = xstrdup_printf( + "%s", gres_context[i].gres_name); + if ((tres_pos = assoc_mgr_find_tres_pos2( + &tres_rec, true)) != -1) + tres_cnt[tres_pos] = count; + xfree(tres_rec.name); } break; } diff --git a/src/common/gres.h b/src/common/gres.h index e61473af57d20bd957cc4760fd73055508519eb8..b2ef0ce89d26b0263f10cf4a502e692be1faeab3 100644 --- a/src/common/gres.h +++ b/src/common/gres.h @@ -1180,7 +1180,8 @@ extern int gres_get_step_info(List step_gres_list, char *gres_name, extern gres_job_state_t *gres_get_job_state(List gres_list, char *name); extern gres_step_state_t *gres_get_step_state(List gres_list, char *name); -/* Translate a gres_list into a tres_str +/* + * Translate a gres_list into a tres_str * IN gres_list - filled in with gres_job_state_t or gres_step_state_t's * IN is_job - if is job function expects gres_job_state_t's else * gres_step_state_t's diff --git a/src/slurmctld/slurmctld.h b/src/slurmctld/slurmctld.h index b3e9cd62c7443c2bd0cb56e29d0e709adb790e9f..5b3eec721bafa85d06f863f4647c814447e319f7 100644 --- a/src/slurmctld/slurmctld.h +++ b/src/slurmctld/slurmctld.h @@ -946,7 +946,7 @@ struct step_record { dynamic_plugin_data_t *switch_job; /* switch context, opaque */ time_t time_last_active; /* time step was last found on node */ time_t tot_sus_time; /* total time in suspended state */ - char *tres_alloc_str; /* simple tres string for step */ + char *tres_alloc_str; /* simple TRES string for step */ char *tres_bind; /* Task to TRES binding directives */ char *tres_fmt_alloc_str; /* formatted tres string for step */ char *tres_freq; /* TRES frequency directives */