From 44e764dae5503680a92c74b706cd61c2e5127fc8 Mon Sep 17 00:00:00 2001 From: Danny Auble <da@llnl.gov> Date: Tue, 30 Dec 2008 22:32:37 +0000 Subject: [PATCH] svn merge -r16104:16120 https://eris.llnl.gov/svn/slurm/branches/slurm-1.3 --- doc/man/man1/sreport.1 | 4 + doc/man/man5/slurm.conf.5 | 54 ++++++++----- src/common/slurm_protocol_pack.c | 1 - .../mysql/accounting_storage_mysql.c | 76 ++++++++----------- src/slurmctld/read_config.c | 6 +- src/sreport/common.c | 24 ++++++ src/sreport/sreport.c | 2 + src/sreport/sreport.h | 1 + src/sreport/user_reports.c | 69 ++++++++++++++++- 9 files changed, 168 insertions(+), 69 deletions(-) diff --git a/doc/man/man1/sreport.1 b/doc/man/man1/sreport.1 index e60fbeecbff..219ab933c26 100644 --- a/doc/man/man1/sreport.1 +++ b/doc/man/man1/sreport.1 @@ -239,6 +239,10 @@ List of partitions jobs ran on to include in report. Default is all. When used with the Sizes report will print number of jobs ran instead of time used. .TP +.B TopCount=<OPT> +Used in the TopUsage report. Change the number of users displayed. +Default is 10. +.TP .B Users=<OPT> List of users jobs to include in report. Default is all. .RE diff --git a/doc/man/man5/slurm.conf.5 b/doc/man/man5/slurm.conf.5 index 4fc62e540f2..5ed417e62b7 100644 --- a/doc/man/man5/slurm.conf.5 +++ b/doc/man/man5/slurm.conf.5 @@ -941,25 +941,45 @@ The default value is 30 seconds. .TP \fBSchedulerType\fR -Identifies the type of scheduler to be used. Acceptable values include -"sched/builtin" for the built\-in FIFO scheduler, -"sched/backfill" for a backfill scheduling module to augment -the default FIFO scheduling, -"sched/gang" for gang scheduler (time\-slicing of parallel jobs), -"sched/hold" to hold all newly arriving jobs if a file "/etc/slurm.hold" -exists otherwise use the built\-in FIFO scheduler, and -"sched/wiki" for the Wiki interface to the Maui Scheduler. -The default value is "sched/builtin". +Identifies the type of scheduler to be used. +Note the \fBslurmctld\fR daemon must be restarted for a change in +scheduler type to become effective (reconfiguring a running daemon has +no effect for this parameter). +The \fBscontrol\fR command can be used to manually change job priorities +if desired. +Acceptable values include: +.RS +.TP +\fBsched/builtin\fR +for the built\-in FIFO (First In First Out) scheduler. +This is the default. +.TP +\fBsched/backfill\fR +for a backfill scheduling module to augment the default FIFO scheduling. Backfill scheduling will initiate lower\-priority jobs if doing -so does not delay the expected initiation time of any higher +so does not delay the expected initiation time of any higher priority job. -When initially setting the value to "sched/wiki", any pending jobs -must have their priority set to zero (held). -When changing the value from "sched/wiki", all pending jobs -should have their priority change from zero to some large number. -The \fBscontrol\fR command can be used to change job priorities. -The \fBslurmctld\fR daemon must be restarted for a change in -scheduler type to become effective. +Effectiveness of backfill scheduling is dependent upon users specifying +job time limits, otherwise all jobs will have the same time limit and +backfilling is impossible. +.TP +\fBsched/gang\fR +for gang scheduler (time\-slicing of parallel jobs). This also supports +preemption of jobs in lower priority partitions (queues). +See the \fIPreemption\fR web page for details: +.br +\fIhttps://computing.llnl.gov/linux/slurm/preempt.html\fR +.TP +\fBsched/hold\fR +to hold all newly arriving jobs if a file "/etc/slurm.hold" +exists otherwise use the built\-in FIFO scheduler +.TP +\fBsched/wiki\fR +for the Wiki interface to the Maui Scheduler +.TP +\fBsched/wiki2\fR +for the Wiki interface to the Moab Cluster Suite +.RE .TP \fBSelectType\fR diff --git a/src/common/slurm_protocol_pack.c b/src/common/slurm_protocol_pack.c index 2abbe679a2f..e15137dc66d 100644 --- a/src/common/slurm_protocol_pack.c +++ b/src/common/slurm_protocol_pack.c @@ -2034,7 +2034,6 @@ unpack_error: return SLURM_ERROR; } - static int _unpack_partition_info_msg(partition_info_msg_t ** msg, Buf buffer) { diff --git a/src/plugins/accounting_storage/mysql/accounting_storage_mysql.c b/src/plugins/accounting_storage/mysql/accounting_storage_mysql.c index be6373d80e2..49775bb4d46 100644 --- a/src/plugins/accounting_storage/mysql/accounting_storage_mysql.c +++ b/src/plugins/accounting_storage/mysql/accounting_storage_mysql.c @@ -1131,6 +1131,10 @@ static int _setup_association_cond_limits(acct_association_cond_t *assoc_cond, } list_iterator_destroy(itr); xstrcat(*extra, ")"); + } else if(assoc_cond->user_list) { + /* we want all the users, but no non-user associations */ + set = 1; + xstrfmtcat(*extra, " && (%s.user!='')", prefix); } if(assoc_cond->partition_list @@ -6790,7 +6794,6 @@ extern List acct_storage_p_get_users(mysql_conn_t *mysql_conn, uid_t uid, MYSQL_ROW row; uint16_t private_data = 0; acct_user_rec_t user; - acct_wckey_cond_t *wckey_cond = NULL; /* if this changes you will need to edit the corresponding enum */ char *user_req_inx[] = { @@ -6925,33 +6928,6 @@ empty: user_list = list_create(destroy_acct_user_rec); - if(user_cond && user_cond->with_assocs) { - /* We are going to be freeing the inners of - this list in the user->name so we don't - free it here - */ - if(!user_cond->assoc_cond) - user_cond->assoc_cond = xmalloc( - sizeof(acct_association_cond_t)); - - if(user_cond->assoc_cond->user_list) - list_destroy(user_cond->assoc_cond->user_list); - user_cond->assoc_cond->user_list = list_create(NULL); - } - - if(user_cond && user_cond->with_wckeys) { - /* We are going to be freeing the inners of - this list in the user->name so we don't - free it here - */ - wckey_cond = xmalloc(sizeof(acct_wckey_cond_t)); - wckey_cond->user_list = list_create(NULL); - - if(user_cond->assoc_cond && user_cond->assoc_cond->cluster_list) - wckey_cond->cluster_list = - user_cond->assoc_cond->cluster_list; - } - while((row = mysql_fetch_row(result))) { acct_user_rec_t *user = xmalloc(sizeof(acct_user_rec_t)); /* uid_t pw_uid; */ @@ -6978,23 +6954,26 @@ empty: if(user_cond && user_cond->with_coords) _get_user_coords(mysql_conn, user); - - - if(user_cond && user_cond->with_assocs) - list_append(user_cond->assoc_cond->user_list, - user->name); - - if(user_cond && user_cond->with_wckeys) - list_append(wckey_cond->user_list, user->name); } mysql_free_result(result); - if(user_cond && user_cond->with_assocs - && list_count(user_cond->assoc_cond->user_list)) { + if(user_cond && user_cond->with_assocs) { ListIterator assoc_itr = NULL; acct_user_rec_t *user = NULL; acct_association_rec_t *assoc = NULL; - List assoc_list = acct_storage_p_get_associations( + List assoc_list = NULL; + + /* Make sure we don't get any non-user associations + * this is done by at least having a user_list + * defined */ + if(!user_cond->assoc_cond) + user_cond->assoc_cond = + xmalloc(sizeof(acct_association_cond_t)); + + if(!user_cond->assoc_cond->user_list) + user_cond->assoc_cond->user_list = list_create(NULL); + + assoc_list = acct_storage_p_get_associations( mysql_conn, uid, user_cond->assoc_cond); if(!assoc_list) { @@ -7024,15 +7003,22 @@ empty: } get_wckeys: - if(wckey_cond) { + if(user_cond && user_cond->with_wckeys) { ListIterator wckey_itr = NULL; acct_user_rec_t *user = NULL; acct_wckey_rec_t *wckey = NULL; - List wckey_list = acct_storage_p_get_wckeys( - mysql_conn, uid, wckey_cond); - - wckey_cond->cluster_list = NULL; - destroy_acct_wckey_cond(wckey_cond); + List wckey_list = NULL; + acct_wckey_cond_t wckey_cond; + + memset(&wckey_cond, 0, sizeof(acct_wckey_cond_t)); + if(user_cond->assoc_cond) { + wckey_cond.user_list = + user_cond->assoc_cond->user_list; + wckey_cond.cluster_list = + user_cond->assoc_cond->cluster_list; + } + wckey_list = acct_storage_p_get_wckeys( + mysql_conn, uid, &wckey_cond); if(!wckey_list) { error("no wckeys"); diff --git a/src/slurmctld/read_config.c b/src/slurmctld/read_config.c index 58e5d65134b..e7ce99fbc3e 100644 --- a/src/slurmctld/read_config.c +++ b/src/slurmctld/read_config.c @@ -314,7 +314,7 @@ static int _state_str2int(const char *state_str) return state_val; } -#ifdef HAVE_BG +#ifdef HAVE_3D /* Used to get the general name of the machine, used primarily * for bluegene systems. Not in general use because some systems * have multiple prefix's such as foo[1-1000],bar[1-1000]. @@ -392,7 +392,7 @@ static int _build_single_nodeline_info(slurm_conf_node_t *node_ptr, goto cleanup; } -#ifdef HAVE_BG +#ifdef HAVE_3D if (conf->node_prefix == NULL) _set_node_prefix(node_ptr->nodenames, conf); #endif @@ -574,7 +574,7 @@ static int _build_all_nodeline_info(slurm_ctl_conf_t *conf) _build_single_nodeline_info(node, config_ptr, conf); } xfree(highest_node_name); -#ifdef HAVE_BG +#ifdef HAVE_3D { char *node_000 = NULL; struct node_record *node_rec = NULL; diff --git a/src/sreport/common.c b/src/sreport/common.c index e6fb85f3e75..4a3c691e484 100644 --- a/src/sreport/common.c +++ b/src/sreport/common.c @@ -433,3 +433,27 @@ extern int sort_assoc_dec(sreport_assoc_rec_t *assoc_a, return 0; } +extern int get_uint(char *in_value, uint32_t *out_value, char *type) +{ + char *ptr = NULL, *meat = NULL; + long num; + + if(!(meat = strip_quotes(in_value, NULL))) { + error("Problem with strip_quotes"); + return SLURM_ERROR; + } + num = strtol(meat, &ptr, 10); + if ((num == 0) && ptr && ptr[0]) { + error("Invalid value for %s (%s)", type, meat); + xfree(meat); + return SLURM_ERROR; + } + xfree(meat); + + if (num < 0) + *out_value = INFINITE; /* flag to clear */ + else + *out_value = (uint32_t) num; + return SLURM_SUCCESS; +} + diff --git a/src/sreport/sreport.c b/src/sreport/sreport.c index 6a461afe317..4136d412754 100644 --- a/src/sreport/sreport.c +++ b/src/sreport/sreport.c @@ -648,6 +648,8 @@ sreport [<OPTION>] [<COMMAND>] \n\ - Group - Group all accounts together for each user.\n\ Default is a separate entry for each user\n\ and account reference. \n\ + - TopCount=<OPT> - Used in the TopUsage report. Change the \n\ + number of users displayed. Default is 10.\n\ - Users=<OPT> - List of users jobs to include in report. \n\ Default is all. \n\ \n\ diff --git a/src/sreport/sreport.h b/src/sreport/sreport.h index 05ff40cfe41..a1f2dc61775 100644 --- a/src/sreport/sreport.h +++ b/src/sreport/sreport.h @@ -153,5 +153,6 @@ extern int sort_cluster_dec(sreport_cluster_rec_t *cluster_a, extern int sort_assoc_dec(sreport_assoc_rec_t *assoc_a, sreport_assoc_rec_t *assoc_b); +extern int get_uint(char *in_value, uint32_t *out_value, char *type); #endif /* HAVE_SREPORT_H */ diff --git a/src/sreport/user_reports.c b/src/sreport/user_reports.c index 56e6bf12fec..784cc63fa4c 100644 --- a/src/sreport/user_reports.c +++ b/src/sreport/user_reports.c @@ -49,7 +49,7 @@ enum { static List print_fields_list = NULL; /* types are of print_field_t */ static bool group_accts = false; -static int top_limit = 10; +static uint32_t top_limit = 10; static int _set_cond(int *start, int argc, char *argv[], acct_user_cond_t *user_cond, List format_list) @@ -131,6 +131,11 @@ static int _set_cond(int *start, int argc, char *argv[], MAX(command_len, 1))) { assoc_cond->usage_start = parse_time(argv[i]+end, 1); set = 1; + } else if (!strncasecmp (argv[i], "TopCount", + MAX(command_len, 1))) { + if (get_uint(argv[i]+end, &top_limit, "TopCount") + != SLURM_SUCCESS) + exit_code = 1; } else { exit_code=1; fprintf(stderr, " Unknown condition: %s\n" @@ -240,17 +245,20 @@ extern int user_top(int argc, char *argv[]) { int rc = SLURM_SUCCESS; acct_user_cond_t *user_cond = xmalloc(sizeof(acct_user_cond_t)); + acct_cluster_cond_t cluster_cond; ListIterator itr = NULL; ListIterator itr2 = NULL; ListIterator itr3 = NULL; ListIterator cluster_itr = NULL; List format_list = list_create(slurm_destroy_char); List user_list = NULL; + List usage_cluster_list = NULL; List cluster_list = list_create(destroy_sreport_cluster_rec); char *object = NULL; int i=0; acct_user_rec_t *user = NULL; + acct_cluster_rec_t *cluster = NULL; acct_association_rec_t *assoc = NULL; acct_accounting_rec_t *assoc_acct = NULL; sreport_user_rec_t *sreport_user = NULL; @@ -274,6 +282,58 @@ extern int user_top(int argc, char *argv[]) fprintf(stderr, " Problem with user query.\n"); goto end_it; } + /* We have to get the clusters here or we will be unable to + get the correct total time for the cluster if associations + are not enforced. + */ + memset(&cluster_cond, 0, sizeof(acct_cluster_cond_t)); + cluster_cond.with_usage = 1; + cluster_cond.with_deleted = 1; + cluster_cond.usage_end = user_cond->assoc_cond->usage_end; + cluster_cond.usage_start = user_cond->assoc_cond->usage_start; + cluster_cond.cluster_list = user_cond->assoc_cond->cluster_list; + usage_cluster_list = acct_storage_g_get_clusters( + db_conn, my_uid, &cluster_cond); + if(!usage_cluster_list) { + exit_code=1; + fprintf(stderr, " Problem with cluster query.\n"); + goto end_it; + } + + itr = list_iterator_create(usage_cluster_list); + while((cluster = list_next(itr))) { + cluster_accounting_rec_t *accting = NULL; + + /* check to see if this cluster is around during the + time we are looking at */ + if(!cluster->accounting_list + || !list_count(cluster->accounting_list)) + continue; + + sreport_cluster = xmalloc(sizeof(sreport_cluster_rec_t)); + + list_append(cluster_list, sreport_cluster); + + sreport_cluster->name = xstrdup(cluster->name); + sreport_cluster->user_list = + list_create(destroy_sreport_user_rec); + + /* get the amount of time and the average cpu count + during the time we are looking at */ + cluster_itr = list_iterator_create(cluster->accounting_list); + while((accting = list_next(cluster_itr))) { + sreport_cluster->cpu_secs += accting->alloc_secs + + accting->down_secs + accting->idle_secs + + accting->resv_secs; + sreport_cluster->cpu_count += accting->cpu_count; + } + list_iterator_destroy(cluster_itr); + + sreport_cluster->cpu_count /= + list_count(cluster->accounting_list); + } + list_iterator_destroy(itr); + list_destroy(usage_cluster_list); if(print_fields_have_header) { char start_char[20]; @@ -367,6 +427,9 @@ extern int user_top(int argc, char *argv[]) } } if(!sreport_cluster) { + error("This cluster '%s' hasn't " + "registered yet, but we have jobs " + "that ran?", assoc->cluster); sreport_cluster = xmalloc(sizeof(sreport_cluster_rec_t)); list_append(cluster_list, sreport_cluster); @@ -399,8 +462,8 @@ extern int user_top(int argc, char *argv[]) while((assoc_acct = list_next(itr3))) { sreport_user->cpu_secs += (uint64_t)assoc_acct->alloc_secs; - sreport_cluster->cpu_secs += - (uint64_t)assoc_acct->alloc_secs; +/* sreport_cluster->cpu_secs += */ +/* (uint64_t)assoc_acct->alloc_secs; */ } list_iterator_destroy(itr3); } -- GitLab