diff --git a/doc/man/man1/sreport.1 b/doc/man/man1/sreport.1 index e60fbeecbff7b0c7df684d1da94fe94d73d47c69..219ab933c2680c75ecc323198803fa3c9acb9778 100644 --- a/doc/man/man1/sreport.1 +++ b/doc/man/man1/sreport.1 @@ -239,6 +239,10 @@ List of partitions jobs ran on to include in report. Default is all. When used with the Sizes report will print number of jobs ran instead of time used. .TP +.B TopCount=<OPT> +Used in the TopUsage report. Change the number of users displayed. +Default is 10. +.TP .B Users=<OPT> List of users jobs to include in report. Default is all. .RE diff --git a/doc/man/man5/slurm.conf.5 b/doc/man/man5/slurm.conf.5 index 4fc62e540f26e9c12b005c1e659c8f81478d2a91..5ed417e62b78c8239676461b33b82fd788b6d2d1 100644 --- a/doc/man/man5/slurm.conf.5 +++ b/doc/man/man5/slurm.conf.5 @@ -941,25 +941,45 @@ The default value is 30 seconds. .TP \fBSchedulerType\fR -Identifies the type of scheduler to be used. Acceptable values include -"sched/builtin" for the built\-in FIFO scheduler, -"sched/backfill" for a backfill scheduling module to augment -the default FIFO scheduling, -"sched/gang" for gang scheduler (time\-slicing of parallel jobs), -"sched/hold" to hold all newly arriving jobs if a file "/etc/slurm.hold" -exists otherwise use the built\-in FIFO scheduler, and -"sched/wiki" for the Wiki interface to the Maui Scheduler. -The default value is "sched/builtin". +Identifies the type of scheduler to be used. +Note the \fBslurmctld\fR daemon must be restarted for a change in +scheduler type to become effective (reconfiguring a running daemon has +no effect for this parameter). +The \fBscontrol\fR command can be used to manually change job priorities +if desired. +Acceptable values include: +.RS +.TP +\fBsched/builtin\fR +for the built\-in FIFO (First In First Out) scheduler. +This is the default. +.TP +\fBsched/backfill\fR +for a backfill scheduling module to augment the default FIFO scheduling. Backfill scheduling will initiate lower\-priority jobs if doing -so does not delay the expected initiation time of any higher +so does not delay the expected initiation time of any higher priority job. -When initially setting the value to "sched/wiki", any pending jobs -must have their priority set to zero (held). -When changing the value from "sched/wiki", all pending jobs -should have their priority change from zero to some large number. -The \fBscontrol\fR command can be used to change job priorities. -The \fBslurmctld\fR daemon must be restarted for a change in -scheduler type to become effective. +Effectiveness of backfill scheduling is dependent upon users specifying +job time limits, otherwise all jobs will have the same time limit and +backfilling is impossible. +.TP +\fBsched/gang\fR +for gang scheduler (time\-slicing of parallel jobs). This also supports +preemption of jobs in lower priority partitions (queues). +See the \fIPreemption\fR web page for details: +.br +\fIhttps://computing.llnl.gov/linux/slurm/preempt.html\fR +.TP +\fBsched/hold\fR +to hold all newly arriving jobs if a file "/etc/slurm.hold" +exists otherwise use the built\-in FIFO scheduler +.TP +\fBsched/wiki\fR +for the Wiki interface to the Maui Scheduler +.TP +\fBsched/wiki2\fR +for the Wiki interface to the Moab Cluster Suite +.RE .TP \fBSelectType\fR diff --git a/src/common/slurm_protocol_pack.c b/src/common/slurm_protocol_pack.c index 2abbe679a2f3e927d3813aeaa1cc5769bbf7c9c4..e15137dc66d1e46298f128f40de7332a35c021fa 100644 --- a/src/common/slurm_protocol_pack.c +++ b/src/common/slurm_protocol_pack.c @@ -2034,7 +2034,6 @@ unpack_error: return SLURM_ERROR; } - static int _unpack_partition_info_msg(partition_info_msg_t ** msg, Buf buffer) { diff --git a/src/plugins/accounting_storage/mysql/accounting_storage_mysql.c b/src/plugins/accounting_storage/mysql/accounting_storage_mysql.c index be6373d80e2e5bb0097eb0cbdb4b8d9bd51a2224..49775bb4d46a8fb262b0c4914e0b507b1f09bf9e 100644 --- a/src/plugins/accounting_storage/mysql/accounting_storage_mysql.c +++ b/src/plugins/accounting_storage/mysql/accounting_storage_mysql.c @@ -1131,6 +1131,10 @@ static int _setup_association_cond_limits(acct_association_cond_t *assoc_cond, } list_iterator_destroy(itr); xstrcat(*extra, ")"); + } else if(assoc_cond->user_list) { + /* we want all the users, but no non-user associations */ + set = 1; + xstrfmtcat(*extra, " && (%s.user!='')", prefix); } if(assoc_cond->partition_list @@ -6790,7 +6794,6 @@ extern List acct_storage_p_get_users(mysql_conn_t *mysql_conn, uid_t uid, MYSQL_ROW row; uint16_t private_data = 0; acct_user_rec_t user; - acct_wckey_cond_t *wckey_cond = NULL; /* if this changes you will need to edit the corresponding enum */ char *user_req_inx[] = { @@ -6925,33 +6928,6 @@ empty: user_list = list_create(destroy_acct_user_rec); - if(user_cond && user_cond->with_assocs) { - /* We are going to be freeing the inners of - this list in the user->name so we don't - free it here - */ - if(!user_cond->assoc_cond) - user_cond->assoc_cond = xmalloc( - sizeof(acct_association_cond_t)); - - if(user_cond->assoc_cond->user_list) - list_destroy(user_cond->assoc_cond->user_list); - user_cond->assoc_cond->user_list = list_create(NULL); - } - - if(user_cond && user_cond->with_wckeys) { - /* We are going to be freeing the inners of - this list in the user->name so we don't - free it here - */ - wckey_cond = xmalloc(sizeof(acct_wckey_cond_t)); - wckey_cond->user_list = list_create(NULL); - - if(user_cond->assoc_cond && user_cond->assoc_cond->cluster_list) - wckey_cond->cluster_list = - user_cond->assoc_cond->cluster_list; - } - while((row = mysql_fetch_row(result))) { acct_user_rec_t *user = xmalloc(sizeof(acct_user_rec_t)); /* uid_t pw_uid; */ @@ -6978,23 +6954,26 @@ empty: if(user_cond && user_cond->with_coords) _get_user_coords(mysql_conn, user); - - - if(user_cond && user_cond->with_assocs) - list_append(user_cond->assoc_cond->user_list, - user->name); - - if(user_cond && user_cond->with_wckeys) - list_append(wckey_cond->user_list, user->name); } mysql_free_result(result); - if(user_cond && user_cond->with_assocs - && list_count(user_cond->assoc_cond->user_list)) { + if(user_cond && user_cond->with_assocs) { ListIterator assoc_itr = NULL; acct_user_rec_t *user = NULL; acct_association_rec_t *assoc = NULL; - List assoc_list = acct_storage_p_get_associations( + List assoc_list = NULL; + + /* Make sure we don't get any non-user associations + * this is done by at least having a user_list + * defined */ + if(!user_cond->assoc_cond) + user_cond->assoc_cond = + xmalloc(sizeof(acct_association_cond_t)); + + if(!user_cond->assoc_cond->user_list) + user_cond->assoc_cond->user_list = list_create(NULL); + + assoc_list = acct_storage_p_get_associations( mysql_conn, uid, user_cond->assoc_cond); if(!assoc_list) { @@ -7024,15 +7003,22 @@ empty: } get_wckeys: - if(wckey_cond) { + if(user_cond && user_cond->with_wckeys) { ListIterator wckey_itr = NULL; acct_user_rec_t *user = NULL; acct_wckey_rec_t *wckey = NULL; - List wckey_list = acct_storage_p_get_wckeys( - mysql_conn, uid, wckey_cond); - - wckey_cond->cluster_list = NULL; - destroy_acct_wckey_cond(wckey_cond); + List wckey_list = NULL; + acct_wckey_cond_t wckey_cond; + + memset(&wckey_cond, 0, sizeof(acct_wckey_cond_t)); + if(user_cond->assoc_cond) { + wckey_cond.user_list = + user_cond->assoc_cond->user_list; + wckey_cond.cluster_list = + user_cond->assoc_cond->cluster_list; + } + wckey_list = acct_storage_p_get_wckeys( + mysql_conn, uid, &wckey_cond); if(!wckey_list) { error("no wckeys"); diff --git a/src/slurmctld/read_config.c b/src/slurmctld/read_config.c index 58e5d65134bb86e4bd72cb2d64fa0221a0b86b09..e7ce99fbc3e2526cecb07ffc2934d6b41851e428 100644 --- a/src/slurmctld/read_config.c +++ b/src/slurmctld/read_config.c @@ -314,7 +314,7 @@ static int _state_str2int(const char *state_str) return state_val; } -#ifdef HAVE_BG +#ifdef HAVE_3D /* Used to get the general name of the machine, used primarily * for bluegene systems. Not in general use because some systems * have multiple prefix's such as foo[1-1000],bar[1-1000]. @@ -392,7 +392,7 @@ static int _build_single_nodeline_info(slurm_conf_node_t *node_ptr, goto cleanup; } -#ifdef HAVE_BG +#ifdef HAVE_3D if (conf->node_prefix == NULL) _set_node_prefix(node_ptr->nodenames, conf); #endif @@ -574,7 +574,7 @@ static int _build_all_nodeline_info(slurm_ctl_conf_t *conf) _build_single_nodeline_info(node, config_ptr, conf); } xfree(highest_node_name); -#ifdef HAVE_BG +#ifdef HAVE_3D { char *node_000 = NULL; struct node_record *node_rec = NULL; diff --git a/src/sreport/common.c b/src/sreport/common.c index e6fb85f3e751ea8b708f81f0c823cfda03a5e564..4a3c691e4840cc0b5493a2ee3f6563c8663f1e78 100644 --- a/src/sreport/common.c +++ b/src/sreport/common.c @@ -433,3 +433,27 @@ extern int sort_assoc_dec(sreport_assoc_rec_t *assoc_a, return 0; } +extern int get_uint(char *in_value, uint32_t *out_value, char *type) +{ + char *ptr = NULL, *meat = NULL; + long num; + + if(!(meat = strip_quotes(in_value, NULL))) { + error("Problem with strip_quotes"); + return SLURM_ERROR; + } + num = strtol(meat, &ptr, 10); + if ((num == 0) && ptr && ptr[0]) { + error("Invalid value for %s (%s)", type, meat); + xfree(meat); + return SLURM_ERROR; + } + xfree(meat); + + if (num < 0) + *out_value = INFINITE; /* flag to clear */ + else + *out_value = (uint32_t) num; + return SLURM_SUCCESS; +} + diff --git a/src/sreport/sreport.c b/src/sreport/sreport.c index 6a461afe317d6b79a4b5a189c8889da52046ef85..4136d412754e70eb76957accf3060503597282b8 100644 --- a/src/sreport/sreport.c +++ b/src/sreport/sreport.c @@ -648,6 +648,8 @@ sreport [<OPTION>] [<COMMAND>] \n\ - Group - Group all accounts together for each user.\n\ Default is a separate entry for each user\n\ and account reference. \n\ + - TopCount=<OPT> - Used in the TopUsage report. Change the \n\ + number of users displayed. Default is 10.\n\ - Users=<OPT> - List of users jobs to include in report. \n\ Default is all. \n\ \n\ diff --git a/src/sreport/sreport.h b/src/sreport/sreport.h index 05ff40cfe413866b89f70a8316a75ef72e7f8a4f..a1f2dc6177526560e66c9a3e68d9244849b13717 100644 --- a/src/sreport/sreport.h +++ b/src/sreport/sreport.h @@ -153,5 +153,6 @@ extern int sort_cluster_dec(sreport_cluster_rec_t *cluster_a, extern int sort_assoc_dec(sreport_assoc_rec_t *assoc_a, sreport_assoc_rec_t *assoc_b); +extern int get_uint(char *in_value, uint32_t *out_value, char *type); #endif /* HAVE_SREPORT_H */ diff --git a/src/sreport/user_reports.c b/src/sreport/user_reports.c index 56e6bf12fecc15273813bf3ad812745a2c99d4bd..784cc63fa4c0887b6a1237418f3754aa9b46a3df 100644 --- a/src/sreport/user_reports.c +++ b/src/sreport/user_reports.c @@ -49,7 +49,7 @@ enum { static List print_fields_list = NULL; /* types are of print_field_t */ static bool group_accts = false; -static int top_limit = 10; +static uint32_t top_limit = 10; static int _set_cond(int *start, int argc, char *argv[], acct_user_cond_t *user_cond, List format_list) @@ -131,6 +131,11 @@ static int _set_cond(int *start, int argc, char *argv[], MAX(command_len, 1))) { assoc_cond->usage_start = parse_time(argv[i]+end, 1); set = 1; + } else if (!strncasecmp (argv[i], "TopCount", + MAX(command_len, 1))) { + if (get_uint(argv[i]+end, &top_limit, "TopCount") + != SLURM_SUCCESS) + exit_code = 1; } else { exit_code=1; fprintf(stderr, " Unknown condition: %s\n" @@ -240,17 +245,20 @@ extern int user_top(int argc, char *argv[]) { int rc = SLURM_SUCCESS; acct_user_cond_t *user_cond = xmalloc(sizeof(acct_user_cond_t)); + acct_cluster_cond_t cluster_cond; ListIterator itr = NULL; ListIterator itr2 = NULL; ListIterator itr3 = NULL; ListIterator cluster_itr = NULL; List format_list = list_create(slurm_destroy_char); List user_list = NULL; + List usage_cluster_list = NULL; List cluster_list = list_create(destroy_sreport_cluster_rec); char *object = NULL; int i=0; acct_user_rec_t *user = NULL; + acct_cluster_rec_t *cluster = NULL; acct_association_rec_t *assoc = NULL; acct_accounting_rec_t *assoc_acct = NULL; sreport_user_rec_t *sreport_user = NULL; @@ -274,6 +282,58 @@ extern int user_top(int argc, char *argv[]) fprintf(stderr, " Problem with user query.\n"); goto end_it; } + /* We have to get the clusters here or we will be unable to + get the correct total time for the cluster if associations + are not enforced. + */ + memset(&cluster_cond, 0, sizeof(acct_cluster_cond_t)); + cluster_cond.with_usage = 1; + cluster_cond.with_deleted = 1; + cluster_cond.usage_end = user_cond->assoc_cond->usage_end; + cluster_cond.usage_start = user_cond->assoc_cond->usage_start; + cluster_cond.cluster_list = user_cond->assoc_cond->cluster_list; + usage_cluster_list = acct_storage_g_get_clusters( + db_conn, my_uid, &cluster_cond); + if(!usage_cluster_list) { + exit_code=1; + fprintf(stderr, " Problem with cluster query.\n"); + goto end_it; + } + + itr = list_iterator_create(usage_cluster_list); + while((cluster = list_next(itr))) { + cluster_accounting_rec_t *accting = NULL; + + /* check to see if this cluster is around during the + time we are looking at */ + if(!cluster->accounting_list + || !list_count(cluster->accounting_list)) + continue; + + sreport_cluster = xmalloc(sizeof(sreport_cluster_rec_t)); + + list_append(cluster_list, sreport_cluster); + + sreport_cluster->name = xstrdup(cluster->name); + sreport_cluster->user_list = + list_create(destroy_sreport_user_rec); + + /* get the amount of time and the average cpu count + during the time we are looking at */ + cluster_itr = list_iterator_create(cluster->accounting_list); + while((accting = list_next(cluster_itr))) { + sreport_cluster->cpu_secs += accting->alloc_secs + + accting->down_secs + accting->idle_secs + + accting->resv_secs; + sreport_cluster->cpu_count += accting->cpu_count; + } + list_iterator_destroy(cluster_itr); + + sreport_cluster->cpu_count /= + list_count(cluster->accounting_list); + } + list_iterator_destroy(itr); + list_destroy(usage_cluster_list); if(print_fields_have_header) { char start_char[20]; @@ -367,6 +427,9 @@ extern int user_top(int argc, char *argv[]) } } if(!sreport_cluster) { + error("This cluster '%s' hasn't " + "registered yet, but we have jobs " + "that ran?", assoc->cluster); sreport_cluster = xmalloc(sizeof(sreport_cluster_rec_t)); list_append(cluster_list, sreport_cluster); @@ -399,8 +462,8 @@ extern int user_top(int argc, char *argv[]) while((assoc_acct = list_next(itr3))) { sreport_user->cpu_secs += (uint64_t)assoc_acct->alloc_secs; - sreport_cluster->cpu_secs += - (uint64_t)assoc_acct->alloc_secs; +/* sreport_cluster->cpu_secs += */ +/* (uint64_t)assoc_acct->alloc_secs; */ } list_iterator_destroy(itr3); }