diff --git a/NEWS b/NEWS index 6acb1c7ee9a70fea327e29deeaa74e2e4993b82e..ad6b617614a23f38bdc841c604dc02d7fbfd233d 100644 --- a/NEWS +++ b/NEWS @@ -227,12 +227,17 @@ documents those changes that are of interest to users and admins. If multiple hostslists are given on stdin they are combined to a union hostlist before being used in the way requested by the options. - -- when using -j option in sacct no user restriction will applied unless + -- When using -j option in sacct no user restriction will applied unless specified with the -u option. - -- significant speed up for association based reports in sreport + -- For sched/wiki and sched/wiki2, change logging of wiki message traffic + from debug() to debug2(). Only seen if SlurmctldDebug is configured to + 6 or higher. + -- Significant speed up for association based reports in sreport -- BLUEGENE - fix for checking if job can run with downed nodes. Previously sbatch etc would tell you node configuration not available now jobs are accepted but held until nodes are back up. + -- Fix in accounting so if any nodes are removed from the system when they + were previously down will be recorded correctly. * Changes in SLURM 1.3.13 ========================= diff --git a/contribs/slurmdb-direct/moab_2_slurmdb.pl b/contribs/slurmdb-direct/moab_2_slurmdb.pl index 6dfe6d7e2ede9ce8df482e1e8a1f171eb5caa3b1..650663efd1d2858d743e86c14442602e4c6e52ba 100755 --- a/contribs/slurmdb-direct/moab_2_slurmdb.pl +++ b/contribs/slurmdb-direct/moab_2_slurmdb.pl @@ -46,7 +46,7 @@ use strict; use FindBin; use Getopt::Long 2.24 qw(:config no_ignore_case require_order); -use lib "${FindBin::Bin}/../lib/perl"; +#use lib "${FindBin::Bin}/../lib/perl"; use lib qw(/home/da/slurm/1.3/snowflake/lib/perl/5.8.8); use autouse 'Pod::Usage' => qw(pod2usage); use Slurm ':all'; @@ -56,12 +56,30 @@ BEGIN { require "config.slurmdb.pl"; } our ($logLevel, $db_conn_line, $db_job_table, $db_user, $db_passwd); my $set = 0; +my $submit_set = 0; +my $migrate_set = 0; +my $start_set = 0; +my $end_set = 0; -my $sql = "INSERT INTO $db_job_table " . - "(jobid, associd, wckeyid, uid, gid, nodelist, " . - "cluster, account, partition, wckey, eligible, " . - "submit, start, name, track_steps, state, priority, " . - "req_cpus, alloc_cpus) VALUES "; +my $submit_sql = "INSERT INTO $db_job_table " . + "(jobid, associd, wckeyid, track_steps, priority, uid, gid, cluster, " . + "account, partition, wckey, name, state, req_cpus, submit) VALUES "; + +my $migrate_sql = "INSERT INTO $db_job_table " . + "(jobid, associd, wckeyid, track_steps, priority, uid, gid, cluster, " . + "account, partition, wckey, name, state, req_cpus, " . + "submit, eligible) VALUES "; + +my $start_sql = "INSERT INTO $db_job_table " . + "(jobid, associd, wckeyid, track_steps, priority, uid, gid, cluster, " . + "account, partition, wckey, name, state, req_cpus, " . + "submit, eligible, start, nodelist, alloc_cpus) VALUES "; + +my $end_sql = "INSERT INTO $db_job_table " . + "(jobid, associd, wckeyid, track_steps, priority, uid, gid, cluster, " . + "account, partition, wckey, name, state, req_cpus, " . + "submit, eligible, start, nodelist, alloc_cpus, " . + "end, comp_code) VALUES "; foreach my $line (<STDIN>) { chomp $line; @@ -78,8 +96,8 @@ foreach my $line (<STDIN>) { $group, $wall_limit, $state, - $class, - $sub_time, + $partition, + $eligible_time, $dispatch_time, $start_time, $end_time, @@ -91,7 +109,7 @@ foreach my $line (<STDIN>) { $node_disk_comp, $node_disk, $node_features, - $queue_time, + $submit_time, $alloc_tasks, $tasks_per_node, $qos, @@ -101,12 +119,12 @@ foreach my $line (<STDIN>) { $rm_ext, $bypass_cnt, $cpu_secs, - $partition, + $cluster, $procs_per_task, $mem_per_task, $disk_per_task, $swap_per_task, - $eligible_time, + $other_time, $timeout, $alloc_hostlist, $rm_name, @@ -125,32 +143,133 @@ foreach my $line (<STDIN>) { @extra) = split /\s+/, $line; next if !$type; next if $type ne "job"; - next if $event eq "JOBMIGRATE"; my $uid = getpwnam($user); my $gid = getgrnam($group); $uid = -2 if !$uid; $gid = -2 if !$gid; + + # figure out the wckey + my $wckey = ""; + if ($rm_ext =~ /wckey:(\w*)/) { + $wckey = $1; + } + + if($partition =~ /\[(\w*)/) { + $partition = $1; + } + + #figure out the cluster + if($cluster eq "ALL") { + if ($node_features =~ /\[(\w*)\]/) { + $cluster = $1; + } elsif ($rm_ext =~ /partition:(\w*)/) { + $cluster = $1; + } elsif ($rm_ext =~ /feature:(\w*)/) { + $cluster = $1; + } else { + $cluster = ""; + } + } + + if($message =~ /job\\20exceeded\\20wallclock\\20limit/) { + $event = "JOBTIMEOUT"; + } my $alloc_hl = Slurm::Hostlist::create($alloc_hostlist); if($alloc_hl) { Slurm::Hostlist::uniq($alloc_hl); $alloc_hl = Slurm::Hostlist::ranged_string($alloc_hl); } + + if($event eq "JOBSUBMIT") { + $submit_sql .= ", " if $submit_set; + $submit_sql .= "($id, 0, 0, 0, 0, $uid, $gid, \"$cluster\", " . + "\"$account\", \"$partition\", \"$wckey\", " . + "\"$executable\", 0, $req_tasks, $submit_time)"; + $submit_set = 1; + $set = 1; + } elsif ($event eq "JOBMIGRATE") { + $migrate_sql .= ", " if $migrate_set; + # here for some reason the eligible time is really the + # elgible time, so we use the end time which appears + # to be the best guess. + $migrate_sql .= "($id, 0, 0, 0, 0, $uid, $gid, \"$cluster\", " . + "\"$account\", \"$partition\", \"$wckey\", " . + "\"$executable\", 0, $req_tasks, $submit_time, " . + "$end_time)"; + $migrate_set = 1; + $set = 1; + } elsif ($event eq "JOBSTART") { + $start_sql .= ", " if $start_set; + + # req_tasks is used for alloc_tasks on purpose. + # alloc_tasks isn't always correct. + $start_sql .= "($id, 0, 0, 0, 0, $uid, $gid, \"$cluster\", " . + "\"$account\", \"$partition\", \"$wckey\", " . + "\"$executable\", 1, $req_tasks, $submit_time, " . + "$eligible_time, $start_time, \"$alloc_hl\", " . + "$req_tasks)"; + $start_set = 1; + $set = 1; + } elsif (($event eq "JOBEND") || ($event eq "JOBCANCEL") + || ($event eq "JOBFAILURE") || ($event eq "JOBTIMEOUT")) { + if($event eq "JOBEND") { + $state = 3; + } elsif($event eq "JOBCANCEL") { + $state = 4; + } elsif($event eq "JOBFAILURE") { + $state = 5; + } else { + $state = 6; + } - $sql .= ", " if $set; - $sql .= "($id, 0, 0, $uid, $gid, '$alloc_hl', )"; - $set = 1; + $end_sql .= ", " if $end_set; + $end_sql .= "($id, 0, 0, 0, 0, $uid, $gid, \"$cluster\", " . + "\"$account\", \"$partition\", \"$wckey\", " . + "\"$executable\", $state, $req_tasks, $submit_time, " . + "$eligible_time, $start_time, \"$alloc_hl\", " . + "$req_tasks, $end_time, $comp_code)"; + $end_set = 1; + $set = 1; + } else { + print "ERROR: unknown event of $event\n"; + next; + } } exit 0 if !$set; -$sql .= " on duplicate key update nodelist=VALUES(nodelist), account=VALUES(account), partition=VALUES(partition), wckey=VALUES(wckey), start=VALUES(start), alloc_cpus=VALUES(alloc_cpus)"; -print "$sql\n"; -exit 0; $db_user = (getpwuid($<))[0] if !$db_user; my $dbhandle = DBI->connect($db_conn_line, $db_user, $db_passwd, {AutoCommit => 1, RaiseError => 1}); -$dbhandle->do($sql); +if($submit_set) { + $submit_sql .= " on duplicate key update jobid=VALUES(jobid)"; + #print "submit\n$submit_sql\n\n"; + $dbhandle->do($submit_sql); +} + +if($migrate_set) { + $migrate_sql .= " on duplicate key update eligible=VALUES(eligible)"; + #print "migrate\n$migrate_sql\n\n"; + $dbhandle->do($migrate_sql); +} + +if($start_set) { + $start_sql .= " on duplicate key update nodelist=VALUES(nodelist), " . + "account=VALUES(account), partition=VALUES(partition), " . + "wckey=values(wckey), start=VALUES(start), " . + "name=VALUES(name), state=values(state), " . + "alloc_cpus=values(alloc_cpus)"; + #print "start\n$start_sql\n\n"; + $dbhandle->do($start_sql); +} + +if($end_set) { + $end_sql .= " on duplicate key update end=VALUES(end), " . + "state=VALUES(state), comp_code=VALUES(comp_code)"; + #print "end\n$end_sql\n\n"; + $dbhandle->do($end_sql); +} exit 0; diff --git a/src/plugins/accounting_storage/mysql/accounting_storage_mysql.c b/src/plugins/accounting_storage/mysql/accounting_storage_mysql.c index 47f8b48c37e766a51a65afa07beff733ad4f4703..242233a7514abaa3e7be68d982e1ef78de53e504 100644 --- a/src/plugins/accounting_storage/mysql/accounting_storage_mysql.c +++ b/src/plugins/accounting_storage/mysql/accounting_storage_mysql.c @@ -10063,12 +10063,16 @@ extern int clusteracct_storage_p_cluster_procs(mysql_conn_t *mysql_conn, debug("%s has changed from %s cpus to %u", cluster, row[0], procs); + /* reset all the entries for this cluster since the procs + changed some of the downed nodes may have gone away. + Request them again with ACCOUNTING_FIRST_REG */ query = xstrdup_printf( "update %s set period_end=%d where cluster=\"%s\" " - "and period_end=0 and node_name=''", + "and period_end=0", event_table, event_time, cluster); rc = mysql_db_query(mysql_conn->db_conn, query); xfree(query); + first = 1; if(rc != SLURM_SUCCESS) goto end_it; add_it: diff --git a/src/plugins/accounting_storage/mysql/mysql_jobacct_process.c b/src/plugins/accounting_storage/mysql/mysql_jobacct_process.c index 82f5f0af85be65cc70d9f9d6cd1b91d1be7186f8..11fe731be5728e146c2ee8d57af49ea39b5a15a1 100644 --- a/src/plugins/accounting_storage/mysql/mysql_jobacct_process.c +++ b/src/plugins/accounting_storage/mysql/mysql_jobacct_process.c @@ -358,6 +358,13 @@ extern int setup_job_cond_limits(acct_job_cond_t *job_cond, char **extra) "(t1.eligible < %d " "&& (t1.end >= %d || t1.end = 0)))", job_cond->usage_end, job_cond->usage_start); + } else if(job_cond->usage_end) { + if(*extra) + xstrcat(*extra, " && ("); + else + xstrcat(*extra, " where ("); + xstrfmtcat(*extra, + "(t1.eligible < %d))", job_cond->usage_end); } if(job_cond->state_list && list_count(job_cond->state_list)) { diff --git a/src/plugins/sched/wiki/msg.c b/src/plugins/sched/wiki/msg.c index 9c48666b349c8f6a84ff1259cd2e840d2cbb8a1e..a9560d0cd67ecfd04f5f5ba88ec7cf3a83e6ecce 100644 --- a/src/plugins/sched/wiki/msg.c +++ b/src/plugins/sched/wiki/msg.c @@ -475,7 +475,7 @@ static char * _recv_msg(slurm_fd new_fd) return NULL; } - debug("wiki msg recv:%s", buf); + debug2("wiki msg recv:%s", buf); return buf; } @@ -489,7 +489,7 @@ static size_t _send_msg(slurm_fd new_fd, char *buf, size_t size) char header[10]; size_t data_sent; - debug("wiki msg send:%s", buf); + debug2("wiki msg send:%s", buf); (void) sprintf(header, "%08lu\n", (unsigned long) size); if (_write_bytes((int) new_fd, header, 9) != 9) { diff --git a/src/plugins/sched/wiki2/msg.c b/src/plugins/sched/wiki2/msg.c index 2c1f0639db345755bd6925e9ad8c218bea6c6249..578247f477da78a994753b31967ed5973a389c09 100644 --- a/src/plugins/sched/wiki2/msg.c +++ b/src/plugins/sched/wiki2/msg.c @@ -519,7 +519,7 @@ static char * _recv_msg(slurm_fd new_fd) return NULL; } - debug("wiki msg recv:%s", buf); + debug2("wiki msg recv:%s", buf); return buf; } @@ -533,7 +533,7 @@ static size_t _send_msg(slurm_fd new_fd, char *buf, size_t size) char header[10]; size_t data_sent; - debug("wiki msg send:%s", buf); + debug2("wiki msg send:%s", buf); (void) sprintf(header, "%08lu\n", (unsigned long) size); if (_write_bytes((int) new_fd, header, 9) != 9) { diff --git a/src/sacct/options.c b/src/sacct/options.c index 7ece979b0bae13145d7fdcf2cebfa0736c7327bb..0da3735825fa781fddd0cbdbe47c538706cd4170 100644 --- a/src/sacct/options.c +++ b/src/sacct/options.c @@ -406,7 +406,7 @@ sacct [<OPTION>] \n\ Print a list of fields that can be specified with the \n\ '--format' option \n\ -E, --endtime: \n\ - Select jobs ending before this time. \n\ + Select jobs started before this time. \n\ -f, --file=file: \n\ Read data from the specified file, rather than SLURM's \n\ current accounting log file. (Only appliciable when \n\ @@ -455,7 +455,7 @@ sacct [<OPTION>] \n\ completed (cd), failed (f), timeout (to), and \n\ node_fail (nf). \n\ -S, --starttime: \n\ - Select jobs starting after this time. \n\ + Select jobs eligible after this time. \n\ -u, --uid, --user: \n\ Use this comma seperated list of uids or user names \n\ to select jobs to display. By default, the running \n\ @@ -468,6 +468,12 @@ sacct [<OPTION>] \n\ -X, --allocations: \n\ Only show cumulative statistics for each job, not the \n\ intermediate steps. \n\ + \n\ + Note, valid start/end time formats are... \n\ + HH:MM[:SS] [AM|PM] \n\ + MMDD[YY] or MM/DD[/YY] or MM.DD[.YY] \n\ + MM/DD[/YY]-HH:MM[:SS] \n\ + YYYY-MM-DD[THH[:MM[:SS]]] \n\ \n"); return; diff --git a/src/slurmctld/controller.c b/src/slurmctld/controller.c index 75e42efc915d4b920a1d06bfdecc85e11a8ee10d..e8c826168b5d7a05c4bb35c6aaa650bfb01bcdb0 100644 --- a/src/slurmctld/controller.c +++ b/src/slurmctld/controller.c @@ -434,16 +434,9 @@ int main(int argc, char *argv[]) } unlock_slurmctld(config_write_lock); - if ((recover == 0) || - (!stat("/tmp/slurm_accounting_first", &stat_buf))) { - /* When first starting to write node state - * information to Gold or SlurmDBD, create - * a file called "/tmp/slurm_accounting_first" - * to capture node initialization information */ - + if (recover == 0) _accounting_mark_all_nodes_down("cold-start"); - unlink("/tmp/slurm_accounting_first"); - } + } else { error("this host (%s) not valid controller (%s or %s)", node_name, slurmctld_conf.control_machine, @@ -1679,8 +1672,9 @@ static void *_assoc_cache_mgr(void *no_data) struct job_record *job_ptr = NULL; acct_qos_rec_t qos_rec; acct_association_rec_t assoc_rec; + /* Write lock on jobs, read lock on nodes and partitions */ slurmctld_lock_t job_write_lock = - { READ_LOCK, WRITE_LOCK, WRITE_LOCK, READ_LOCK }; + { NO_LOCK, WRITE_LOCK, READ_LOCK, READ_LOCK }; while(running_cache == 1) { slurm_mutex_lock(&assoc_cache_mutex); diff --git a/src/sreport/sreport.c b/src/sreport/sreport.c index 19f0c754381d1cd1e45d335b9e23140e635aac19..1ba60a33d81902d2e52b9206c96777f5a7a49d52 100644 --- a/src/sreport/sreport.c +++ b/src/sreport/sreport.c @@ -701,6 +701,12 @@ sreport [<OPTION>] [<COMMAND>] \n\ - Account, Cluster, Login, Proper, Used \n\ \n\ \n\ + Note, valid start/end time formats are... \n\ + HH:MM[:SS] [AM|PM] \n\ + MMDD[YY] or MM/DD[/YY] or MM.DD[.YY] \n\ + MM/DD[/YY]-HH:MM[:SS] \n\ + \n\ + \n\ All commands and options are case-insensitive. \n\n"); }