diff --git a/contribs/sjstat b/contribs/sjstat index b4e3cfeb1a41bb19a8164156977c9d1a51d32df7..f6df8b096a858225c563b62284af603da32b1f19 100755 --- a/contribs/sjstat +++ b/contribs/sjstat @@ -9,36 +9,36 @@ # Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). # Written by Phil Eckert <eckert21@llnl.gov>. # CODE-OCEC-09-009. All rights reserved. -# +# # This file is part of SLURM, a resource management program. # For details, see <https://computing.llnl.gov/linux/slurm/>. # Please also read the included file: DISCLAIMER. -# +# # SLURM is free software; you can redistribute it and/or modify it under # the terms of the GNU General Public License as published by the Free # Software Foundation; either version 2 of the License, or (at your option) # any later version. # -# In addition, as a special exception, the copyright holders give permission +# In addition, as a special exception, the copyright holders give permission # to link the code of portions of this program with the OpenSSL library under -# certain conditions as described in each individual source file, and -# distribute linked combinations including the two. You must obey the GNU -# General Public License in all respects for all of the code used other than -# OpenSSL. If you modify file(s) with this exception, you may extend this -# exception to your version of the file(s), but you are not obligated to do +# certain conditions as described in each individual source file, and +# distribute linked combinations including the two. You must obey the GNU +# General Public License in all respects for all of the code used other than +# OpenSSL. If you modify file(s) with this exception, you may extend this +# exception to your version of the file(s), but you are not obligated to do # so. If you do not wish to do so, delete this exception statement from your -# version. If you delete this exception statement from all source files in +# version. If you delete this exception statement from all source files in # the program, then also delete it here. -# +# # SLURM is distributed in the hope that it will be useful, but WITHOUT ANY # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more # details. -# +# # You should have received a copy of the GNU General Public License along # with SLURM; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. -# +# # Based off code with permission copyright 2006, 2007 Cluster Resources, Inc. ############################################################################### @@ -69,6 +69,16 @@ use autouse 'Pod::Usage' => qw(pod2usage); my ($help, $man, $pool, $running, $verbose); my (%MaxNodes, %MaxTime); +# +# Check SLURM status. +# + isslurmup(); + +# +# See if bluegene system. +# + my $bglflag = 1 if (`scontrol show config | grep -i bluegene`); + # # Get user options. # @@ -98,6 +108,25 @@ use autouse 'Pod::Usage' => qw(pod2usage); exit; +# +# Do usable for bluegene +# +sub Usable +{ + my ($tot, $out) = @_; + + $tot *= 1024.0 if ($tot =~ /K/); + $out *= 1024.0 if ($out =~ /K/); + + my $usable = $tot - $out; + if ($usable > 1024.0) { + $usable /= 1024.0; + $usable .= 'K'; + } + + return($usable); +} + # # Get the SLURM partitions information. # @@ -109,7 +138,7 @@ sub do_sinfo # # Get the partition and node info. # - my $options = "\"%9P %6m %.4c %.16F %f\""; + my $options = "\"%9P %6m %.4c %.22F %f\""; my $ct = 0; my @sin = `sinfo -e -o $options`; @@ -128,11 +157,13 @@ sub do_sinfo $s_idle[$ct] = $fields[1]; $s_out[$ct] = $fields[2]; $s_total[$ct] = $fields[3]; + + if ($bglflag) { + $s_usable[$ct] = Usable($s_total[$ct], $s_out[$ct]); + } else { $s_usable[$ct] = $s_total[$ct] - $s_out[$ct]; -# -# Handle "k" factor for Blue Gene. -# - $s_usable[$ct] .= 'K' if ($s_total[$ct] =~ /K/); + } + $s_feat[$ct] = ($line[4] .= " "); $s_feat[$ct] =~ s/\(null\)//g; $ct++; @@ -140,10 +171,10 @@ sub do_sinfo printf("\nScheduling pool data:\n"); if ($verbose) { - printf("------------------------------------------------------------------------------\n"); - printf(" Total Usable Free Node Time Other \n"); - printf("Pool Memory Cpus Nodes Nodes Nodes Limit Limit traits \n"); - printf("------------------------------------------------------------------------------\n"); + printf("----------------------------------------------------------------------------------\n"); + printf(" Total Usable Free Node Time Other \n"); + printf("Pool Memory Cpus Nodes Nodes Nodes Limit Limit traits \n"); + printf("----------------------------------------------------------------------------------\n"); } else { printf("-------------------------------------------------------------\n"); printf("Pool Memory Cpus Total Usable Free Other Traits \n"); @@ -154,17 +185,15 @@ sub do_sinfo if ($verbose) { my $p = $s_part[$i]; $p =~ s/\*//; - printf("%-9s %6dMb %5d %6s %7s %6s %6s %6s %-s\n", + printf("%-9s %6dMb %5s %6s %7s %6s %6s %10s %-s\n", $s_part[$i], $s_mem[$i], $s_cpu[$i], - $s_total[$i], - $s_total[$i] - $s_out[$i], + $s_total[$i], $s_usable[$i], $s_idle[$i], $MaxNodes{$p}, $MaxTime{$p}, $s_feat[$i]); } else { - printf("%-9s %6dMb %5d %6s %6s %6s %-s\n", + printf("%-9s %6dMb %5s %6s %6s %6s %-s\n", $s_part[$i], $s_mem[$i], $s_cpu[$i], - $s_total[$i], - $s_total[$i] - $s_out[$i], + $s_total[$i], $s_usable[$i], $s_idle[$i], $s_feat[$i]); } } @@ -189,10 +218,10 @@ sub do_squeue my $rval = system("scontrol show config | grep cons_res >> /dev/null"); if ($rval) { $type = "Nodes"; - $options = "\"%8i %8u %.6D %2t %.11S %.12l %.9P %.11M %1000R\""; + $options = "\"%8i %8u %.6D %2t %S %.12l %.9P %.11M %1000R\""; } else { $type = "Procs"; - $options = "\"%8i %8u %.6C %2t %.11S %.12l %.9P %.11M %1000R\""; + $options = "\"%8i %8u %.6C %2t %S %.12l %.9P %.11M %1000R\""; } # @@ -211,6 +240,8 @@ sub do_squeue $s_user[$ct] = $line[1]; $s_nodes[$ct] = $line[2]; $s_status[$ct] = $line[3]; + $line[4] =~ s/^.....//; + $line[4] = "N/A" if ($line[3] =~ /PD/); $s_begin[$ct] = $line[4]; $s_limit[$ct] = $line[5]; if ($line[5] eq "UNLIMITED") { @@ -233,10 +264,10 @@ sub do_squeue printf("Running job data:\n"); if ($verbose) { - printf("------------------------------------------------------------------------------------------------\n"); - printf(" Time Time Time \n"); - printf("JobID User $type Pool Status Used Limit Started Master/Other \n"); - printf("------------------------------------------------------------------------------------------------\n"); + printf("---------------------------------------------------------------------------------------------------\n"); + printf(" Time Time Time \n"); + printf("JobID User $type Pool Status Used Limit Started Master/Other \n"); + printf("---------------------------------------------------------------------------------------------------\n"); } else { printf("----------------------------------------------------------------------\n"); printf("JobID User $type Pool Status Used Master/Other \n"); @@ -245,7 +276,7 @@ sub do_squeue for (my $i = 0; $i < $ct; $i++) { if ($verbose) { - printf("%-8s %-8s %6s %-9s %-7s %10s %11s %11s %.12s\n", + printf("%-8s %-8s %6s %-9s %-7s %10s %11s %14s %.12s\n", $s_job[$i], $s_user[$i], $s_nodes[$i], $s_pool[$i], $s_status[$i], $s_used[$i], $s_limit[$i], $s_begin[$i], @@ -277,13 +308,12 @@ sub do_scontrol_part foreach my $tmp (@scon) { chomp $tmp; my @line = split(' ',$tmp); - ($part) = ($tmp =~ m/PartitionName=(\S+)\s+/) if ($tmp =~ /PartitionName=/); + ($part) = ($tmp =~ m/PartitionName=(\S+)/) if ($tmp =~ /PartitionName=/); ($MaxTime{$part}) = ($tmp =~ m/MaxTime=(\S+)\s+/) if ($tmp =~ /MaxTime=/); ($MaxNodes{$part}) = ($tmp =~ m/MaxNodes=(\S+)\s+/) if ($tmp =~ /MaxNodes=/); $MaxTime{$part} =~ s/UNLIMITED/UNLIM/ if ($MaxTime{$part}); $MaxNodes{$part} =~ s/UNLIMITED/UNLIM/ if ($MaxNodes{$part}); - } return; @@ -380,12 +410,26 @@ sub usage } +# +# Determine if SLURM is available. +# +sub isslurmup +{ + my $out = `scontrol show part 2>&1`; + if ($?) { + printf("\n SLURM is not communicating.\n\n"); + exit(1); + } + + return; +} + __END__ =head1 NAME -B<sjstat> - List attributes of jobs under SLURM control +B<sjstat> - List attributes of jobs under the SLURM control =head1 SYNOPSIS @@ -393,7 +437,7 @@ B<sjstat> [B<-h> ] [B<-c>] [B<-r> ] [B<-v>] =head1 DESCRIPTION -The B<sjstat> command is used to display statistics of jobs under control of SLURM. +The B<sjstat> command is used to display statistics of jobs under control of SLURM. The output is designed to give information on the resource usage and availablilty, as well as information about jobs that are currently active on the machine. This output is built using the SLURM utilities, sinfo, squeue and scontrol, the man pages for these @@ -462,7 +506,7 @@ The following is a basic request for status. The Running job data contains information pertaining to the: - JobID either the SLURM job id + JobID the SLURM job id User owner of the job Nodes nodes required, or in use by the job (Note: On cpu scheduled machines, this field @@ -495,10 +539,11 @@ The following is a basic request for status. pbatch* 15000Mb 8 1072 1070 174 UNLIM UNLIM (null) Running job data: - ---------------------------------------------------------------------------------------------- - JobID User Nodes Pool Status Used Limit Start Master/Other - ---------------------------------------------------------------------------------------------- - 395 sam 200 pbatch PD 0:00 30:00 N/A (JobHeld) + --------------------------------------------------------------------------------------------------- + Time Time Time + JobID User Nodes Pool Status Used Limit Started Master/Other + --------------------------------------------------------------------------------------------------- + 38562 tom 4 pbatch PD 0:00 1:00:00 01-14T18:11:22 (JobHeld) The added fields to the "Scheduling pool data" are: @@ -510,11 +555,8 @@ The following is a basic request for status. Limit Time limit of job. Start Start time of job. -=head1 AUTHOR - -Written by Philip D. Eckert - =head1 REPORTING BUGS Report bugs to <eckert2@llnl.gov> +=cut