Commit d5e46a8d authored by fwinkler's avatar fwinkler

Added new scripts for post proecessing.

parent 41c91f80
...@@ -58,7 +58,7 @@ while IFS= read -r line ; do ...@@ -58,7 +58,7 @@ while IFS= read -r line ; do
# check if # check if
if [ "x$backup_avail" != "x" ] && [ $backup_avail -eq 0 ]; then if [ "x$backup_avail" != "x" ] && [ $backup_avail -eq 0 ]; then
#echo "Shard ${shard_id} is not in backup" #echo "Shard ${shard_id} is not in backup"
echo -e "Backup shard ${shard_id} of ${shard_arr[1]} (${shard_arr[2]}): about $shard_duration hours ($start_seconds s -> $end_seconds s)" |2>&1 tee -a $DEBUG_PATH echo -e "Backup shard ${shard_id} of ${shard_arr[1]} (${shard_arr[2]}): about $shard_duration hours ($start_seconds s -> $end_seconds s)"
backup_dir="${backup_path}/${shard_id}_${start_seconds}" backup_dir="${backup_path}/${shard_id}_${start_seconds}"
...@@ -70,27 +70,27 @@ while IFS= read -r line ; do ...@@ -70,27 +70,27 @@ while IFS= read -r line ; do
#if directory exists, directory is not empty, no file name contains "pending", and a "tar.gz" file exists, then add backup to backup table #if directory exists, directory is not empty, no file name contains "pending", and a "tar.gz" file exists, then add backup to backup table
if [ -d "${backup_dir}" ] && [ "$(ls -A ${backup_dir})" ] && [ $(ls -l ${backup_dir} | grep -c pending) -eq 0 ] && [ $(ls -l ${backup_dir} | grep -c tar.gz) -gt 0 ]; then if [ -d "${backup_dir}" ] && [ "$(ls -A ${backup_dir})" ] && [ $(ls -l ${backup_dir} | grep -c pending) -eq 0 ] && [ $(ls -l ${backup_dir} | grep -c tar.gz) -gt 0 ]; then
success=1 success=1
echo -e "Backup of shard ${shard_id} into ${backup_dir} successful!" |2>&1 tee -a $DEBUG_PATH echo -e "Backup of shard ${shard_id} into ${backup_dir} successful!"
else else
success=0 success=0
echo -e "Backup of shard ${shard_id} into ${backup_dir} failed!" |2>&1 tee -a $DEBUG_PATH echo -e "Backup of shard ${shard_id} into ${backup_dir} failed!"
if [ -d "${backup_dir}" ]; then if [ -d "${backup_dir}" ]; then
echo -e "Backup directory ${backup_dir} exists." |2>&1 tee -a $DEBUG_PATH echo -e "Backup directory ${backup_dir} exists."
fi fi
if [ "$(ls -A ${backup_dir})" ]; then if [ "$(ls -A ${backup_dir})" ]; then
echo -e "Backup directory ${backup_dir} is not empty." |2>&1 tee -a $DEBUG_PATH echo -e "Backup directory ${backup_dir} is not empty."
else else
echo -e "Backup directory ${backup_dir} is empty: ls -A ${backup_dir} = $(ls -A ${backup_dir})" |2>&1 tee -a $DEBUG_PATH echo -e "Backup directory ${backup_dir} is empty: ls -A ${backup_dir} = $(ls -A ${backup_dir})"
fi fi
if [ $(ls -l ${backup_dir} | grep -c pending) -eq 0 ]; then if [ $(ls -l ${backup_dir} | grep -c pending) -eq 0 ]; then
echo -e "No filename contains pending." |2>&1 tee -a $DEBUG_PATH echo -e "No filename contains pending."
fi fi
if [ $(ls -l ${backup_dir} | grep -c tar.gz) -gt 0 ]; then if [ $(ls -l ${backup_dir} | grep -c tar.gz) -gt 0 ]; then
echo -e "An archive tar.gz exists" |2>&1 tee -a $DEBUG_PATH echo -e "An archive tar.gz exists"
fi fi
[[ ${backup_dir} = *influx* ]] && [[ ${backup_dir} = *backup* ]] && rm -rf ${backup_dir} [[ ${backup_dir} = *influx* ]] && [[ ${backup_dir} = *backup* ]] && rm -rf ${backup_dir}
...@@ -100,10 +100,10 @@ while IFS= read -r line ; do ...@@ -100,10 +100,10 @@ while IFS= read -r line ; do
mysql_command "${query}" mysql_command "${query}"
else else
echo -e "Shard $shard_id is already in backup ($backup_avail)." |2>&1 tee -a $DEBUG_PATH echo -e "Shard $shard_id is already in backup ($backup_avail)."
fi fi
else else
echo -e "Shard $shard_id is incomplete" |2>&1 tee -a $DEBUG_PATH echo -e "Shard $shard_id is incomplete"
fi fi
done <<< "$(echo "$shards")" done <<< "$(echo "$shards")"
......
#!/usr/bin/env python
from __future__ import print_function
import argparse
import os
import subprocess
import sys
import hostlist
#import mysql.connector as mariadb
import pymysql as mariadb
import ast
import operator # max function
import json
import time
from datetime import datetime
from datetime import timedelta
from influxdb import InfluxDBClient
debug_file = sys.stderr
debug_flag = False
def print_info(*args, **kwargs):
global debug_file
print(*args, file=debug_file, **kwargs)
def print_debug(*args, **kwargs):
global debug_flag
if debug_flag:
print("DEBUG:", *args, file=sys.stderr, **kwargs)
### conversion of large values
unitthresholds = (
(10**16, 'P'),
(10**13, 'T'),
(10**10, 'G'),
(10**7, 'M'),
(10**4, 'K'),
(1, '')
)
# conversion factor for unit prefixes: binary, decimal
unit2fac = {
"P": (1<<50L, 10**15),
"T": (1<<40L, 10**12),
"G": (1<<30L, 10**9),
"M": (1<<20L, 10**6),
"K": (1<<10L, 10**3),
"": (1, 1)
}
def getUnitAndFactor(unit, value):
metric_divide = 1
for (factor, quantifier) in unitthresholds:
if value >= factor:
if unit.startswith('Xi'):
metric_divide = unit2fac[quantifier][0]
else:
metric_divide = unit2fac[quantifier][1]
unit = unit.replace('X', quantifier, 1)
break
if unit.startswith('iB'):
unit = unit.replace('iB', 'bytes', 1)
return ( unit, metric_divide )
### Taurus partition - available CPU cores per node
partition_cpu_num = {
"haswell":24,
"west":12,
"sandy":16,
"broadwell":28,
"gpu1":16,
"gpu2":24,
"smp1":32,
"smp2":64,
"knl":64,
"hpdlf":12,
"ml":174
}
partition_gpu_num = {
"gpu1":2,
"gpu2":4,
"ml":6
}
# non-contiguous timeline data
# per socket data
# per cpu data
# per node data
#measurements_metrics = {
#'likwid_cpu': ('flops_any', 'cpi'), # shared hosts clause
#'likwid_socket' : ('mem_bw', 'rapl_power'), # socket hostname clause
#'cpu' : ('used',), # shared hosts clause
#'memory' : ('used',),
#'infiniband' : ('bw',),
#'lustre_scratch2' : ('read_bw', 'write_bw', 'read_requests', 'write_requests', 'open', 'close', 'fsync', 'seek'), # no 'create' in measurement
#'lustre_highiops' : ('read_bw', 'write_bw', 'read_requests', 'write_requests', 'open', 'close', 'fsync', 'seek'), # no 'create' in measurement
#'nvml' : ('gpu_used','mem_used','power','temp')
#}
ipc_avail = False # Do we have IPC or CPI values?
mflops = False # Are flops_any in MFLOPS?
# base performance metrics
perf_foot_metrics = {
'likwid_cpu': (('ipc','mean'),('flops_any','mean')),
'likwid_socket' : (('mem_bw','mean'), ('rapl_power','mean')),
'cpu' : (('used','mean'),), # shared hosts clause
'memory' : (('used','max'),),
'infiniband' : (('bw','mean'),),
'lustre_scratch2' : (('read_bw','mean'), ('write_bw','mean')),
'lustre_highiops' : (('read_bw','mean'), ('write_bw','mean')),
'nvml' : (('gpu_used','mean'), ('mem_used','max'),('power','mean'))
}
# maximum per job values for selected metrics
perf_max_per_job = {
'likwid_cpu': ('flops_any',),
'cpu' : ('used',),
'infiniband' : ('bw',),
'lustre_scratch2' : ('read_bw', 'write_bw', 'read_requests', 'write_requests', 'open', 'close', 'fsync', 'seek'),
'lustre_highiops' : ('read_bw', 'write_bw', 'read_requests', 'write_requests', 'open', 'close', 'fsync', 'seek'),
'nvml' : ('gpu_used',)
}
# Generate more comprehensive footprint based on per node select statements
# performance metrics and their aggregate
# structure: measurement : ((metric1,aggregate1[,aggregate2,...]),[(metric1,aggregate1[,aggregate2,...]),...]
perf_per_node = {
'likwid_cpu': (('flops_any','mean','max'), ('ipc','mean','max')),
'likwid_socket' : (('mem_bw','mean','max'), ('rapl_power','mean','max')),
'cpu' : (('used','mean','max'),),
'memory' : (('used','max'),),
'infiniband' : (('bw','mean'),),
'lustre_scratch2' : (('read_bw','mean'), ('write_bw','mean'), ('read_requests','mean'), ('write_requests','mean'), ('open','mean'), ('close','mean'), ('fsync','mean'), ('seek','mean')),
'lustre_highiops' : (('read_bw','mean'), ('write_bw','mean'), ('read_requests','mean'), ('write_requests','mean'), ('open','mean'), ('close','mean'), ('fsync','mean'), ('seek','mean')),
'nvml' : (('gpu_used','mean','max'), ('mem_used','max'),('power','mean','max'),('temp','mean','max'))
}
metric_name_mapping = {
('likwid_cpu','flops_any'):'flops',
('likwid_cpu','cpi'):'cpi',
('likwid_cpu','ipc'):'ipc',
('likwid_socket','mem_bw'):'mem_bw',
#('likwid_socket','rapl_power','mean'):'cpu_power_avg',
('cpu','used'):'cpu_used',
('memory','used'):'memory', #host_mem_max
('infiniband','bw'):'ib_bw',
('lustre_scratch2','read_bw'):'lustre_scratch2_read_bw',
('lustre_scratch2','write_bw'):'lustre_scratch2_write_bw',
('lustre_highiops','read_bw'):'lustre_highiops_read_bw',
('lustre_highiops','write_bw'):'lustre_highiops_write_bw'
#('nvml','gpu_used'):'gpu_used',
#('nvml','mem_used':'gpu_mem_used',
#('nvml','power':'gpu_power',
}
map_influx2maria = {
('likwid_cpu','flops_any'):('flops_any', 'per_core','footprint_base'),
('likwid_cpu','cpi'):('ipc','per_core','footprint_base'),
('likwid_cpu','ipc'):('ipc','per_core','footprint_base'),
('likwid_socket','mem_bw'):('mem_bw','per_socket','footprint_base'),
('likwid_socket','rapl_power'):('cpu_power','per_socket','footprint_base'),
('cpu','used'):('cpu_used', 'per_core','footprint_base'),
('memory','used'):('host_mem_used','per_node','footprint_base'),
('infiniband','bw'):('ib_bw','per_node','footprint_base'),
('lustre_scratch2','read_bw'):('lustre_scratch2_read_bytes','per_node','footprint_fileio'),
('lustre_scratch2','write_bw'):('lustre_scratch2_write_bytes','per_node','footprint_fileio'),
('lustre_highiops','read_bw'):('lustre_highiops_read_bytes','per_node','footprint_fileio'),
('lustre_highiops','write_bw'):('lustre_highiops_write_bytes','per_node','footprint_fileio'),
('nvml','gpu_used'):('used','per_gpu','footprint_gpu'),
('nvml','mem_used'):('mem_used','per_gpu','footprint_gpu'),
('nvml','power'):('power','per_gpu','footprint_gpu')
}
def get_table_and_field_name(measurement, metric, aggregate):
maria_name_tuple = map_influx2maria[(measurement,metric)]
table_name = maria_name_tuple[2]
field_name = maria_name_tuple[0]
# exception for the file IO table (no "mean_per_node" as column suffix)
if table_name != 'footprint_fileio':
field_name += '_' + aggregate + '_' + maria_name_tuple[1]
#table_name = "footprint_base"
#field_name = ""
#if measurement == 'likwid_socket':
#field_name += '_per_socket'
#elif 'nvml' == measurement:
#field_name += '_per_gpu'
#table_name = "footprint_gpu"
#elif 'cpu' in measurement:
#field_name += '_per_cpu'
#else:
#field_name += '_per_node'
#if 'highiops' in measurement:
#table_name = "footprint_lustre_highiops"
#elif 'scratch2' in measurement:
#table_name = "footprint_lustre_scratch2"
return (table_name,field_name)
def get_metric_key(measurement, metric):
if measurement == 'cpu':
return "cpu_used"
elif measurement == 'memory':
return "main_mem_used"
elif measurement.startswith("lustre_"):
return measurement + "_" + metric
elif measurement == 'infiniband':
return "ib_" + metric
elif measurement == 'nvml' and metric != 'gpu_used':
return "gpu_" + metric
else:
return metric
PER_NODE, PER_SOCKET, PER_CORE = 0, 1, 2
num_nodes, num_sockets, num_cores = -1, -1, -1
metric_desc_tab = {
'mem_bw':("Memory bandwidth","XiB/s", PER_SOCKET),
'flops_any':("Normalized FLOPS","XFLOPs", PER_CORE),
'cpi':("Cycles / instr.","CPI", PER_CORE),
'ipc':("Instr./Cycle","IPC", PER_CORE),
'write_bw':("Lustre write bandwidth","XiB/s", PER_NODE),
'read_bw':("Lustre read bandwidth","XiB/s", PER_NODE),
'main_mem_used':("Main memory used","XiB", PER_NODE),
'ib_bw':("IB bandwidth","XiB/s", PER_NODE),
'cpu_used':("CPU used/active","", PER_CORE),
'rapl_power':("CPU power","W", PER_SOCKET),
'gpu_used':("GPU used/active","", PER_NODE),
'gpu_mem_used':("GPU memory used","XiB", PER_NODE),
'gpu_power':("GPU power","W", PER_NODE),
'gpu_temp':("GPU temperature","C", PER_NODE),
'lustre_highiops_open':("highiops open","1/s", PER_NODE),
'lustre_highiops_close':("highiops close","1/s", PER_NODE),
'lustre_highiops_seek':("highiops seek","1/s", PER_NODE),
'lustre_highiops_read_bw':("highiops read bw","XiB/s", PER_NODE),
'lustre_highiops_write_bw':("highiops write bw","XiB/s", PER_NODE),
'lustre_highiops_read_requests':("highiops read req","1/s", PER_NODE),
'lustre_highiops_write_requests':("highiops write req","1/s", PER_NODE),
'lustre_scratch2_open':("scratch2 open","1/s", PER_NODE),
'lustre_scratch2_close':("scratch2 close","1/s", PER_NODE),
'lustre_scratch2_seek':("scratch2 seek","1/s", PER_NODE),
'lustre_scratch2_read_bw':("scratch2 read bw","XiB/s", PER_NODE),
'lustre_scratch2_write_bw':("scratch2 write bw","XiB/s", PER_NODE),
'lustre_scratch2_read_requests':("scratch2 read req","1/s", PER_NODE),
'lustre_scratch2_write_requests':("scratch2 write req","1/s", PER_NODE),
}
metric_output_order = ['cpu_used', 'main_mem_used', 'ipc', 'flops_any', 'mem_bw', 'rapl_power', 'gpu_used', 'gpu_mem_used',
'gpu_power', 'gpu_temp', 'ib_bw',
'lustre_highiops_read_bw','lustre_highiops_write_bw','lustre_highiops_open','lustre_highiops_close','lustre_highiops_seek','lustre_highiops_read_requests','lustre_highiops_write_requests',
'lustre_scratch2_read_bw','lustre_scratch2_write_bw','lustre_scratch2_open','lustre_scratch2_close','lustre_scratch2_seek','lustre_scratch2_read_requests','lustre_scratch2_write_requests']
def connect_to_mariadb():
#connection data for MariaDB
HOST = os.environ["MARIADB_HOST"]
PORT = os.environ["MARIADB_PORT"]
USER = os.environ["MARIADB_USER"]
PASSWORD = os.environ["MARIADB_PASSWORD"]
DATABASE = os.environ["MARIADB_DATABASE"]
return mariadb.connect( host=HOST, port=int(PORT),
user=USER, passwd=PASSWORD,
db=DATABASE, connect_timeout=10)
influxdb_connection = None
influxdb_connection_st = None
influxdb_connection_lt = None
def connect_to_influxdb(isShortTerm=False):
if isShortTerm:
# connection data for InfluxDB
host = os.environ["INFLUXDB_HOST"]
port = os.environ["INFLUXDB_PORT"]
user = os.environ["INFLUXDB_USER"]
password = os.environ["INFLUXDB_PASSWORD"]
database = os.environ["INFLUXDB_DATABASE"]
else:
host = os.environ["INFLUXDB_LT_HOST"]
port = os.environ["INFLUXDB_LT_PORT"]
user = os.environ["INFLUXDB_LT_USER"]
password = os.environ["INFLUXDB_LT_PASSWORD"]
database = os.environ["INFLUXDB_LT_DATABASE"]
# connect to database
return InfluxDBClient(host, port, user, password, database)
def setInfluxConnection(job_start):
global influxdb_connection
# if job started within the last two weeks
if job_start > (time.time() - 1209600):
global influxdb_connection_st
if influxdb_connection_st == None:
influxdb_connection_st = connect_to_influxdb(True)
influxdb_connection = influxdb_connection_st
else:
global influxdb_connection_lt
if influxdb_connection_lt == None:
influxdb_connection_lt = connect_to_influxdb(False)
influxdb_connection = influxdb_connection_lt
"""
brief divide the value depending on the metric type
"""
def nat_divide(value, metric_key):
metric_divide = 1
if metric_key in metric_desc_tab:
if metric_desc_tab[metric_key][2] == PER_CORE:
metric_divide = num_cores
elif metric_desc_tab[metric_key][2] == PER_NODE:
metric_divide = num_nodes
elif metric_desc_tab[metric_key][2] == PER_SOCKET:
metric_divide = num_sockets
return value/float(metric_divide)
def convert_cpulist_to_cpulistdict(cpulist):
# taurusi5579[21-23],taurusi5592[0-3,10-16,22-23] -->
# {'taurusi5579':'[21-23]','taurusi5592':'[0-3,10-16,22-23]'}
cpulist_1 = cpulist.replace("[","':'[")
cpulist_2 = cpulist_1.replace("],","]','")
cpulist = str("{'") + cpulist_2 + str("'}")
cpulist_dict = ast.literal_eval(cpulist)
return cpulist_dict
def get_cores_num(partition):
if "haswell" in partition:
cpu_num = partition_cpu_num["haswell"]
elif "west" in partition:
cpu_num = partition_cpu_num["west"]
elif "sandy" in partition:
cpu_num = partition_cpu_num["sandy"]
elif "broadwell" in partition:
cpu_num = partition_cpu_num["broadwell"]
elif "gpu1" in partition:
cpu_num = partition_cpu_num["gpu1"]
elif "gpu2" in partition:
cpu_num = partition_cpu_num["gpu2"]
elif "smp1" in partition:
cpu_num = partition_cpu_num["smp1"]
elif "smp2" in partition:
cpu_num = partition_cpu_num["smp2"]
elif "knl" in partition:
cpu_num = partition_cpu_num["knl"]
elif partition in partition_cpu_num:
cpu_num = partition_cpu_num[partition]
else:
cpu_num = 24 #default
return cpu_num
def get_socket_cpu(cpu, partition):
#print partition
# get number of cpus from partition dictionary
cpu_num = get_cores_num(partition)
# get cpu index of second socket (first socket has cpu index 0)
socket_cpu_index = int(cpu_num/2)
#print cpu
#print socket_cpu_index
if cpu < socket_cpu_index:
return 0
else:
return socket_cpu_index
# returns "where" clause with socket cpus for all hosts and total number of sockets
def get_socket_hostnames_clause(cpulist_dict, partition):
global num_sockets
num_sockets = 0
counter = 1
hostnames_clause = "("
num_nodes = len(cpulist_dict)
for node in cpulist_dict:
# get cpus
cpus = hostlist.expand_hostlist(cpulist_dict[node])
if len(cpus) == 0:
break
# get first and last cpu from list
first_cpu = cpus[0]
last_cpu = cpus[len(cpus)-1]
socket_cpu_1 = get_socket_cpu(int(first_cpu), partition)
socket_cpu_2 = get_socket_cpu(int(last_cpu), partition)
if socket_cpu_1 == socket_cpu_2:
cpu_clause = "(cpu='" + str(socket_cpu_1) + "')"
num_sockets += 1
else:
cpu_clause = "(cpu='" + str(socket_cpu_1) + "' or cpu='" + str(socket_cpu_2) + "')"
num_sockets += 2
#print cpu_clause
if counter < num_nodes:
hostnames_clause += "(hostname='" + str(node) + "') and " + str(cpu_clause) + ") or ("
else:
hostnames_clause += "(hostname='" + str(node) + "') and " + str(cpu_clause) + ")"
counter = counter + 1
#print hostnames_clause
return hostnames_clause
def get_cpu_clause(cpulist):
cpu_idx_last = len(cpulist)-1
cpu_idx = 0
cpu_clause = "("
for cpu in cpulist:
if cpu_idx < cpu_idx_last:
cpu_clause += "cpu='" + str(cpu) + "' or "
else:
cpu_clause += "cpu='" + str(cpu) + "')"
cpu_idx += 1
return cpu_clause
def get_cpu_hostnames_clause(cpulist_dict):
num_nodes = len(cpulist_dict)
counter = 1
hostnames_clause = "("
for node in cpulist_dict:
try:
cpus = hostlist.expand_hostlist(cpulist_dict[node])
except:
print_info("Could not expand CPU list for node ", str(node), "(" + str(cpulist_dict[node]) + ")")
break
cpu_clause = get_cpu_clause(cpus)
if counter < num_nodes:
hostnames_clause += "(hostname='" + str(node) + "') and " + cpu_clause + ") or ("
else:
hostnames_clause += "(hostname='" + str(node) + "') and " + cpu_clause + ")"
counter = counter + 1
#print hostnames_clause
return hostnames_clause
def get_pur_hostnames_clause(nodelist):
num_nodes = len(nodelist)
counter = 1
hostnames_clause = "("
for node in nodelist:
if counter < num_nodes:
hostnames_clause += "hostname='" + str(node) + "' or "
else:
hostnames_clause += "hostname='" + str(node) + "')"
counter = counter + 1
return hostnames_clause
def get_hostnames_clauses(nodelist, cpulist_dict, partition, exclusive):
# host names for measurements without CPU tags
pure_hostnames_clause = get_pur_hostnames_clause(nodelist)
# check if host names clause needs CPUs and sockets for non exclusive jobs
if exclusive == 0:
# check input data:
if len(nodelist) != len(cpulist_dict):
print_info("len(nodelist) != len(cpulist_dict)")
cpu_hostnames_clause = get_cpu_hostnames_clause(cpulist_dict)
if not cpu_hostnames_clause:
print_info("Could not generate shared hostname clause")
return None
socket_hostnames_clause = get_socket_hostnames_clause(cpulist_dict, partition)
else:
cpu_hostnames_clause = pure_hostnames_clause
socket_hostnames_clause = cpu_hostnames_clause
num_sockets = num_nodes * 2
return (pure_hostnames_clause, socket_hostnames_clause, cpu_hostnames_clause)
def get_results_from_influxdb(query_str):
global influxdb_connection
#get all points
try:
query_result = influxdb_connection.query(query_str).get_points()
except:
print_info("Influx query failed! ", query_str)
return None
points = list(query_result)
#print query_str, ":", points
if len(points) > 0:
return points[0]
else:
#if not ("scratch2" in query_str or "highiops" in query_str):
# print "No points on", query_str
return None
def get_mean_ipc_from_cpi(where_clause):
query_str = "select median(ipc) as medianipc, mean(ipc) as meanipc from (select 1/cpi as ipc from likwid_cpu where " + where_clause + ")"
print_debug("get_mean_ipc_from_cpi: %s" % (query_str,))
result_value = -1
result = get_results_from_influxdb(query_str)
if result and "meanipc" in result:
result_value = float(result['meanipc'])
if result_value > 50 and "medianipc" in result:
result_value = float(result['medianipc'])
print_info("Mean IPC = %f -> use median IPC = %f" % (float(result['meanipc']), result_value))
if result_value > 50:
print_info("Do not store IPC of %f" % (result_value,))
return -1
return result_value
# gather the native average and max values on a per node basis
def get_per_node_footprint(nodelist, cpulist_dict, exclusive, partition, time_clause):
global ipc_avail
global perf_per_node
footprint = {} #used as call by reference
if exclusive:
nodes = nodelist
else:
nodes = cpulist_dict
if len(nodelist) != len(cpulist_dict):
print_info("len(nodelist) != len(cpulist_dict)")
print_info("Node list:", nodelist)
print_info("CPU list:", cpulist_dict)
# get metrics for each node individually
# detect outlier nodes based on the average values per node
min_avg_values = {} # min average per node