Commit ac89a778 authored by Julius Metz's avatar Julius Metz

1. Version

parent 2c7bff08
venv
__pycache__
\ No newline at end of file
__pycache__
trash
*.raw.gz
testsites
\ No newline at end of file
def hardvalue(filter_data, filtervalue):
def filter_hardvalue(filter_data, filtervalue):
"""sort commands from greatest to smallest value in relation to the overall value (percentage).
Than add all commands to filter_info until filtervalue is smaller as all percentages of the commands.
......@@ -40,7 +40,7 @@ def hardvalue(filter_data, filtervalue):
return filter_info
def average(filter_data, filtervalue):
def filter_average(filter_data, filtervalue):
"""filters out the commands which are below the filtervalue(percentage)
in relation to the overall average.
......
This diff is collapsed.
import plotly
import datetime
def build_plot(plot_data, **kwargs):
plot = {
'data': plot_data,
'layout': {
'height': kwargs.get('height', 500),
'width': kwargs.get('width', None),
'title': {
'text': kwargs.get('title', None),
},
'xaxis': {
'title': kwargs.get('xtitle', None),
},
'yaxis': {
'title': kwargs.get('ytitle', None),
'type': kwargs.get('ytype', None),
},
'showlegend': kwargs.get('showlegend', True),
},
}
return plotly.offline.plot(plot, include_plotlyjs=False, output_type='div')
def default_plot(cmds_data, filter_info, plot_settings, **kwargs):
plot_data = []
needed_key = None
if 'needed_key' in kwargs:
needed_key = kwargs['needed_key']
else:
raise Exception('no needed_key in default_plot generater settings')
for cmd in filter_info[needed_key]:
plot_data.append(
{
'type': 'scattergl',
'mode': 'markers',
'x': cmds_data[cmd].get('datetime', []),
'y': cmds_data[cmd].get(needed_key, []),
'name': cmd,
}
)
return build_plot(plot_data, **plot_settings)
import os
import re
import subprocess
import shlex
......@@ -5,6 +6,8 @@ from pathlib import Path
import copy
import time
import datetime
import multiprocessing as mp
import shutil
import click
import plotly
......@@ -14,87 +17,7 @@ from yattag import Doc
import plots_generators
import filter_func
import value_merger
FILTER_FUNCTIONS = ['hardvalue', 'average']
NAME_SPEZIAL_PARAMETER_CONFIG = {
'java': ['-cp', '-classpath'],
'bash': [],
'sh': [],
'perl': [],
'python': ['-m', '-W', '-X', '--check-hash-based-pycs', '-c'],
}
NEEDED_VALUES_DEFAULTS = {
'default_value': 0,
'default_merger': 'addition_int',
}
NEEDED_VALUES = {
'PCT': {'keys':['PCT']},
'VmRSS': {'keys':['VmRSS'], 'merger': 'x2oneaddition_float_sizedown2'},
'RKBC': {'keys':['RKBC'], 'merger': 'x2oneaddition_float_sizedown1'},
'WKBC': {'keys':['WKBC'], 'merger': 'x2oneaddition_float_sizedown1'},
'syscalls': {'keys':['RSYS', 'WSYS'], 'merger': 'x2oneaddtion_int'},
}
COMAND_BLACKLIST_REGEX = [
r'^[^ ]+perl .+collectl',
]
PLOT_CONFIG = [{
'generator': 'default_plot',
'name': 'cpu',
'generator_settings': {'needed_key':'PCT'},
'plotly_settings': {
'title': 'CPU load',
'xtitle': 'Date',
'ytitle': 'CPU load',
},
},
{
'generator': 'default_plot',
'name': 'ram',
'generator_settings': {'needed_key': 'VmRSS'},
'plotly_settings': {
'title': 'Memory Usage',
'xtitle': 'Date',
'ytitle': 'RAM usage GiB',
},
},
{
'generator': 'default_plot',
'name': 'ior',
'generator_settings': {'needed_key':'RKBC'},
'plotly_settings': {
'title': 'read io',
'xtitle': 'Date',
'ytitle': 'I/O MiB/s',
'ytype': 'log',
},
},
{
'generator': 'default_plot',
'name': 'iow',
'generator_settings': {'needed_key':'WKBC'},
'plotly_settings': {
'title': 'write io',
'xtitle': 'Date',
'ytitle': 'I/O MiB/s',
'ytype': 'log',
},
},
{
'generator': 'default_plot',
'name': 'ios',
'generator_settings': {'needed_key':'syscalls'},
'plotly_settings': {
'title': 'I/O syscalls',
'xtitle': 'Date',
'ytitle': 'I/O syscalls/s',
'ytype': 'log',
},
},
]
from sitegenerator_Config import *
def datestr2date(datestr):
......@@ -107,10 +30,10 @@ def datestr2date(datestr):
datetime.date -- date of the string
"""
return datetime.date(
int(datestr[:4]),
int(datestr[4:6]),
int(datestr[6:8]),
)
int(datestr[:4]),
int(datestr[4:6]),
int(datestr[6:8]),
)
def get_cmdname(cmd, coarsest=False):
......@@ -175,29 +98,26 @@ def parse_file(path, collectl, merge, coarsest):
for entry in output:
splited_entry = entry.split(' ', len(head_indexes_dict)-1)
cmd = splited_entry[-1]
for regexpr in COMAND_BLACKLIST_REGEX:
if re.search(regexpr, cmd):
break
else:
if merge:
cmd = get_cmdname(cmd, coarsest=coarsest)
if not cmd in entrys_data:
entrys_data[cmd] = {}
tmp_datetime = datetime.datetime.combine(
datestr2date(splited_entry[head_indexes_dict['Date']]),
datetime.time.fromisoformat(splited_entry[head_indexes_dict['Time']]),
if merge:
cmd = get_cmdname(cmd, coarsest=coarsest)
if cmd in COMAND_BLACKLIST_REGEX:
continue
if not cmd in entrys_data:
entrys_data[cmd] = {}
tmp_datetime = datetime.datetime.combine(
datestr2date(splited_entry[head_indexes_dict['Date']]),
datetime.time.fromisoformat(splited_entry[head_indexes_dict['Time']]),
)
if not tmp_datetime in entrys_data[cmd]:
entrys_data[cmd][tmp_datetime] = copy.deepcopy(empty_dict_with_value_titles)
for value_title, value_title_settings in NEEDED_VALUES.items():
entrys_data[cmd][tmp_datetime][value_title] = getattr(
value_merger,
value_title_settings.get('merger', NEEDED_VALUES_DEFAULTS['default_merger']),
)(
entrys_data[cmd][tmp_datetime][value_title],
*[splited_entry[head_indexes_dict[key]] for key in value_title_settings['keys']],
)
if not tmp_datetime in entrys_data[cmd]:
entrys_data[cmd][tmp_datetime] = copy.deepcopy(empty_dict_with_value_titles)
for value_title, value_title_settings in NEEDED_VALUES.items():
entrys_data[cmd][tmp_datetime][value_title] = getattr(
value_merger,
value_title_settings.get('merger', NEEDED_VALUES_DEFAULTS['default_merger']),
)(
entrys_data[cmd][tmp_datetime][value_title],
*[splited_entry[head_indexes_dict[key]] for key in value_title_settings['keys']],
)
#float(splited_entry[head_indexes_dict[head_title]])
print('parsing/merge took {:.1f}s'.format(time.time()- parsing_starttime))
dictbuild_starttime = time.time()
......@@ -223,21 +143,89 @@ def parse_file(path, collectl, merge, coarsest):
return entry_data_plotfriendly, plot_filter_data
def build_html(plots_dict, plotlypath):
def build_plot(plot_data, plotly_format_vars, **kwargs):
"""Make plotlayout with values from kwargs and plotly_format_vars.
Build plot as html div.
Arguments:
plot_data {dict} -- plot data in plotly layout
plotly_format_vars {dict} -- values for layout config
Returns:
str -- plotly html div
"""
plot = {
'data': plot_data,
'layout': {
'height': kwargs.get('height', 500),
'width': kwargs.get('width', None),
'title': {
'text': kwargs.get('title', None).format(**plotly_format_vars),
},
'xaxis': {
'title': kwargs.get('xtitle', None).format(**plotly_format_vars),
},
'yaxis': {
'title': kwargs.get('ytitle', None).format(**plotly_format_vars),
'type': kwargs.get('ytype', None),
},
'showlegend': kwargs.get('showlegend', True),
},
}
return plotly.offline.plot(plot, include_plotlyjs=False, output_type='div')
def scatter_plot(cmds_data, filter_info, cmd_color, plot_settings, plotly_format_vars, **kwargs):
"""Build a list of dicts in Plotlyconf style for scatter plot with the data of needed_key in kwargs
than call build_plot to generate html plot code.
Arguments:
cmds_data {dict} -- is the data for the plot example: {'Command': {'datetime':[...], 'cpu': [...]}}
filter_info {dict} -- has the information which cmds will be shown example: {'cpu': [cmds]}
cmd_color {dict} -- assign a fixed plot color to each cmd example: {'Command': 'rgb(0, 0, 128)'}
plot_settings {dict} -- settings for build_plot
plotly_format_vars {dict} -- values for build_plot
Raises:
Exception: raise if no needed_key is given in kwargs
Returns:
str -- plotly html div
"""
plot_data = []
needed_key = None
if 'needed_key' in kwargs:
needed_key = kwargs['needed_key']
else:
raise Exception('no needed_key in default_plot generater settings')
for cmd in filter_info[needed_key]:
plot_data.append({
'type': 'scattergl',
'mode': 'markers',
'x': cmds_data[cmd].get('datetime', []),
'y': cmds_data[cmd].get(needed_key, []),
'name': cmd,
'marker':{
'color': cmd_color[cmd],
},
})
return build_plot(plot_data, plotly_format_vars, **plot_settings)
def build_html(plots_dict):
doc, tag, text = Doc().tagtext()
doc.asis('<!DOCTYPE html>')
with tag('html'):
with tag('head'):
with tag('script'):
with Path(plotlypath).open(mode='r') as f:
doc.asis(f.read())
with tag('script', src='plotly.js'):
pass
with tag('script'):
doc.asis('''toggleplot = (name, index) => {
const elem = document.querySelector('#'+name+' div:nth-child('+index+')');
if (window.getComputedStyle(elem).display === "none") {
elem.style.display = 'contents';
const elems = document.getElementsByClassName(name);
if (window.getComputedStyle(elems[index]).display === "none") {
elems[index].style.display = 'contents';
} else {
elem.style.display = 'none';
elems[index].style.display = 'none';
}
}''')
with tag('body'):
......@@ -254,56 +242,108 @@ def build_html(plots_dict, plotlypath):
with tag('h2'):
text(name)
for i, plot in enumerate(plots):
with tag('button', onclick='toggleplot("{}", {})'.format(name, i+1)):
doc.asis('toggle host')
doc.asis(plot)
with tag('p'):
with tag('button', onclick='toggleplot("{}", {})'.format(name, i)):
text('toggle host')
with tag('div', klass=name):
doc.asis(plot)
return doc.getvalue()
def data_from_file(arguments):
path, collectl, merge, coarsest, filtercmds, filtervalue, filtertype = arguments
ps = subprocess.Popen(('zcat', str(path)), stdout=subprocess.PIPE)
host = subprocess.check_output(('awk', '/^# Host:/{print $3;exit}'), stdin=ps.stdout).decode().strip()
ps.terminate()
data, filter_data = parse_file(path, collectl, merge, coarsest)
if filtercmds:
filter_infos = getattr(filter_func, filtertype)(filter_data, filtervalue)
else:
filter_infos = {key: list(data.keys()) for key in NEEDED_VALUES.keys()}
return host, data, filter_infos
@click.command()
@click.option('--file', '-f', required=True)
@click.option('--files', '-f', multiple=True)
@click.option('--sourcedir', '-s', multiple=True)
@click.option('--collectl', '-c', required=False, default='collectl')
@click.option('--plotlypath', '-p', required=True)
@click.option('--plotlypath', '-p', default='plotly.min.js')
@click.option('--destination', '-d', required=False, default='.')
@click.option('--merge/--notmerge', default=True)
@click.option('--coarsest/--notcoarsest', default=False)
@click.option('--filtercmds/--notfiltercmds', default=True)
@click.option('--filtervalue', '-v', type=int, default=90)
@click.option('--filtertype', '-t',
type=click.Choice(FILTER_FUNCTIONS, case_sensitive=False),
default=FILTER_FUNCTIONS[0])
def main(file, collectl, plotlypath, destination, merge, coarsest, filtercmds, filtervalue, filtertype):
path = Path(file)
if path.exists():
data, filter_data = parse_file(path, collectl, merge, coarsest)
if filtercmds:
filter_infos = getattr(filter_func, filtertype)(filter_data, filtervalue)
else:
filter_infos = {key: list(data.keys()) for key in NEEDED_VALUES.keys()}
plots_dict = {config['name']: [] for config in PLOT_CONFIG}
for plot_config in PLOT_CONFIG:
plots_dict[plot_config['name']].append(getattr(plots_generators, plot_config['generator'])(
data,
filter_infos,
plot_config['plotly_settings'],
**plot_config['generator_settings'],
))
with Path(destination, 'plots.html').open(mode='w') as f:
f.write(build_html(plots_dict, plotlypath))
type=click.Choice(FILTER_FUNCTIONS.keys(), case_sensitive=False),
default=DEFAULT_FILTER)
def main(files, sourcedir, collectl, plotlypath, destination, merge, coarsest, filtercmds, filtervalue, filtertype):
source_paths = []
if not files and not sourcedir:
sourcedir = ['.']
for directory in sourcedir:
if Path(directory).is_dir():
source_paths.extend(
[Path(directory, sourcefile) for sourcefile in os.listdir(directory) if sourcefile.endswith('.raw.gz')]
)
for collectl_file in files:
path = Path(collectl_file)
if path.is_file():
source_paths.append(path)
if not source_paths:
Exception("no valid source found")
data_colllect_functions = []
for source_path in source_paths:
data_colllect_functions.append((source_path,
collectl,
merge,
coarsest,
filtercmds,
filtervalue,
FILTER_FUNCTIONS[filtertype],
))
pool = mp.Pool(min(len(data_colllect_functions), mp.cpu_count()))
results = pool.map(data_from_file, data_colllect_functions)
pool.close()
hosts_data = {}
cmd_all = []
for host, data, filter_infos in results:
hosts_data[host] = {'data': data, 'filter_infos': filter_infos}
cmd_all.extend([cmd for cmds in filter_infos.values() for cmd in cmds])
cmd_colors = {cmd: PLOTLY_COLORS[i % len(PLOTLY_COLORS)] for i, cmd in enumerate(set(cmd_all))}
start_plots_build = time.time()
plots_dict = {}
for host, host_data in hosts_data.items():
plots_dict[host] = {config['name']: [] for config in PLOT_CONFIG}
for plot_config in PLOT_CONFIG:
plots_dict[host][plot_config['name']].append(scatter_plot(
host_data['data'],
host_data['filter_infos'],
cmd_colors,
plot_config['plotly_settings'],
{'host': host},
**plot_config['generator_settings'],
))
print("plots build in {:.1f}s".format(time.time() - start_plots_build))
try:
shutil.copy(plotlypath , str(Path(destination, 'plotly.js')))
except shutil.SameFileError:
pass
time_sites = time.time()
for host, site_plots in plots_dict.items():
with Path(destination, host+'-plots.html').open(mode='w') as f:
f.write(build_html(site_plots))
print("write/build of websites took {:.1f}s".format(time.time() - time_sites))
if __name__ == '__main__':
......
FILTER_FUNCTIONS = {'hardvalue': 'filter_hardvalue', 'average': 'filter_average'}
DEFAULT_FILTER = 'average'
NAME_SPEZIAL_PARAMETER_CONFIG = {
'java': ['-cp', '-classpath'],
'bash': [],
'sh': [],
'perl': [],
'python': ['-m', '-W', '-X', '--check-hash-based-pycs', '-c'],
}
NEEDED_VALUES_DEFAULTS = {
'default_value': 0,
'default_merger': 'addition_int',
}
NEEDED_VALUES = {
'PCT': {'keys':['PCT']},
'VmRSS': {'keys':['VmRSS'], 'merger': 'x2oneaddition_float_sizedown2'},
'RKBC': {'keys':['RKBC'], 'merger': 'x2oneaddition_float_sizedown1'},
'WKBC': {'keys':['WKBC'], 'merger': 'x2oneaddition_float_sizedown1'},
'syscalls': {'keys':['RSYS', 'WSYS'], 'merger': 'x2oneaddtion_int'},
}
COMAND_BLACKLIST_REGEX = [
'collectl',
]
PLOTLY_COLORS=['rgb(31, 119, 180)', 'rgb(255, 127, 14)',
'rgb(44, 160, 44)', 'rgb(214, 39, 40)',
'rgb(148, 103, 189)', 'rgb(140, 86, 75)',
'rgb(227, 119, 194)', 'rgb(127, 127, 127)',
'rgb(188, 189, 34)', 'rgb(23, 190, 207)']
PLOT_CONFIG = [{
'name': 'cpu',
'generator_settings': {'needed_key':'PCT'},
'plotly_settings': {
'title': 'CPU load {host}',
'xtitle': 'Date',
'ytitle': 'CPU load',
},
},
{
'name': 'ram',
'generator_settings': {'needed_key': 'VmRSS'},
'plotly_settings': {
'title': 'Memory Usage {host}',
'xtitle': 'Date',
'ytitle': 'RAM usage GiB',
},
},
{
'name': 'ior',
'generator_settings': {'needed_key':'RKBC'},
'plotly_settings': {
'title': 'read io {host}',
'xtitle': 'Date',
'ytitle': 'I/O MiB/s',
'ytype': 'log',
},
},
{
'name': 'iow',
'generator_settings': {'needed_key':'WKBC'},
'plotly_settings': {
'title': 'write io {host}',
'xtitle': 'Date',
'ytitle': 'I/O MiB/s',
'ytype': 'log',
},
},
{
'name': 'ios',
'generator_settings': {'needed_key':'syscalls'},
'plotly_settings': {
'title': 'I/O syscalls {host}',
'xtitle': 'Date',
'ytitle': 'I/O syscalls/s',
'ytype': 'log',
},
},
]
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment