Commit 1c733ec4 authored by Julius Metz's avatar Julius Metz

performance changes

parent 94f04ff3
import os
import re
import gzip
import subprocess
import shlex
from pathlib import Path
import copy
import time
......@@ -9,9 +8,10 @@ import datetime
import multiprocessing as mp
import shutil
import importlib
import pickle
import json
import click
import plotly
from yattag import Doc
......@@ -38,6 +38,9 @@ DEFAULT_FILTER = 'average'
CONFIG_VARIABLES = ['NEEDED_VALUES_DEFAULTS', 'NEEDED_VALUES', 'PLOTS_CONFIG',
'NAME_SPEZIAL_PARAMETER', 'COMAND_BLACKLIST', 'PLOTLY_COLORS', 'PLOTLY_STATIC_COLORS']
#pattern for split of commands (with \" and \ )
FIELDS_PATTERN = re.compile(r'(?:(?:\s*[^\\]|\A)\"(.*?[^\\]|)\"|(?:\s+|\A)(?=[^\s])(.*?[^\\])(?= |\Z))')
#(?:\"(.*?)\"|(\S+))
def datestr2date(datestr):
......@@ -69,7 +72,9 @@ def get_cmdname(cmd, spezial_parameters_of_all, coarsest=False):
str -- new cmd name
"""
# search for (for example) bash_function="python" in cmd="/usr/bin/python3 my_script.py"
cmd_splited = shlex.split(cmd)
# split command with regex
cmd_splited = [x[0] or x[1] for x in FIELDS_PATTERN.findall(cmd)]
#shlex.split(cmd)
bash_function = cmd_splited[0].split('/')[-1]
bash_function = re.search(r'[^\W\n]+', bash_function).group(0)
# check if bash_function is known, if not, return bash_function
......@@ -87,7 +92,7 @@ def get_cmdname(cmd, spezial_parameters_of_all, coarsest=False):
continue
if bash_function == 'bash' or bash_function == 'sh' and parameter == '-c':
return bash_function + ' -c'
# return shlex.join(cmd_splited[position+1:])
# return cmd_splited[position+1]
if parameter.startswith('-'):
continue
return parameter.split('/')[-1]
......@@ -171,8 +176,8 @@ def parse_file(path, collectl, shorten_cmds, coarsest, config):
)
# if datetime not yet existing, add new entry from template
if not tmp_datetime in entrys_data[cmd]:
entrys_data[cmd][tmp_datetime] = copy.deepcopy(
empty_dict_with_value_titles)
entrys_data[cmd][tmp_datetime] = pickle.loads(pickle.dumps(
empty_dict_with_value_titles, -1))
# get values from data as given in NEEDED_VALUES and run specified merger function
# to merge multiple values with same timestamp or rescale (e.g. to GB)
for value_title, value_title_settings in config['NEEDED_VALUES'].items():
......@@ -190,12 +195,12 @@ def parse_file(path, collectl, shorten_cmds, coarsest, config):
# create lists entry_data_plotfriendly[cmd][metric] = [ values, ... ] with the actual data to plot
# and sum up all metrics for each command to enable filtering of non-interesting commands later on
entry_data_plotfriendly = {}
plot_filter_data = copy.deepcopy(empty_dict_with_value_titles)
plot_filter_data = pickle.loads(pickle.dumps(empty_dict_with_value_titles, -1))
plot_filter_data['number_of_values'] = 0
plot_filter_data['commands'] = {}
for cmd, cmd_data in entrys_data.items():
plot_filter_data['commands'][cmd] = copy.deepcopy(
empty_dict_with_value_titles)
plot_filter_data['commands'][cmd] = pickle.loads(pickle.dumps(
empty_dict_with_value_titles, -1))
plot_filter_data['commands'][cmd]['number_of_values'] = 0
entry_data_plotfriendly[cmd] = {key: [] for key in config['NEEDED_VALUES']}
entry_data_plotfriendly[cmd]['datetime'] = []
......@@ -231,56 +236,52 @@ def make_plot(cmds_data, filter_info, cmd_color, plot_settings, plotly_format_va
str -- plotly html div
"""
plot_data = []
#startdate = min([date for cmd in filter_info[needed_key] for date in cmds_data[cmd]['datetime']])
for cmd in filter_info[needed_key]:
trace = {
'x': cmds_data[cmd].get('datetime', []),
'y': cmds_data[cmd].get(needed_key, []),
plot_data.append({
'x': [str(date) for date in cmds_data[cmd]['datetime']],
'y': cmds_data[cmd][needed_key],
'name': cmd,
'marker': {
'color': cmd_color[cmd],
},
**plot_settings['data'],
}
trace.update(
plot_settings['data'],
)
plot_data.append(trace)
plot = {
'data': plot_data,
'layout': copy.deepcopy(plot_settings['layout']),
}
if 'title' in plot['layout']:
if type(plot['layout']['title']) == str:
plot['layout']['title'] = plot['layout']['title'].format(
layout = pickle.loads(pickle.dumps(plot_settings['layout'], -1))
if 'title' in layout:
if type(layout['title']) == str:
layout['title'] = layout['title'].format(
**plotly_format_vars,
)
else:
plot['layout']['title']['text'] = plot['layout']['title']['text'].format(
layout['title']['text'] = layout['title']['text'].format(
**plotly_format_vars,
)
if 'yaxis' in plot['layout'] and 'title' in plot['layout']['yaxis']:
if type(plot['layout']['yaxis']['title']) == str:
plot['layout']['yaxis']['title'] = plot['layout']['yaxis']['title'].format(
if 'yaxis' in layout and 'title' in layout['yaxis']:
if type(layout['yaxis']['title']) == str:
layout['yaxis']['title'] = layout['yaxis']['title'].format(
**plotly_format_vars,
)
else:
plot['layout']['yaxis']['title']['text'] = plot['layout']['yaxis']['title']['text'].format(
layout['yaxis']['title']['text'] = layout['yaxis']['title']['text'].format(
**plotly_format_vars,
)
if 'xaxis' in plot['layout'] and 'title' in plot['layout']['xaxis']:
if type(plot['layout']['xaxis']['title']) == str:
plot['layout']['xaxis']['title'] = plot['layout']['xaxis']['title'].format(
if 'xaxis' in layout and 'title' in layout['xaxis']:
if type(layout['xaxis']['title']) == str:
layout['xaxis']['title'] = layout['xaxis']['title'].format(
**plotly_format_vars,
)
else:
plot['layout']['xaxis']['title']['text'] = plot['layout']['xaxis']['title']['text'].format(
layout['xaxis']['title']['text'] = layout['xaxis']['title']['text'].format(
**plotly_format_vars,
)
return plotly.offline.plot(plot, include_plotlyjs=False, output_type='div')
return {'data': json.dumps(plot_data), 'layout': json.dumps(layout)}
def build_html(plots_dict):
......@@ -307,6 +308,15 @@ def build_html(plots_dict):
elems[index].style.display = 'none';
}
}''')
with tag('script'):
doc.asis("document.onreadystatechange = () => {if (document.readyState === 'complete') {")
doc.asis('console.log("test");')
for name, plots in plots_dict.items():
for i, plot in enumerate(plots):
doc.asis("Plotly.newPlot(document.getElementById('{name}-plot-{number}'), JSON.parse('{data}'), JSON.parse('{layout}'));"\
.format(name=name, number=i, data=plot['data'], layout=plot['layout'])
)
doc.asis('}}')
with tag('body'):
with tag('div', id='index'):
with tag('h2'):
......@@ -320,12 +330,12 @@ def build_html(plots_dict):
with tag('div', id=name):
#with tag('h2'):
# text(name)
for i, plot in enumerate(plots):
for i in range(len(plots)):
#with tag('p'):
# with tag('button', onclick='toggleplot("{}", {})'.format(name, i)):
# text('toggle plot')
with tag('div', klass=name):
doc.asis(plot)
with tag('div', id='{}-plot-{}'.format(name, i)):
pass
return doc.getvalue()
......@@ -368,11 +378,11 @@ def data_from_file(arguments):
3. filter_infos = {metrics: [cmds, ...]
"""
path, collectl, shorten_cmds, coarsest, filtercmds, filtervalue, filtertype, config = arguments
zcat = subprocess.Popen(('zcat', str(path)), stdout=subprocess.PIPE)
host = subprocess.check_output(
('awk', '/^# Host:/{print $3;exit}'), stdin=zcat.stdout,
).decode().strip()
zcat.terminate()
host = ''
with gzip.open(path, 'r') as f:
for line in f:
if line.startswith(b'# Host:'):
host = re.search(r'# Host: *([^ ]+)', line.decode()).group(1)
data, filter_data = parse_file(path, collectl, shorten_cmds, coarsest, config)
if filtercmds:
filter_infos = getattr(filter_func, filtertype)(filter_data, filtervalue)
......@@ -417,7 +427,6 @@ def main(source, collectl, plotlypath, destination, configpath,
print('in destination "collectlplots" already exist')
exit(1)
config_module = default_plot_conf
if configpath:
if configpath.endswith('.py'):
......@@ -454,12 +463,14 @@ def main(source, collectl, plotlypath, destination, configpath,
pool.close()
hosts_data = {}
cmd_all = set()
for host, data, filter_infos in results:
hosts_data[host] = {'data': data, 'filter_infos': filter_infos}
cmd_all.update([cmd for cmds in filter_infos.values() for cmd in cmds])
cmd_colors = {cmd: config['PLOTLY_COLORS'][i % len(config['PLOTLY_COLORS'])] for i, cmd in enumerate(sorted(list(cmd_all)))}
cmd_colors.update(config['PLOTLY_STATIC_COLORS'])
# for each host and each plot (as given in config) call make_plot to create html div with plotly
start_plots_build = time.time()
plots_dict = {}
......
......@@ -12,7 +12,6 @@ For information of the option use "--help" or go to the option description.
### Requirements
```
Click=<7.0
plotly=<4.5.0
yattag=<1.13.2
```
......@@ -24,7 +23,7 @@ yattag=<1.13.2
| -c, --collectl | how to call collectl | is the collectl that will be called to get the data from the sources | collectl without a path |
| -p, --plotlypath | a path to a plotly javascript libery | is needed for the plot in the html files | plotly.js next to skript |
| -d, --destination | a path to a directory | is where a directory with the html files will be created | current directory |
| --configpath | a path to config file | is a python file with the plot and merge settings see [here](config)| a default config |
| --configpath | a path to config file | is a python file with the plot and merge settings see [here](#config)| a default config |
| --shorten /<br> --notshorten | - | enable or disable shorten of commands with parameters/options only to file/command names. <br> examples:<br> python ~/scripts/script.py 1 --> script.py <br>ls -lisa --> ls | enabled |
| --coarsest /<br> --notcoarsest | - | enable or disable shorten of commands only to command names.<br> If enabled --shorten is ignored!<br> examples: <br> python ~/scripts/script.py 1 --> python <br> ls -lisa --> ls | disabled |
| --filtercmds / --notfiltercmds | - | enable or disable filtering | enabled |
......@@ -287,8 +286,8 @@ In default it gives only two filter but in value_merger.py it is possible to add
#### Arguments of a merger function
1. base value is the initial value for merging on it. Normaly has the same type like the return value
because the return value can possible the next base value.
2. (args) All following arguments are the values(str) from the collectl that were assigned in the Config.
3. (kwargs) Are the Parameter that were assigned in the Config for the merger.
2. (args) All following arguments are the values(str) from the collectl that were assigned in [NEEDED_VALUES](#needed_values).
3. (kwargs) Are the Parameter that were assigned in [NEEDED_VALUES](#needed_values) for the merger.
#### Return
A merger function must return a value that can be the base value of his merger.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment