Commit 1c733ec4 authored by Julius Metz's avatar Julius Metz

performance changes

parent 94f04ff3
import os
import re import re
import gzip
import subprocess import subprocess
import shlex
from pathlib import Path from pathlib import Path
import copy import copy
import time import time
...@@ -9,9 +8,10 @@ import datetime ...@@ -9,9 +8,10 @@ import datetime
import multiprocessing as mp import multiprocessing as mp
import shutil import shutil
import importlib import importlib
import pickle
import json
import click import click
import plotly
from yattag import Doc from yattag import Doc
...@@ -38,6 +38,9 @@ DEFAULT_FILTER = 'average' ...@@ -38,6 +38,9 @@ DEFAULT_FILTER = 'average'
CONFIG_VARIABLES = ['NEEDED_VALUES_DEFAULTS', 'NEEDED_VALUES', 'PLOTS_CONFIG', CONFIG_VARIABLES = ['NEEDED_VALUES_DEFAULTS', 'NEEDED_VALUES', 'PLOTS_CONFIG',
'NAME_SPEZIAL_PARAMETER', 'COMAND_BLACKLIST', 'PLOTLY_COLORS', 'PLOTLY_STATIC_COLORS'] 'NAME_SPEZIAL_PARAMETER', 'COMAND_BLACKLIST', 'PLOTLY_COLORS', 'PLOTLY_STATIC_COLORS']
#pattern for split of commands (with \" and \ )
FIELDS_PATTERN = re.compile(r'(?:(?:\s*[^\\]|\A)\"(.*?[^\\]|)\"|(?:\s+|\A)(?=[^\s])(.*?[^\\])(?= |\Z))')
#(?:\"(.*?)\"|(\S+))
def datestr2date(datestr): def datestr2date(datestr):
...@@ -69,7 +72,9 @@ def get_cmdname(cmd, spezial_parameters_of_all, coarsest=False): ...@@ -69,7 +72,9 @@ def get_cmdname(cmd, spezial_parameters_of_all, coarsest=False):
str -- new cmd name str -- new cmd name
""" """
# search for (for example) bash_function="python" in cmd="/usr/bin/python3 my_script.py" # search for (for example) bash_function="python" in cmd="/usr/bin/python3 my_script.py"
cmd_splited = shlex.split(cmd) # split command with regex
cmd_splited = [x[0] or x[1] for x in FIELDS_PATTERN.findall(cmd)]
#shlex.split(cmd)
bash_function = cmd_splited[0].split('/')[-1] bash_function = cmd_splited[0].split('/')[-1]
bash_function = re.search(r'[^\W\n]+', bash_function).group(0) bash_function = re.search(r'[^\W\n]+', bash_function).group(0)
# check if bash_function is known, if not, return bash_function # check if bash_function is known, if not, return bash_function
...@@ -87,7 +92,7 @@ def get_cmdname(cmd, spezial_parameters_of_all, coarsest=False): ...@@ -87,7 +92,7 @@ def get_cmdname(cmd, spezial_parameters_of_all, coarsest=False):
continue continue
if bash_function == 'bash' or bash_function == 'sh' and parameter == '-c': if bash_function == 'bash' or bash_function == 'sh' and parameter == '-c':
return bash_function + ' -c' return bash_function + ' -c'
# return shlex.join(cmd_splited[position+1:]) # return cmd_splited[position+1]
if parameter.startswith('-'): if parameter.startswith('-'):
continue continue
return parameter.split('/')[-1] return parameter.split('/')[-1]
...@@ -171,8 +176,8 @@ def parse_file(path, collectl, shorten_cmds, coarsest, config): ...@@ -171,8 +176,8 @@ def parse_file(path, collectl, shorten_cmds, coarsest, config):
) )
# if datetime not yet existing, add new entry from template # if datetime not yet existing, add new entry from template
if not tmp_datetime in entrys_data[cmd]: if not tmp_datetime in entrys_data[cmd]:
entrys_data[cmd][tmp_datetime] = copy.deepcopy( entrys_data[cmd][tmp_datetime] = pickle.loads(pickle.dumps(
empty_dict_with_value_titles) empty_dict_with_value_titles, -1))
# get values from data as given in NEEDED_VALUES and run specified merger function # get values from data as given in NEEDED_VALUES and run specified merger function
# to merge multiple values with same timestamp or rescale (e.g. to GB) # to merge multiple values with same timestamp or rescale (e.g. to GB)
for value_title, value_title_settings in config['NEEDED_VALUES'].items(): for value_title, value_title_settings in config['NEEDED_VALUES'].items():
...@@ -190,12 +195,12 @@ def parse_file(path, collectl, shorten_cmds, coarsest, config): ...@@ -190,12 +195,12 @@ def parse_file(path, collectl, shorten_cmds, coarsest, config):
# create lists entry_data_plotfriendly[cmd][metric] = [ values, ... ] with the actual data to plot # create lists entry_data_plotfriendly[cmd][metric] = [ values, ... ] with the actual data to plot
# and sum up all metrics for each command to enable filtering of non-interesting commands later on # and sum up all metrics for each command to enable filtering of non-interesting commands later on
entry_data_plotfriendly = {} entry_data_plotfriendly = {}
plot_filter_data = copy.deepcopy(empty_dict_with_value_titles) plot_filter_data = pickle.loads(pickle.dumps(empty_dict_with_value_titles, -1))
plot_filter_data['number_of_values'] = 0 plot_filter_data['number_of_values'] = 0
plot_filter_data['commands'] = {} plot_filter_data['commands'] = {}
for cmd, cmd_data in entrys_data.items(): for cmd, cmd_data in entrys_data.items():
plot_filter_data['commands'][cmd] = copy.deepcopy( plot_filter_data['commands'][cmd] = pickle.loads(pickle.dumps(
empty_dict_with_value_titles) empty_dict_with_value_titles, -1))
plot_filter_data['commands'][cmd]['number_of_values'] = 0 plot_filter_data['commands'][cmd]['number_of_values'] = 0
entry_data_plotfriendly[cmd] = {key: [] for key in config['NEEDED_VALUES']} entry_data_plotfriendly[cmd] = {key: [] for key in config['NEEDED_VALUES']}
entry_data_plotfriendly[cmd]['datetime'] = [] entry_data_plotfriendly[cmd]['datetime'] = []
...@@ -231,56 +236,52 @@ def make_plot(cmds_data, filter_info, cmd_color, plot_settings, plotly_format_va ...@@ -231,56 +236,52 @@ def make_plot(cmds_data, filter_info, cmd_color, plot_settings, plotly_format_va
str -- plotly html div str -- plotly html div
""" """
plot_data = [] plot_data = []
#startdate = min([date for cmd in filter_info[needed_key] for date in cmds_data[cmd]['datetime']])
for cmd in filter_info[needed_key]: for cmd in filter_info[needed_key]:
trace = { plot_data.append({
'x': cmds_data[cmd].get('datetime', []), 'x': [str(date) for date in cmds_data[cmd]['datetime']],
'y': cmds_data[cmd].get(needed_key, []), 'y': cmds_data[cmd][needed_key],
'name': cmd, 'name': cmd,
'marker': { 'marker': {
'color': cmd_color[cmd], 'color': cmd_color[cmd],
}, },
**plot_settings['data'],
} }
trace.update(
plot_settings['data'],
) )
plot_data.append(trace)
plot = { layout = pickle.loads(pickle.dumps(plot_settings['layout'], -1))
'data': plot_data, if 'title' in layout:
'layout': copy.deepcopy(plot_settings['layout']), if type(layout['title']) == str:
} layout['title'] = layout['title'].format(
if 'title' in plot['layout']:
if type(plot['layout']['title']) == str:
plot['layout']['title'] = plot['layout']['title'].format(
**plotly_format_vars, **plotly_format_vars,
) )
else: else:
plot['layout']['title']['text'] = plot['layout']['title']['text'].format( layout['title']['text'] = layout['title']['text'].format(
**plotly_format_vars, **plotly_format_vars,
) )
if 'yaxis' in plot['layout'] and 'title' in plot['layout']['yaxis']: if 'yaxis' in layout and 'title' in layout['yaxis']:
if type(plot['layout']['yaxis']['title']) == str: if type(layout['yaxis']['title']) == str:
plot['layout']['yaxis']['title'] = plot['layout']['yaxis']['title'].format( layout['yaxis']['title'] = layout['yaxis']['title'].format(
**plotly_format_vars, **plotly_format_vars,
) )
else: else:
plot['layout']['yaxis']['title']['text'] = plot['layout']['yaxis']['title']['text'].format( layout['yaxis']['title']['text'] = layout['yaxis']['title']['text'].format(
**plotly_format_vars, **plotly_format_vars,
) )
if 'xaxis' in plot['layout'] and 'title' in plot['layout']['xaxis']: if 'xaxis' in layout and 'title' in layout['xaxis']:
if type(plot['layout']['xaxis']['title']) == str: if type(layout['xaxis']['title']) == str:
plot['layout']['xaxis']['title'] = plot['layout']['xaxis']['title'].format( layout['xaxis']['title'] = layout['xaxis']['title'].format(
**plotly_format_vars, **plotly_format_vars,
) )
else: else:
plot['layout']['xaxis']['title']['text'] = plot['layout']['xaxis']['title']['text'].format( layout['xaxis']['title']['text'] = layout['xaxis']['title']['text'].format(
**plotly_format_vars, **plotly_format_vars,
) )
return plotly.offline.plot(plot, include_plotlyjs=False, output_type='div') return {'data': json.dumps(plot_data), 'layout': json.dumps(layout)}
def build_html(plots_dict): def build_html(plots_dict):
...@@ -307,6 +308,15 @@ def build_html(plots_dict): ...@@ -307,6 +308,15 @@ def build_html(plots_dict):
elems[index].style.display = 'none'; elems[index].style.display = 'none';
} }
}''') }''')
with tag('script'):
doc.asis("document.onreadystatechange = () => {if (document.readyState === 'complete') {")
doc.asis('console.log("test");')
for name, plots in plots_dict.items():
for i, plot in enumerate(plots):
doc.asis("Plotly.newPlot(document.getElementById('{name}-plot-{number}'), JSON.parse('{data}'), JSON.parse('{layout}'));"\
.format(name=name, number=i, data=plot['data'], layout=plot['layout'])
)
doc.asis('}}')
with tag('body'): with tag('body'):
with tag('div', id='index'): with tag('div', id='index'):
with tag('h2'): with tag('h2'):
...@@ -320,12 +330,12 @@ def build_html(plots_dict): ...@@ -320,12 +330,12 @@ def build_html(plots_dict):
with tag('div', id=name): with tag('div', id=name):
#with tag('h2'): #with tag('h2'):
# text(name) # text(name)
for i, plot in enumerate(plots): for i in range(len(plots)):
#with tag('p'): #with tag('p'):
# with tag('button', onclick='toggleplot("{}", {})'.format(name, i)): # with tag('button', onclick='toggleplot("{}", {})'.format(name, i)):
# text('toggle plot') # text('toggle plot')
with tag('div', klass=name): with tag('div', id='{}-plot-{}'.format(name, i)):
doc.asis(plot) pass
return doc.getvalue() return doc.getvalue()
...@@ -368,11 +378,11 @@ def data_from_file(arguments): ...@@ -368,11 +378,11 @@ def data_from_file(arguments):
3. filter_infos = {metrics: [cmds, ...] 3. filter_infos = {metrics: [cmds, ...]
""" """
path, collectl, shorten_cmds, coarsest, filtercmds, filtervalue, filtertype, config = arguments path, collectl, shorten_cmds, coarsest, filtercmds, filtervalue, filtertype, config = arguments
zcat = subprocess.Popen(('zcat', str(path)), stdout=subprocess.PIPE) host = ''
host = subprocess.check_output( with gzip.open(path, 'r') as f:
('awk', '/^# Host:/{print $3;exit}'), stdin=zcat.stdout, for line in f:
).decode().strip() if line.startswith(b'# Host:'):
zcat.terminate() host = re.search(r'# Host: *([^ ]+)', line.decode()).group(1)
data, filter_data = parse_file(path, collectl, shorten_cmds, coarsest, config) data, filter_data = parse_file(path, collectl, shorten_cmds, coarsest, config)
if filtercmds: if filtercmds:
filter_infos = getattr(filter_func, filtertype)(filter_data, filtervalue) filter_infos = getattr(filter_func, filtertype)(filter_data, filtervalue)
...@@ -417,7 +427,6 @@ def main(source, collectl, plotlypath, destination, configpath, ...@@ -417,7 +427,6 @@ def main(source, collectl, plotlypath, destination, configpath,
print('in destination "collectlplots" already exist') print('in destination "collectlplots" already exist')
exit(1) exit(1)
config_module = default_plot_conf config_module = default_plot_conf
if configpath: if configpath:
if configpath.endswith('.py'): if configpath.endswith('.py'):
...@@ -454,18 +463,20 @@ def main(source, collectl, plotlypath, destination, configpath, ...@@ -454,18 +463,20 @@ def main(source, collectl, plotlypath, destination, configpath,
pool.close() pool.close()
hosts_data = {} hosts_data = {}
cmd_all = set() cmd_all = set()
for host, data, filter_infos in results: for host, data, filter_infos in results:
hosts_data[host] = {'data': data, 'filter_infos': filter_infos} hosts_data[host] = {'data': data, 'filter_infos': filter_infos}
cmd_all.update([cmd for cmds in filter_infos.values() for cmd in cmds]) cmd_all.update([cmd for cmds in filter_infos.values() for cmd in cmds])
cmd_colors = {cmd: config['PLOTLY_COLORS'][i % len(config['PLOTLY_COLORS'])] for i, cmd in enumerate(sorted(list(cmd_all)))} cmd_colors = {cmd: config['PLOTLY_COLORS'][i % len(config['PLOTLY_COLORS'])] for i, cmd in enumerate(sorted(list(cmd_all)))}
cmd_colors.update(config['PLOTLY_STATIC_COLORS']) cmd_colors.update(config['PLOTLY_STATIC_COLORS'])
# for each host and each plot (as given in config) call make_plot to create html div with plotly # for each host and each plot (as given in config) call make_plot to create html div with plotly
start_plots_build = time.time() start_plots_build = time.time()
plots_dict = {} plots_dict = {}
for host, host_data in hosts_data.items(): for host, host_data in hosts_data.items():
plots_dict[host] = {plot_config['name']: [] for plot_config in config['PLOTS_CONFIG']} plots_dict[host] = {plot_config['name']: [] for plot_config in config['PLOTS_CONFIG']}
for plot_config in config['PLOTS_CONFIG']: for plot_config in config['PLOTS_CONFIG']:
plots_dict[host][plot_config['name']].append( make_plot( plots_dict[host][plot_config['name']].append( make_plot(
host_data['data'], host_data['data'],
host_data['filter_infos'], host_data['filter_infos'],
......
...@@ -12,7 +12,6 @@ For information of the option use "--help" or go to the option description. ...@@ -12,7 +12,6 @@ For information of the option use "--help" or go to the option description.
### Requirements ### Requirements
``` ```
Click=<7.0 Click=<7.0
plotly=<4.5.0
yattag=<1.13.2 yattag=<1.13.2
``` ```
...@@ -24,7 +23,7 @@ yattag=<1.13.2 ...@@ -24,7 +23,7 @@ yattag=<1.13.2
| -c, --collectl | how to call collectl | is the collectl that will be called to get the data from the sources | collectl without a path | | -c, --collectl | how to call collectl | is the collectl that will be called to get the data from the sources | collectl without a path |
| -p, --plotlypath | a path to a plotly javascript libery | is needed for the plot in the html files | plotly.js next to skript | | -p, --plotlypath | a path to a plotly javascript libery | is needed for the plot in the html files | plotly.js next to skript |
| -d, --destination | a path to a directory | is where a directory with the html files will be created | current directory | | -d, --destination | a path to a directory | is where a directory with the html files will be created | current directory |
| --configpath | a path to config file | is a python file with the plot and merge settings see [here](config)| a default config | | --configpath | a path to config file | is a python file with the plot and merge settings see [here](#config)| a default config |
| --shorten /<br> --notshorten | - | enable or disable shorten of commands with parameters/options only to file/command names. <br> examples:<br> python ~/scripts/script.py 1 --> script.py <br>ls -lisa --> ls | enabled | | --shorten /<br> --notshorten | - | enable or disable shorten of commands with parameters/options only to file/command names. <br> examples:<br> python ~/scripts/script.py 1 --> script.py <br>ls -lisa --> ls | enabled |
| --coarsest /<br> --notcoarsest | - | enable or disable shorten of commands only to command names.<br> If enabled --shorten is ignored!<br> examples: <br> python ~/scripts/script.py 1 --> python <br> ls -lisa --> ls | disabled | | --coarsest /<br> --notcoarsest | - | enable or disable shorten of commands only to command names.<br> If enabled --shorten is ignored!<br> examples: <br> python ~/scripts/script.py 1 --> python <br> ls -lisa --> ls | disabled |
| --filtercmds / --notfiltercmds | - | enable or disable filtering | enabled | | --filtercmds / --notfiltercmds | - | enable or disable filtering | enabled |
...@@ -287,8 +286,8 @@ In default it gives only two filter but in value_merger.py it is possible to add ...@@ -287,8 +286,8 @@ In default it gives only two filter but in value_merger.py it is possible to add
#### Arguments of a merger function #### Arguments of a merger function
1. base value is the initial value for merging on it. Normaly has the same type like the return value 1. base value is the initial value for merging on it. Normaly has the same type like the return value
because the return value can possible the next base value. because the return value can possible the next base value.
2. (args) All following arguments are the values(str) from the collectl that were assigned in the Config. 2. (args) All following arguments are the values(str) from the collectl that were assigned in [NEEDED_VALUES](#needed_values).
3. (kwargs) Are the Parameter that were assigned in the Config for the merger. 3. (kwargs) Are the Parameter that were assigned in [NEEDED_VALUES](#needed_values) for the merger.
#### Return #### Return
A merger function must return a value that can be the base value of his merger. A merger function must return a value that can be the base value of his merger.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment