Commit 45ba026b authored by Julius Metz's avatar Julius Metz

pep8, doku

parent ac89a778
......@@ -2,4 +2,5 @@ venv
__pycache__
trash
*.raw.gz
testsites
\ No newline at end of file
testsites
.pylintrc
\ No newline at end of file
......@@ -14,10 +14,9 @@ import plotly
from yattag import Doc
import plots_generators
import filter_func
import value_merger
from sitegenerator_Config import *
from Collectl2plotly_Config import *
def datestr2date(datestr):
......@@ -52,7 +51,7 @@ def get_cmdname(cmd, coarsest=False):
bash_function = cmd_splited[0].split('/')[-1]
bash_function = re.search(r'[^\W\n]+', bash_function).group(0)
spezial_parameter = NAME_SPEZIAL_PARAMETER_CONFIG.get(bash_function, None)
if coarsest or spezial_parameter == None:
if coarsest or spezial_parameter is None:
return bash_function
skip = False
for position, parameter in enumerate(cmd_splited[1:]):
......@@ -64,19 +63,32 @@ def get_cmdname(cmd, coarsest=False):
continue
if bash_function == 'bash' or bash_function == 'sh' and parameter == '-c':
return bash_function + ' -c'
#return shlex.join(cmd_splited[position+1:])
# return shlex.join(cmd_splited[position+1:])
if parameter.startswith('-'):
continue
return parameter.split('/')[-1]
return cmd
def parse_file(path, collectl, merge, coarsest):
def parse_file(path, collectl, shorten_cmds, coarsest):
"""start subproccess collectl than parse the output and merge.
After that build usefull dict from parsed data
Arguments:
path {Path} -- path to collectl file to parse
collectl {str} -- collectl command
shorten_cmds {bool} -- if True cmd will be shorted by get_cmdname
coarsest {bool} -- parameter of get_cmdname
Returns:
(dict, dict) -- 1.: parsed_data 2.: data for filter function
"""
collectl_starttime = time.time()
process = subprocess.run(
[collectl, '-P', '-p', path, '-sZ'], capture_output=True,
[collectl, '-P', '-p', path, '-sZ'], capture_output=True, check=True,
)
print('collectl make table took {:.1f}s'.format(time.time()- collectl_starttime))
print('collectl make table took {:.1f}s'.format(
time.time() - collectl_starttime))
parsing_starttime = time.time()
output = process.stdout.decode().splitlines()
head = output.pop(0).split(' ')
......@@ -88,17 +100,20 @@ def parse_file(path, collectl, merge, coarsest):
break
head[0] = head[0][1:]
head_indexes_dict = {head_title: index for index, head_title in enumerate(head)}
head_indexes_dict = {
head_title: index for index, head_title in enumerate(head)}
entrys_data = {}
empty_dict_with_value_titles = {
value_title: copy.deepcopy(
value_title_settings.get('default', NEEDED_VALUES_DEFAULTS['default_value'])
value_title_settings.get(
'default', NEEDED_VALUES_DEFAULTS['default_value']
)
) for value_title, value_title_settings in NEEDED_VALUES.items()
}
for entry in output:
splited_entry = entry.split(' ', len(head_indexes_dict)-1)
cmd = splited_entry[-1]
if merge:
if shorten_cmds or coarsest:
cmd = get_cmdname(cmd, coarsest=coarsest)
if cmd in COMAND_BLACKLIST_REGEX:
continue
......@@ -106,10 +121,13 @@ def parse_file(path, collectl, merge, coarsest):
entrys_data[cmd] = {}
tmp_datetime = datetime.datetime.combine(
datestr2date(splited_entry[head_indexes_dict['Date']]),
datetime.time.fromisoformat(splited_entry[head_indexes_dict['Time']]),
datetime.time.fromisoformat(
splited_entry[head_indexes_dict['Time']],
),
)
if not tmp_datetime in entrys_data[cmd]:
entrys_data[cmd][tmp_datetime] = copy.deepcopy(empty_dict_with_value_titles)
entrys_data[cmd][tmp_datetime] = copy.deepcopy(
empty_dict_with_value_titles)
for value_title, value_title_settings in NEEDED_VALUES.items():
entrys_data[cmd][tmp_datetime][value_title] = getattr(
value_merger,
......@@ -118,7 +136,7 @@ def parse_file(path, collectl, merge, coarsest):
entrys_data[cmd][tmp_datetime][value_title],
*[splited_entry[head_indexes_dict[key]] for key in value_title_settings['keys']],
)
print('parsing/merge took {:.1f}s'.format(time.time()- parsing_starttime))
print('parsing/merge took {:.1f}s'.format(time.time() - parsing_starttime))
dictbuild_starttime = time.time()
entry_data_plotfriendly = {}
......@@ -126,11 +144,13 @@ def parse_file(path, collectl, merge, coarsest):
plot_filter_data['number_of_values'] = 0
plot_filter_data['commands'] = {}
for cmd, cmd_data in entrys_data.items():
plot_filter_data['commands'][cmd] = copy.deepcopy(empty_dict_with_value_titles)
plot_filter_data['commands'][cmd] = copy.deepcopy(
empty_dict_with_value_titles)
plot_filter_data['commands'][cmd]['number_of_values'] = 0
entry_data_plotfriendly[cmd] = {key: [] for key in NEEDED_VALUES.keys()}
entry_data_plotfriendly[cmd] = {key: []
for key in NEEDED_VALUES}
entry_data_plotfriendly[cmd]['datetime'] = []
for cmd_data_time, cmd_data_values in cmd_data.items():
for cmd_data_time, cmd_data_values in cmd_data.items():
entry_data_plotfriendly[cmd]['datetime'].append(cmd_data_time)
for cmd_data_key, cmd_data_value in cmd_data_values.items():
entry_data_plotfriendly[cmd][cmd_data_key].append(cmd_data_value)
......@@ -139,7 +159,10 @@ def parse_file(path, collectl, merge, coarsest):
plot_filter_data['commands'][cmd]['number_of_values'] += 1
plot_filter_data[cmd_data_key] += cmd_data_value
plot_filter_data['number_of_values'] += 1
print('data dict/ filter_data_dict build took {:.1f}s'.format(time.time()- dictbuild_starttime))
print(
'data dict/ filter_data_dict build took {:.1f}s'.format(
time.time() - dictbuild_starttime),
)
return entry_data_plotfriendly, plot_filter_data
......@@ -175,8 +198,8 @@ def build_plot(plot_data, plotly_format_vars, **kwargs):
return plotly.offline.plot(plot, include_plotlyjs=False, output_type='div')
def scatter_plot(cmds_data, filter_info, cmd_color, plot_settings, plotly_format_vars, **kwargs):
"""Build a list of dicts in Plotlyconf style for scatter plot with the data of needed_key in kwargs
def scatter_plot(cmds_data, filter_info, cmd_color, plot_settings, plotly_format_vars, needed_key):
"""Build a list of dicts in Plotlyconf style for scatter plot with the data from cmds_data of needed_key
than call build_plot to generate html plot code.
Arguments:
......@@ -185,19 +208,12 @@ def scatter_plot(cmds_data, filter_info, cmd_color, plot_settings, plotly_format
cmd_color {dict} -- assign a fixed plot color to each cmd example: {'Command': 'rgb(0, 0, 128)'}
plot_settings {dict} -- settings for build_plot
plotly_format_vars {dict} -- values for build_plot
Raises:
Exception: raise if no needed_key is given in kwargs
needed_key {str} -- key of cmds_data for the values to be use
Returns:
str -- plotly html div
"""
plot_data = []
needed_key = None
if 'needed_key' in kwargs:
needed_key = kwargs['needed_key']
else:
raise Exception('no needed_key in default_plot generater settings')
for cmd in filter_info[needed_key]:
plot_data.append({
'type': 'scattergl',
......@@ -205,7 +221,7 @@ def scatter_plot(cmds_data, filter_info, cmd_color, plot_settings, plotly_format
'x': cmds_data[cmd].get('datetime', []),
'y': cmds_data[cmd].get(needed_key, []),
'name': cmd,
'marker':{
'marker': {
'color': cmd_color[cmd],
},
})
......@@ -213,6 +229,14 @@ def scatter_plot(cmds_data, filter_info, cmd_color, plot_settings, plotly_format
def build_html(plots_dict):
"""build html site with the plots in plotsdict
Arguments:
plots_dict {dict} -- key: name of plot value: [plot, ...]
Returns:
str -- html website
"""
doc, tag, text = Doc().tagtext()
doc.asis('<!DOCTYPE html>')
with tag('html'):
......@@ -233,7 +257,7 @@ def build_html(plots_dict):
with tag('h2'):
text('Index:')
with tag('ul'):
for name in plots_dict.keys():
for name in plots_dict:
with tag('li'):
with tag('a', href='#'+name):
text(name)
......@@ -244,69 +268,106 @@ def build_html(plots_dict):
for i, plot in enumerate(plots):
with tag('p'):
with tag('button', onclick='toggleplot("{}", {})'.format(name, i)):
text('toggle host')
text('toggle plot')
with tag('div', klass=name):
doc.asis(plot)
return doc.getvalue()
def get_sources(sources):
"""collect all sources (.raw.gz files)
Arguments:
sources {tuple} -- tuple of files/directorys
Returns:
list -- list of all .raw.gz files which was found
"""
source_paths = []
for source in sources:
source_path = Path(source)
if source_path.is_dir():
source_paths.extend(
[Path(source_path, sourcefile) for sourcefile in os.listdir(
source_path) if sourcefile.endswith('.raw.gz')]
)
elif source_path.is_file() and source.endswith('.raw.gz'):
source_paths.append(source_path)
return source_paths
def data_from_file(arguments):
path, collectl, merge, coarsest, filtercmds, filtervalue, filtertype = arguments
ps = subprocess.Popen(('zcat', str(path)), stdout=subprocess.PIPE)
host = subprocess.check_output(('awk', '/^# Host:/{print $3;exit}'), stdin=ps.stdout).decode().strip()
ps.terminate()
data, filter_data = parse_file(path, collectl, merge, coarsest)
"""make data and filter infos of one collectl file
Arguments:
arguments {tuple} -- is a tuple of arguments for the function:
path {Path} -- collectldata file path
collectl {str} -- collectl command
shorten_cmds {bool} -- para for parse_file
coarsest {bool} -- para for parse_file
filtercmds {bool} -- if True cmds will be filter else not
filtervalue {int} -- para for filter
filtertype {str} -- wich filter will be called
Returns:
[type] -- [description]
"""
path, collectl, shorten_cmds, coarsest, filtercmds, filtervalue, filtertype = arguments
zcat = subprocess.Popen(('zcat', str(path)), stdout=subprocess.PIPE)
host = subprocess.check_output(
('awk', '/^# Host:/{print $3;exit}'), stdin=zcat.stdout,
).decode().strip()
zcat.terminate()
data, filter_data = parse_file(path, collectl, shorten_cmds, coarsest)
if filtercmds:
filter_infos = getattr(filter_func, filtertype)(filter_data, filtervalue)
else:
filter_infos = {key: list(data.keys()) for key in NEEDED_VALUES.keys()}
filter_infos = {key: list(data.keys()) for key in NEEDED_VALUES}
return host, data, filter_infos
@click.command()
@click.option('--files', '-f', multiple=True)
@click.option('--sourcedir', '-s', multiple=True)
@click.option('--collectl', '-c', required=False, default='collectl')
@click.option('--plotlypath', '-p', default='plotly.min.js')
@click.option('--destination', '-d', required=False, default='.')
@click.option('--merge/--notmerge', default=True)
@click.option('--coarsest/--notcoarsest', default=False)
@click.option('--filtercmds/--notfiltercmds', default=True)
@click.option('--filtervalue', '-v', type=int, default=90)
@click.option('--filtertype', '-t',
type=click.Choice(FILTER_FUNCTIONS.keys(), case_sensitive=False),
default=DEFAULT_FILTER)
def main(files, sourcedir, collectl, plotlypath, destination, merge, coarsest, filtercmds, filtervalue, filtertype):
source_paths = []
if not files and not sourcedir:
sourcedir = ['.']
for directory in sourcedir:
if Path(directory).is_dir():
source_paths.extend(
[Path(directory, sourcefile) for sourcefile in os.listdir(directory) if sourcefile.endswith('.raw.gz')]
)
for collectl_file in files:
path = Path(collectl_file)
if path.is_file():
source_paths.append(path)
@click.command(help='Generate htmlfiles with Plotlyplots with data from collectlfiles(".raw.gz")')
@click.option('--source', '-s', multiple=True, default=['.'], show_default=True, type=click.Path(exists=True), help='source for the plots. (.raw.gz file or directory with .raw.gz) multiple useable')
@click.option('--collectl', '-c', default='collectl', show_default=True, help='collectl command')
@click.option('--plotlypath', '-p', default='./plotly-latest.min.js', type=click.Path(exists=True), show_default=True, help='path to plotly.js')
@click.option('--destination', '-d', default='.', type=click.Path(exists=True), show_default=True, help='path to directory where directory with plots will be created')
@click.option('--shorten/--notshorten', default=True, help='commands will be shorted only to name')
@click.option('--coarsest/--notcoarsest', default=False, help='commands will be shorted only to type (bash, perl, ...)')
@click.option('--filtercmds/--notfiltercmds', default=True, help='filtering or not')
@click.option('--filtervalue', help='Value which is given to the filter.')
@click.option('--filtertype',
type=click.Choice(FILTER_FUNCTIONS.keys(), case_sensitive=False),
default=DEFAULT_FILTER, show_default=True,
help='Filter which is to be used.',
)
def main(source, collectl, plotlypath, destination,
shorten, coarsest, filtercmds, filtervalue, filtertype):
source_paths = get_sources(source)
if not source_paths:
Exception("no valid source found")
print('no valid source found')
exit(1)
if not Path(destination).is_dir():
print('destination is no valid directory')
exit(1)
plots_dir = Path(destination, 'collectlplots')
if plots_dir.is_dir():
print('in destination "collectldir" already exist')
exit(1)
data_colllect_functions = []
for source_path in source_paths:
data_colllect_functions.append((source_path,
collectl,
merge,
coarsest,
filtercmds,
filtervalue,
FILTER_FUNCTIONS[filtertype],
))
collectl,
shorten,
coarsest,
filtercmds,
filtervalue,
FILTER_FUNCTIONS[filtertype],
))
pool = mp.Pool(min(len(data_colllect_functions), mp.cpu_count()))
results = pool.map(data_from_file, data_colllect_functions)
......@@ -325,24 +386,26 @@ def main(files, sourcedir, collectl, plotlypath, destination, merge, coarsest, f
plots_dict[host] = {config['name']: [] for config in PLOT_CONFIG}
for plot_config in PLOT_CONFIG:
plots_dict[host][plot_config['name']].append(scatter_plot(
host_data['data'],
host_data['filter_infos'],
cmd_colors,
plot_config['plotly_settings'],
{'host': host},
**plot_config['generator_settings'],
host_data['data'],
host_data['filter_infos'],
cmd_colors,
plot_config['plotly_settings'],
{'host': host},
plot_config['needed_key'],
))
print("plots build in {:.1f}s".format(time.time() - start_plots_build))
plots_dir.mkdir()
try:
shutil.copy(plotlypath , str(Path(destination, 'plotly.js')))
shutil.copy(plotlypath, str(Path(plots_dir, 'plotly.js')))
except shutil.SameFileError:
pass
time_sites = time.time()
for host, site_plots in plots_dict.items():
with Path(destination, host+'-plots.html').open(mode='w') as f:
f.write(build_html(site_plots))
with Path(plots_dir, host+'-plots.html').open(mode='w') as plots_file:
plots_file.write(build_html(site_plots))
print("write/build of websites took {:.1f}s".format(time.time() - time_sites))
......
FILTER_FUNCTIONS = {'hardvalue': 'filter_hardvalue', 'average': 'filter_average'}
DEFAULT_FILTER = 'average'
"""Config of collectl2plotly
"""
NAME_SPEZIAL_PARAMETER_CONFIG = {
'java': ['-cp', '-classpath'],
'bash': [],
'sh': [],
'perl': [],
'python': ['-m', '-W', '-X', '--check-hash-based-pycs', '-c'],
}
### Merge configs
# hint: merger function must be in value_merger.py
# Defaults merge infos
# default_value is the startvalue for merging if no is given in NEEDED_VALUES
# default_merger is the merger function that is called if no is given in NEEDED_VALUES
NEEDED_VALUES_DEFAULTS = {
'default_value': 0,
'default_merger': 'addition_int',
}
# Information which values needed for plots
# key : name of value needed for plotconfig
# value: is dict with following configuration options
# 'keys' : must be a list with the table heads from collectl that is to be merge
# 'merger' : function that merge the values from the given keys to the new for the plots
# 'default': startvalue for merging
NEEDED_VALUES = {
'PCT': {'keys':['PCT']},
'VmRSS': {'keys':['VmRSS'], 'merger': 'x2oneaddition_float_sizedown2'},
......@@ -22,19 +27,25 @@ NEEDED_VALUES = {
'syscalls': {'keys':['RSYS', 'WSYS'], 'merger': 'x2oneaddtion_int'},
}
COMAND_BLACKLIST_REGEX = [
'collectl',
]
PLOTLY_COLORS=['rgb(31, 119, 180)', 'rgb(255, 127, 14)',
'rgb(44, 160, 44)', 'rgb(214, 39, 40)',
'rgb(148, 103, 189)', 'rgb(140, 86, 75)',
'rgb(227, 119, 194)', 'rgb(127, 127, 127)',
'rgb(188, 189, 34)', 'rgb(23, 190, 207)']
### Plots Config
# List of diagrams to be created
# config of Plot
# 'name': Is the name of the Plot will displayed on the website
# 'needed_key': is a key defined in NEEDED_VALUES for the values that are to be used in der Plot as y
# 'plotly_settings': are plotly layout configs, all not required
# hint {host} will be replaced with real hostname in title, xtitle, ytitle
# title: that will be displayed over plot
# xtitle: that will be displayed on the x axis plot
# ytitle: that will be displayed on the y axis plot
# ytype: what type y axis is example: 'log' for more see Plotly doc
# showlegend: legend displayed or not
# 'height': height of plot
# 'width': width of plot
PLOT_CONFIG = [{
'name': 'cpu',
'generator_settings': {'needed_key':'PCT'},
'needed_key': 'PCT',
'plotly_settings': {
'title': 'CPU load {host}',
'xtitle': 'Date',
......@@ -43,7 +54,7 @@ PLOT_CONFIG = [{
},
{
'name': 'ram',
'generator_settings': {'needed_key': 'VmRSS'},
'needed_key': 'VmRSS',
'plotly_settings': {
'title': 'Memory Usage {host}',
'xtitle': 'Date',
......@@ -52,7 +63,7 @@ PLOT_CONFIG = [{
},
{
'name': 'ior',
'generator_settings': {'needed_key':'RKBC'},
'needed_key': 'RKBC',
'plotly_settings': {
'title': 'read io {host}',
'xtitle': 'Date',
......@@ -62,7 +73,7 @@ PLOT_CONFIG = [{
},
{
'name': 'iow',
'generator_settings': {'needed_key':'WKBC'},
'needed_key': 'WKBC',
'plotly_settings': {
'title': 'write io {host}',
'xtitle': 'Date',
......@@ -72,7 +83,7 @@ PLOT_CONFIG = [{
},
{
'name': 'ios',
'generator_settings': {'needed_key':'syscalls'},
'needed_key': 'syscalls',
'plotly_settings': {
'title': 'I/O syscalls {host}',
'xtitle': 'Date',
......@@ -82,3 +93,48 @@ PLOT_CONFIG = [{
},
]
### Name parse configs
# dict of all commands where the name will be filtered out
# key : command
# value: list of parameter that take a value what is not the name
NAME_SPEZIAL_PARAMETER_CONFIG = {
'java': ['-cp', '-classpath'],
'bash': [],
'sh': [],
'perl': [],
'python': ['-m', '-W', '-X', '--check-hash-based-pycs', '-c'],
}
### Filter Configs
# registered filter functions
# filter functions must be in filter_func.py
# key : Display name
# value: function name
FILTER_FUNCTIONS = {'hardvalue': 'filter_hardvalue', 'average': 'filter_average'}
# Default filter function if no explicit is given
# must be a display name of a function
DEFAULT_FILTER = 'average'
### other Configs
# list of commands that are ignored
COMAND_BLACKLIST_REGEX = [
'collectl',
]
# list of colors of the plot entrys
PLOTLY_COLORS=['rgb(31, 119, 180)', 'rgb(255, 127, 14)',
'rgb(44, 160, 44)', 'rgb(214, 39, 40)',
'rgb(148, 103, 189)', 'rgb(140, 86, 75)',
'rgb(227, 119, 194)', 'rgb(127, 127, 127)',
'rgb(188, 189, 34)', 'rgb(23, 190, 207)']
......@@ -6,11 +6,16 @@ def filter_hardvalue(filter_data, filtervalue):
Arguments:
filter_data {dict} -- dict with all data needed for filter func
filtervalue {int} -- percentage of the interesting values
filtervalue {str} -- percentage of the interesting values
Returns:
dict -- dict key= collectl heads value=list of cmd that have not been filtered out
"""
if filtervalue is None:
filtervalue = 95
else:
filtervalue = int(filtervalue)
filter_info = {}
tmp_sort_data = {}
......@@ -51,6 +56,11 @@ def filter_average(filter_data, filtervalue):
Returns:
dict -- dict key= collectl heads value=list of cmd that have not been filtered out
"""
if filtervalue is None:
filtervalue = 5
else:
filtervalue = int(filtervalue)
all_averages = {}
filter_info = {}
for cmd, cmd_data in filter_data['commands'].items():
......@@ -60,9 +70,8 @@ def filter_average(filter_data, filtervalue):
if not cmd_data_key in all_averages:
all_averages[cmd_data_key] = filter_data[cmd_data_key] / filter_data['number_of_values']
filter_info[cmd_data_key] = []
cmd_avarage = cmd_data[cmd_data_key] / cmd_data['number_of_values']
if int( 100 * cmd_avarage / all_averages[cmd_data_key]) < filtervalue:
cmd_avarage = cmd_data_value / cmd_data['number_of_values']
if int(100 * cmd_avarage / all_averages[cmd_data_key]) < filtervalue:
continue
filter_info[cmd_data_key].append(cmd)
return filter_info
# Collectl2plotly
## How to
For use you need to install the requirements via pip.
After that you can use it like that:
```
python Collectl2plotly <options>
```
For information of the option use "--help" or go to the option description.
### Requirements
```
Click=<7.0
plotly=<4.5.0
yattag=<1.13.2
```
## Options
| Option | takes | Description | Default |
| --------------- | ----- | ------------ | ------- |
| -s, --source | a path to a directory with .raw.gz or .raw.gz file directly. | It is multiple useable! It will be used as collectl sources to get the Plotdata. | If no sources is given as parameter it search in the current dir |
| -c, --collectl | how to call collectl | is the collectl that will be called to get the data from the sources | collectl without a path |
| -p, --plotlypath | a path to a plotly javascript libery | is needed for the plot in the html files | ./plotly-latest.min.js |
| -d, --destination | a path to a directory | is where a directory with the html files will be created | current directory |
| --shorten /<br> --notshorten | - | enable or disable shorten of commands with parameter/options only to file/command names. <br> examples:<br> python ~/scripts/script.py 1 --> script.py <br>ls -lisa --> ls | enabled |
| --coarsest /<br> --notcoarsest | - | enable or disable shorten of commands only to command names.<br> If enabled --shorten is ignored!<br> examples: <br> python ~/scripts/script.py 1 --> python <br> ls -lisa --> ls | disabled |
| --filtercmds / --notfiltercmds | - | enable or disable filtering | enabled |
| --filtertype | filtertype <br> see --help for detail | to select which filter to be used | see --help |
| --filtervalue | any value (string, int, ...) | is passed to the filter function as string| - |
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment