Commit 32565906 authored by Julius Metz's avatar Julius Metz

add function to merge 'same' commands and blacklist commands

parent 52cb77f7
......@@ -6,7 +6,6 @@ def cpu_plot(values, title=None, xtitle=None, ytitle=None):
plot = {
'data': [],
'layout': {
'showlegend': True,
'title': {
'text': title
},
......@@ -16,6 +15,7 @@ def cpu_plot(values, title=None, xtitle=None, ytitle=None):
'yaxis': {
'title': ytitle,
},
'showlegend': True,
},
}
for cmd, cmd_data in values.items():
......
import re
import subprocess
from pathlib import Path
import datetime
......@@ -6,6 +7,15 @@ import click
import plots_generators
SAME_COMMAND_REGEX = [
(r'.+ org.apache.spark.launcher.Main .+', 'org.apache.spark.launcher.Main'),
(r'.+ org.apache.spark.executor.CoarseGrainedExecutorBackend .+', 'org.apache.spark.executor.CoarseGrainedExecutorBackend'),
(r'.+ org.apache.spark.deploy.worker.Worker .+', 'org.apache.spark.deploy.worker.Worker'),
]
MERGE_VALUES = ['PCT']
MERGE_IDENTIFIER = 'datetime'
COMAND_BLACKLIST_REGEX = [r'^/usr/bin/perl .+collectl']
HEAD_BLACKLIST = ['Time', 'Date']
CONFIG = {
'cpu_plot':{
......@@ -33,7 +43,9 @@ def datestr2date(datestr):
def parse_file(path, collectl):
process = subprocess.run([collectl, '-P', '-p', path, '-sZ'], capture_output=True)
process = subprocess.run(
[collectl, '-P', '-p', path, '-sZ'], capture_output=True,
)
output = process.stdout.decode().splitlines()
head = output.pop(0).split(' ')
head[0] = head[0][1:]
......@@ -44,37 +56,75 @@ def parse_file(path, collectl):
splited_entry = entry.split(' ', len(head)-1)
cmd = splited_entry[-1]
if not cmd in entrys_data:
entrys_data[cmd] = {
head_elem : [] for head_elem in head[:-1] if head_elem not in HEAD_BLACKLIST
}
entrys_data[cmd]['datetime'] = []
for regexpr in COMAND_BLACKLIST_REGEX:
if re.search(regexpr, cmd):
break
else:
if not cmd in entrys_data:
entrys_data[cmd] = {
head_elem : [] for head_elem in head[:-1] if head_elem not in HEAD_BLACKLIST
}
entrys_data[cmd]['datetime'] = []
for i, head_elem in enumerate(head[:-1]):
if head_elem == 'Date':
tmp_date = datestr2date(splited_entry[i])
if head_elem == 'Time':
tmp_time = datetime.time.fromisoformat(splited_entry[i])
if not head_elem in HEAD_BLACKLIST:
entrys_data[cmd][head_elem].append(splited_entry[i])
entrys_data[cmd]['datetime'].append(
datetime.datetime.combine(tmp_date, tmp_time),
)
return entrys_data
for i, head_elem in enumerate(head[:-1]):
if head_elem == 'Date':
tmp_date = datestr2date(splited_entry[i])
if head_elem == 'Time':
tmp_time = datetime.time.fromisoformat(splited_entry[i])
def merge_same_commands(data):
"""checks which commands can be combined - this is done using SAME_COMMAND_REGEX
than combine the commands and remove the old.
if not head_elem in HEAD_BLACKLIST:
entrys_data[cmd][head_elem].append(splited_entry[i])
MERGE_VALUES specify which variable keys are combined
MERGE_IDENTIFIER specifies the unique variable key where the merge is based on
all variable keys that are not in MERGE_VALUES or MERGE_IDENTIFIER are left out and not transmitted!
entrys_data[cmd]['datetime'].append(
datetime.datetime.combine(tmp_date, tmp_time),
)
return entrys_data
Arguments:
data {dict} -- data from collectl parsed
"""
for command, cmd_data in list(data.items()):
for regexpr, name in SAME_COMMAND_REGEX:
if re.search(regexpr, command):
if not name in data:
data[name] = {
key: cmd_data[key] for key in MERGE_VALUES + [MERGE_IDENTIFIER]
}
else:
for i, identifier in enumerate(cmd_data[MERGE_IDENTIFIER]):
try:
index = data[name][MERGE_IDENTIFIER].index(identifier)
for key in MERGE_VALUES:
data[name][key][index] = float(data[name][key][index]) +\
float(cmd_data[key][i])
except ValueError:
for key in MERGE_VALUES + [MERGE_IDENTIFIER]:
data[name][key].append(cmd_data[key][i])
data.pop(command, None)
break
@click.command()
@click.option('--file', '-f', required=True)
@click.option('--collectl', '-c', required=False, default='collectl')
def main(file, collectl):
@click.option('--merge/--notmerge', default=True)
def main(file, collectl, merge):
path = Path(file)
if path.exists():
data = parse_file(path, collectl)
if merge:
merge_same_commands(data)
for generator, settings in CONFIG.items():
getattr(plots_generators, generator)(data, **settings)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment