Collectl2plotly.py 23 KB
Newer Older
1
import re
Julius Metz's avatar
Julius Metz committed
2
import gzip
Julius Metz's avatar
Julius Metz committed
3 4
import subprocess
from pathlib import Path
5 6
import copy
import time
7
import datetime
Julius Metz's avatar
Julius Metz committed
8 9
import multiprocessing as mp
import shutil
Julius Metz's avatar
Julius Metz committed
10
import importlib
Julius Metz's avatar
Julius Metz committed
11 12
import pickle
import json
Julius Metz's avatar
Julius Metz committed
13 14

import click
15 16
from yattag import Doc

Julius Metz's avatar
Julius Metz committed
17

18 19 20
import Collectl2plotly.filter_func as filter_func
import Collectl2plotly.value_merger as value_merger
import Collectl2plotly.Collectl2plotly_Plots_Config as default_plot_conf
Julius Metz's avatar
Julius Metz committed
21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39


### Filter Configs

# registered filter functions
# filter functions must be in filter_func.py
# key  : Display name
# value: function name
FILTER_FUNCTIONS = {'hardvalue': 'filter_hardvalue', 'average': 'filter_average'}

# Default filter function if no explicit is given
# must be a display name of a function
DEFAULT_FILTER = 'average'

### validation Config

# required Config values
CONFIG_VARIABLES = ['NEEDED_VALUES_DEFAULTS', 'NEEDED_VALUES', 'PLOTS_CONFIG',
 'NAME_SPEZIAL_PARAMETER', 'COMAND_BLACKLIST', 'PLOTLY_COLORS', 'PLOTLY_STATIC_COLORS']
Julius Metz's avatar
WC  
Julius Metz committed
40

41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64
### Other

# config for find best axis interval and unit
# from smallest unit to greatest
# intervals = possible intervals of the axis(needed)
# max = conversion value if not given greatest supported unit
# unit = str with unit name
AXIS_INTERVAL_REL = [
    {
        'max': 60,
        'intervals': [1, 5, 10],
        'unit': 'sec',
    },
    {
        'max': 60,
        'intervals': [0.5, 1, 5, 10],
        'unit': 'min',
    },
    {
        'intervals': [0.25 ,0.5, 1, 5, 10],
        'unit': 'h',
    }
]

Julius Metz's avatar
Julius Metz committed
65 66 67
#pattern for split of commands (with \" and \ )
FIELDS_PATTERN = re.compile(r'(?:(?:\s*[^\\]|\A)\"(.*?[^\\]|)\"|(?:\s+|\A)(?=[^\s])(.*?[^\\])(?= |\Z))')
#(?:\"(.*?)\"|(\S+))
68 69


70
def datestr2date(datestr):
Julius Metz's avatar
Julius Metz committed
71 72 73 74 75 76 77 78
    """Converts a "datestring" to a date Object.

    Arguments:
        datestr {str} -- string of a Date example: 20191224

    Returns:
        datetime.date -- date of the string
    """
79
    return datetime.date(
Julius Metz's avatar
Julius Metz committed
80 81 82 83
        int(datestr[:4]),
        int(datestr[4:6]),
        int(datestr[6:8]),
    )
84 85


Julius Metz's avatar
Julius Metz committed
86
def get_cmdname(cmd, spezial_parameters_of_all, coarsest=False):
87 88 89 90 91 92 93 94 95 96 97
    """search in complete commandstring the name of the skript or the command that is used

    Arguments:
        cmd {str} -- complete commandstring

    Keyword Arguments:
        coarsest {bool} -- return only the call function(example: bash, python) if True (default: {False})

    Returns:
        str -- new cmd name
    """
Matthias Lieber's avatar
Matthias Lieber committed
98
    # search for (for example) bash_function="python" in cmd="/usr/bin/python3 my_script.py"
Julius Metz's avatar
Julius Metz committed
99 100 101
    # split command with regex
    cmd_splited = [x[0] or x[1] for x in FIELDS_PATTERN.findall(cmd)]
    #shlex.split(cmd)
102 103
    bash_function = cmd_splited[0].split('/')[-1]
    bash_function = re.search(r'[^\W\n]+', bash_function).group(0)
Matthias Lieber's avatar
Matthias Lieber committed
104
    # check if bash_function is known, if not, return bash_function
Julius Metz's avatar
Julius Metz committed
105 106
    spezial_parameters = spezial_parameters_of_all.get(bash_function, None)
    if coarsest or spezial_parameters is None:
107 108
        return bash_function
    skip = False
Matthias Lieber's avatar
Matthias Lieber committed
109
    # search script/program name within the parameters and only return this without path
110 111 112 113
    for position, parameter in enumerate(cmd_splited[1:]):
        if skip:
            skip = False
            continue
Julius Metz's avatar
Julius Metz committed
114
        if parameter in spezial_parameters:
115 116 117 118
            skip = True
            continue
        if bash_function == 'bash' or bash_function == 'sh' and parameter == '-c':
            return bash_function + ' -c'
Julius Metz's avatar
Julius Metz committed
119
            # return cmd_splited[position+1]
120 121 122 123 124 125
        if parameter.startswith('-'):
            continue
        return parameter.split('/')[-1]
    return cmd


Julius Metz's avatar
Julius Metz committed
126
def parse_file(path, collectl, shorten_cmds, coarsest, config):
Julius Metz's avatar
Julius Metz committed
127 128 129 130 131 132 133 134
    """start subproccess collectl than parse the output and merge.
       After that build usefull dict from parsed data

    Arguments:
        path {Path} -- path to collectl file to parse
        collectl {str} -- collectl command
        shorten_cmds {bool} -- if True cmd will be shorted by get_cmdname
        coarsest {bool} -- parameter of get_cmdname
Julius Metz's avatar
Julius Metz committed
135 136
        config {dict} -- plots and merge Config

Julius Metz's avatar
Julius Metz committed
137 138

    Returns:
Julius Metz's avatar
Julius Metz committed
139 140 141
        (dict, dict) --  1. plot_data = {comands : {metrics: [values, ...],  ...}, ...}
                         2. data for filter function :
                         {'number_of_values': X, 'commands':{ cmd:{'number_of_values': X, metrics:SUM, ...}}, metrics:SUM, ...}
Julius Metz's avatar
Julius Metz committed
142
    """
143
    collectl_starttime = time.time()
Matthias Lieber's avatar
Matthias Lieber committed
144
    # run collectl in playback mode and read output into list
145
    process = subprocess.run(
Matthias Lieber's avatar
Matthias Lieber committed
146
        [collectl, '-P', '-p', path, '-sZ'], stdout=subprocess.PIPE, check=True,
147
    )
Julius Metz's avatar
Julius Metz committed
148 149
    print('collectl make table took {:.1f}s'.format(
        time.time() - collectl_starttime))
150
    parsing_starttime = time.time()
Matthias Lieber's avatar
Matthias Lieber committed
151
    # output contains all data!
Julius Metz's avatar
Julius Metz committed
152
    output = process.stdout.decode().splitlines()
Matthias Lieber's avatar
Matthias Lieber committed
153
    # get table head
Julius Metz's avatar
Julius Metz committed
154
    head = output.pop(0).split(' ')
155 156 157 158 159 160 161
    for possible_head in output[:]:
        if possible_head.startswith('#'):
            head = possible_head.split(' ')
            output.remove(possible_head)
        else:
            break

Matthias Lieber's avatar
Matthias Lieber committed
162
    # get template of an entry from the head
Julius Metz's avatar
Julius Metz committed
163
    head[0] = head[0][1:]
Julius Metz's avatar
Julius Metz committed
164 165
    head_indexes_dict = {
        head_title: index for index, head_title in enumerate(head)}
166 167
    empty_dict_with_value_titles = {
        value_title: copy.deepcopy(
Julius Metz's avatar
Julius Metz committed
168
            value_title_settings.get(
Julius Metz's avatar
Julius Metz committed
169
                'base_value', config['NEEDED_VALUES_DEFAULTS']['default_base_value']
Julius Metz's avatar
Julius Metz committed
170
            )
Julius Metz's avatar
Julius Metz committed
171
        ) for value_title, value_title_settings in config['NEEDED_VALUES'].items()
172
    }
Julius Metz's avatar
Julius Metz committed
173

Matthias Lieber's avatar
Matthias Lieber committed
174 175
    # parse all output lines
    entrys_data = {}
Julius Metz's avatar
WC  
Julius Metz committed
176
    cmd_cmdshort_dict = {}
Julius Metz's avatar
Julius Metz committed
177 178
    merger_lookup_dict = {}

Julius Metz's avatar
Julius Metz committed
179
    for entry in output:
Matthias Lieber's avatar
Matthias Lieber committed
180
        # split by ' ' (exclude command from splitting)
181
        splited_entry = entry.split(' ', len(head_indexes_dict)-1)
Matthias Lieber's avatar
Matthias Lieber committed
182
        # get command string and shorten
Julius Metz's avatar
Julius Metz committed
183
        cmd = splited_entry[-1]
Julius Metz's avatar
Julius Metz committed
184
        if shorten_cmds or coarsest:
Julius Metz's avatar
WC  
Julius Metz committed
185 186 187
            if cmd in cmd_cmdshort_dict:
                cmd = cmd_cmdshort_dict[cmd]
            else:
Julius Metz's avatar
Julius Metz committed
188
                short_cmd = get_cmdname(cmd, config['NAME_SPEZIAL_PARAMETER'], coarsest=coarsest)
Julius Metz's avatar
WC  
Julius Metz committed
189 190
                cmd_cmdshort_dict[cmd]= short_cmd
                cmd = short_cmd
Julius Metz's avatar
Julius Metz committed
191
        if cmd in config['COMAND_BLACKLIST']:
Julius Metz's avatar
Julius Metz committed
192
            continue
Matthias Lieber's avatar
Matthias Lieber committed
193
        # create dict for each command
Julius Metz's avatar
Julius Metz committed
194 195
        if not cmd in entrys_data:
            entrys_data[cmd] = {}
Matthias Lieber's avatar
Matthias Lieber committed
196
        # get datetime obj for current entry
Julius Metz's avatar
Julius Metz committed
197 198
        tmp_datetime = datetime.datetime.combine(
            datestr2date(splited_entry[head_indexes_dict['Date']]),
Matthias Lieber's avatar
Matthias Lieber committed
199
            datetime.time(*[int(n) for n in splited_entry[head_indexes_dict['Time']].split(':')]),
Julius Metz's avatar
Julius Metz committed
200
        )
Matthias Lieber's avatar
Matthias Lieber committed
201
        # if datetime not yet existing, add new entry from template
Julius Metz's avatar
Julius Metz committed
202
        if not tmp_datetime in entrys_data[cmd]:
Julius Metz's avatar
Julius Metz committed
203 204
            entrys_data[cmd][tmp_datetime] = pickle.loads(pickle.dumps(
                empty_dict_with_value_titles, -1))
Matthias Lieber's avatar
Matthias Lieber committed
205 206
        # get values from data as given in NEEDED_VALUES and run specified merger function
        # to merge multiple values with same timestamp or rescale (e.g. to GB)
Julius Metz's avatar
Julius Metz committed
207 208 209 210 211
        for value_title, value_title_settings in config['NEEDED_VALUES'].items():
            merger, merger_kwargs = value_title_settings.get('merger', config['NEEDED_VALUES_DEFAULTS']['default_merger'])
            if not merger in merger_lookup_dict:
                merger_lookup_dict[merger] = getattr(value_merger, merger)
            entrys_data[cmd][tmp_datetime][value_title] = merger_lookup_dict[merger](
Julius Metz's avatar
Julius Metz committed
212 213
                entrys_data[cmd][tmp_datetime][value_title],
                *[splited_entry[head_indexes_dict[key]] for key in value_title_settings['keys']],
Julius Metz's avatar
Julius Metz committed
214
                **merger_kwargs
215
            )
Julius Metz's avatar
Julius Metz committed
216
    print('parsing/merge took {:.1f}s'.format(time.time() - parsing_starttime))
217
    dictbuild_starttime = time.time()
218

Matthias Lieber's avatar
Matthias Lieber committed
219 220
    # create lists entry_data_plotfriendly[cmd][metric] = [ values, ... ] with the actual data to plot
    # and sum up all metrics for each command to enable filtering of non-interesting commands later on
221
    entry_data_plotfriendly = {}
Julius Metz's avatar
Julius Metz committed
222
    plot_filter_data = pickle.loads(pickle.dumps(empty_dict_with_value_titles, -1))
223 224 225
    plot_filter_data['number_of_values'] = 0
    plot_filter_data['commands'] = {}
    for cmd, cmd_data in entrys_data.items():
Julius Metz's avatar
Julius Metz committed
226 227
        plot_filter_data['commands'][cmd] = pickle.loads(pickle.dumps(
            empty_dict_with_value_titles, -1))
228
        plot_filter_data['commands'][cmd]['number_of_values'] = 0
Julius Metz's avatar
Julius Metz committed
229
        entry_data_plotfriendly[cmd] = {key: [] for key in config['NEEDED_VALUES']}
230
        entry_data_plotfriendly[cmd]['datetime'] = []
Julius Metz's avatar
Julius Metz committed
231
        for cmd_data_time, cmd_data_values in cmd_data.items():
232 233 234 235 236 237 238 239
            entry_data_plotfriendly[cmd]['datetime'].append(cmd_data_time)
            for cmd_data_key, cmd_data_value in cmd_data_values.items():
                entry_data_plotfriendly[cmd][cmd_data_key].append(cmd_data_value)

                plot_filter_data['commands'][cmd][cmd_data_key] += cmd_data_value
                plot_filter_data['commands'][cmd]['number_of_values'] += 1
                plot_filter_data[cmd_data_key] += cmd_data_value
                plot_filter_data['number_of_values'] += 1
Julius Metz's avatar
Julius Metz committed
240 241 242 243
    print(
        'data dict/ filter_data_dict build took {:.1f}s'.format(
            time.time() - dictbuild_starttime),
    )
244
    return entry_data_plotfriendly, plot_filter_data
Julius Metz's avatar
Julius Metz committed
245

Julius Metz's avatar
Julius Metz committed
246

247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281
def make_relative_xaxi(all_values):
    """build lists with relative values for xaxis

    Arguments:
        all_values {[datetime, ..]} -- list of all datetime objects for plot

    Returns:
        (list, list) -- 1.: list with the relative values that are displayed on the xaxis
                        2.: list with the datetime as str of the absolute values of xaxis
    """
    min_value = min(all_values)
    max_value = max(all_values)
    guideline_tickcounts = 10
    end_value = (max_value - min_value).total_seconds()
    conversion_factor = 1

    for current_unit in AXIS_INTERVAL_REL:
        if 'max' in current_unit and end_value > current_unit['max']:
            end_value /= current_unit['max']
            conversion_factor *= current_unit['max']
            continue

        distances = [
            max(end_value // interval - guideline_tickcounts, guideline_tickcounts - end_value // interval) \
                for interval in current_unit['intervals']
        ]
        interval = current_unit['intervals'][distances.index(min(distances))]
        xaxis_ticks = [interval * i for i in range(int(end_value/interval)+1)]
        if len(xaxis_ticks) == 1:
            xaxis_ticks.append(1)
        xaxis_ticks_values = [str(min_value + datetime.timedelta(seconds=tick * conversion_factor)) for tick in xaxis_ticks]
        return xaxis_ticks, xaxis_ticks_values, current_unit['unit']


def make_plot(cmds_data, filter_info, cmd_color, plot_settings, plotly_format_vars, needed_key, relative_xaxis):
Julius Metz's avatar
WC  
Julius Metz committed
282
    """Build a list of dicts in Plotlyconf style for the diffrent traces with the data from cmds_data of needed_key
283
        than make plot dict in Plotly style and change '{host}' in titels.
Julius Metz's avatar
Julius Metz committed
284 285 286 287 288

    Arguments:
        cmds_data {dict} -- is the data for the plot example: {'Command': {'datetime':[...], 'cpu': [...]}}
        filter_info {dict} -- has the information which cmds will be shown example: {'cpu': [cmds]}
        cmd_color {dict} -- assign a fixed plot color to each cmd example:  {'Command': 'rgb(0, 0, 128)'}
Julius Metz's avatar
WC  
Julius Metz committed
289 290
        plot_settings {dict} -- settings for Plotly
        plotly_format_vars {dict} -- values for Plotly settings
Julius Metz's avatar
Julius Metz committed
291
        needed_key {str} -- key of cmds_data for the values to be use
292
        relative_xaxis {bool} -- if true add buttons to change xaxis to relative
Julius Metz's avatar
Julius Metz committed
293 294

    Returns:
295
        dict -- with plotly jsons
Julius Metz's avatar
Julius Metz committed
296 297 298
    """
    plot_data = []
    for cmd in filter_info[needed_key]:
Julius Metz's avatar
Julius Metz committed
299 300 301
        plot_data.append({
            'x': [str(date) for date in cmds_data[cmd]['datetime']],
            'y': cmds_data[cmd][needed_key],
Julius Metz's avatar
Julius Metz committed
302
            'name': cmd,
Julius Metz's avatar
Julius Metz committed
303
            'marker': {
Julius Metz's avatar
Julius Metz committed
304 305
                'color': cmd_color[cmd],
            },
Julius Metz's avatar
Julius Metz committed
306
            **plot_settings['data'],
Julius Metz's avatar
WC  
Julius Metz committed
307 308 309 310
        }
        )


311 312 313
    layout = pickle.loads(pickle.dumps(plot_settings.get('layout', {}), -1))

    # replace {host} in titles
Julius Metz's avatar
Julius Metz committed
314 315 316
    if 'title' in layout:
        if type(layout['title']) == str:
            layout['title'] = layout['title'].format(
Julius Metz's avatar
Julius Metz committed
317 318 319
                **plotly_format_vars,
            )
        else:
Julius Metz's avatar
Julius Metz committed
320
            layout['title']['text'] = layout['title']['text'].format(
Julius Metz's avatar
Julius Metz committed
321 322
                **plotly_format_vars,
            )
Julius Metz's avatar
WC  
Julius Metz committed
323

Julius Metz's avatar
Julius Metz committed
324 325 326
    if 'yaxis' in layout and 'title' in layout['yaxis']:
        if type(layout['yaxis']['title']) == str:
            layout['yaxis']['title'] = layout['yaxis']['title'].format(
Julius Metz's avatar
Julius Metz committed
327 328 329
                **plotly_format_vars,
            )
        else:
Julius Metz's avatar
Julius Metz committed
330
            layout['yaxis']['title']['text'] = layout['yaxis']['title']['text'].format(
Julius Metz's avatar
Julius Metz committed
331 332
                **plotly_format_vars,
            )
333
    xtitle = ''
Julius Metz's avatar
Julius Metz committed
334 335 336
    if 'xaxis' in layout and 'title' in layout['xaxis']:
        if type(layout['xaxis']['title']) == str:
            layout['xaxis']['title'] = layout['xaxis']['title'].format(
Julius Metz's avatar
Julius Metz committed
337 338
                **plotly_format_vars,
            )
339
            xtitle = layout['xaxis']['title']
Julius Metz's avatar
Julius Metz committed
340
        else:
Julius Metz's avatar
Julius Metz committed
341
            layout['xaxis']['title']['text'] = layout['xaxis']['title']['text'].format(
Julius Metz's avatar
Julius Metz committed
342 343
                **plotly_format_vars,
            )
344
            xtitle = layout['xaxis']['title']['text']
Julius Metz's avatar
WC  
Julius Metz committed
345

346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371
    # add buttons for switching xaxis if 'relative-absolute_xaxis' in PLOT_CONF is set.
    if relative_xaxis:
            ticks_name, ticks_value, unit = make_relative_xaxi(
                [date for cmd in filter_info[needed_key] for date in cmds_data[cmd]['datetime']],
            )
            if not 'xaxis' in layout:
                layout['xaxis'] = {}
            if not 'updatemenus' in layout:
                layout['updatemenus'] = []
            layout['updatemenus'].append({
                'type': 'buttons',
                'x': 0.6,
                'y': 1.15,
                'direction': 'left',
                'buttons': [
                    {'args':[{'xaxis':{'title': xtitle, 'tickvals': None}}],
                    'label':'absolute xaxis',
                    'method':'relayout'},
                    {'args':[{'xaxis': {'title': 'runtime in {}'.format(unit), 'tickvals': ticks_value, 'ticktext': ticks_name}}],
                    'label':'relative xaxis',
                    'method':'relayout'},
                ],
                'showactive':True,
            })

    return {'data': json.dumps(plot_data), 'layout': json.dumps(layout), 'config': json.dumps(plot_settings.get('config', {}))}
Julius Metz's avatar
Julius Metz committed
372 373 374


def build_html(plots_dict):
Julius Metz's avatar
Julius Metz committed
375 376 377 378 379 380 381 382
    """build html site with the plots in plotsdict

    Arguments:
        plots_dict {dict} -- key: name of plot value: [plot, ...]

    Returns:
        str -- html website
    """
383 384 385 386
    doc, tag, text = Doc().tagtext()
    doc.asis('<!DOCTYPE html>')
    with tag('html'):
        with tag('head'):
Julius Metz's avatar
Julius Metz committed
387 388
            with tag('script', src='plotly.js'):
                pass
Julius Metz's avatar
Julius Metz committed
389 390 391 392
            with tag('script'):
                doc.asis("document.onreadystatechange = () => {if (document.readyState === 'complete') {")
                for name, plots in plots_dict.items():
                    for i, plot in enumerate(plots):
393 394
                        doc.asis("Plotly.newPlot(document.getElementById('{name}-plot-{number}'), JSON.parse('{data}'), JSON.parse('{layout}'), JSON.parse('{config}'));"\
                            .format(name=name, number=i, data=plot['data'], layout=plot['layout'], config=plot['config'])
Julius Metz's avatar
Julius Metz committed
395 396
                        )
                doc.asis('}}')
397 398 399 400 401
        with tag('body'):
            with tag('div', id='index'):
                with tag('h2'):
                    text('Index:')
                with tag('ul'):
Julius Metz's avatar
Julius Metz committed
402
                    for name in plots_dict:
403 404 405 406 407
                        with tag('li'):
                            with tag('a', href='#'+name):
                                text(name)
            for name, plots in plots_dict.items():
                with tag('div', id=name):
Julius Metz's avatar
Julius Metz committed
408 409 410
                    for i in range(len(plots)):
                        with tag('div', id='{}-plot-{}'.format(name, i)):
                            pass
411 412 413 414

    return doc.getvalue()


Julius Metz's avatar
Julius Metz committed
415 416 417 418 419 420 421 422 423 424 425 426 427
def get_sources(sources):
    """collect all sources (.raw.gz files)

    Arguments:
        sources {tuple} -- tuple of files/directorys

    Returns:
        list -- list of all .raw.gz files which was found
    """
    source_paths = []
    for source in sources:
        source_path =  Path(source)
        if source_path.is_dir():
Julius Metz's avatar
Julius Metz committed
428
            source_paths.extend(source_path.glob('*.raw.gz'))
Julius Metz's avatar
Julius Metz committed
429 430 431 432 433
        elif source_path.is_file() and source.endswith('.raw.gz'):
            source_paths.append(source_path)
    return source_paths


Julius Metz's avatar
Julius Metz committed
434
def data_from_file(arguments):
Julius Metz's avatar
Julius Metz committed
435 436 437 438 439 440 441 442 443 444
    """make data and filter infos of one collectl file

    Arguments:
        arguments {tuple} -- is a tuple of arguments for the function:
                    path {Path} -- collectldata file path
                    collectl {str} -- collectl command
                    shorten_cmds {bool} -- para for parse_file
                    coarsest {bool} -- para for parse_file
                    filtercmds {bool} -- if True cmds will be filter else not
                    filtervalue {int} -- para for filter
Matthias Lieber's avatar
Matthias Lieber committed
445
                    filtertype {str} -- which filter will be called
Julius Metz's avatar
Julius Metz committed
446
                    config {dict} -- plots and merge Config
Julius Metz's avatar
Julius Metz committed
447
    Returns:
Julius Metz's avatar
Julius Metz committed
448 449 450
        (str, dict, dict) -- 1. hostname
                             2. plot_data = {comands : {metrics: [values, ...],  ...}, ...}
                             3. filter_infos = {metrics: [cmds, ...]
Julius Metz's avatar
Julius Metz committed
451
    """
Julius Metz's avatar
Julius Metz committed
452
    path, collectl, shorten_cmds, coarsest, filtercmds, filtervalue, filtertype, config = arguments
Julius Metz's avatar
Julius Metz committed
453 454 455 456 457
    host = ''
    with gzip.open(path, 'r') as f:
        for line in f:
            if line.startswith(b'# Host:'):
                host = re.search(r'# Host: *([^ ]+)', line.decode()).group(1)
Julius Metz's avatar
Julius Metz committed
458
    data, filter_data = parse_file(path, collectl, shorten_cmds, coarsest, config)
Julius Metz's avatar
Julius Metz committed
459 460 461
    if filtercmds:
        filter_infos = getattr(filter_func, filtertype)(filter_data, filtervalue)
    else:
Julius Metz's avatar
Julius Metz committed
462
        filter_infos = {key: list(data.keys()) for key in config['NEEDED_VALUES']}
Julius Metz's avatar
Julius Metz committed
463 464
    return host, data, filter_infos

465

Julius Metz's avatar
Julius Metz committed
466 467 468
@click.command(help='Generate htmlfiles with Plotlyplots with data from collectlfiles(".raw.gz")')
@click.option('--source', '-s', multiple=True, default=['.'], show_default=True, type=click.Path(exists=True), help='source for the plots. (.raw.gz file or directory with .raw.gz) multiple useable')
@click.option('--collectl', '-c', default='collectl', show_default=True, help='collectl command')
Julius Metz's avatar
Julius Metz committed
469
@click.option('--plotlypath', '-p', type=click.Path(exists=True), help='path to plotly.js')
Julius Metz's avatar
Julius Metz committed
470
@click.option('--destination', '-d', default='.', type=click.Path(exists=True), show_default=True, help='path to directory where directory with plots will be created')
Julius Metz's avatar
Julius Metz committed
471
@click.option('--configpath', default=None, type=click.Path(exists=True), help='python file with plot and merge infos see doku for detail')
Julius Metz's avatar
Julius Metz committed
472
@click.option('--shorten/--notshorten', default=True, help='commands will be shorted only to name')
473
@click.option('--coarsest', is_flag=True, help='commands will be shorted only to type (bash, perl, ...)')
Julius Metz's avatar
Julius Metz committed
474
@click.option('--filtercmds/--notfiltercmds', default=True, help='filtering or not')
Matthias Lieber's avatar
Matthias Lieber committed
475
@click.option('--filtervalue', help='Parameter which is given to the filter.')
Julius Metz's avatar
Julius Metz committed
476 477 478 479 480
@click.option('--filtertype',
              type=click.Choice(FILTER_FUNCTIONS.keys(), case_sensitive=False),
              default=DEFAULT_FILTER, show_default=True,
              help='Filter which is to be used.',
              )
Julius Metz's avatar
Julius Metz committed
481 482 483
@click.option('--force', is_flag=True, help='override existing plot directory if exist')
def main(source, collectl, plotlypath, destination, configpath,
         shorten, coarsest, filtercmds, filtervalue, filtertype, force):
Julius Metz's avatar
Julius Metz committed
484
    source_paths = get_sources(source)
485

Julius Metz's avatar
Julius Metz committed
486
    if not source_paths:
Julius Metz's avatar
Julius Metz committed
487 488 489 490 491 492 493 494 495
        print('no valid source found')
        exit(1)

    if not Path(destination).is_dir():
        print('destination is no valid directory')
        exit(1)

    plots_dir = Path(destination, 'collectlplots')
    if plots_dir.is_dir():
Julius Metz's avatar
Julius Metz committed
496 497 498 499 500 501 502 503 504
        if force:
            shutil.rmtree(plots_dir)
        else:
            print('in destination "collectlplots" already exist')
            exit(1)

    config_module = default_plot_conf
    if configpath:
        if configpath.endswith('.py'):
505
            config_module = importlib.machinery.SourceFileLoader('config', configpath).load_module()
Julius Metz's avatar
Julius Metz committed
506 507 508 509 510 511 512 513 514 515 516 517
        else:
            print('given config isn`t a ".py" Python file')
            exit(1)

    # validate config and make dict from values
    config = {}
    for config_var_name in CONFIG_VARIABLES:
        try:
            config[config_var_name] = getattr(config_module, config_var_name)
        except AttributeError:
            print('{} missing in config'.format(config_var_name))
            exit(1)
518 519


Julius Metz's avatar
Julius Metz committed
520 521 522
    data_colllect_functions = []
    for source_path in source_paths:
        data_colllect_functions.append((source_path,
Julius Metz's avatar
Julius Metz committed
523 524 525 526 527 528
                                        collectl,
                                        shorten,
                                        coarsest,
                                        filtercmds,
                                        filtervalue,
                                        FILTER_FUNCTIONS[filtertype],
Julius Metz's avatar
Julius Metz committed
529
                                        config,
Julius Metz's avatar
Julius Metz committed
530
                                        ))
531

Matthias Lieber's avatar
Matthias Lieber committed
532
    # use multiprocessing to parse all collectl output files independently from each other in parallel
Julius Metz's avatar
Julius Metz committed
533 534 535 536
    pool = mp.Pool(min(len(data_colllect_functions), mp.cpu_count()))
    results = pool.map(data_from_file, data_colllect_functions)
    pool.close()
    hosts_data = {}
Julius Metz's avatar
Julius Metz committed
537
    cmd_all = set()
Julius Metz's avatar
Julius Metz committed
538

Julius Metz's avatar
Julius Metz committed
539 540
    for host, data, filter_infos in results:
        hosts_data[host] = {'data': data, 'filter_infos': filter_infos}
Julius Metz's avatar
Julius Metz committed
541
        cmd_all.update([cmd for cmds in filter_infos.values() for cmd in cmds])
Julius Metz's avatar
Julius Metz committed
542

Julius Metz's avatar
Julius Metz committed
543 544
    cmd_colors = {cmd: config['PLOTLY_COLORS'][i % len(config['PLOTLY_COLORS'])] for i, cmd in enumerate(sorted(list(cmd_all)))}
    cmd_colors.update(config['PLOTLY_STATIC_COLORS'])
Julius Metz's avatar
Julius Metz committed
545

Matthias Lieber's avatar
Matthias Lieber committed
546
    # for each host and each plot (as given in config) call make_plot to create html div with plotly
Julius Metz's avatar
Julius Metz committed
547 548 549
    start_plots_build = time.time()
    plots_dict = {}
    for host, host_data in hosts_data.items():
Julius Metz's avatar
Julius Metz committed
550
        plots_dict[host] = {plot_config['name']: [] for plot_config in config['PLOTS_CONFIG']}
Julius Metz's avatar
Julius Metz committed
551
        for plot_config in config['PLOTS_CONFIG']:
Matthias Lieber's avatar
Matthias Lieber committed
552
            plots_dict[host][plot_config['name']].append( make_plot(
Julius Metz's avatar
Julius Metz committed
553 554 555 556 557 558
                host_data['data'],
                host_data['filter_infos'],
                cmd_colors,
                plot_config['plotly_settings'],
                {'host': host},
                plot_config['needed_key'],
559
                plot_config.get('relative-absolute_xaxis', False),
Julius Metz's avatar
Julius Metz committed
560 561 562
            ))
    print("plots build in {:.1f}s".format(time.time() - start_plots_build))

Julius Metz's avatar
Julius Metz committed
563

Matthias Lieber's avatar
Matthias Lieber committed
564
    # create output directory and copy plotly.js
Julius Metz's avatar
Julius Metz committed
565
    plots_dir.mkdir()
Julius Metz's avatar
Julius Metz committed
566 567
    if plotlypath is None:
        plotlypath = Path(__file__).with_name('plotly-latest.min.js')
Julius Metz's avatar
Julius Metz committed
568
    try:
Julius Metz's avatar
Julius Metz committed
569
        shutil.copy(plotlypath, str(Path(plots_dir, 'plotly.js')))
Julius Metz's avatar
Julius Metz committed
570 571 572
    except shutil.SameFileError:
        pass

Matthias Lieber's avatar
Matthias Lieber committed
573
    # for each host create html file with plots
Julius Metz's avatar
Julius Metz committed
574 575
    time_sites = time.time()
    for host, site_plots in plots_dict.items():
Julius Metz's avatar
Julius Metz committed
576 577
        with Path(plots_dir, host+'-plots.html').open(mode='w') as plots_file:
            plots_file.write(build_html(site_plots))
Julius Metz's avatar
Julius Metz committed
578
    print("write/build of websites took {:.1f}s".format(time.time() - time_sites))
Julius Metz's avatar
Julius Metz committed
579 580 581 582


if __name__ == '__main__':
    main()