From b7f75ccb55fde9f20e3cd48cb5ca88cd27d6dfcc Mon Sep 17 00:00:00 2001 From: Rod Schultz <Rod.Schultz@Bull.com> Date: Wed, 8 May 2013 15:39:23 -0700 Subject: [PATCH] initial check-in for hdf5 profiling --- auxdir/x_ac_hdf5.m4 | 67 + configure.ac | 5 + doc/html/Makefile.am | 1 + doc/html/acct_gather_profile_plugins.shtml | 412 ++++ doc/html/documentation.shtml | 1 + doc/html/man_index.shtml | 4 +- doc/man/man1/Makefile.am | 2 + doc/man/man1/salloc.1 | 38 + doc/man/man1/sbatch.1 | 39 + doc/man/man1/sh5util.1 | 101 + doc/man/man1/srun.1 | 36 + doc/man/man5/acct_gather.conf.5 | 43 + doc/man/man5/slurm.conf.5 | 24 + slurm/slurm.h.in | 10 +- src/api/config_info.c | 5 + src/api/step_launch.c | 2 + src/common/Makefile.am | 1 + src/common/read_config.c | 15 + src/common/read_config.h | 1 + src/common/slurm_acct_gather.c | 9 +- src/common/slurm_acct_gather.h | 4 + src/common/slurm_acct_gather_profile.c | 319 +++ src/common/slurm_acct_gather_profile.h | 231 +++ src/common/slurm_jobacct_gather.c | 65 + src/common/slurm_protocol_api.c | 19 + src/common/slurm_protocol_api.h | 6 + src/common/slurm_protocol_defs.c | 3 + src/common/slurm_protocol_defs.h | 1 + src/common/slurm_protocol_pack.c | 222 ++- src/plugins/Makefile.am | 1 + src/plugins/acct_gather_profile/Makefile.am | 3 + .../acct_gather_profile/hdf5/Makefile.am | 16 + .../hdf5/acct_gather_profile_hdf5.c | 496 +++++ .../hdf5/acct_gather_profile_hdf5.h | 67 + .../acct_gather_profile/hdf5/hdf5_api.c | 1744 +++++++++++++++++ .../acct_gather_profile/hdf5/hdf5_api.h | 938 +++++++++ .../hdf5/sh5util/Makefile.am | 20 + .../hdf5/sh5util/sh5util.c | 1029 ++++++++++ .../hdf5/sh5util/sh5util.h | 180 ++ .../acct_gather_profile/none/Makefile.am | 15 + .../none/acct_gather_profile_none.c | 169 ++ src/plugins/launch/slurm/launch_slurm.c | 1 + src/salloc/opt.c | 15 +- src/salloc/opt.h | 1 + src/salloc/salloc.c | 1 + src/sbatch/opt.c | 16 +- src/sbatch/opt.h | 1 + src/sbatch/sbatch.c | 1 + src/slurmctld/controller.c | 5 + src/slurmctld/job_mgr.c | 11 + src/slurmctld/job_scheduler.c | 11 + src/slurmctld/proc_req.c | 3 + src/slurmctld/slurmctld.h | 1 + src/slurmd/slurmd/slurmd.c | 6 + src/slurmd/slurmd/slurmd.h | 1 + src/slurmd/slurmstepd/mgr.c | 5 + src/slurmd/slurmstepd/slurmstepd_job.c | 2 + src/slurmd/slurmstepd/slurmstepd_job.h | 1 + src/srun/libsrun/allocate.c | 2 + src/srun/libsrun/opt.c | 11 + src/srun/libsrun/opt.h | 1 + 61 files changed, 6449 insertions(+), 11 deletions(-) create mode 100644 auxdir/x_ac_hdf5.m4 create mode 100644 doc/html/acct_gather_profile_plugins.shtml create mode 100644 doc/man/man1/sh5util.1 create mode 100644 src/common/slurm_acct_gather_profile.c create mode 100644 src/common/slurm_acct_gather_profile.h create mode 100644 src/plugins/acct_gather_profile/Makefile.am create mode 100644 src/plugins/acct_gather_profile/hdf5/Makefile.am create mode 100644 src/plugins/acct_gather_profile/hdf5/acct_gather_profile_hdf5.c create mode 100644 src/plugins/acct_gather_profile/hdf5/acct_gather_profile_hdf5.h create mode 100644 src/plugins/acct_gather_profile/hdf5/hdf5_api.c create mode 100644 src/plugins/acct_gather_profile/hdf5/hdf5_api.h create mode 100644 src/plugins/acct_gather_profile/hdf5/sh5util/Makefile.am create mode 100644 src/plugins/acct_gather_profile/hdf5/sh5util/sh5util.c create mode 100644 src/plugins/acct_gather_profile/hdf5/sh5util/sh5util.h create mode 100644 src/plugins/acct_gather_profile/none/Makefile.am create mode 100644 src/plugins/acct_gather_profile/none/acct_gather_profile_none.c diff --git a/auxdir/x_ac_hdf5.m4 b/auxdir/x_ac_hdf5.m4 new file mode 100644 index 00000000000..fd7dc899e34 --- /dev/null +++ b/auxdir/x_ac_hdf5.m4 @@ -0,0 +1,67 @@ +##***************************************************************************** +# COPIER: +# Rod Schultz <rod.schultz@bull.com> +# from example writtey by +# Morris Jette <jette1@llnl.gov> +# +# SYNOPSIS: +# X_AC_HDF5 +# +# DESCRIPTION: +# Determine if the HDF5 libraries exists and if they support PCI data. +##***************************************************************************** + +AC_DEFUN([X_AC_HDF5], +[ + _x_ac_hdf5_dirs="/usr /usr/local" + _x_ac_hdf5_libs="lib64 lib" + x_ac_cv_hdf5_pci="no" + + AC_ARG_WITH( + [hdf5], + AS_HELP_STRING(--with-hdf5=PATH,Specify path to hdf5 installation), + [_x_ac_hdf5_dirs="$withval $_x_ac_hdf5_dirs"]) + + AC_CACHE_CHECK( + [for hdf5 installation], + [x_ac_cv_hdf5_dir], + [ + for d in $_x_ac_hdf5_dirs; do + test -d "$d" || continue + test -d "$d/include" || continue + test -f "$d/include/hdf5.h" || continue + for bit in $_x_ac_hdf5_libs; do + test -d "$d/$bit" || continue + _x_ac_hdf5_cppflags_save="$CPPFLAGS" + CPPFLAGS="-I$d/include $CPPFLAGS" + _x_ac_hdf5_libs_save="$LIBS" + LIBS="-L$d/$bit -lhdf5 $LIBS" + AC_LINK_IFELSE( + [AC_LANG_CALL([], H5close)], + AS_VAR_SET(x_ac_cv_hdf5_dir, $d)) + AC_TRY_LINK([#include <hdf5.h>], + [int i = HDF5_OBJ_PCI_DEVICE;], + [x_ac_cv_hdf5_pci="yes"], []) + CPPFLAGS="$_x_ac_hdf5_cppflags_save" + LIBS="$_x_ac_hdf5_libs_save" + test -n "$x_ac_cv_hdf5_dir" && break + done + test -n "$x_ac_cv_hdf5_dir" && break + done + ]) + if test -z "$x_ac_cv_hdf5_dir"; then + AC_MSG_WARN([unable to locate hdf5 installation]) + else + HDF5_CPPFLAGS="-I$x_ac_cv_hdf5_dir/include" + HDF5_LDFLAGS="-Wl,-rpath -Wl,$x_ac_cv_hdf5_dir/$bit -L$x_ac_cv_hdf5_dir/$bit" + HDF5_LIBS="-lhdf5" + AC_DEFINE(HAVE_HDF5, 1, [Define to 1 if hdf5 library found]) + if test "$x_ac_cv_hdf5_pci" = "yes"; then + AC_DEFINE(HAVE_HDF5_PCI, 1, [Define to 1 if hdf5 library supports PCI devices]) + fi + fi + + AC_SUBST(HDF5_LIBS) + AC_SUBST(HDF5_CPPFLAGS) + AC_SUBST(HDF5_LDFLAGS) +]) diff --git a/configure.ac b/configure.ac index 6278bfc8965..9f9486edcbf 100644 --- a/configure.ac +++ b/configure.ac @@ -221,6 +221,7 @@ X_AC_SUN_CONST X_AC_DIMENSIONS X_AC_CFLAGS +X_AC_HDF5 X_AC_HWLOC X_AC_FREEIPMI X_AC_XCPU @@ -502,6 +503,10 @@ AC_CONFIG_FILES([Makefile src/plugins/acct_gather_energy/rapl/Makefile src/plugins/acct_gather_energy/ipmi/Makefile src/plugins/acct_gather_energy/none/Makefile + src/plugins/acct_gather_profile/Makefile + src/plugins/acct_gather_profile/io_energy/Makefile + src/plugins/acct_gather_profile/none/Makefile + src/plugins/acct_gather_profile/sprfmrgh5/Makefile src/plugins/jobcomp/Makefile src/plugins/jobcomp/filetxt/Makefile src/plugins/jobcomp/none/Makefile diff --git a/doc/html/Makefile.am b/doc/html/Makefile.am index 24ea2e36c5b..db67904f203 100644 --- a/doc/html/Makefile.am +++ b/doc/html/Makefile.am @@ -5,6 +5,7 @@ generated_html = \ accounting.html \ accounting_storageplugins.html \ acct_gather_energy_plugins.html \ + acct_gather_profile_plugins.html \ add.html \ api.html \ authplugins.html \ diff --git a/doc/html/acct_gather_profile_plugins.shtml b/doc/html/acct_gather_profile_plugins.shtml new file mode 100644 index 00000000000..29e4663691d --- /dev/null +++ b/doc/html/acct_gather_profile_plugins.shtml @@ -0,0 +1,412 @@ +<!--#include virtual="header.txt"--> + +<h1><a name="top">SLURM Profile Accounting Plugin API (AcctGatherProfileType) +</a></h1> + +<h2> Overview</h2> +<p> This document describes SLURM profile accounting plugins and the API that +defines them. It is intended as a resource to programmers wishing to write +their own SLURM profile accounting plugins. + +<p>A profiling plugin allows more detailed information on the execution of jobs +than can reasonably be kept in the accounting database. (All jobs may also not +be profiled.) + +<p>The plugin provides an API for making calls to store data at various +points in a step's lifecycle. It collects data for <b>nodes</b>, +<b>tasks</b> and periodic <b>samples</b>. The periodic samples are eventually +consolidated into one <i>time series</i> dataset for each node of a job. + +<p>The plugin's primary work is done within slurmstepd on the compute nodes. +It assumes a shared file system, presumably on the management network. This +avoids having to transfer files back to the controller at step end. Data is +typically gathered at job_acct_gather interval or acct_gather_energy interval +and the volume is not expected to be burdensome. + +<p>The reference implementation <i>(io_energy)</i> records I/O counts from the +network interface (Infiniband), I/O counts from the node from the Lustre +parallel file system, disk I/O counts, cpu and memory utilization +for each task, and a record of energy use. + +<p>The reference implementation stores this data in a HDF5 file for each step +on each node for the jobs. A separate program +(<a href="sprfmrgh5.html">sprfmrgh5</a>) is provided to +consolidate all the node-step files in one container for the job. +HDF5 is a well known structured data set that allows different types of +related data to be stored in one file. Its internal structure resembles a +file system with <i>groups</i> being similar to <i>directories</i> and +<i>data sets</i> being similar to <i>files</i>. There are commodity programs, +notably <b>HDF5View</b> for viewing and manipulating these files. +<b>sprfmrgh5</b> also provides some capability for extracting subsets of date +for import into other analysis tools like spreadsheets. + +<p>This plugin is incompatible with --enable-front-end. It you need to +simulate a large configuration, please use --enable-multiple-slurmd. +<p>SLURM profile accounting plugins must conform to the SLURM Plugin API with +the following specifications: +<p><span class="commandline">const char +plugin_name[]="<i>full text name</i>"</span> +<p style="margin-left:.2in"> +A free-formatted ASCII text string that identifies the plugin. + +<p><span class="commandline">const char +plugin_type[]="<i>major/minor</i>"</span><br> +<p style="margin-left:.2in"> +The major type must be "acct_gather_profile." +The minor type can be any suitable name +for the type of profile accounting. We currently use +<ul> +<li><b>none</b>— No profile data is gathered. +<li><b>io_energy</b>—Gets profile data about energy use and various i/o +sources (local disk, Lustre, network). CPU and memory usage is also gathered. +</ul> +<p>The programmer is urged to study +<span class="commandline">src/plugins/acct_gather_profile/io_energy.c</span> +and +<span class="commandline">src/common/slurm_acct_gather_profile.c</span> +for a sample implementation of a SLURM profile accounting plugin. +<p class="footer"><a href="#top">top</a> + +<h2>API Functions</h2> +<p>All of the following functions are required. Functions which are not +implemented must be stubbed. + +<p class="commandline">int acct_gather_profile_controller_start() +<p style="margin-left:.2in"><b>Description</b>:<br> +Called during slurmctld's initialization. +<br /> +Provides an opportunity to create files and do other system wide +initialization. +<p style="margin-left:.2in"><b>Returns</b>: <br> +<span class="commandline">SLURM_SUCCESS</span> on success, or<br> +<span class="commandline">SLURM_ERROR</span> on failure. + + +<p class="commandline"> +void acct_gather_profile_g_conf_options(s_p_options_t **full_options, +int *full_options_cnt) +<p style="margin-left:.2in"><b>Description</b>:<br> +Defines configuration options in acct_gather.conf<br /> +<p style="margin-left:.2in"><b>Arguments</b>: <br> +<span class="commandline">full(out) option definitions.</span> +<span class="commandline">full_options_cnt(out) number in full.</span> +<p style="margin-left:.2in"><b>Returns</b>: <br> +<span class="commandline">SLURM_SUCCESS</span> on success, or<br> +<span class="commandline">SLURM_ERROR</span> on failure. + +<p class="commandline"> +void acct_gather_profile_g_conf_set(s_p_hashtbl_t *tbl) +<p style="margin-left:.2in"><b>Description</b>:<br> +Set configuration options from acct_gather.conf<br /> +<p style="margin-left:.2in"><b>Arguments</b>: <br> +<span class="commandline">tbl -- hash table of options./span> +<p style="margin-left:.2in"><b>Returns</b>: <br> +<span class="commandline">SLURM_SUCCESS</span> on success, or<br> +<span class="commandline">SLURM_ERROR</span> on failure. + +<p class="commandline"> +void acct_gather_profile_g_conf_get(s_p_hashtbl_t *tbl) +<p style="margin-left:.2in"><b>Description</b>:<br> +Gets configuration options from acct_gather.conf<br /> +<p style="margin-left:.2in"><b>Returns</b>: <br> +<span class="commandline">void* pointer to slurm_acct_gather_conf_t</span> on success, or<br> +<span class="commandline">NULL</span> on failure. + +<p class="commandline"> +int acct_gather_profile_p_node_step_start(slurmd_job_t* job) +<p style="margin-left:.2in"><b>Description</b>:<br> +Called once per step on each node from slurmstepd, before launching tasks. +<br /> +Provides an opportunity to create files and other node-step level +initialization. +<p style="margin-left:.2in"><b>Arguments</b>: <br> +<span class="commandline">job -- slumd_job_t structure containing information +about the step. </span> +<p style="margin-left:.2in"><b>Returns</b>: <br> +<span class="commandline">SLURM_SUCCESS</span> on success, or<br> +<span class="commandline">SLURM_ERROR</span> on failure. + +<p class="commandline"> +int acct_gather_profile_p_node_step_end(slurmd_job_t* job) +<p style="margin-left:.2in"><b>Description</b>:<br> +Called once per step on each node from slurmstepd, after all tasks end. +<br /> +Provides an opportunity to close files, etc. +<p style="margin-left:.2in"><b>Arguments</b>: <br> +<span class="commandline">job -- slumd_job_t structure containing information +about the step. </span> +<p style="margin-left:.2in"><b>Returns</b>: <br> +<span class="commandline">SLURM_SUCCESS</span> on success, or<br> +<span class="commandline">SLURM_ERROR</span> on failure. + +<p class="commandline"> +int acct_gather_profile_p_task_start(slurmd_job_t* job, uint32_t taskid) +<p style="margin-left:.2in"><b>Description</b>:<br> +Called once per task from slurmstepd, BEFORE node step start is called. +<br /> +Provides an opportunity to gather beginning values from node counters +(bytes_read ...) +<br /> +At this point in the life cycle, the value of the --profile option isn't +known and files are not open so calls to any of the 'add_*_data' +functions cannot be made. +<p style="margin-left:.2in"><b>Arguments</b>: <br> +<span class="commandline">job -- slumd_job_t structure containing information +about the step. </span> +<br /><span class="commandline">taskid -- SLURM taskid. </span> +<p style="margin-left:.2in"><b>Returns</b>: <br> +<span class="commandline">SLURM_SUCCESS</span> on success, or<br> +<span class="commandline">SLURM_ERROR</span> on failure. + +<p class="commandline"> +int acct_gather_profile_p_task_end(slurmd_job_t* job, pid_t taskpid) +<p style="margin-left:.2in"><b>Description</b>:<br> +Called once per task from slurmstepd. +<br /> +Provides an opportunity to put final data for a task. +<p style="margin-left:.2in"><b>Arguments</b>: <br> +<span class="commandline">job -- slumd_job_t structure containing information +about the step. </span> +<br /><span class="commandline">pid -- task process id (pid_t). </span> +<p style="margin-left:.2in"><b>Returns</b>: <br> +<span class="commandline">SLURM_SUCCESS</span> on success, or<br> +<span class="commandline">SLURM_ERROR</span> on failure. + +<p class="commandline">int acct_gather_profile_p_job_sample() +<p style="margin-left:.2in"><b>Description</b>:<br> +Called from the job_acct_gather poll_data routine. +<br /> +Provides an opportunity to put data at job_accnt_gather frequency, +from the job step info structure. +<p style="margin-left:.2in"><b>Arguments</b>: <br> +<span class="commandline"> None</span> +<p style="margin-left:.2in"><b>Returns</b>: <br> +<span class="commandline">SLURM_SUCCESS</span> on success, or<br> +<span class="commandline">SLURM_ERROR</span> on failure. + +<p class="commandline"> +int acct_gather_profile_p_add_node_data(slurmd_job_t* job, char* group, +char* type, void* data); +<p style="margin-left:.2in"><b>Description</b>:<br> +Put data at the Node Totals level. Typically called when the step ends. +<p style="margin-left:.2in"><b>Arguments</b>: <br> +<span class="commandline">job -- slumd_job_t structure containing information +about the step. </span> +<br /><span class="commandline">group -- identifies the data stream +(source of data). </span> +<br /><span class="commandline">type -- identifies the type of data. </span> +<br /><span class="commandline">data -- data structure to be put to the file. +</span> +<p style="margin-left:.2in"><b>Returns</b>: <br> +<span class="commandline">SLURM_SUCCESS</span> on success, or<br> +<span class="commandline">SLURM_ERROR</span> on failure. + + +<p class="commandline"> +int acct_gather_profile_p_add_sample_data(char* group, +char* type, void* data); +<p style="margin-left:.2in"><b>Description</b>:<br> +Put data at the Node Samples level. Typically called from something called +at either job_acct_gather interval or acct_gather_energy interval. +<br /> +All samples in the same group will eventually be consolidated in one +time series. +<p style="margin-left:.2in"><b>Arguments</b>: <br> +<br /><span class="commandline">group -- identifies the data stream +(source of data). </span> +<br /><span class="commandline">type -- identifies the type of data. </span> +<br /><span class="commandline">data -- data structure to be put to the file. +</span> +<p style="margin-left:.2in"><b>Returns</b>: <br> +<span class="commandline">SLURM_SUCCESS</span> on success, or<br> +<span class="commandline">SLURM_ERROR</span> on failure. + + +<p class="commandline"> +int acct_gather_profile_p_add_task_data(slurmd_job_t* job, uint32_t taskid, +char* group, char* type, void* data); +<p style="margin-left:.2in"><b>Description</b>:<br> +Put data at the Task Totals level. Typically called at task end. +<p style="margin-left:.2in"><b>Arguments</b>: <br> +<span class="commandline">job -- slumd_job_t structure containing information +about the step. </span> +<br /><span class="commandline">taskid -- slurm taskid </span> +<br /><span class="commandline">group -- identifies the data stream +(source of data). </span> +<br /><span class="commandline">type -- identifies the type of data. </span> +<br /><span class="commandline">data -- data structure to be put to the file. +</span> +<p style="margin-left:.2in"><b>Returns</b>: <br> +<span class="commandline">SLURM_SUCCESS</span> on success, or<br> +<span class="commandline">SLURM_ERROR</span> on failure. + +<p>Note that the io_energy plugin only uses +<i>acct_gather_profile_p_add_sample_data</i>. The job merge program has +capability for summarizing a time series and inserting grand totals for the +node. The <i>add_node_data</i> and <i>add_task_data</i> functions were defined +in the intial design and may become depracated. + +<h2>Parameters</h2> + +<p>These parameters can be used in the slurm.conf to configure the +plugin and the frequency at which to gather node profile data.</p> +<dl> +<dt><span class="commandline">AcctGatherProfileType</span> +<dd>Specifies which plugin should be used. +</dl> + +<p>The <a href="acct_gather.conf.html">acct_gather.conf</a> provides profile +configuration options. +<dl> +<dt><span class="commandline">ProfileDir</span> +<dd>Path to location in a shared file system in which to write profile data. +There is no default as there is standard location for a shared file system. +It this parameter is not specified, no profiling will occur. +<dt><span class="commandline">ProfileDefaultProfile</span> +<dd>Default setting for --profile command line option for srun, salloc, sbatch. +</dl> +The default profile value is <b>none</b> which means no profiling will be done +for jobs. The io_energy plugin also includes; +<ul> +<li> +<b>energy</b> sample energy use for the node. +</li> +<li> +<b>lustre</b> sample i/o to the Lustre file system for the node. +</li> +<li> +<b>network</b> sample i/o through the network (infiniband) interface +for the node. +</li> +<li> +<b>task</b> sample local disk I/O, cpu and memory use for each task. +</li> +<li> +<b>all</b> all of the above. +</li> +</ul> +Use caution when setting the default to values other than none as a file for +each job will be created. This option is provided for test systems. +<p>Most of the sources of profile data are associated with various +acct_gather plugins. The acct_gather.conf file has setting for various +sampling mechanisms that can be used to change the frequency at which +samples occur. + +<h2>Data Types</h2> +A plugin-like structure is implemented to generalize HDF5 data operations from +various sources. A <i>C</i> <b>typedef</b> is defined for each datatype. These +declarations are in /common/slurm_acct_gather_profile.h so the datatype are +common to all profile plugins. +<p> +The operations are defined via structures of function pointers, and they are +defined in /plugins/acct_gather_profile/common/profile_hdf5.h and should work +on any HDF5 implementation, not only io_energy. +<p> +Functions must be implemented to perform various operations for the datatype. +The api for the plugin includes an argument for the datatype so that the +implementation of that api can call the specific operation for that datatype. +<p>Groups in the HDF5 file containing a dataset will include an attribute for +the datatype so that the program that merges step files into the job can +discover the type of the group and do the right thing. +<p> +For example, the typedef for the energy sample datatype; +<pre> +typedef struct profile_energy { + char tod[TOD_LEN]; // Not used in node-step + time_t time; + uint64_t watts; + uint64_t cpu_freq; +} profile_energy_t; +</pre> +<p> +A <i>factory</i> method is implemented for each type to construct a structure +with functions implementing various operations for the type. +The following structure of functions is required for each type. +<pre> +/* + * Structure of function pointers of common operations on a + * profile data type. (Some may be stubs, particularly if the data type + * does not represent a time series. + * dataset_size -- size of one dataset (structure size). + * create_memory_datatype -- creates hdf5 memory datatype + * corresponding to the datatype structure. + * create_file_datatype -- creates hdf5 file datatype + * corresponding to the datatype structure. + * create_s_memory_datatype -- creates hdf5 memory datatype + * corresponding to the summary datatype structure. + * create_s_file_datatype -- creates hdf5 file datatype + * corresponding to the summary datatype structure. + * init_job_series -- allocates a buffer for a complete time + * series (in job merge) and initializes each member + * merge_step_series -- merges all the individual time samples + * into a single data set with one item per sample. + * Data items can be scaled (e.g. subtracting beginning time) + * differenced (to show counts in interval) or other things + * appropriate for the series. + * series_total -- accumulate or average members in the entire + * series to be added to the file as totals for the node or + * task. + * extract_series -- format members of a structure for putting + * to a file data extracted from a time series to be imported into + * another analysis tool. (e.g. format as comma separated value.) + * extract_totals -- format members of a structure for putting + * to a file data extracted from a time series total to be imported + * into another analysis tool. (e.g. format as comma,separated value.) + */ +typedef struct profile_hdf5_ops { + int (*dataset_size) (); + hid_t (*create_memory_datatype) (); + hid_t (*create_file_datatype) (); + hid_t (*create_s_memory_datatype) (); + hid_t (*create_s_file_datatype) (); + void* (*init_job_series) (int, int); + void (*merge_step_series) (hid_t, void*, void*, void*); + void* (*series_total) (int, void*); + void (*extract_series) (FILE*, bool, int, int, char*, + char*, void*); + void (*extract_totals) (FILE*, bool, int, int, char*, + char*, void*); +} profile_hdf5_ops_t; +</pre> + +Note there are two different data types for supporting time series.<br> +1) A primary type is defined for gathering data in the node step file. +It is typically named profile_{series_name}_t.<br> +2) Another type is defined for summarizing series totals. +It is typically named profile_{series_name}_s_t. It does not have a 'factory'. +It is only used in the functions of the primary data type and the +primaries structure has operations to create appropriate hdf5 objects. + +<p>When adding a new type, the <b>profile_factory</b> function has to be +modified to return an <i>ops</i> for the type. + +<p>Interaction between type and hdf5. +<ul> +<li> +The profile_{type}_t structure is used by callers of the <b>add_*_data</b> +functions. +</li> +<li> +HDF5 needs a <b>memory</b>_datatype to transform this structure into its +dataset object in memory. The <i>create_memory_datatype</i> function creates +the appropriate object. +</li> +<li> +HDF5 needs a <b>file</b>_datatype to transform the dataset into how it will be +written to the HDF5 file (or to transform what it reads from a file into a +dataset.) The <i>create_file_datatype</i> function creates +the appropriate object. +</li> +</ul> +<h2>Versioning</h2> +<p>This document describes version 1 of the SLURM Profile Accounting API. +Future releases of SLURM may revise this API. A profile accounting plugin +conveys its ability to implement a particular API version using the mechanism +outlined for SLURM plugins.</p> + +<p class="footer"><a href="#top">top</a> + +<p style="text-align:center;">Last modified 1 April 2013</p> + +<!--#include virtual="footer.txt"--> + diff --git a/doc/html/documentation.shtml b/doc/html/documentation.shtml index 46d9f68d06a..4aeee4cd6c4 100644 --- a/doc/html/documentation.shtml +++ b/doc/html/documentation.shtml @@ -104,6 +104,7 @@ Documenation for other versions of Slurm is distributed with the code</b></p> <li><a href="preemption_plugins.html">Preemption Plugin Programmer Guide</a></li> <li><a href="priority_plugins.html">Priority Plugin Programmer Guide</a></li> <li><a href="proctrack_plugins.html">Process Tracking Plugin Programmer Guide</a></li> +<li><a href="acct_gather_profile_plugins.html">Profile Accounting Plugin Programmer Guide</a></li> <li><a href="schedplugins.html">Scheduler Plugin Programmer Guide</a></li> <li><a href="selectplugins.html">Resource Selection Plugin Programmer Guide</a></li> <li><a href="slurmctld_plugstack.html">Slurmctld Generic Plugin Programmer Guide</a></li> diff --git a/doc/html/man_index.shtml b/doc/html/man_index.shtml index f531c459f09..83acef10b01 100644 --- a/doc/html/man_index.shtml +++ b/doc/html/man_index.shtml @@ -18,6 +18,7 @@ Documentation for other versions of Slurm is distributed with the code</b></p> <tr><td><a href="slurm.html">slurm</a></td><td>SLURM system overview.</td></tr> <tr><td><a href="smap.html">smap</a></td><td>graphically view information about SLURM jobs, partitions, and set configurations parameters.</td></tr> <tr><td><a href="sprio.html">sprio</a></td><td>view the factors that comprise a job's scheduling priority</td></tr> +<tr><td><a href="sprfmrgh5.html">sprfmrgh5</a></td><td>merge utility for acct_gather_profile plugin.</td></tr> <tr><td><a href="squeue.html">squeue</a></td><td>view information about jobs located in the SLURM scheduling queue.</td></tr> <tr><td><a href="sreport.html">sreport</a></td><td>Generate reports from the slurm accounting data.</td></tr> <tr><td><a href="srun_cr.html">srun_cr</a></td><td>run parallel jobs with checkpoint/restart support</td></tr> @@ -29,6 +30,7 @@ Documentation for other versions of Slurm is distributed with the code</b></p> <tr><td><a href="bluegene.conf.html">bluegene.conf</a></td><td>Slurm configuration file for BlueGene systems</td></tr> <tr><td><a href="cgroup.conf.html">cgroup.conf</a></td><td>Slurm configuration file for the cgroup support</td></tr> <tr><td><a href="gres.conf.html">gres.conf</a></td><td>Slurm configuration file for generic resource management.</td></tr> +<tr><td><a href="acct_gather.conf.html">acct_gather.conf</a></td><td>Configuration file for all acct_gather plugins</td></tr> <tr><td><a href="slurm.conf.html">slurm.conf</a></td><td>Slurm configuration file</td></tr> <tr><td><a href="slurmdbd.conf.html">slurmdbd.conf</a></td><td>Slurm Database Daemon (SlurmDBD) configuration file</td></tr> <tr><td><a href="topology.conf.html">topology.conf</a></td><td>Slurm configuration file for defining the network topology</td></tr> @@ -41,6 +43,6 @@ Documentation for other versions of Slurm is distributed with the code</b></p> </table> -<p style="text-align:center;">Last modified 29 November 2012</p> +<p style="text-align:center;">Last modified 1 April 2013</p> <!--#include virtual="footer.txt"--> diff --git a/doc/man/man1/Makefile.am b/doc/man/man1/Makefile.am index 7c54dfb8a1d..a669ecd30fb 100644 --- a/doc/man/man1/Makefile.am +++ b/doc/man/man1/Makefile.am @@ -14,6 +14,7 @@ man1_MANS = \ slurm.1 \ smap.1 \ sprio.1 \ + sprfmrgh5.1 \ squeue.1 \ sreport.1 \ srun.1 \ @@ -40,6 +41,7 @@ html_DATA = \ sinfo.html \ smap.html \ sprio.html \ + sprfmrgh5.html \ squeue.html \ sreport.html \ srun.html \ diff --git a/doc/man/man1/salloc.1 b/doc/man/man1/salloc.1 index 3960740e2f6..b074368dc54 100644 --- a/doc/man/man1/salloc.1 +++ b/doc/man/man1/salloc.1 @@ -925,6 +925,38 @@ per processor. By specifying \fB\-\-overcommit\fR you are explicitly allowing more than one task per processor. However no more than \fBMAX_TASKS_PER_NODE\fR tasks are permitted to execute per node. +.TP +\fB\-\-profile\fR=<all|none|[energy[,|task[,|lustre[,|network]]]]> +enables detailed data collection by the acct_gather_profile plugin. +Detailed data are typically time-series that are stored in an HDF5 file for +the job. + +.RS +.TP 10 +\fBAll\fR +All data types are collected. (Cannot be combined with other values.) + +.TP +\fBNone\fR +No data types are collected. This is the default. + (Cannot be combined with other values.) + +.TP +\fBEnergy\fR +Energy data is collected. + +.TP +\fBTask\fR +Task (I/O, Memory, ...) data is collected. + +.TP +\fBLustre\fR +Lustre data is collected. + +.TP +\fBNetwork\fR +Network (InfiniBand) data is collected. +.RE .TP \fB\-p\fR, \fB\-\-partition\fR=<\fIpartition_names\fR> Request a specific partition for the resource allocation. If not specified, @@ -1233,6 +1265,9 @@ Same as \fB\-O, \-\-overcommit\fR \fBSALLOC_PARTITION\fR Same as \fB\-p, \-\-partition\fR .TP +\fBSALLOC_PROFILE\fR +Same as \fB\-\-profile\fR +.TP \fBSALLOC_QOS\fR Same as \fB\-\-qos\fR .TP @@ -1317,6 +1352,9 @@ Same as \fB\-n, \-\-ntasks\fR \fBSLURM_NTASKS_PER_NODE\fR Set to value of the \-\-ntasks\-per\-node\fR option, if specified. .TP +\fBSLURM_PROFILE\fR +Same as \fB\-\-profile\fR +.TP \fBSLURM_TASKS_PER_NODE\fR Number of tasks to be initiated on each node. Values are comma separated and in the same order as SLURM_NODELIST. diff --git a/doc/man/man1/sbatch.1 b/doc/man/man1/sbatch.1 index 2c597845a66..5c85198f72c 100644 --- a/doc/man/man1/sbatch.1 +++ b/doc/man/man1/sbatch.1 @@ -1021,6 +1021,39 @@ partition as designated by the system administrator. If the job can use more than one partition, specify their names in a comma separate list and the one offering earliest initiation will be used. +.TP +\fB\-\-profile\fR=<all|none|[energy[,|task[,|lustre[,|network]]]]> +enables detailed data collection by the acct_gather_profile plugin. +Detailed data are typically time-series that are stored in an HDF5 file for +the job. + +.RS +.TP 10 +\fBAll\fR +All data types are collected. (Cannot be combined with other values.) + +.TP +\fBNone\fR +No data types are collected. This is the default. + (Cannot be combined with other values.) + +.TP +\fBEnergy\fR +Energy data is collected. + +.TP +\fBTask\fR +Task (I/O, Memory, ...) data is collected. + +.TP +\fBLustre\fR +Lustre data is collected. + +.TP +\fBNetwork\fR +Network (InfiniBand) data is collected. +.RE + .TP \fB\-\-propagate\fR[=\fIrlimitfR] Allows users to specify which of the modifiable (soft) resource limits @@ -1411,6 +1444,9 @@ Same as \fB\-O, \-\-overcommit\fR \fBSBATCH_PARTITION\fR Same as \fB\-p, \-\-partition\fR .TP +\fBSBATCH_PROFILE\fR +Same as \fB\-\-profile\fR +.TP \fBSBATCH_QOS\fR Same as \fB\-\-qos\fR .TP @@ -1551,6 +1587,9 @@ This value is propagated to the spawned processes. \fBSLURM_PROCID\fR The MPI rank (or relative process ID) of the current process .TP +\fBSLURM_PROFILE\fR +Same as \fB\-\-profile\fR +.TP \fBSLURM_RESTART_COUNT\fR If the job has been restarted due to system failure or has been explicitly requeued, this will be sent to the number of times diff --git a/doc/man/man1/sh5util.1 b/doc/man/man1/sh5util.1 new file mode 100644 index 00000000000..9a821de5ff7 --- /dev/null +++ b/doc/man/man1/sh5util.1 @@ -0,0 +1,101 @@ +.TH "sprfmrgh5" "1" "SLURM 2.6" "March 2013" "SLURM Commands" +.SH "NAME" +.LP +sprfmrgh5 \- Tool for merging HDF5 files from the acct_gather_profile +plugin that gathers detailed data for jobs running under SLURM + +.SH "SYNOPSIS" +.LP +sprfmrgh5 + +.SH "DESCRIPTION" +.LP +sprfmrgh5 merges HDF5 files produced on each node for each step of a job into +one HDF5 file for the job. The resulting file can be viewed and manipulated +by common HDF5 tools such as HDF5View, h5dump, h5edit, or h5ls. +.LP +sprfmrgh5 has two execution modes. The first mode merges all the node-step +files for a job into one job file. The second mode extracts a limited set of +data for specific nodes, steps, and data series. +The extract mode is set with the \fB\-\-extract\fR command line option. +.LP +The merge mode of sprfmrgh5 is expected to be launched during the controller +epilog script using the SLURM_JOB_ID environment variable. +However, it can be launched on any system with access to the shared file +system. + +.SH "OPTIONS" +.LP + +.TP +\fB\-\-jobid=<number>\fR +supplies the slurm jobid of the node-step files to be merged. + +.TP +\fB\-\-profiledir=<path>\fR +supplies the path containing the node-step files to be merged. +This is a required argument. + +.TP +\fB\-\-savefiles\fR +retains the merge-step files after the merge. Normally, they are deleted. + + +.TP +\fB\-\-extract\fR +sets extract mode to extract limited subsets of data from the +HDF5 file into a comma, separated, value file suitable for import +into a spreadsheet or other analysis tool. +The following options apply to extract mode. + +.TP +\fB\-\-stepid=<n | *>\fR +specifies the step for which data is to be extracted. It is either +a step number, or \fB*\fR which specifies all steps. +\fB*\fR is the default. + +.TP +\fB\-\-node=<name | *>\fR +specifies the name fo the node for which data is to be extracted. +It is either nodename, or \fB*\fR which specifies all nodes. +\fB*\fR is the default. + +.TP +\fB\-\-level=[Node:Totals|Node:TimeSeries|Task:Totals]\fR +specifies the level in the job structure from which data +is to be extracted. + +.RS +.TP +\fBNode:Totals\fR means the extracted data will come from the Totals +group of a node. + +.TP +\fBNode:TimeSeries\fR means the extracted data will come from the Time Series +group of a node. + +.TP +\fB\-\-series=<name | *>\fR +specifies the name of the data series which is to be extracted. +It is either series name, or \fBTasks\fR which is all tasks, +or \fB*\fR which specifies all series. +\fB*\fR is the default. + +.TP +\fB\-\-output=<path>\fR +specifies the path of the csv file containing the extracted data. +By default it is 'profile_data.csv' in the current directory/ + +.SH "COPYING" +SLURM is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 2 of the License, or (at your option) +any later version. +.LP +SLURM is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +details. + +.SH "SEE ALSO" +.LP diff --git a/doc/man/man1/srun.1 b/doc/man/man1/srun.1 index 412f398c0e6..3b2ea2523ef 100644 --- a/doc/man/man1/srun.1 +++ b/doc/man/man1/srun.1 @@ -1095,6 +1095,39 @@ partition as designated by the system administrator. If the job can use more than one partition, specify their names in a comma separate list and the one offering earliest initiation will be used. +.TP +\fB\-\-profile\fR=<all|none|[energy[,|task[,|lustre[,|network]]]]> +enables detailed data collection by the acct_gather_profile plugin. +Detailed data are typically time-series that are stored in an HDF5 file for +the job. + +.RS +.TP 10 +\fBAll\fR +All data types are collected. (Cannot be combined with other values.) + +.TP +\fBNone\fR +No data types are collected. This is the default. + (Cannot be combined with other values.) + +.TP +\fBEnergy\fR +Energy data is collected. + +.TP +\fBTask\fR +Task (I/O, Memory, ...) data is collected. + +.TP +\fBLustre\fR +Lustre data is collected. + +.TP +\fBNetwork\fR +Network (InfiniBand) data is collected. +.RE + .TP \fB\-\-prolog\fR=<\fIexecutable\fR> \fBsrun\fR will run \fIexecutable\fR just before launching the job step. @@ -1842,6 +1875,9 @@ If set, then PMI key\-pairs will contain no duplicate keys. This is the case for MPICH2 and reduces overhead in testing for duplicates for improved performance .TP +\fBSLURM_PROFILE\fR +Same as \fB\-\-profile\fR +.TP \fBSLURM_PROLOG\fR Same as \fB\-\-prolog\fR .TP diff --git a/doc/man/man5/acct_gather.conf.5 b/doc/man/man5/acct_gather.conf.5 index 1ada0c9ce97..992b8c9da0f 100644 --- a/doc/man/man5/acct_gather.conf.5 +++ b/doc/man/man5/acct_gather.conf.5 @@ -58,6 +58,44 @@ Specify BMC Username. Specify BMC Password. .RE +.TP +\fBProfileIO_Energy\fR options used for acct_gather_profile/io_energy are as follows. + +.RS +.TP 20 +\fBProfileDir\fR=<path> +This parameter is the path to the shared folder into which the +acct_gather_profile plugin will write detailed data (usually as an HDF5 file). +The directory is assumed to be on a file system shared by the controller and +all compute nodes. This is a required parameter. + +.TP +\fBProfileDefaultProfile\fR=opt{,opt{,opt}} +Default --Profile value for data types collected for each job submission. +It ia a comma separated list of data streams. Allowed values are; + +.RS +.TP +\fBAll\fR All data types are collected. (Cannot be combined with other values.) + +.TP +\fBNone\fR No data types are collected. This is the default. + (Cannot be combined with other values.) + +.TP +\fBEnergy\fR Energy data is collected. + +.TP +\fBTask\fR Task (I/O, Memory, ...) data is collected. + +.TP +\fBLustre\fR Lustre data is collected. + +.TP +\fBNetwork\fR Network (InfiniBand) data is collected. +.RE +.RE + .SH "EXAMPLE" .LP .br @@ -72,6 +110,11 @@ EnergyIPMIFrequency=10 EnergyIPMICalcAdjustment=yes .br # +.br +# Parameters for AcctGatherProfile +.br +ProfileDir=/app/slurm/profile_data +.br .SH "COPYING" Copyright (C) 2012-2013 Bull. diff --git a/doc/man/man5/slurm.conf.5 b/doc/man/man5/slurm.conf.5 index 54f2d0ac730..6623d37a189 100644 --- a/doc/man/man5/slurm.conf.5 +++ b/doc/man/man5/slurm.conf.5 @@ -195,6 +195,30 @@ Average Power Limit (RAPL) mechanism. Note that enabling RAPL may require the execution of the command "sudo modprobe msr". .RE +.TP +\fBAcctGatherProfileType\fR +Identifies the plugin to be used for detailed job profiling. +The jobacct_gather plugin and slurmd daemon call this plugin to collect +detailed data such as I/O counts, memory usage, or energy consumption for jobs +and nodes. There are interfaces in this plugin to collect data as step start +and completion, task start and completion, and at the account gather +frequency. The data collected at the node level is related to jobs only in +case of exclusive job allocation. + +Configurable values at present are: +.RS +.TP 20 +\fBacct_gather_profile/none\fR +No profile data is collected. +.TP +\fBacct_gather_profile/io_energy\fR +Lustre I/O counts and I/O counts from infiniband network adaptors are +collected at the node level. Local disk I/O counts and memory usage are sampled +for tasks at jobacct_gather frequency. Energy consumption at the node level +is gathered at jobacct_gather_frequency. Data from all the steps on all +nodes use for the job are consolidated into and HDF5 structured file. +.RE + .TP \fBAuthType\fR The authentication method for communications between SLURM diff --git a/slurm/slurm.h.in b/slurm/slurm.h.in index c2930a98f5c..2abc4ea875b 100644 --- a/slurm/slurm.h.in +++ b/slurm/slurm.h.in @@ -1008,6 +1008,7 @@ typedef struct job_descriptor { /* For submit, allocate, and update requests */ uint32_t priority; /* relative priority of the job, * explicitly set only for user root, * 0 == held (don't initiate) */ + char *profile; /* Level of acct_gather_profile {all | none} */ char *qos; /* Quality of Service */ char *resp_host; /* NOTE: Set by slurmctld */ char *req_nodes; /* comma separated list of required nodes @@ -1150,6 +1151,7 @@ typedef struct job_info { time_t pre_sus_time; /* time job ran prior to last suspend */ uint32_t priority; /* relative priority of the job, * 0=held, 1=required nodes DOWN/DRAINED */ + char *profile; /* Level of acct_gather_profile {all | none} */ char *qos; /* Quality of Service */ char *req_nodes; /* comma separated list of required nodes */ int *req_node_inx; /* required list index pairs into node_table: @@ -1317,6 +1319,7 @@ typedef struct { char *gres; /* generic resources needed */ char *name; /* name of the job step */ char *network; /* network use spec */ + char *profile; /* Level of acct_gather_profile {all | none} */ uint8_t no_kill; /* 1 if no kill on node failure */ uint32_t min_nodes; /* minimum number of nodes required by job, * default=0 */ @@ -1363,6 +1366,7 @@ typedef struct { bool multi_prog; uint32_t slurmd_debug; /* remote slurmd debug level */ bool parallel_debug; + char *profile; /* Level of acct_gather_profile {all | none} */ char *task_prolog; char *task_epilog; uint16_t cpu_bind_type; /* use cpu_bind_type_t */ @@ -1431,6 +1435,7 @@ typedef struct { uint32_t cpu_freq; /* requested cpu frequency */ uint32_t num_tasks; /* number of tasks */ char *partition; /* name of assigned partition */ + char *profile; /* Level of acct_gather_profile {all | none} */ char *resv_ports; /* ports allocated for MPI */ time_t run_time; /* net run time (factor out time suspended) */ dynamic_plugin_data_t *select_jobinfo; /* opaque data type, @@ -1863,7 +1868,9 @@ typedef struct reservation_name_msg { #define DEBUG_FLAG_SWITCH 0x00020000 /* SwitchType plugin */ #define DEBUG_FLAG_ENERGY 0x00040000 /* AcctGatherEnergy plugin */ #define DEBUG_FLAG_EXT_SENSORS 0x00080000 /* ExtSensorsType plugin */ -#define DEBUG_FLAG_THREADID 0x00100000 /* Print out the thread id */ +#define DEBUG_FLAG_THREADID 0x00100000 /* Print out the thread id */ +#define DEBUG_FLAG_PROFILE 0x00200000 /* AcctGatherProfile plugin */ + #define GROUP_FORCE 0x8000 /* if set, update group membership * info even if no updates to * /etc/group file */ @@ -1905,6 +1912,7 @@ typedef struct slurm_ctl_conf { char *accounting_storage_user; /* accounting storage user */ uint16_t acctng_store_job_comment; /* send job comment to accounting */ char *acct_gather_energy_type; /* energy accounting type */ + char *acct_gather_profile_type; /* profile accounting type */ uint16_t acct_gather_node_freq; /* secs between node acct request */ char *authtype; /* authentication type */ char *backup_addr; /* comm path of slurmctld secondary server */ diff --git a/src/api/config_info.c b/src/api/config_info.c index 4527883c4ee..73146e289d5 100644 --- a/src/api/config_info.c +++ b/src/api/config_info.c @@ -197,6 +197,11 @@ extern void *slurm_ctl_conf_2_key_pairs (slurm_ctl_conf_t* slurm_ctl_conf_ptr) key_pair->value = xstrdup(slurm_ctl_conf_ptr->acct_gather_energy_type); list_append(ret_list, key_pair); + key_pair = xmalloc(sizeof(config_key_pair_t)); + key_pair->name = xstrdup("AcctGatherProfileType"); + key_pair->value = xstrdup(slurm_ctl_conf_ptr->acct_gather_profile_type); + list_append(ret_list, key_pair); + snprintf(tmp_str, sizeof(tmp_str), "%u sec", slurm_ctl_conf_ptr->acct_gather_node_freq); key_pair = xmalloc(sizeof(config_key_pair_t)); diff --git a/src/api/step_launch.c b/src/api/step_launch.c index cfb8cacfdfb..bf6406c1089 100644 --- a/src/api/step_launch.c +++ b/src/api/step_launch.c @@ -247,6 +247,7 @@ int slurm_step_launch (slurm_step_ctx_t *ctx, launch.ntasks = ctx->step_resp->step_layout->task_cnt; launch.slurmd_debug = params->slurmd_debug; launch.switch_job = ctx->step_resp->switch_job; + launch.profile = params->profile; launch.task_prolog = params->task_prolog; launch.task_epilog = params->task_epilog; launch.cpu_bind_type = params->cpu_bind_type; @@ -420,6 +421,7 @@ int slurm_step_launch_add (slurm_step_ctx_t *ctx, launch.ntasks = ctx->step_resp->step_layout->task_cnt; launch.slurmd_debug = params->slurmd_debug; launch.switch_job = ctx->step_resp->switch_job; + launch.profile = params->profile; launch.task_prolog = params->task_prolog; launch.task_epilog = params->task_epilog; launch.cpu_bind_type = params->cpu_bind_type; diff --git a/src/common/Makefile.am b/src/common/Makefile.am index acbe7f79d6f..8671e190b08 100644 --- a/src/common/Makefile.am +++ b/src/common/Makefile.am @@ -93,6 +93,7 @@ libcommon_la_SOURCES = \ slurm_accounting_storage.c slurm_accounting_storage.h \ slurm_jobacct_gather.c slurm_jobacct_gather.h \ slurm_acct_gather_energy.c slurm_acct_gather_energy.h \ + slurm_acct_gather_profile.c slurm_acct_gather_profile.h \ slurm_jobcomp.c slurm_jobcomp.h \ slurm_topology.c slurm_topology.h \ switch.c switch.h \ diff --git a/src/common/read_config.c b/src/common/read_config.c index 3d4adef5f80..790959579a3 100644 --- a/src/common/read_config.c +++ b/src/common/read_config.c @@ -164,6 +164,7 @@ s_p_options_t slurm_conf_options[] = { {"AccountingStoreJobComment", S_P_BOOLEAN}, {"AcctGatherEnergyType", S_P_STRING}, {"AcctGatherNodeFreq", S_P_UINT16}, + {"AcctGatherProfileType", S_P_STRING}, {"AuthType", S_P_STRING}, {"BackupAddr", S_P_STRING}, {"BackupController", S_P_STRING}, @@ -2057,6 +2058,7 @@ free_slurm_conf (slurm_ctl_conf_t *ctl_conf_ptr, bool purge_node_hash) xfree (ctl_conf_ptr->control_machine); xfree (ctl_conf_ptr->crypto_type); xfree (ctl_conf_ptr->acct_gather_energy_type); + xfree (ctl_conf_ptr->acct_gather_profile_type); xfree (ctl_conf_ptr->epilog); xfree (ctl_conf_ptr->epilog_slurmctld); xfree (ctl_conf_ptr->ext_sensors_type); @@ -2161,6 +2163,7 @@ init_slurm_conf (slurm_ctl_conf_t *ctl_conf_ptr) ctl_conf_ptr->disable_root_jobs = 0; ctl_conf_ptr->acct_gather_node_freq = 0; xfree (ctl_conf_ptr->acct_gather_energy_type); + xfree (ctl_conf_ptr->acct_gather_profile_type); ctl_conf_ptr->ext_sensors_freq = 0; xfree (ctl_conf_ptr->ext_sensors_type); ctl_conf_ptr->dynalloc_port = (uint16_t) NO_VAL; @@ -2654,6 +2657,11 @@ _validate_and_set_defaults(slurm_ctl_conf_t *conf, s_p_hashtbl_t *hashtbl) conf->acct_gather_energy_type = xstrdup(DEFAULT_ACCT_GATHER_ENERGY_TYPE); + if (!s_p_get_string(&conf->acct_gather_profile_type, + "AcctGatherProfileType", hashtbl)) + conf->acct_gather_profile_type = + xstrdup(DEFAULT_ACCT_GATHER_PROFILE_TYPE); + if (!s_p_get_uint16(&conf->acct_gather_node_freq, "AcctGatherNodeFreq", hashtbl)) conf->acct_gather_node_freq = 0; @@ -3763,6 +3771,11 @@ extern char * debug_flags2str(uint32_t debug_flags) xstrcat(rc, ","); xstrcat(rc, "Priority"); } + if (debug_flags & DEBUG_FLAG_PROFILE) { + if (rc) + xstrcat(rc, ","); + xstrcat(rc, "Profile"); + } if (debug_flags & DEBUG_FLAG_RESERVATION) { if (rc) xstrcat(rc, ","); @@ -3845,6 +3858,8 @@ extern uint32_t debug_str2flags(char *debug_flags) rc |= DEBUG_FLAG_NO_REALTIME; else if (strcasecmp(tok, "Priority") == 0) rc |= DEBUG_FLAG_PRIO; + else if (strcasecmp(tok, "Profile") == 0) + rc |= DEBUG_FLAG_PROFILE; else if (strcasecmp(tok, "Reservation") == 0) rc |= DEBUG_FLAG_RESERVATION; else if (strcasecmp(tok, "SelectType") == 0) diff --git a/src/common/read_config.h b/src/common/read_config.h index 049ff6d1a57..45f6f3f8b7b 100644 --- a/src/common/read_config.h +++ b/src/common/read_config.h @@ -80,6 +80,7 @@ extern char *default_plugstack; #define JOB_ACCT_GATHER_TYPE_NONE "jobacct_gather/none" #define DEFAULT_JOB_ACCT_GATHER_FREQ 30 #define DEFAULT_ACCT_GATHER_ENERGY_TYPE "acct_gather_energy/none" +#define DEFAULT_ACCT_GATHER_PROFILE_TYPE "acct_gather_profile/none" #define ACCOUNTING_STORAGE_TYPE_NONE "accounting_storage/none" #define DEFAULT_DISABLE_ROOT_JOBS 0 #define DEFAULT_ENFORCE_PART_LIMITS 0 diff --git a/src/common/slurm_acct_gather.c b/src/common/slurm_acct_gather.c index e515355df62..374d2f1e527 100644 --- a/src/common/slurm_acct_gather.c +++ b/src/common/slurm_acct_gather.c @@ -1,5 +1,5 @@ /*****************************************************************************\ - * slurm_acct_gather.h - generic interface needed for some + * slurm_acct_gather.c - generic interface needed for some * acct_gather plugins. ***************************************************************************** * Copyright (C) 2013 SchedMD LLC. @@ -49,13 +49,13 @@ extern int acct_gather_conf_init(void) s_p_options_t *full_options = NULL; int full_options_cnt = 0; struct stat buf; - if (inited) return SLURM_SUCCESS; /* get options from plugins using acct_gather.conf */ - acct_gather_energy_g_conf_options(&full_options, &full_options_cnt); +// acct_gather_energy_g_conf_options(&full_options, &full_options_cnt); + acct_gather_profile_g_conf_options(&full_options, &full_options_cnt); /* ADD MORE HERE */ /* for the NULL at the end */ @@ -84,7 +84,8 @@ extern int acct_gather_conf_init(void) xfree(conf_path); /* handle acct_gather.conf in each plugin */ - acct_gather_energy_g_conf_set(tbl); +// acct_gather_energy_g_conf_set(tbl); + acct_gather_profile_g_conf_set(tbl); /* ADD MORE HERE */ /******************************************/ diff --git a/src/common/slurm_acct_gather.h b/src/common/slurm_acct_gather.h index 7e980e4a7b9..272723ae7cf 100644 --- a/src/common/slurm_acct_gather.h +++ b/src/common/slurm_acct_gather.h @@ -53,9 +53,13 @@ #include "read_config.h" #include "slurm_acct_gather_energy.h" +#include "slurm_acct_gather_profile.h" typedef struct { void *energy_ipmi; + // Options for acct_gather_profile plugin + char *profile_dir; + char *profile_DefaultProfile; } slurm_acct_gather_conf_t; extern int acct_gather_conf_init(void); diff --git a/src/common/slurm_acct_gather_profile.c b/src/common/slurm_acct_gather_profile.c new file mode 100644 index 00000000000..0a43c6c36d4 --- /dev/null +++ b/src/common/slurm_acct_gather_profile.c @@ -0,0 +1,319 @@ +/*****************************************************************************\ + * slurm_acct_gather_profile.c - implementation-independent job profile + * accounting plugin definitions + ***************************************************************************** + * Copyright (C) 2013 Bull S. A. S. + * Bull, Rue Jean Jaures, B.P.68, 78340, Les Clayes-sous-Bois. + * + * Written by Rod Schultz <rod.schultz@bull.com> + * + * This file is part of SLURM, a resource management program. + * For details, see <http://www.schedmd.com/slurmdocs/>. + * Please also read the included file: DISCLAIMER. + * + * SLURM is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * In addition, as a special exception, the copyright holders give permission + * to link the code of portions of this program with the OpenSSL library under + * certain conditions as described in each individual source file, and + * distribute linked combinations including the two. You must obey the GNU + * General Public License in all respects for all of the code used other than + * OpenSSL. If you modify file(s) with this exception, you may extend this + * exception to your version of the file(s), but you are not obligated to do + * so. If you do not wish to do so, delete this exception statement from your + * version. If you delete this exception statement from all source files in + * the program, then also delete it here. + * + * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with SLURM; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +\*****************************************************************************/ + +#include <pthread.h> +#include <stdlib.h> +#include <string.h> + +#include "src/common/macros.h" +#include "src/common/plugin.h" +#include "src/common/plugrack.h" +#include "src/common/read_config.h" +#include "src/common/slurm_protocol_api.h" +#include "src/common/xmalloc.h" +#include "src/common/xstring.h" +#include "src/common/slurm_acct_gather_profile.h" + +typedef struct profile_task_info { + uint32_t gtid; /* global task id */ + pid_t pid; /* slurm taskid */ +} profile_task_pid_t; + +static int nodetasks = 0; +static profile_task_pid_t* pid2task = NULL; + +typedef struct slurm_acct_gather_profile_ops { + int (*controller_start) (); + int (*node_step_start) (slurmd_job_t*); + int (*node_step_end) (slurmd_job_t*); + int (*task_start) (slurmd_job_t*, uint32_t); + int (*task_end) (slurmd_job_t*, pid_t); + int (*job_sample) (); + int (*add_node_data) (slurmd_job_t*, char*, char*, void*); + int (*add_sample_data) (char*, char*, void*); + int (*add_task_data) (slurmd_job_t*, uint32_t, char*, char*, void*); + void (*conf_options) (s_p_options_t **full_options, + int *full_options_cnt); + void (*conf_set) (s_p_hashtbl_t *tbl); + void* (*conf_get) (); +} slurm_acct_gather_profile_ops_t; + +/* + * These strings must be kept in the same order as the fields + * declared for slurm_acct_gather_profile_ops_t. + */ +static const char *syms[] = { + "acct_gather_profile_p_controller_start", + "acct_gather_profile_p_node_step_start", + "acct_gather_profile_p_node_step_end", + "acct_gather_profile_p_task_start", + "acct_gather_profile_p_task_end", + "acct_gather_profile_p_job_sample", + "acct_gather_profile_p_add_node_data", + "acct_gather_profile_p_add_sample_data", + "acct_gather_profile_p_add_task_data", + "acct_gather_profile_p_conf_options", + "acct_gather_profile_p_conf_set", + "acct_gather_profile_p_conf_get" +}; + +static slurm_acct_gather_profile_ops_t ops; +static plugin_context_t *g_context = NULL; +static pthread_mutex_t g_context_lock = PTHREAD_MUTEX_INITIALIZER; +static pthread_mutex_t profile_mutex = PTHREAD_MUTEX_INITIALIZER; +static bool init_run = false; + +extern int slurm_acct_gather_profile_init(void) +{ + int retval = SLURM_SUCCESS; + char *plugin_type = "acct_gather_profile"; + char *type = NULL; + if (init_run && g_context) + return retval; + + slurm_mutex_lock(&g_context_lock); + + if (g_context) + goto done; + + type = slurm_get_acct_gather_profile_type(); + + g_context = plugin_context_create( + plugin_type, type, (void **)&ops, syms, sizeof(syms)); + + if (!g_context) { + error("cannot create %s context for %s", plugin_type, type); + retval = SLURM_ERROR; + goto done; + } + init_run = true; + +done: + slurm_mutex_unlock(&g_context_lock); + xfree(type); + if (retval == SLURM_SUCCESS) + retval = acct_gather_conf_init(); + return retval; +} + +extern int acct_gather_profile_fini(void) +{ + int rc; + if (!g_context) + return SLURM_SUCCESS; + + init_run = false; + rc = plugin_context_destroy(g_context); + g_context = NULL; + return rc; +} + +extern void acct_gather_profile_g_conf_options(s_p_options_t **full_options, + int *full_options_cnt) +{ + if (slurm_acct_gather_profile_init() < 0) + return; + (*(ops.conf_options))(full_options, full_options_cnt); + return; +} + +extern void acct_gather_profile_g_conf_set(s_p_hashtbl_t *tbl) +{ + if (slurm_acct_gather_profile_init() < 0) + return; + + (*(ops.conf_set))(tbl); + return; +} + +extern void* acct_gather_profile_g_conf_get() { + + if (!g_context) + return NULL; + + return (*(ops.conf_get))(); +} + + +extern int acct_gather_profile_g_controller_start() +{ + int retval = SLURM_ERROR; + + if (slurm_acct_gather_profile_init() < 0) + return retval; + + retval = (*(ops.controller_start))(); + return retval; +} + +extern int acct_gather_profile_g_node_step_start(slurmd_job_t* job) +{ + int tx; + int retval = SLURM_ERROR; + + if (job->stepid == NO_VAL) { + return retval; + } + if (job->profile) { + debug3("PROFILE: option --profile=%s",job->profile); + } + nodetasks = job->node_tasks; + pid2task = xmalloc(sizeof(profile_task_pid_t)*nodetasks); + if (!pid2task) { + nodetasks = 0; + return retval; + } + for (tx=0;tx<nodetasks;tx++) { + pid2task[tx].gtid = job->task[tx]->gtid; + pid2task[tx].pid = job->task[tx]->pid; + } + if (slurm_acct_gather_profile_init() < 0) + return retval; + + retval = (*(ops.node_step_start))(job); + return retval; +} + +extern int acct_gather_profile_g_node_step_end(slurmd_job_t* job) +{ + int retval = SLURM_ERROR; + if (job->stepid == NO_VAL) { + return retval; + } + if (!g_context) { + xfree(pid2task); + return retval; + } + + retval = (*(ops.node_step_end))(job); + xfree(pid2task); + nodetasks = 0; + return retval; +} + +extern int acct_gather_profile_g_task_start(slurmd_job_t* job, uint32_t taskid) +{ + int retval = SLURM_ERROR; + if (job->stepid == NO_VAL) { + return retval; + } + // task start occurs before node_step_start. + if (slurm_acct_gather_profile_init() < 0) + return retval; + slurm_mutex_lock(&profile_mutex); + retval = (*(ops.task_start))(job, taskid); + slurm_mutex_unlock(&profile_mutex); + return retval; +} + +extern int acct_gather_profile_g_task_end(slurmd_job_t* job, pid_t taskpid) +{ + int retval = SLURM_ERROR; + if (!g_context) { + return retval; + } + slurm_mutex_lock(&profile_mutex); + retval = (*(ops.task_end))(job, taskpid); + slurm_mutex_unlock(&profile_mutex); + return retval; +} + +extern int acct_gather_profile_g_job_sample() +{ + int retval = SLURM_ERROR; + if (!g_context) { + return retval; + } + slurm_mutex_lock(&profile_mutex); + retval = (*(ops.job_sample))(); + slurm_mutex_unlock(&profile_mutex); + return retval; +} + +extern int acct_gather_profile_g_add_node_data(slurmd_job_t* job, char* group, + char* type, void* data) +{ + int retval = SLURM_ERROR; + if (!g_context) { + return retval; + } + slurm_mutex_lock(&profile_mutex); + retval = (*(ops.add_node_data))(job,group,type,data); + slurm_mutex_unlock(&profile_mutex); + return retval; +} + +extern int acct_gather_profile_g_add_sample_data(char* group, char* type, + void* data) +{ + int retval = SLURM_ERROR; + if (!g_context) { + return retval; + } + slurm_mutex_lock(&profile_mutex); + retval = (*(ops.add_sample_data))(group,type,data); + slurm_mutex_unlock(&profile_mutex); + return retval; +} + +extern int acct_gather_profile_g_add_task_data(slurmd_job_t* job, + uint32_t taskid, char* group, char* type, void* data) +{ + int retval = SLURM_ERROR; + if (!g_context) { + return retval; + } + slurm_mutex_lock(&profile_mutex); + retval = (*(ops.add_task_data))(job,taskid,group,type,data); + slurm_mutex_unlock(&profile_mutex); + return retval; +} + +extern int get_taskid_from_pid(pid_t pid, uint32_t *gtid) { + int tx; + if (pid2task == NULL) + return SLURM_ERROR; + for (tx=0;tx<nodetasks;tx++) { + if (pid2task[tx].pid == pid) { + *gtid = pid2task[tx].gtid; + return SLURM_SUCCESS; + } + } + return SLURM_ERROR; +} diff --git a/src/common/slurm_acct_gather_profile.h b/src/common/slurm_acct_gather_profile.h new file mode 100644 index 00000000000..24aae19e5a6 --- /dev/null +++ b/src/common/slurm_acct_gather_profile.h @@ -0,0 +1,231 @@ +/*****************************************************************************\ + * slurm_acct_gather_profile.h - implementation-independent job profile + * accounting plugin definitions + * Copyright (C) 2013 Bull S. A. S. + * Bull, Rue Jean Jaures, B.P.68, 78340, Les Clayes-sous-Bois. + * + * Written by Rod Schultz <rod.schultz@bull.com> + * + * This file is part of SLURM, a resource management program. + * For details, see <http://www.schedmd.com/slurmdocs/>. + * Please also read the included file: DISCLAIMER. + * + * SLURM is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * In addition, as a special exception, the copyright holders give permission + * to link the code of portions of this program with the OpenSSL library under + * certain conditions as described in each individual source file, and + * distribute linked combinations including the two. You must obey the GNU + * General Public License in all respects for all of the code used other than + * OpenSSL. If you modify file(s) with this exception, you may extend this + * exception to your version of the file(s), but you are not obligated to do + * so. If you do not wish to do so, delete this exception statement from your + * version. If you delete this exception statement from all source files in + * the program, then also delete it here. + * + * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with SLURM; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +\*****************************************************************************/ + +#ifndef __SLURM_ACCT_GATHER_PROFILE_H__ +#define __SLURM_ACCT_GATHER_PROFILE_H__ + +#if HAVE_CONFIG_H +# include "config.h" +# if HAVE_INTTYPES_H +# include <inttypes.h> +# else +# if HAVE_STDINT_H +# include <stdint.h> +# endif +# endif /* HAVE_INTTYPES_H */ +#else /* !HAVE_CONFIG_H */ +# include <inttypes.h> +#endif /* HAVE_CONFIG_H */ + +#include <sys/resource.h> +#include <sys/types.h> +#include <time.h> +#include <unistd.h> + +#include "slurm/slurm.h" +#include "slurm/slurmdb.h" +#include "src/common/macros.h" +#include "src/common/pack.h" +#include "src/common/list.h" +#include "src/common/xmalloc.h" +#include "src/common/slurm_acct_gather.h" +#include "src/slurmd/slurmstepd/slurmstepd_job.h" + +/* + * Load the plugin + */ +extern int acct_gather_profile_init(void); + +/* + * Unload the plugin + */ +extern int acct_gather_profile_fini(void); +/* + * Define plugin local conf for acct_gather.conf + * + * Parameters + * full_options -- pointer that will receive list of plugin local + * definitions + * full_options_cnt -- count of plugin local definitions + */ +extern void acct_gather_profile_g_conf_options(s_p_options_t **full_options, + int *full_options_cnt); +/* + * set plugin local conf from acct_gather.conf into its structure + * + * Parameters + * tbl - hash table of acct_gather.conf key-values. + */ +extern void acct_gather_profile_g_conf_set(s_p_hashtbl_t *tbl); + +/* + * get acct_gather.conf parameters + * + * returns - pointer to static slurm_acct_gather_conf_t + */ +extern void* acct_gather_profile_g_conf_get(void); + +/* + * Called from slurmctld, when it starts. + * Provide an opportunity to make necessary directories and other global + * initialization. + * + * Returns -- SLURM_SUCCESS or SLURM_ERROR + */ +extern int acct_gather_profile_g_controller_start(); + +/* + * Called once per step on each node from slurmstepd, before launching tasks. + * Provides an opportunity to create files and other node-step level + * initialization. + * + * Parameters + * job -- structure defining a slurm job + * + * Returns -- SLURM_SUCCESS or SLURM_ERROR + */ +extern int acct_gather_profile_g_node_step_start(slurmd_job_t* job); + +/* + * Called once per step on each node from slurmstepd, after all tasks end. + * Provides an opportunity to close files, etc. + * + * Parameters + * job -- structure defining a slurm job + * + * Returns -- SLURM_SUCCESS or SLURM_ERROR + */ +extern int acct_gather_profile_g_node_step_end(slurmd_job_t* job); + +/* + * Called once per task from slurmstepd, BEFORE node step start is called. + * Provides an opportunity to gather beginning values from node counters + * (bytes_read ...) + * At this point in the life cycle, the value of the --profile option isn't + * known and and files are not open so calls to the 'add_*_data' + * functions cannot be made. + * + * Parameters + * job -- structure defining a slurm job + * taskid -- slurm taskid + * + * Returns -- SLURM_SUCCESS or SLURM_ERROR + */ +extern int acct_gather_profile_g_task_start(slurmd_job_t* job, + uint32_t taskid); + +/* + * Called once per task from slurmstepd. + * Provides an opportunity to put final data for a task. + * + * Parameters + * job -- structure defining a slurm job + * taskpid -- linux process id of task + * + * Returns -- SLURM_SUCCESS or SLURM_ERROR + */ +extern int acct_gather_profile_g_task_end(slurmd_job_t* job, pid_t taskpid); + +/* + * Called from the job_acct_gather poll_data routine. + * Provides an opportunity to put data from the job step info structure. + * + * Returns -- SLURM_SUCCESS or SLURM_ERROR + */ +extern int acct_gather_profile_g_job_sample(); + +/* + * Put data at the Node Totals level. Typically called when the step ends. + * + * Parameters + * job -- structure defining a slurm job + * group -- identifies the data stream (source of data). + * type -- identifies the type of data. + * data -- data structure to be put to the file. + * + * Returns -- SLURM_SUCCESS or SLURM_ERROR + */ +extern int acct_gather_profile_g_add_node_data(slurmd_job_t* job, char* group, + char* type, void* data); + +/* + * Put data at the Node Samples level. Typically called from something called + * at either job_acct_gather interval or acct_gather_energy interval. + * All samples in the same group will eventually be consolidated in one + * dataset + * + * Parameters + * group -- identifies the data stream (source of data). + * type -- identifies the type of data. + * data -- data structure to be put to the file. + * + * Returns -- SLURM_SUCCESS or SLURM_ERROR + */ +extern int acct_gather_profile_g_add_sample_data(char* group, char* type, + void* data); + +/* + * Put data at the Task Totals level. Typically called at task end. + * + * Parameters + * job -- structure defining a slurm job + * taskid -- slurm taskid + * group -- identifies the data stream (source of data). + * type -- identifies the type of data. + * data -- data structure to be put to the file. + * + * Returns -- SLURM_SUCCESS or SLURM_ERROR + */ +extern int acct_gather_profile_g_add_task_data(slurmd_job_t* job, + uint32_t taskid, char* group, char* type, void* data); + +/* + * get the slurm taskid from a pid. + * + * Parameters + * pid - a linux process id + * gtid - (out) pointer to variable to receive slurm taskid + * + * Returns + * corresponding slurm taskid (or -1) + * Returns -- SLURM_SUCCESS or SLURM_ERROR + * + */ +extern int get_taskid_from_pid(pid_t pid, uint32_t *gtid); + +#endif /*__SLURM_ACCT_GATHER_PROFILE_H__*/ diff --git a/src/common/slurm_jobacct_gather.c b/src/common/slurm_jobacct_gather.c index 2f80e650344..ac746ac5b95 100644 --- a/src/common/slurm_jobacct_gather.c +++ b/src/common/slurm_jobacct_gather.c @@ -58,6 +58,7 @@ #include "src/common/plugin.h" #include "src/common/plugrack.h" #include "src/common/read_config.h" +#include "src/common/slurm_acct_gather_profile.h" #include "src/common/slurm_jobacct_gather.h" #include "src/common/slurmdbd_defs.h" #include "src/common/xmalloc.h" @@ -175,6 +176,7 @@ static void _poll_data(void) /* Update the data */ slurm_mutex_lock(&task_list_lock); (*(ops.poll_data))(task_list, pgid_plugin, cont_id); + acct_gather_profile_g_job_sample(); slurm_mutex_unlock(&task_list_lock); } @@ -417,6 +419,7 @@ extern int jobacct_gather_add_task(pid_t pid, jobacct_id_t *jobacct_id, (*(ops.add_task))(pid, jobacct_id); + if (poll == 1) _poll_data(); @@ -923,6 +926,34 @@ extern void jobacctinfo_pack(jobacctinfo_t *jobacct, pack32((uint32_t)jobacct->act_cpufreq, buffer); pack32((uint32_t)jobacct->energy.consumed_energy, buffer); + _pack_jobacct_id(&jobacct->max_vsize_id, rpc_version, buffer); + _pack_jobacct_id(&jobacct->max_rss_id, rpc_version, buffer); + _pack_jobacct_id(&jobacct->max_pages_id, rpc_version, buffer); + _pack_jobacct_id(&jobacct->min_cpu_id, rpc_version, buffer); + } else if(rpc_version >= SLURM_2_3_PROTOCOL_VERSION) { + // RBS: replicate 2_3 + if (!jobacct) { + for (i = 0; i < 13; i++) + pack32((uint32_t) 0, buffer); + for (i = 0; i < 4; i++) + _pack_jobacct_id(NULL, rpc_version, buffer); + return; + } + + pack32((uint32_t)jobacct->user_cpu_sec, buffer); + pack32((uint32_t)jobacct->user_cpu_usec, buffer); + pack32((uint32_t)jobacct->sys_cpu_sec, buffer); + pack32((uint32_t)jobacct->sys_cpu_usec, buffer); + pack32((uint32_t)jobacct->max_vsize, buffer); + pack32((uint32_t)jobacct->tot_vsize, buffer); + pack32((uint32_t)jobacct->max_rss, buffer); + pack32((uint32_t)jobacct->tot_rss, buffer); + pack32((uint32_t)jobacct->max_pages, buffer); + pack32((uint32_t)jobacct->tot_pages, buffer); + pack32((uint32_t)jobacct->min_cpu, buffer); + pack32((uint32_t)jobacct->tot_cpu, buffer); + pack32((uint32_t)jobacct->act_cpufreq, buffer); + _pack_jobacct_id(&jobacct->max_vsize_id, rpc_version, buffer); _pack_jobacct_id(&jobacct->max_rss_id, rpc_version, buffer); _pack_jobacct_id(&jobacct->max_pages_id, rpc_version, buffer); @@ -1056,6 +1087,40 @@ extern int jobacctinfo_unpack(jobacctinfo_t **jobacct, if (_unpack_jobacct_id(&(*jobacct)->min_cpu_id, rpc_version, buffer) != SLURM_SUCCESS) goto unpack_error; + } else if(rpc_version >= SLURM_2_3_PROTOCOL_VERSION) { + // RBS replicate 1_3 + *jobacct = xmalloc(sizeof(struct jobacctinfo)); + safe_unpack32(&uint32_tmp, buffer); + (*jobacct)->user_cpu_sec = uint32_tmp; + safe_unpack32(&uint32_tmp, buffer); + (*jobacct)->user_cpu_usec = uint32_tmp; + safe_unpack32(&uint32_tmp, buffer); + (*jobacct)->sys_cpu_sec = uint32_tmp; + safe_unpack32(&uint32_tmp, buffer); + (*jobacct)->sys_cpu_usec = uint32_tmp; + safe_unpack32(&(*jobacct)->max_vsize, buffer); + safe_unpack32(&(*jobacct)->tot_vsize, buffer); + safe_unpack32(&(*jobacct)->max_rss, buffer); + safe_unpack32(&(*jobacct)->tot_rss, buffer); + safe_unpack32(&(*jobacct)->max_pages, buffer); + safe_unpack32(&(*jobacct)->tot_pages, buffer); + safe_unpack32(&(*jobacct)->min_cpu, buffer); + safe_unpack32(&(*jobacct)->tot_cpu, buffer); + safe_unpack32(&(*jobacct)->act_cpufreq, buffer); + + if (_unpack_jobacct_id(&(*jobacct)->max_vsize_id, rpc_version, buffer) + != SLURM_SUCCESS) + goto unpack_error; + if (_unpack_jobacct_id(&(*jobacct)->max_rss_id, rpc_version, buffer) + + != SLURM_SUCCESS) + goto unpack_error; + if (_unpack_jobacct_id(&(*jobacct)->max_pages_id, rpc_version, buffer) + != SLURM_SUCCESS) + goto unpack_error; + if (_unpack_jobacct_id(&(*jobacct)->min_cpu_id, rpc_version, buffer) + != SLURM_SUCCESS) + goto unpack_error; } else { *jobacct = xmalloc(sizeof(struct jobacctinfo)); safe_unpack32(&uint32_tmp, buffer); diff --git a/src/common/slurm_protocol_api.c b/src/common/slurm_protocol_api.c index 4d59a0fcfb7..20711677209 100644 --- a/src/common/slurm_protocol_api.c +++ b/src/common/slurm_protocol_api.c @@ -1410,6 +1410,25 @@ char *slurm_get_acct_gather_energy_type(void) return acct_gather_energy_type; } +/* slurm_get_profile_accounting_type + * get ProfileAccountingType from slurmctld_conf object + * RET char * - profile_accounting type, MUST be xfreed by caller + */ +char *slurm_get_acct_gather_profile_type(void) +{ + char *acct_gather_profile_type = NULL; + slurm_ctl_conf_t *conf; + + if (slurmdbd_conf) { + } else { + conf = slurm_conf_lock(); + acct_gather_profile_type = + xstrdup(conf->acct_gather_profile_type); + slurm_conf_unlock(); + } + return acct_gather_profile_type; +} + extern uint16_t slurm_get_acct_gather_node_freq(void) { uint16_t freq = 0; diff --git a/src/common/slurm_protocol_api.h b/src/common/slurm_protocol_api.h index 182f54e8603..b9d6f3d85d6 100644 --- a/src/common/slurm_protocol_api.h +++ b/src/common/slurm_protocol_api.h @@ -557,6 +557,12 @@ char *slurm_get_proctrack_type(void); */ char *slurm_get_acct_gather_energy_type(void); +/* slurm_get_acct_gather_profile_type + * get ProfileAccountingType from slurmctld_conf object + * RET char * - acct_gather_profile_type, MUST be xfreed by caller + */ +char *slurm_get_acct_gather_profile_type(void); + /* slurm_get_acct_gather_node_freq * returns the accounting poll frequency for requesting info from a * node from the slurmctld_conf object diff --git a/src/common/slurm_protocol_defs.c b/src/common/slurm_protocol_defs.c index 39b523b044e..e57db262ef6 100644 --- a/src/common/slurm_protocol_defs.c +++ b/src/common/slurm_protocol_defs.c @@ -415,6 +415,7 @@ extern void slurm_free_job_desc_msg(job_desc_msg_t * msg) xfree(msg->network); xfree(msg->std_out); xfree(msg->partition); + xfree(msg->profile); xfree(msg->ramdiskimage); xfree(msg->req_nodes); xfree(msg->reservation); @@ -500,6 +501,7 @@ extern void slurm_free_job_info_members(job_info_t * job) xfree(job->node_inx); xfree(job->nodes); xfree(job->partition); + xfree(job->profile); xfree(job->qos); xfree(job->req_node_inx); xfree(job->req_nodes); @@ -728,6 +730,7 @@ extern void slurm_free_launch_tasks_request_msg(launch_tasks_request_msg_t * msg xfree(msg->ofname); xfree(msg->efname); + xfree(msg->profile); xfree(msg->task_prolog); xfree(msg->task_epilog); xfree(msg->complete_nodelist); diff --git a/src/common/slurm_protocol_defs.h b/src/common/slurm_protocol_defs.h index e071f74dcd4..1178fd5cdca 100644 --- a/src/common/slurm_protocol_defs.h +++ b/src/common/slurm_protocol_defs.h @@ -706,6 +706,7 @@ typedef struct launch_tasks_request_msg { uint16_t *io_port; /* array of available client IO listen ports */ /********** END "normal" IO only options **********/ + char *profile; char *task_prolog; char *task_epilog; diff --git a/src/common/slurm_protocol_pack.c b/src/common/slurm_protocol_pack.c index b3312ca1a5b..9e518af7ab1 100644 --- a/src/common/slurm_protocol_pack.c +++ b/src/common/slurm_protocol_pack.c @@ -4134,6 +4134,7 @@ _unpack_reserve_info_members(reserve_info_t * resv, Buf buffer, char *node_inx_str = NULL; uint32_t uint32_tmp; +//RBS: this is another SchedMD change and should be good. if (protocol_version >= SLURM_2_5_PROTOCOL_VERSION) { safe_unpackstr_xmalloc(&resv->accounts, &uint32_tmp, buffer); safe_unpack32(&resv->core_cnt, buffer); @@ -4266,6 +4267,36 @@ _unpack_job_step_info_members(job_step_info_t * step, Buf buffer, buffer, protocol_version)) goto unpack_error; } else if (protocol_version >= SLURM_2_4_PROTOCOL_VERSION) { + safe_unpack32(&step->job_id, buffer); + safe_unpack32(&step->step_id, buffer); + safe_unpack16(&step->ckpt_interval, buffer); + safe_unpack32(&step->user_id, buffer); + safe_unpack32(&step->num_cpus, buffer); + safe_unpack32(&step->cpu_freq, buffer); //NLK Don Power okay + safe_unpack32(&step->num_tasks, buffer); + safe_unpack32(&step->time_limit, buffer); + + safe_unpack_time(&step->start_time, buffer); + safe_unpack_time(&step->run_time, buffer); + + safe_unpackstr_xmalloc(&step->partition, &uint32_tmp, buffer); + safe_unpackstr_xmalloc(&step->resv_ports, &uint32_tmp, buffer); + safe_unpackstr_xmalloc(&step->nodes, &uint32_tmp, buffer); + safe_unpackstr_xmalloc(&step->name, &uint32_tmp, buffer); + safe_unpackstr_xmalloc(&step->network, &uint32_tmp, buffer); + safe_unpackstr_xmalloc(&node_inx_str, &uint32_tmp, buffer); + safe_unpackstr_xmalloc(&step->ckpt_dir, &uint32_tmp, buffer); + safe_unpackstr_xmalloc(&step->gres, &uint32_tmp, buffer); + if (node_inx_str == NULL) + step->node_inx = bitfmt2int(""); + else { + step->node_inx = bitfmt2int(node_inx_str); + xfree(node_inx_str); + } + if (select_g_select_jobinfo_unpack(&step->select_jobinfo, + buffer, protocol_version)) + goto unpack_error; + } else if (protocol_version >= SLURM_2_3_PROTOCOL_VERSION) { safe_unpack32(&step->job_id, buffer); safe_unpack32(&step->step_id, buffer); safe_unpack16(&step->ckpt_interval, buffer); @@ -4404,6 +4435,7 @@ _unpack_job_info_members(job_info_t * job, Buf buffer, job->ntasks_per_node = (uint16_t)NO_VAL; +//<<<<<<< slurm_protocol_pack.c nlk temp remove if (protocol_version >= SLURM_2_6_PROTOCOL_VERSION) { safe_unpack32(&job->array_job_id, buffer); safe_unpack16(&job->array_task_id, buffer); @@ -4441,7 +4473,7 @@ _unpack_job_info_members(job_info_t * job, Buf buffer, safe_unpackstr_xmalloc(&job->gres, &uint32_tmp, buffer); safe_unpackstr_xmalloc(&job->batch_host, &uint32_tmp, buffer); safe_unpackstr_xmalloc(&job->batch_script, &uint32_tmp, buffer); - + safe_unpackstr_xmalloc(&job->profile, &uint32_tmp, buffer); safe_unpackstr_xmalloc(&job->qos, &uint32_tmp, buffer); safe_unpackstr_xmalloc(&job->licenses, &uint32_tmp, buffer); safe_unpackstr_xmalloc(&job->state_desc, &uint32_tmp, buffer); @@ -6280,6 +6312,8 @@ _pack_job_desc_msg(job_desc_msg_t * job_desc_ptr, Buf buffer, { /* load the data values */ if (protocol_version >= SLURM_2_6_PROTOCOL_VERSION) { +//SMD if (protocol_version >= SLURM_2_5_PROTOCOL_VERSION) { +//RBS: other half of job_desc_msg. I don't think we have to replicate, but we do have to use 2_4 pack16(job_desc_ptr->contiguous, buffer); pack16(job_desc_ptr->task_dist, buffer); pack16(job_desc_ptr->kill_on_node_fail, buffer); @@ -6301,6 +6335,7 @@ _pack_job_desc_msg(job_desc_msg_t * job_desc_ptr, Buf buffer, packstr(job_desc_ptr->account, buffer); packstr(job_desc_ptr->comment, buffer); pack16(job_desc_ptr->nice, buffer); + packstr(job_desc_ptr->profile, buffer); packstr(job_desc_ptr->qos, buffer); pack8(job_desc_ptr->open_mode, buffer); @@ -6447,6 +6482,7 @@ _pack_job_desc_msg(job_desc_msg_t * job_desc_ptr, Buf buffer, packstr(job_desc_ptr->account, buffer); packstr(job_desc_ptr->comment, buffer); pack16(job_desc_ptr->nice, buffer); + packstr(job_desc_ptr->profile, buffer); packstr(job_desc_ptr->qos, buffer); pack8(job_desc_ptr->open_mode, buffer); @@ -6595,6 +6631,7 @@ _pack_job_desc_msg(job_desc_msg_t * job_desc_ptr, Buf buffer, pack16(job_desc_ptr->nice, buffer); packstr(job_desc_ptr->qos, buffer); +///<<<<<<< slurm_protocol_pack.c nlk temp.... why is this here pack8(job_desc_ptr->open_mode, buffer); pack8(job_desc_ptr->overcommit, buffer); pack16(job_desc_ptr->acctg_freq, buffer); @@ -6769,6 +6806,8 @@ _unpack_job_desc_msg(job_desc_msg_t ** job_desc_buffer_ptr, Buf buffer, safe_unpackstr_xmalloc(&job_desc_ptr->comment, &uint32_tmp, buffer); safe_unpack16(&job_desc_ptr->nice, buffer); + safe_unpackstr_xmalloc(&job_desc_ptr->profile, &uint32_tmp, + buffer); safe_unpackstr_xmalloc(&job_desc_ptr->qos, &uint32_tmp, buffer); @@ -6904,6 +6943,8 @@ _unpack_job_desc_msg(job_desc_msg_t ** job_desc_buffer_ptr, Buf buffer, safe_unpackstr_xmalloc(&job_desc_ptr->comment, &uint32_tmp, buffer); safe_unpack16(&job_desc_ptr->nice, buffer); + safe_unpackstr_xmalloc(&job_desc_ptr->profile, &uint32_tmp, + buffer); safe_unpackstr_xmalloc(&job_desc_ptr->qos, &uint32_tmp, buffer); @@ -7437,7 +7478,82 @@ _pack_launch_tasks_request_msg(launch_tasks_request_msg_t * msg, Buf buffer, int i = 0; xassert(msg != NULL); - if (protocol_version >= SLURM_2_5_PROTOCOL_VERSION) { + if (protocol_version >= SLURM_2_6_PROTOCOL_VERSION) { + pack32(msg->job_id, buffer); + pack32(msg->job_step_id, buffer); + pack32(msg->ntasks, buffer); + pack32(msg->uid, buffer); + pack32(msg->gid, buffer); + pack32(msg->job_mem_lim, buffer); + pack32(msg->step_mem_lim, buffer); + + pack32(msg->nnodes, buffer); + pack16(msg->cpus_per_task, buffer); + pack16(msg->task_dist, buffer); + + slurm_cred_pack(msg->cred, buffer); + for (i = 0; i < msg->nnodes; i++) { + pack16(msg->tasks_to_launch[i], buffer); + pack16(msg->cpus_allocated[i], buffer); + pack32_array(msg->global_task_ids[i], + (uint32_t) msg->tasks_to_launch[i], + buffer); + } + pack16(msg->num_resp_port, buffer); + for (i = 0; i < msg->num_resp_port; i++) + pack16(msg->resp_port[i], buffer); + slurm_pack_slurm_addr(&msg->orig_addr, buffer); + packstr_array(msg->env, msg->envc, buffer); + packstr_array(msg->spank_job_env, msg->spank_job_env_size, + buffer); + packstr(msg->cwd, buffer); + pack16(msg->cpu_bind_type, buffer); + packstr(msg->cpu_bind, buffer); + pack16(msg->mem_bind_type, buffer); + packstr(msg->mem_bind, buffer); + packstr_array(msg->argv, msg->argc, buffer); + pack16(msg->task_flags, buffer); + pack16(msg->multi_prog, buffer); + pack16(msg->user_managed_io, buffer); + if (msg->user_managed_io == 0) { + packstr(msg->ofname, buffer); + packstr(msg->efname, buffer); + packstr(msg->ifname, buffer); + pack8(msg->buffered_stdio, buffer); + pack8(msg->labelio, buffer); + pack16(msg->num_io_port, buffer); + for (i = 0; i < msg->num_io_port; i++) + pack16(msg->io_port[i], buffer); + } + packstr(msg->profile, buffer); + packstr(msg->task_prolog, buffer); + packstr(msg->task_epilog, buffer); + pack16(msg->slurmd_debug, buffer); + switch_pack_jobinfo(msg->switch_job, buffer); + job_options_pack(msg->options, buffer); + packstr(msg->alias_list, buffer); + packstr(msg->complete_nodelist, buffer); + + pack8(msg->open_mode, buffer); + pack8(msg->pty, buffer); + pack16(msg->acctg_freq, buffer); + pack32(msg->cpu_freq, buffer); + packstr(msg->ckpt_dir, buffer); + packstr(msg->restart_dir, buffer); + if (!(cluster_flags & CLUSTER_FLAG_BG)) { + /* If on a Blue Gene cluster do not send this to the + * slurmstepd, it will overwrite the environment that + * ia already set up correctly for both the job and the + * step. The slurmstep treats this select_jobinfo as if + * were for the job instead of for the step. + */ + select_g_select_jobinfo_pack(msg->select_jobinfo, + buffer, + protocol_version); + } + } else if (protocol_version >= SLURM_2_5_PROTOCOL_VERSION) { + //SMD if (protocol_version >= SLURM_2_5_PROTOCOL_VERSION) { + //RBS: I replicated as Don's stuff is in our 2_4, but we will be addint to 2_5 pack32(msg->job_id, buffer); pack32(msg->job_step_id, buffer); pack32(msg->ntasks, buffer); @@ -7600,7 +7716,106 @@ _unpack_launch_tasks_request_msg(launch_tasks_request_msg_t ** msg = xmalloc(sizeof(launch_tasks_request_msg_t)); *msg_ptr = msg; - if (protocol_version >= SLURM_2_5_PROTOCOL_VERSION) { + if (protocol_version >= SLURM_2_6_PROTOCOL_VERSION) { + safe_unpack32(&msg->job_id, buffer); + safe_unpack32(&msg->job_step_id, buffer); + safe_unpack32(&msg->ntasks, buffer); + safe_unpack32(&msg->uid, buffer); + safe_unpack32(&msg->gid, buffer); + safe_unpack32(&msg->job_mem_lim, buffer); + safe_unpack32(&msg->step_mem_lim, buffer); + + safe_unpack32(&msg->nnodes, buffer); + safe_unpack16(&msg->cpus_per_task, buffer); + safe_unpack16(&msg->task_dist, buffer); + + if (!(msg->cred = slurm_cred_unpack(buffer, protocol_version))) + goto unpack_error; + msg->tasks_to_launch = xmalloc(sizeof(uint16_t) * msg->nnodes); + msg->cpus_allocated = xmalloc(sizeof(uint16_t) * msg->nnodes); + msg->global_task_ids = xmalloc(sizeof(uint32_t *) * + msg->nnodes); + for (i = 0; i < msg->nnodes; i++) { + safe_unpack16(&msg->tasks_to_launch[i], buffer); + safe_unpack16(&msg->cpus_allocated[i], buffer); + safe_unpack32_array(&msg->global_task_ids[i], + &uint32_tmp, + buffer); + if (msg->tasks_to_launch[i] != (uint16_t) uint32_tmp) + goto unpack_error; + } + safe_unpack16(&msg->num_resp_port, buffer); + if (msg->num_resp_port > 0) { + msg->resp_port = xmalloc(sizeof(uint16_t) * + msg->num_resp_port); + for (i = 0; i < msg->num_resp_port; i++) + safe_unpack16(&msg->resp_port[i], buffer); + } + slurm_unpack_slurm_addr_no_alloc(&msg->orig_addr, buffer); + safe_unpackstr_array(&msg->env, &msg->envc, buffer); + safe_unpackstr_array(&msg->spank_job_env, + &msg->spank_job_env_size, buffer); + safe_unpackstr_xmalloc(&msg->cwd, &uint32_tmp, buffer); + safe_unpack16(&msg->cpu_bind_type, buffer); + safe_unpackstr_xmalloc(&msg->cpu_bind, &uint32_tmp, buffer); + safe_unpack16(&msg->mem_bind_type, buffer); + safe_unpackstr_xmalloc(&msg->mem_bind, &uint32_tmp, buffer); + safe_unpackstr_array(&msg->argv, &msg->argc, buffer); + safe_unpack16(&msg->task_flags, buffer); + safe_unpack16(&msg->multi_prog, buffer); + safe_unpack16(&msg->user_managed_io, buffer); + if (msg->user_managed_io == 0) { + safe_unpackstr_xmalloc(&msg->ofname, &uint32_tmp, + buffer); + safe_unpackstr_xmalloc(&msg->efname, &uint32_tmp, + buffer); + safe_unpackstr_xmalloc(&msg->ifname, &uint32_tmp, + buffer); + safe_unpack8(&msg->buffered_stdio, buffer); + safe_unpack8(&msg->labelio, buffer); + safe_unpack16(&msg->num_io_port, buffer); + if (msg->num_io_port > 0) { + msg->io_port = xmalloc(sizeof(uint16_t) * + msg->num_io_port); + for (i = 0; i < msg->num_io_port; i++) + safe_unpack16(&msg->io_port[i], + buffer); + } + } + safe_unpackstr_xmalloc(&msg->profile, &uint32_tmp, buffer); + safe_unpackstr_xmalloc(&msg->task_prolog, &uint32_tmp, buffer); + safe_unpackstr_xmalloc(&msg->task_epilog, &uint32_tmp, buffer); + safe_unpack16(&msg->slurmd_debug, buffer); + + switch_alloc_jobinfo(&msg->switch_job); + if (switch_unpack_jobinfo(msg->switch_job, buffer) < 0) { + error("switch_unpack_jobinfo: %m"); + switch_free_jobinfo(msg->switch_job); + goto unpack_error; + } + msg->options = job_options_create(); + if (job_options_unpack(msg->options, buffer) < 0) { + error("Unable to unpack extra job options: %m"); + goto unpack_error; + } + safe_unpackstr_xmalloc(&msg->alias_list, &uint32_tmp, buffer); + safe_unpackstr_xmalloc(&msg->complete_nodelist, &uint32_tmp, + buffer); + + safe_unpack8(&msg->open_mode, buffer); + safe_unpack8(&msg->pty, buffer); + safe_unpack16(&msg->acctg_freq, buffer); + safe_unpack32(&msg->cpu_freq, buffer); + safe_unpackstr_xmalloc(&msg->ckpt_dir, &uint32_tmp, buffer); + safe_unpackstr_xmalloc(&msg->restart_dir, &uint32_tmp, buffer); + if (!(cluster_flags & CLUSTER_FLAG_BG)) { + select_g_select_jobinfo_unpack(&msg->select_jobinfo, + buffer, + protocol_version); + } +//SMD if (protocol_version >= SLURM_2_5_PROTOCOL_VERSION) { +//RBS: another replication with Don's stuff + } else if (protocol_version >= SLURM_2_5_PROTOCOL_VERSION) { safe_unpack32(&msg->job_id, buffer); safe_unpack32(&msg->job_step_id, buffer); safe_unpack32(&msg->ntasks, buffer); @@ -7784,6 +7999,7 @@ _unpack_launch_tasks_request_msg(launch_tasks_request_msg_t ** safe_unpack8(&msg->open_mode, buffer); safe_unpack8(&msg->pty, buffer); safe_unpack16(&msg->acctg_freq, buffer); + safe_unpack32(&msg->cpu_freq, buffer); safe_unpackstr_xmalloc(&msg->ckpt_dir, &uint32_tmp, buffer); safe_unpackstr_xmalloc(&msg->restart_dir, &uint32_tmp, buffer); if (!(cluster_flags & CLUSTER_FLAG_BG)) { diff --git a/src/plugins/Makefile.am b/src/plugins/Makefile.am index 5da26695bd7..b05834528c0 100644 --- a/src/plugins/Makefile.am +++ b/src/plugins/Makefile.am @@ -1,6 +1,7 @@ SUBDIRS = \ accounting_storage \ acct_gather_energy \ + acct_gather_profile \ auth \ checkpoint \ crypto \ diff --git a/src/plugins/acct_gather_profile/Makefile.am b/src/plugins/acct_gather_profile/Makefile.am new file mode 100644 index 00000000000..f00c7aaeda9 --- /dev/null +++ b/src/plugins/acct_gather_profile/Makefile.am @@ -0,0 +1,3 @@ +# Makefile for accounting gather profile plugins + +SUBDIRS = io_energy sprfmrgh5 none diff --git a/src/plugins/acct_gather_profile/hdf5/Makefile.am b/src/plugins/acct_gather_profile/hdf5/Makefile.am new file mode 100644 index 00000000000..3e620453dfd --- /dev/null +++ b/src/plugins/acct_gather_profile/hdf5/Makefile.am @@ -0,0 +1,16 @@ +# Makefile for acct_gather_profile/io_energy plugin + +AUTOMAKE_OPTIONS = foreign + +PLUGIN_FLAGS = -module -avoid-version --export-dynamic + +INCLUDES = -I$(top_srcdir) -I$(top_srcdir)/src/common -I$(top_srcdir)/src/plugins/acct_gather_profile/common + +pkglib_LTLIBRARIES = acct_gather_profile_io_energy.la + +# cpu/core energy accounting plugin. +acct_gather_profile_io_energy_la_SOURCES = io_energy.c io_energy.h \ + ../common/profile_hdf5.c ../common/profile_hdf5.h + +acct_gather_profile_io_energy_la_LDFLAGS = $(SO_LDFLAGS) $(PLUGIN_FLAGS) \ + $(HDF5_LDFLAGS) $(HDF5_LIBS) diff --git a/src/plugins/acct_gather_profile/hdf5/acct_gather_profile_hdf5.c b/src/plugins/acct_gather_profile/hdf5/acct_gather_profile_hdf5.c new file mode 100644 index 00000000000..c30b8fed4f8 --- /dev/null +++ b/src/plugins/acct_gather_profile/hdf5/acct_gather_profile_hdf5.c @@ -0,0 +1,496 @@ +/*****************************************************************************\ + * io_energy.c - slurm energy accounting plugin for io and energy using hdf5. + ***************************************************************************** + * Copyright (C) 2013 Bull S. A. S. + * Bull, Rue Jean Jaures, B.P.68, 78340, Les Clayes-sous-Bois. + * + * Written by Rod Schultz <rod.schultz@bull.com> + * + * This file is part of SLURM, a resource management program. + * For details, see <http://www.schedmd.com/slurmdocs/>. + * Please also read the included file: DISCLAIMER. + * + * SLURM is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * In addition, as a special exception, the copyright holders give permission + * to link the code of portions of this program with the OpenSSL library under + * certain conditions as described in each individual source file, and + * distribute linked combinations including the two. You must obey the GNU + * General Public License in all respects for all of the code used other than + * OpenSSL. If you modify file(s) with this exception, you may extend this + * exception to your version of the file(s), but you are not obligated to do + * so. If you do not wish to do so, delete this exception statement from your + * version. If you delete this exception statement from all source files in + * the program, then also delete it here. + * + * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with SLURM; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * This file is patterned after jobcomp_linux.c, written by Morris Jette and + * Copyright (C) 2002 The Regents of the University of California. +\*****************************************************************************/ + +#include <stdio.h> +#include <stdlib.h> +#include <sys/types.h> +#include <sys/un.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <errno.h> +#include <inttypes.h> +#include <unistd.h> +#include <math.h> + +#include "src/common/fd.h" +#include "src/common/slurm_xlator.h" +#include "src/common/slurm_acct_gather_profile.h" +#include "src/common/slurm_protocol_api.h" +#include "src/common/slurm_protocol_defs.h" +#include "src/plugins/acct_gather_profile/common/profile_hdf5.h" +#include "src/slurmd/common/proctrack.h" + +#include "io_energy.h" + +/* + * These variables are required by the generic plugin interface. If they + * are not found in the plugin, the plugin loader will ignore it. + * + * plugin_name - a string giving a human-readable description of the + * plugin. There is no maximum length, but the symbol must refer to + * a valid string. + * + * plugin_type - a string suggesting the type of the plugin or its + * applicability to a particular form of data or method of data handling. + * If the low-level plugin API is used, the contents of this string are + * unimportant and may be anything. SLURM uses the higher-level plugin + * interface which requires this string to be of the form + * + * <application>/<method> + * + * where <application> is a description of the intended application of + * the plugin (e.g., "jobacct" for SLURM job completion logging) and <method> + * is a description of how this plugin satisfies that application. SLURM will + * only load job completion logging plugins if the plugin_type string has a + * prefix of "jobacct/". + * + * plugin_version - an unsigned 32-bit integer giving the version number + * of the plugin. If major and minor revisions are desired, the major + * version number may be multiplied by a suitable magnitude constant such + * as 100 or 1000. Various SLURM versions will likely require a certain + * minimum version for their plugins as the job accounting API + * matures. + */ +const char plugin_name[] = "AcctGatherProfile io_energy plugin"; +const char plugin_type[] = "acct_gather_profile/io_energy"; +const uint32_t plugin_version = 100; + +static uint32_t debug_flags = 0; + + +// Global HDF5 Variables +// The HDF5 file and base objects will remain open for the duration of the +// step. This avoids reconstruction on every acct_gather_sample and +// flushing the buffers on every put. +// Static variables ok as add function are inside a lock. +static uint32_t jobid; +static uint32_t stepid; +static uint32_t nodetasks; +static uint32_t sampleNo = 0; +static char* stepd_nodename = NULL; +static char* profileFileName; +static hid_t file_id = -1; // File +static hid_t gidNode = -1; +static hid_t gidTasks = -1; +static hid_t gidSamples = -1; +static hid_t gidTotals = -1; +static char groupNode[MAX_GROUP_NAME+1]; +static int nOpts = 0; +static char** profileOpts = NULL; +static slurm_acct_gather_conf_t acct_gather_conf; + +/* + * init() is called when the plugin is loaded, before any other functions + * are called. Put global initialization here. + */ +extern int init(void) +{ + debug_flags = slurm_get_debug_flags(); + verbose("%s loaded", plugin_name); + return SLURM_SUCCESS; +} + +extern int fini(void) +{ + xfree(profileFileName); + return SLURM_SUCCESS; +} + +extern void reset_slurm_profile_conf() +{ + xfree(acct_gather_conf.profile_dir); + xfree(acct_gather_conf.profile_DefaultProfile); +} + +extern void acct_gather_profile_p_conf_set(s_p_hashtbl_t *tbl) +{ + reset_slurm_profile_conf(); + if (!tbl) + return; + + if (!s_p_get_string(&acct_gather_conf.profile_dir, + "ProfileDir", tbl)) { + acct_gather_conf.profile_dir = NULL; + } + if (!s_p_get_string(&acct_gather_conf.profile_DefaultProfile, + "ProfileDefaultProfile", tbl)) { + acct_gather_conf.profile_DefaultProfile = + xstrdup(PROFILE_DEFAULT_PROFILE); + } + + ValidSeriesList(acct_gather_conf.profile_DefaultProfile); +} + +extern void* acct_gather_profile_p_conf_get() +{ + return &acct_gather_conf; +} + +extern void acct_gather_profile_p_conf_options(s_p_options_t **full_options, + int *full_options_cnt) +{ + s_p_options_t options[] = { + {"ProfileDir", S_P_STRING}, + {"ProfileDefaultProfile", S_P_STRING}, + {NULL} }; + + transfer_s_p_options(full_options, options, full_options_cnt); + return; +} + +extern int acct_gather_profile_p_controller_start() +{ +#ifdef HAVE_HDF5 + int rc; + struct stat st; + const char* profdir; + char tmpdir[MAX_PROFILE_PATH+1]; + if (acct_gather_conf.profile_dir == NULL) { + fatal("PROFILE: ProfileDir is required in acct_gather.conf" + "with AcctGatherPluginType=io_energy"); + } + profdir = xstrdup(acct_gather_conf.profile_dir); + /* + * If profile director does not exist, try to create it. + * Otherwise, ensure path is a directory as expected, and that + * we have permission to write to it. + * also make sure the subdirectory tmp exists. + */ + + if (((rc = stat(profdir, &st)) < 0) && (errno == ENOENT)) { + if (mkdir(profdir, 0777) < 0) + fatal("mkdir(%s): %m", profdir); + } + else if (rc < 0) + fatal("Unable to stat acct_gather_profile_dir: %s: %m", + profdir); + else if (!S_ISDIR(st.st_mode)) + fatal("acct_gather_profile_dir: %s: Not a directory!",profdir); + else if (access(profdir, R_OK|W_OK|X_OK) < 0) + fatal("Incorrect permissions on acct_gather_profile_dir: %s", + profdir); + chmod(profdir,0777); + if ((strlen(profdir)+4) > MAX_PROFILE_PATH) + fatal("Length of profile director is too long"); + sprintf(tmpdir,"%s/tmp",profdir); + if (((rc = stat(tmpdir, &st)) < 0) && (errno == ENOENT)) { + if (mkdir(tmpdir, 0777) < 0) + fatal("mkdir(%s): %m", tmpdir); + chmod(tmpdir,0777); + } + xfree(profdir); +#endif + return SLURM_SUCCESS; +} + +extern int acct_gather_profile_p_node_step_start(slurmd_job_t* job) +{ + int rc = SLURM_SUCCESS; +#ifdef HAVE_HDF5 + time_t startTime; + char* slurmDataRoot; + char* optString; + jobid = job->jobid; + stepid = job->stepid; + if (stepid == NO_VAL) + return rc; + + if (job->profile) + optString = job->profile; + else + optString = acct_gather_conf.profile_DefaultProfile; + + profileOpts = GetStringList(optString, &nOpts); + + if (strcasecmp(profileOpts[0],"none") == 0) + return rc; + + if (acct_gather_conf.profile_dir == NULL) { + fatal("PROFILE: ProfileDir is required in acct_gather.conf" + "with AcctGatherPluginType=io_energy"); + } + slurmDataRoot = xstrdup(acct_gather_conf.profile_dir); + + stepd_nodename = xstrdup(job->node_name); + nodetasks = job->node_tasks; + + profileFileName = make_node_step_profile_path(slurmDataRoot, + job->node_name, jobid, stepid); + xfree(slurmDataRoot); + if (profileFileName == NULL) { + info("PROFILE: failed create profileFileName job=%d step=%d", + jobid,stepid); + } + if (debug_flags & DEBUG_FLAG_PROFILE) + info("PROFILE: node_step_start, opt=%s file=%s", + optString, profileFileName); + + // Create a new file using the default properties. + ProfileInit(); + file_id = H5Fcreate(profileFileName, H5F_ACC_TRUNC, H5P_DEFAULT, + H5P_DEFAULT); + if (file_id < 1) { + info("PROFILE: Failed to create Node group"); + return SLURM_FAILURE; + } + + sprintf(groupNode,"/%s~%s",GRP_NODE,stepd_nodename); + gidNode = H5Gcreate(file_id, groupNode, H5P_DEFAULT, + H5P_DEFAULT, H5P_DEFAULT); + if (gidNode < 1) { + H5Fclose(file_id); + file_id = -1; + info("PROFILE: Failed to create Node group"); + return SLURM_FAILURE; + } + put_string_attribute(gidNode, ATTR_NODENAME, stepd_nodename); + put_int_attribute(gidNode, ATTR_NTASKS, nodetasks); + startTime = time(NULL); + put_string_attribute(gidNode,ATTR_STARTTIME,ctime(&startTime)); + +#endif + return rc; +} + +extern int acct_gather_profile_p_node_step_end(slurmd_job_t* job) +{ + int rc = SLURM_SUCCESS; + // No check for --profile as we always want to close the HDF5 file + // if it has been opened. +#ifdef HAVE_HDF5 + if (job->stepid == NO_VAL) { + return rc; + } + + if (debug_flags & DEBUG_FLAG_PROFILE) + info("PROFILE: node_step_end (shutdown)"); + + xfree(stepd_nodename); + if (gidTotals > 0) + H5Gclose(gidTotals); + if (gidSamples > 0) + H5Gclose(gidSamples); + if (gidTasks > 0) + H5Gclose(gidTasks); + if (gidNode > 0) + H5Gclose(gidNode); + if (file_id > 0) + H5Fclose(file_id); + ProfileFinish(); + file_id = -1; +#endif + return rc; +} + +extern int acct_gather_profile_p_task_start(slurmd_job_t* job, uint32_t taskid) +{ + int rc = SLURM_SUCCESS; + if (debug_flags & DEBUG_FLAG_PROFILE) + info("PROFILE: task_start"); + + return rc; +} + +extern int acct_gather_profile_p_task_end(slurmd_job_t* job, pid_t taskpid) +{ + hid_t gidTask; + char groupTask[MAX_GROUP_NAME+1]; + uint64_t taskId; + int rc = SLURM_SUCCESS; + if (!DoSeries(NULL, profileOpts, nOpts)) + return rc; + + if (get_taskid_from_pid(taskpid, &taskId) != SLURM_SUCCESS) + return SLURM_FAILURE; + if (file_id == -1) { + info("PROFILE: add_task_data, HDF5 file is not open"); + return SLURM_FAILURE; + } + if (gidTasks < 0) { + gidTasks = make_group(gidNode, GRP_TASKS); + if (gidTasks < 1) { + info("PROFILE: Failed to create Tasks group"); + return SLURM_FAILURE; + } + } + sprintf(groupTask,"%s~%d", GRP_TASK,taskId); + gidTask = get_group(gidTasks, groupTask); + if (gidTask == -1) { + gidTask = make_group(gidTasks, groupTask); + if (gidTask < 0) { + info("Failed to open tasks %s",groupTask); + return SLURM_FAILURE; + } + put_int_attribute(gidTask,ATTR_TASKID,taskId); + } + put_int_attribute(gidTask,ATTR_CPUPERTASK,job->cpus_per_task); + + if (debug_flags & DEBUG_FLAG_PROFILE) + info("PROFILE: task_end"); + return rc; +} + +extern int acct_gather_profile_p_job_sample(void) +{ + int rc = SLURM_SUCCESS; + if (!DoSeries(NULL, profileOpts, nOpts)) + return rc; +#ifdef HAVE_HDF5 +#endif + return rc; +} + +extern int acct_gather_profile_p_add_node_data(slurmd_job_t* job, char* group, + char* type, void* data) +{ + if (!DoSeries(group, profileOpts, nOpts)) + return SLURM_SUCCESS; + if (debug_flags & DEBUG_FLAG_PROFILE) + info("PROFILE: add_node_data Group-%s Type=%s", group, type); + +#ifdef HAVE_HDF5 + if (file_id == -1) { + info("PROFILE: add_node_data, HDF5 file is not open"); + return SLURM_FAILURE; + } + if (gidTotals < 0) { + gidTotals = make_group(gidNode, GRP_TOTALS); + if (gidTotals < 1) { + info("PROFILE: failed to create Totals group"); + return SLURM_FAILURE; + } + } + put_hdf5_data(gidTotals, type, SUBDATA_NODE, group, data, 1); +#endif + + return SLURM_SUCCESS; +} + +extern int acct_gather_profile_p_add_sample_data(char* group, char* type, + void* data) +{ + hid_t gSampleGrp; + char groupSample[MAX_GROUP_NAME+1]; + + if (!DoSeries(group, profileOpts, nOpts)) + return SLURM_SUCCESS; + if (debug_flags & DEBUG_FLAG_PROFILE) + info("PROFILE: add_sample_data Group-%s Type=%s", group, type); + sampleNo++; +#ifdef HAVE_HDF5 + if (file_id == -1) { + if (debug_flags & DEBUG_FLAG_PROFILE) { + // This can happen from samples from the gather threads + // before the step actually starts. + info("PROFILE: add_sample_data, HDF5 file not open"); + } + return SLURM_FAILURE; + } + if (gidSamples < 0) { + gidSamples = make_group(gidNode, GRP_SAMPLES); + if (gidSamples < 1) { + info("PROFILE: failed to create TimeSeries group"); + return SLURM_FAILURE; + } + } + gSampleGrp = get_group(gidSamples, group); + if (gSampleGrp < 0) { + gSampleGrp = make_group(gidSamples, group); + if (gSampleGrp < 0) { + info("PROFILE: failed to open TimeSeries %s", group); + return SLURM_FAILURE; + } + put_string_attribute(gSampleGrp, ATTR_DATATYPE, type); + } + sprintf(groupSample,"%s~%10.10d",group,sampleNo); + put_hdf5_data(gSampleGrp, type, SUBDATA_SAMPLE, groupSample, data, 1); + H5Gclose(gSampleGrp); +#endif + return SLURM_SUCCESS; +} + +extern int acct_gather_profile_p_add_task_data(slurmd_job_t* job, + uint32_t taskid, char* group, char* type, void* data) +{ + hid_t gidTask, gidTotals; + char groupTask[MAX_GROUP_NAME+1]; + + if (!DoSeries(group, profileOpts, nOpts)) + return SLURM_SUCCESS; + if (debug_flags & DEBUG_FLAG_PROFILE) + info("PROFILE: add_task_data Group-%s Type=%s", group, type); +#ifdef HAVE_HDF5 + if (file_id == -1) { + info("PROFILE: add_task_data, HDF5 file is not open"); + return SLURM_FAILURE; + } + if (gidTasks < 0) { + gidTasks = make_group(gidNode, GRP_TASKS); + if (gidTasks < 1) { + info("PROFILE: Failed to create Tasks group"); + return SLURM_FAILURE; + } + } + + sprintf(groupTask,"%s~%d", GRP_TASK,taskid); + gidTask = get_group(gidTasks, groupTask); + if (gidTask == -1) { + gidTask = make_group(gidTasks, groupTask); + if (gidTask < 0) { + info("Failed to open tasks %s",groupTask); + return SLURM_FAILURE; + } + put_int_attribute(gidTask,ATTR_TASKID,taskid); + put_int_attribute(gidTask,ATTR_CPUPERTASK,taskid); + gidTotals = make_group(gidTask, GRP_TOTALS); + if (gidTotals < 0) { + info("Failed to open %s/%s",groupTask,GRP_TOTALS); + return SLURM_FAILURE; + } + } + + put_hdf5_data(gidTotals, SUBDATA_TOTAL, type, group, data, 1); + + H5Gclose(gidTask); +#endif + return SLURM_SUCCESS; +} + diff --git a/src/plugins/acct_gather_profile/hdf5/acct_gather_profile_hdf5.h b/src/plugins/acct_gather_profile/hdf5/acct_gather_profile_hdf5.h new file mode 100644 index 00000000000..9dfed9b3f9d --- /dev/null +++ b/src/plugins/acct_gather_profile/hdf5/acct_gather_profile_hdf5.h @@ -0,0 +1,67 @@ +/*****************************************************************************\ + * io_energy.h - slurm energy accounting plugin for io and energy using hdf5. + ***************************************************************************** + * Copyright (C) 2013 Bull S. A. S. + * Bull, Rue Jean Jaures, B.P.68, 78340, Les Clayes-sous-Bois. + * + * Written by Rod Schultz <rod.schultz@bull.com> + * + * This file is part of SLURM, a resource management program. + * For details, see <http://www.schedmd.com/slurmdocs/>. + * Please also read the included file: DISCLAIMER. + * + * SLURM is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * In addition, as a special exception, the copyright holders give permission + * to link the code of portions of this program with the OpenSSL library under + * certain conditions as described in each individual source file, and + * distribute linked combinations including the two. You must obey the GNU + * General Public License in all respects for all of the code used other than + * OpenSSL. If you modify file(s) with this exception, you may extend this + * exception to your version of the file(s), but you are not obligated to do + * so. If you do not wish to do so, delete this exception statement from your + * version. If you delete this exception statement from all source files in + * the program, then also delete it here. + * + * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with SLURM; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +\*****************************************************************************/ + +#ifndef _GATHER_PROFILE_IO_ENERGY_H_ +#define _GATHER_PROFILE_IO_ENERGY_H_ + +#include "src/common/slurm_acct_gather_profile.h" + +#define PROFILE_DEFAULT_PROFILE "none" + +// See /common/slurm_acct_gather.h for details on function signatures +extern int acct_gather_profile_p_controller_start(); +extern int acct_gather_profile_p_node_step_start(slurmd_job_t* job); +extern int acct_gather_profile_p_node_step_end(slurmd_job_t* job); +extern int acct_gather_profile_p_task_start(slurmd_job_t* job,uint32_t taskno); +extern int acct_gather_profile_p_task_end(slurmd_job_t* job, pid_t taskpid); +extern int acct_gather_profile_p_job_sample(); +extern int acct_gather_profile_p_add_node_data(slurmd_job_t* job, char* group, + char* type, void* data); +extern int acct_gather_profile_p_add_sample_data(char* group, char* type, + void* data); +extern int acct_gather_profile_p_add_task_data(slurmd_job_t* job, + uint32_t taskid, char* group, char* type, void* data); + +extern int init ( void ); +extern int fini ( void ); +extern void acct_gather_profile_p_conf_options(s_p_options_t **full_options, + int *full_options_cnt); +extern void acct_gather_profile_p_conf_set(s_p_hashtbl_t *tbl); +extern void* acct_gather_profile_p_conf_get(); + +#endif diff --git a/src/plugins/acct_gather_profile/hdf5/hdf5_api.c b/src/plugins/acct_gather_profile/hdf5/hdf5_api.c new file mode 100644 index 00000000000..fff702d5fb2 --- /dev/null +++ b/src/plugins/acct_gather_profile/hdf5/hdf5_api.c @@ -0,0 +1,1744 @@ +/****************************************************************************\ + * profile_hdf5.c + ***************************************************************************** + * Copyright (C) 2013 Bull S. A. S. + * Bull, Rue Jean Jaures, B.P.68, 78340, Les Clayes-sous-Bois. + * + * Written by Rod Schultz <rod.schultz@bull.com> + * + * Provide support for acct_gather_profile plugins based on HDF5 files. + * + * This file is part of SLURM, a resource management program. + * For details, see <http://www.schedmd.com/slurmdocs/>. + * Please also read the included file: DISCLAIMER. + * + * SLURM is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * In addition, as a special exception, the copyright holders give permission + * to link the code of portions of this program with the OpenSSL library under + * certain conditions as described in each individual source file, and + * distribute linked combinations including the two. You must obey the GNU + * General Public License in all respects for all of the code used other than + * OpenSSL. If you modify file(s) with this exception, you may extend this + * exception to your version of the file(s), but you are not obligated to do + * so. If you do not wish to do so, delete this exception statement from your + * version. If you delete this exception statement from all source files in + * the program, then also delete it here. + * + * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with SLURM; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +\****************************************************************************/ + +#include "src/plugins/acct_gather_profile/common/profile_hdf5.h" +#include "src/common/macros.h" +#include "src/common/xassert.h" +#include "src/common/xstring.h" + + +// Static variables ok as add function are inside a lock. +char* availSeries[] = {GRP_ENERGY, GRP_LUSTRE, GRP_NETWORK, GRP_TASK}; +int numAvailSeries = 4; // Has to match # in availSeries +static char profileFilePath[MAX_PROFILE_PATH+1]; +static char dsetName[MAX_DATASET_NAME+1]; +static time_t seriesStart; +hid_t typTOD; +static int i; // General index used in some macros. +static int moffset; // General variable used by insert macros +#ifdef HAVE_HDF5 +/* + * Macro to insert a date string type into a compound memory type + * + * Parameters + * p parent (group) memory type + * label description of item + * type profile struct type + * item data item in type + */ +#define MemAddDateTime(p, label, type, item) \ + if(H5Tinsert(p, label, HOFFSET(type, item), typTOD) < 0) { \ + debug3("PROFILE: failed insert into memory datatype"); \ + H5Tclose(p); \ + return -1; \ + } +/* + * Macro to insert a date string type into a compound file type + * + * Parameters + * p parent (group) file type + * label description of item + * offset offset into record + */ +#define FileAddDateTime(p, label, offset) \ + if(H5Tinsert(p, label, offset, typTOD) < 0) { \ + debug3("PROFILE: failed insert into file datatype"); \ + H5Tclose(p); \ + return -1; \ + } + +/* + * Macro to insert an uint64 into a compound memory type + * + * Parameters + * p parent (group) memory type + * label description of item + * type profile struct type + * item data item in type + */ +#define MemAddUint64(p, label, type, item) \ + if(H5Tinsert(p, label, HOFFSET(type, item), H5T_NATIVE_UINT64) < 0) { \ + debug3("PROFILE: failed insert64 into memory datatype"); \ + H5Tclose(p); \ + return -1; \ + } +/* + * Macro to insert a uint64 into a compound file type + * + * Parameters + * p parent (group) file type + * label description of item + */ +#define FileAddUint64(p, label) \ + if(H5Tinsert(p, label, moffset, H5T_NATIVE_UINT64) < 0) { \ + debug3("PROFILE: failed insert64 into file datatype"); \ + H5Tclose(p); \ + return -1; \ + } \ + moffset += 8; + +/* + * Macro to insert a double into a compound memory type + * + * Parameters + * p parent (group) memory type + * label description of item + * type profile struct type + * item data item in type + */ +#define MemAddDbl(p, label, type, item) \ + if(H5Tinsert(p, label, HOFFSET(type, item), H5T_NATIVE_DOUBLE) < 0) { \ + debug3("PROFILE: failed insertdbl into memory datatype"); \ + H5Tclose(p); \ + return -1; \ + } +/* + * Macro to insert a double into a compound file type + * + * Parameters + * p parent (group) file type + * label description of item + */ +#define FileAddDbl(p, label) \ + if(H5Tinsert(p, label, moffset, H5T_NATIVE_DOUBLE) < 0) { \ + debug3("PROFILE: failed insertdbl into file datatype"); \ + H5Tclose(p); \ + return -1; \ + } \ + moffset += 8; +#else +#define MemAddDateTime(p, label, type, item) \ + debug3("PROFILE: No HDF5 Library"); +#define FileAddDateTime(p, label, offset) \ + debug3("PROFILE: No HDF5 Library"); +#define MemAddUint64(p, label, type, item) \ + debug3("PROFILE: No HDF5 Library"); +#define FileAddUint64(p, label, offset) \ + debug3("PROFILE: No HDF5 Library"); +#define MemAddDbl(p, label, type, item) \ + debug3("PROFILE: No HDF5 Library"); +#define FileAddDbl(p, label, offset) \ + debug3("PROFILE: No HDF5 Library"); +#endif + +/* + * Macro to increment a sample in a difference series + * -- Difference means each sample represents counts for only that interval + * (assumes consistent naming convention) + * + * + * Parameters + * tot total pointer + * smp sample pointer + * var variable name in sample + * count number of items in series + */ +#define IncrDifSample(tot, smp, var, count) \ + for (i=0; i<count; i++) { \ + if (i == 0) { \ + total->var.min = smp[i].var; \ + } \ + tot->var.total += smp[i].var; \ + tot->var.min = MIN(smp[i].var,tot->var.min); \ + tot->var.max = MAX(smp[i].var,tot->var.max); \ + } \ + tot->var.ave = tot->var.total / count; + +/* + * Macro to increment a sample in a running total + * -- Running total means first sample is initial conditions + * (assumes consistent naming convention) + * + * + * Parameters + * tot total pointer + * smp sample pointer + * var variable name in sample + * count number of items in series + */ +#define IncrRTSample(tot, smp, var, count) \ + for (i=1; i<count; i++) { \ + if (i == 1) { \ + total->var.min = smp[i].var; \ + } \ + tot->var.total += smp[i].var; \ + tot->var.min = MIN(smp[i].var,tot->var.min); \ + tot->var.max = MAX(smp[i].var,tot->var.max); \ + } \ + tot->var.ave = tot->var.total / count; + +/* Macro to put an int min,ave,max,total for a variable to extract file + * + * Parameters + * fOt file descriptor + * var variable name + * prf prefix for series (usually ',' + */ +#define PutUintSum(fOt, var, prfx) \ + fprintf(fOt,"%s%ld,%ld,%ld,%ld",prfx, \ + var.min,var.ave,var.max,var.total); +/* Macro to put an int min,ave,max,total for a variable to extract file + * + * Parameters + * fOt file descriptor + * var variable name + * prf prefix for series (usually ',' + */ +#define PutDblSum(fOt, var, prfx) \ + fprintf(fOt,"%s%.3f,%.3f,%.3f,%.3f",prfx, \ + var.min,var.ave,var.max,var.total); + + +/* ============================================================================ + * Common support functions + ===========================================================================*/ + + profile_hdf5_ops_t* profile_factory(char* type) { + if (strcmp(type, PROFILE_ENERGY_DATA) == 0) { + return energy_profile_factory(); + } else if (strcmp(type, PROFILE_IO_DATA) == 0) { + return io_profile_factory(); + } else if (strcmp(type, PROFILE_NETWORK_DATA) == 0) { + return network_profile_factory(); + } else if (strcmp(type, PROFILE_TASK_DATA) == 0) { + return task_profile_factory(); + } else { + error("PROFILE: PROFILE: %s is an invalid data type", type); + return NULL; + } +} + +void ProfileInit() { + typTOD = H5Tcopy (H5T_C_S1); + H5Tset_size (typTOD, TOD_LEN); /* create string of length TOD_LEN */ + return; +} + +void ProfileFinish() { + H5Tclose(typTOD); + H5close(); // make sure all H5 Objects are closed + return; +} + + +bool DoSeries(char* series, char** seriesList, int numSeries) { + int ix; + if (numSeries == 0) + return false; + if (strcasecmp(seriesList[0],"none") == 0) + return false; + if (series == NULL) + return true; + if (strcasecmp(seriesList[0], "all") == 0) + return true; + for (ix=0; ix< numSeries; ix++) { + if (strcasecmp(series, seriesList[ix]) == 0) + return true; + } + return false; +} + +void ValidSeriesList(char* listStr) { + char** list = NULL; + int ix, iy, listLen = 0; + if (strcasecmp(listStr,"all") == 0) + return; + if (strcasecmp(listStr,"none") == 0) + return; + list = GetStringList(listStr, &listLen); + for (ix=0; ix< listLen; ix++) { + for (iy=0; iy<numAvailSeries; iy++) { + if (strcasecmp(list[ix],availSeries[iy]) == 0) + break; + + } + if (iy == numAvailSeries) + info("PROFILE: %s is not a known series", list[ix]); + } + delete_string_list(list, listLen); + return; +} + +char** GetStringList(char* list, int* listLen) { + char *tmp = NULL, *tmp1= NULL, **listOut = NULL; + int ix, nStr = 0, lenStr = 1; + if (list == NULL) { + *listLen = 0; + return NULL; + } + tmp = list; + tmp1 = list; + while (tmp1){ + nStr++; + tmp1 = strchr(tmp, ','); + tmp = tmp1+1; + } + *listLen = nStr; + listOut = (char**) xmalloc(sizeof(char*)*nStr); + tmp = list; + for(ix=1; ix<nStr; ix++) { + tmp1 = strchr(tmp, ','); + lenStr = ((int) (tmp1-tmp)); + if (lenStr < 1) + continue; + listOut[ix-1] = xmalloc(sizeof(char)*(lenStr+1)); + strncpy(listOut[ix-1],tmp,lenStr); + tmp = tmp1+1; + } + listOut[ix-1] = xstrdup(tmp); + return listOut; +} + +void delete_string_list(char** list, int listLen) { + int ix; + if (list == NULL) + return; + for (ix=0; ix<listLen; ix++) { + xfree(list[ix]); + } + xfree(list); + return; +} + +char* DataSetName(char* type) { + sprintf(dsetName, "%s Data", type); + return dsetName; +} + +char* make_job_profile_path(char* rootDir, int jobid) { + int len; + len = snprintf(profileFilePath,MAX_PROFILE_PATH,"%s/job~%d.h5", + rootDir, jobid); + if (len >= MAX_PROFILE_PATH) { + error("PROFILE: path is too big"); + return NULL; + } + return profileFilePath; +} + +char* make_node_step_profile_path(char* rootDir, char* nodename, + int jobid, int stepid) { + int len; + len = snprintf(profileFilePath,MAX_PROFILE_PATH, + "%s/tmp/job~%d~%d~%s.h5", + rootDir, jobid, stepid, nodename); + if (len >= MAX_PROFILE_PATH) { + error("PROFILE: path is too big"); + return NULL; + } + return xstrdup(profileFilePath); +} + +void hdf5_obj_info(hid_t group, char* namGroup) { + +#ifdef HAVE_HDF5 + char* hdf5TypNam[] = {"H5G_LINK ", + "H5G_GROUP ", + "H5G_DATASET", + "H5G_TYPE "}; + + char buf[MAX_GROUP_NAME+1]; + hsize_t nobj, nattr; + hid_t aid; + int i, len, typ; + + if (group < 0) { + info("PROFILE: Group is not HDF5 object"); + return; + } + H5Gget_num_objs(group, &nobj); + nattr = H5Aget_num_attrs(group); + info("PROFILE group: %s NumObject=%d NumAttributes=%d", + namGroup, (int) nobj, (int) nattr); + for (i = 0; (nobj>0) && (i<nobj); i++) { + typ = H5Gget_objtype_by_idx(group, i); + len = H5Gget_objname_by_idx(group, i, buf, MAX_GROUP_NAME); + if ((len > 0) && (len < MAX_GROUP_NAME)) { + info("PROFILE: Obj=%d Type=%s Name=%s", + i,hdf5TypNam[typ], buf); + } else { + info("PROFILE: Obj=%d Type=%s Name=%s (is truncated)", + i,hdf5TypNam[typ], buf); + } + } + for (i = 0; (nattr>0) && (i<nattr); i++) { + aid = H5Aopen_idx(group, (unsigned int)i ); + // Get the name of the attribute. + len = H5Aget_name(aid, MAX_ATTR_NAME, buf); + if (len < MAX_ATTR_NAME) { + info("PROFILE: Attr=%d Name=%s", i,buf); + } else { + info("PROFILE: Attr=%d Name=%s (is truncated)", i,buf); + } + H5Aclose(aid); + } +#endif + return; +} + +hid_t get_attribute_handle(hid_t parent, char* name) { +#ifdef HAVE_HDF5 + char buf[MAX_ATTR_NAME+1]; + int nattr, i, len; + hid_t aid; + + if (parent < 0) { + debug3("PROFILE: parent is not HDF5 object"); + return -1; + } + nattr = H5Aget_num_attrs(parent); + for (i = 0; (nattr>0) && (i<nattr); i++) { + aid = H5Aopen_idx(parent, (unsigned int)i ); + + // Get the name of the attribute. + len = H5Aget_name(aid, MAX_ATTR_NAME, buf); + if (len < MAX_ATTR_NAME) { + if (strcmp(buf,name) == 0) { + return aid; + } + } + H5Aclose(aid); + } + debug3("PROFILE: failed to find HDF5 attribute=%s\n", name); +#endif + return -1; +} + +hid_t get_group(hid_t parent, char* name) { + +#ifdef HAVE_HDF5 + char buf[MAX_GROUP_NAME]; + hsize_t nobj; + hid_t gid; + int i, len; + + if (parent < 0) { + debug3("PROFILE: parent is not HDF5 object"); + return -1; + } + H5Gget_num_objs(parent, &nobj); + for (i = 0; (nobj>0) && (i<nobj); i++) { + // Get the name of the group. + len = H5Gget_objname_by_idx(parent, i, buf, MAX_GROUP_NAME); + if ((len > 0) && (len < MAX_GROUP_NAME)) { + if (strcmp(buf,name) == 0) { + gid = H5Gopen(parent, name, H5P_DEFAULT); + if (gid < 0) + error("PROFILE: Failed to open %s", + name); + return gid; + } + } + } +#endif + return -1; +} + +hid_t make_group(hid_t parent, char* name) { + hid_t gid = -1; +#ifdef HAVE_HDF5 + + char buf[MAX_GROUP_NAME]; + hsize_t nobj; + int i, len; + + if (parent < 0) { + debug3("PROFILE: parent is not HDF5 object"); + return -1; + } + gid = get_group(parent, name); + if (gid > 0) + return gid; + gid = H5Gcreate(parent, name, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + if (gid < 0) { + debug3("PROFILE: failed to create HDF5 group=%s", name); + return -1; + } +#endif + return gid; +} + +void put_string_attribute(hid_t parent, char* name, char* value) { +#ifdef HAVE_HDF5 + hid_t attr, spaceAttr, typAttr; + hsize_t dimAttr[1] = {1}; // Single dimension array of values + + typAttr = H5Tcopy(H5T_C_S1); + if (typAttr < 0) { + debug3("PROFILE: failed to copy type for attribute %s", name); + return; + } + H5Tset_size(typAttr,strlen(value)); + H5Tset_strpad(typAttr,H5T_STR_NULLTERM); + spaceAttr = H5Screate_simple(1, dimAttr, NULL); + if (spaceAttr < 0) { + H5Tclose(typAttr); + debug3("PROFILE: failed to create space for attribute %s", + name); + return; + } + attr = H5Acreate(parent, name, typAttr, spaceAttr, + H5P_DEFAULT, H5P_DEFAULT); + if (attr < 0) { + H5Tclose(typAttr); + H5Sclose(spaceAttr); + debug3("PROFILE: failed to create attribute %s", name); + return; + } + if (H5Awrite(attr, typAttr, value) < 0) { + debug3("PROFILE: failed to write attribute %s", name); + // Fall through to release resources + } + H5Sclose(spaceAttr); + H5Tclose(typAttr); + H5Aclose(attr); +#endif + return; +} + +char* get_string_attribute(hid_t parent, char* name) { + char* value = NULL; +#ifdef HAVE_HDF5 + hid_t attr, type; + size_t size; + + attr = get_attribute_handle(parent, name); + if (attr < 0) { + debug3("PROFILE: Attribute=%s does not exist",name); + return NULL; + } + type = H5Aget_type(attr); + if (H5Tget_class(type) != H5T_STRING) { + H5Aclose(attr); + debug3("PROFILE: Attribute=%s is not a string",name); + return NULL; + } + size = H5Tget_size(type); + value = xmalloc(size+1); + if (value == NULL) { + H5Tclose(type); + H5Aclose(attr); + debug3("PROFILE: failed to malloc %d bytes for attribute=%s", + (int) size, + name); + return NULL; + } + if (H5Aread(attr, type, value) < 0) { + xfree(value); + H5Tclose(type); + H5Aclose(attr); + debug3("PROFILE: failed to read attribute=%s",name); + return NULL; + } + H5Tclose(type); + H5Aclose(attr); +#endif + return value; +} + +void put_int_attribute(hid_t parent, char* name, int value) { +#ifdef HAVE_HDF5 + hid_t attr, spaceAttr; + hsize_t dimAttr[1] = {1}; // Single dimension array of values + spaceAttr = H5Screate_simple(1, dimAttr, NULL); + if (spaceAttr < 0) { + debug3("PROFILE: failed to create space for attribute %s", + name); + return; + } + attr = H5Acreate(parent, name, H5T_NATIVE_INT, spaceAttr, + H5P_DEFAULT, H5P_DEFAULT); + if (attr < 0) { + H5Sclose(spaceAttr); + debug3("PROFILE: failed to create attribute %s", name); + return; + } + if (H5Awrite(attr, H5T_NATIVE_INT, &value) < 0) { + debug3("PROFILE: failed to write attribute %s", name); + // Fall through to release resources + } + H5Sclose(spaceAttr); + H5Aclose(attr); +#endif + return; +} + +int get_int_attribute(hid_t parent, char* name) { + int value = 0; +#ifdef HAVE_HDF5 + hid_t attr; + attr = get_attribute_handle(parent, name); + if (attr < 0) { + debug3("PROFILE: Attribute=%s does not exist, returning",name); + return value; + } + if (H5Aread(attr, H5T_NATIVE_INT, &value) < 0) { + debug3("PROFILE: failed to read attribute=%s, returning",name); + } + H5Aclose(attr); +#endif + return value; +} + +void* get_hdf5_data(hid_t parent, char* type, char* namGroup, int* sizeData) { + void* data = NULL; +#ifdef HAVE_HDF5 + hid_t idDataSet, dtypMemory; + hsize_t szDset; + herr_t ec; + char* subtype = NULL; + + profile_hdf5_ops_t* ops; + ops = profile_factory(type); + if (ops == NULL) { + debug3("PROFILE: failed to create %s operations", type); + return; + } + subtype = get_string_attribute(parent, ATTR_SUBDATATYPE); + if (subtype < 0) { + xfree(ops); + debug3("PROFILE: failed to get %s attribute", + ATTR_SUBDATATYPE); + return; + } + idDataSet = H5Dopen(parent, DataSetName(namGroup), H5P_DEFAULT); + if (idDataSet < 0) { + xfree(subtype); + xfree(ops); + debug3("PROFILE: failed to open %s Data Set", type); + return NULL; + } + if (strcmp(subtype,SUBDATA_SUMMARY) != 0) + dtypMemory = (*(ops->create_memory_datatype))(); + else + dtypMemory = (*(ops->create_s_memory_datatype))(); + xfree(subtype); + if (dtypMemory < 0) { + H5Dclose(idDataSet); + xfree(ops); + debug3("PROFILE: failed to create %s memory datatype", type); + return NULL; + } + szDset = H5Dget_storage_size(idDataSet); + *sizeData = (int) szDset; + if (szDset == 0) { + H5Tclose(dtypMemory); + H5Dclose(idDataSet); + xfree(ops); + debug3("PROFILE: %s data set is empty", type); + return NULL; + } + data = xmalloc(szDset); + if (data == NULL) { + H5Tclose(dtypMemory); + H5Dclose(idDataSet); + xfree(ops); + debug3("PROFILE: failed to get memory for %s data set", type); + return NULL; + } + ec = H5Dread(idDataSet, dtypMemory, H5S_ALL, H5S_ALL, H5P_DEFAULT, + data); + if (ec < 0) { + H5Tclose(dtypMemory); + H5Dclose(idDataSet); + xfree(data); + xfree(ops); + debug3("PROFILE: failed to read %s data", type); + return NULL; + } + H5Tclose(dtypMemory); + H5Dclose(idDataSet); + xfree(ops); +#endif + return data; +} + +void put_hdf5_data(hid_t parent, char* type, char* subtype, + char* group, void* data, int nItem) { +#ifdef HAVE_HDF5 + + hid_t idGroup, dtypMemory, dtypFile, idDataSpace, idDataSet; + hsize_t dims[1]; + herr_t ec; + profile_hdf5_ops_t* ops; + ops = profile_factory(type); + if (ops == NULL) { + debug3("PROFILE: failed to create %s operations", type); + return; + } + // Create the datatypes. + if (strcmp(subtype,SUBDATA_SUMMARY) != 0) + dtypMemory = (*(ops->create_memory_datatype))(); + else + dtypMemory = (*(ops->create_s_memory_datatype))(); + if (dtypMemory < 0) { + xfree(ops); + debug3("PROFILE: failed to create %s memory datatype", type); + return; + } + if (strcmp(subtype,SUBDATA_SUMMARY) != 0) + dtypFile = (*(ops->create_file_datatype))(); + else + dtypFile = (*(ops->create_s_file_datatype))(); + if (dtypFile < 0) { + H5Tclose(dtypMemory); + xfree(ops); + debug3("PROFILE: failed to create %s file datatype", type); + return; + } + + dims[0] = nItem; + idDataSpace = H5Screate_simple(1, dims, NULL); + if (idDataSpace < 0) { + H5Tclose(dtypFile); + H5Tclose(dtypMemory); + xfree(ops); + debug3("PROFILE: failed to create %s space descriptor", type); + return; + } + + idGroup = H5Gcreate(parent, group, H5P_DEFAULT, + H5P_DEFAULT, H5P_DEFAULT); + if (idGroup < 0) { + H5Sclose(idDataSpace); + H5Tclose(dtypFile); + H5Tclose(dtypMemory); + xfree(ops); + debug3("PROFILE: failed to create %s group", group); + return; + } + + put_string_attribute(idGroup, ATTR_DATATYPE, type); + put_string_attribute(idGroup, ATTR_SUBDATATYPE, subtype); + + idDataSet = H5Dcreate(idGroup, DataSetName(group), dtypFile, + idDataSpace, H5P_DEFAULT,H5P_DEFAULT,H5P_DEFAULT); + if (idDataSet < 0) { + H5Gclose(idGroup); + H5Sclose(idDataSpace); + H5Tclose(dtypFile); + H5Tclose(dtypMemory); + xfree(ops); + debug3("PROFILE: failed to create %s dataset", group); + return; + } + + ec = H5Dwrite(idDataSet, dtypMemory, H5S_ALL, H5S_ALL, H5P_DEFAULT, + data); + if (ec < 0) { + debug3("PROFILE: failed to create write task data"); + // Fall through to release resources + } + H5Dclose(idDataSet); + H5Gclose(idGroup); + H5Sclose(idDataSpace); + H5Tclose(dtypFile); + H5Tclose(dtypMemory); + xfree(ops); + +#endif + return; +} + + +// ============================================================================ +// Routines supporting Energy Data type +// ============================================================================ + +int energy_dataset_size() { + return sizeof(profile_energy_t); +} + +hid_t energy_create_memory_datatype() { + hid_t mtypEnergy = -1; +#ifdef HAVE_HDF5 + mtypEnergy = H5Tcreate(H5T_COMPOUND, sizeof(profile_energy_t)); + if (mtypEnergy < 0) { + debug3("PROFILE: failed to create Energy memory datatype"); + return -1; + } + MemAddDateTime(mtypEnergy,"Date Time", profile_energy_t,tod) + MemAddUint64(mtypEnergy, "Time", profile_energy_t, time) + MemAddUint64(mtypEnergy, "Power", profile_energy_t, power) + MemAddUint64(mtypEnergy, "CPU Frequency", profile_energy_t, cpu_freq) +#endif + return mtypEnergy; +} + +hid_t energy_create_file_datatype() { + hid_t ftypEnergy = -1; +#ifdef HAVE_HDF5 + ftypEnergy = H5Tcreate(H5T_COMPOUND,(TOD_LEN+3*8)); + if (ftypEnergy < 0) { + debug3("PROFILE: failed to create Energy file datatype"); + return -1; + } + moffset = TOD_LEN; + FileAddDateTime(ftypEnergy, "Date Time", 0) + FileAddUint64(ftypEnergy, "Time") + FileAddUint64(ftypEnergy, "Power") + FileAddUint64(ftypEnergy, "CPU Frequency") +#endif + return ftypEnergy; +} + +hid_t energy_s_create_memory_datatype() { + hid_t mtypEnergy = -1; +#ifdef HAVE_HDF5 + mtypEnergy = H5Tcreate(H5T_COMPOUND, sizeof(profile_energy_s_t)); + if (mtypEnergy < 0) { + debug3("PROFILE: failed to create Energy_s memory datatype"); + return -1; + } + MemAddDateTime(mtypEnergy,"Start Time", profile_energy_s_t, start_time) + MemAddUint64(mtypEnergy,"Elapsed Time",profile_energy_s_t,elapsed_time) + MemAddUint64(mtypEnergy,"Min Power", profile_energy_s_t, power.min) + MemAddUint64(mtypEnergy,"Ave Power", profile_energy_s_t, power.ave) + MemAddUint64(mtypEnergy,"Max Power", profile_energy_s_t, power.max) + MemAddUint64(mtypEnergy,"Total Power", profile_energy_s_t, power.total) + MemAddUint64(mtypEnergy,"Min CPU Frequency", profile_energy_s_t, + cpu_freq.min) + MemAddUint64(mtypEnergy,"Ave CPU Frequency", profile_energy_s_t, + cpu_freq.ave) + MemAddUint64(mtypEnergy,"Max CPU Frequency", profile_energy_s_t, + cpu_freq.max) + MemAddUint64(mtypEnergy,"Total CPU Frequency", profile_energy_s_t, + cpu_freq.total) +#endif + return mtypEnergy; +} + +hid_t energy_s_create_file_datatype() { + hid_t ftypEnergy = -1; +#ifdef HAVE_HDF5 + ftypEnergy = H5Tcreate(H5T_COMPOUND,(TOD_LEN+9*8)); + if (ftypEnergy < 0) { + debug3("PROFILE: failed to create Energy_s file datatype"); + return -1; + } + moffset = TOD_LEN; + FileAddDateTime(ftypEnergy, "Start Time", 0) + FileAddUint64(ftypEnergy, "Elapsed Time") + FileAddUint64(ftypEnergy, "Min Power") + FileAddUint64(ftypEnergy, "Ave Power") + FileAddUint64(ftypEnergy, "Max Power") + FileAddUint64(ftypEnergy, "Total Power") + FileAddUint64(ftypEnergy, "Min CPU Frequency") + FileAddUint64(ftypEnergy, "Ave CPU Frequency") + FileAddUint64(ftypEnergy, "Max CPU Frequency") + FileAddUint64(ftypEnergy, "Total CPU Frequency") +#endif + return ftypEnergy; +} + +void* energy_init_job_series(int nSamples) { + + int ix; + profile_energy_t* energyData; + + energyData = xmalloc(nSamples * sizeof(profile_energy_t)); + if (energyData == NULL) { + debug3("PROFILE: failed to get memory for energy data"); + return NULL; + } + return (void*) energyData; +} + +void energy_merge_step_series(hid_t group, void* prior, void* cur, void* buf) { +// This is a difference series + profile_energy_t* prfCur = (profile_energy_t*) cur; + profile_energy_t* prfBuf = (profile_energy_t*) buf; + struct tm *ts; + ts = localtime(&prfCur->time); + strftime(prfBuf->tod, TOD_LEN, TOD_FMT, ts); + if (prior == NULL) { + // First sample. + seriesStart = prfCur->time; + prfBuf->time = 0; + + } else { + prfBuf->time = prfCur->time - seriesStart; + } + prfBuf->power = prfCur->power; + prfBuf->cpu_freq = prfCur->cpu_freq; + return; +} + +void* energy_series_total(int nSamples, void* data) +{ + int ix; + profile_energy_t* energyData; + profile_energy_s_t* total; + if (nSamples < 1) + return NULL; + energyData = (profile_energy_t*) data; + total = xmalloc(sizeof(profile_energy_s_t)); + if (total == NULL) { + error("PROFILE: Out of memory getting energy total"); + return NULL; + } + // Assuming energy series are a difference series + strcpy(total->start_time, energyData[0].tod); + total->elapsed_time = energyData[nSamples-1].time; + IncrDifSample(total, energyData, power, nSamples) + IncrDifSample(total, energyData, cpu_freq, nSamples) + return total; +} + +void energy_extract_series(FILE* fOt, bool putHeader, int job, int step, + char* node, char* series, void* data, int sizeData) { + + int nItems, ix; + profile_energy_t* energyData = (profile_energy_t*) data; + if (putHeader) { + fprintf(fOt, "Job,Step,Node,Series,Date_Time,Elapsed_Time," + "Power,CPU_Frequency\n"); + } + nItems = sizeData / sizeof(profile_energy_t); + for (ix=0; ix < nItems; ix++) { + fprintf(fOt,"%d,%d,%s,%s,%s,%ld,%ld,%ld\n",job,step,node, + series,energyData[ix].tod,energyData[ix].time, + energyData[ix].power,energyData[ix].cpu_freq); + } + return; +} + +void energy_extract_total(FILE* fOt, bool putHeader, int job, int step, + char* node, char* series, void* data, int sizeData) { + + int nItems, ix; + profile_energy_s_t* energyData = (profile_energy_s_t*) data; + if (putHeader) { + fprintf(fOt,"Job,Step,Node,Series,Start_Time,Elapsed_Time," + "Min_Power,Ave_Power,Max_Power,Total_Power," + "Min_CPU Frequency,Ave_CPU Frequency," + "Max_CPU Frequency,Total_CPU Frequency\n"); + } + fprintf(fOt,"%d,%d,%s,%s,%s,%ld", job, step, node, series, + energyData->start_time, energyData->elapsed_time); + PutUintSum(fOt, energyData->power,",") + PutUintSum(fOt, energyData->cpu_freq,",") + fprintf(fOt,"\n"); + return; +} + +profile_hdf5_ops_t* energy_profile_factory() { + profile_hdf5_ops_t* ops = xmalloc(sizeof(profile_hdf5_ops_t)); + ops->dataset_size = &energy_dataset_size; + ops->create_memory_datatype = &energy_create_memory_datatype; + ops->create_file_datatype = &energy_create_file_datatype; + ops->create_s_memory_datatype = &energy_s_create_memory_datatype; + ops->create_s_file_datatype = &energy_s_create_file_datatype; + ops->init_job_series = &energy_init_job_series; + ops->merge_step_series = &energy_merge_step_series; + ops->series_total = &energy_series_total; + ops->extract_series = &energy_extract_series; + ops->extract_total = &energy_extract_total; + return ops; +} + + +// ============================================================================ +// Routines supporting I/O Data type +// ============================================================================ + +int io_dataset_size() { + return sizeof(profile_io_t); +} + +hid_t io_create_memory_datatype(void) { + hid_t mtypIO = -1; +#ifdef HAVE_HDF5 + mtypIO = H5Tcreate(H5T_COMPOUND, sizeof(profile_io_t)); + if (mtypIO < 0) { + debug3("PROFILE: failed to create IO memory datatype"); + return -1; + } + MemAddDateTime(mtypIO,"Date Time", profile_io_t,tod) + MemAddUint64(mtypIO, "Time", profile_io_t, time) + MemAddUint64(mtypIO, "Reads", profile_io_t, reads) + MemAddDbl(mtypIO, "Megabytes Read", profile_io_t, read_size) + MemAddUint64(mtypIO, "Writes", profile_io_t, writes) + MemAddDbl(mtypIO, "Megabytes Write", profile_io_t,write_size) +#endif + return mtypIO; +} + +hid_t io_create_file_datatype(void) { + hid_t ftypIO = -1; +#ifdef HAVE_HDF5 + ftypIO = H5Tcreate(H5T_COMPOUND,TOD_LEN+5*8); + if (ftypIO < 0) { + debug3("PROFILE: failed to create IO file datatype"); + return -1; + } + moffset = TOD_LEN; + FileAddDateTime(ftypIO, "Date Time", 0) + FileAddUint64(ftypIO, "Time") + FileAddUint64(ftypIO, "Reads") + FileAddDbl(ftypIO, "Megabytes Read") + FileAddUint64(ftypIO, "Writes") + FileAddDbl(ftypIO, "Megabytes Write") +#endif + return ftypIO; +} + +hid_t io_s_create_memory_datatype(void) { + hid_t mtypIO = -1; +#ifdef HAVE_HDF5 + mtypIO = H5Tcreate(H5T_COMPOUND, sizeof(profile_io_s_t)); + if (mtypIO < 0) { + debug3("PROFILE: failed to create IO memory datatype"); + return -1; + } + MemAddDateTime(mtypIO,"Start Time", profile_io_s_t, start_time) + MemAddUint64(mtypIO, "Elapsed Time", profile_io_s_t, elapsed_time) + MemAddUint64(mtypIO, "Min Reads", profile_io_s_t, reads.min) + MemAddUint64(mtypIO, "Ave Reads", profile_io_s_t, reads.ave) + MemAddUint64(mtypIO, "Max Reads", profile_io_s_t, reads.max) + MemAddUint64(mtypIO, "Total Reads", profile_io_s_t, reads.total) + MemAddDbl(mtypIO, "Min Read Megabytes", profile_io_s_t, + read_size.min) + MemAddDbl(mtypIO, "Ave Read Megabytes", profile_io_s_t, + read_size.ave) + MemAddDbl(mtypIO, "Max Read Megabytes", profile_io_s_t, + read_size.max) + MemAddDbl(mtypIO, "Total Read Megabytes", profile_io_s_t, + read_size.total) + MemAddUint64(mtypIO, "Min Writes", profile_io_s_t, writes.min) + MemAddUint64(mtypIO, "Ave Writes", profile_io_s_t, writes.ave) + MemAddUint64(mtypIO, "Max Writes", profile_io_s_t, writes.max) + MemAddUint64(mtypIO, "Total Writes", profile_io_s_t, writes.total) + MemAddDbl(mtypIO, "Min Write Megabytes", profile_io_s_t, + write_size.min) + MemAddDbl(mtypIO, "Ave Write Megabytes", profile_io_s_t, + write_size.ave) + MemAddDbl(mtypIO, "Max Write Megabytes", profile_io_s_t, + write_size.max) + MemAddDbl(mtypIO, "Total Write Megabytes",profile_io_s_t, + write_size.total) +#endif + return mtypIO; +} + +hid_t io_s_create_file_datatype(void) { + hid_t ftypIO = -1; +#ifdef HAVE_HDF5 + ftypIO = H5Tcreate(H5T_COMPOUND,TOD_LEN+17*8); + if (ftypIO < 0) { + debug3("PROFILE: failed to create IO file datatype"); + return -1; + } + moffset = TOD_LEN; + FileAddDateTime(ftypIO,"Start Time", 0) + FileAddUint64(ftypIO, "Elapsed Time") + FileAddUint64(ftypIO, "Min Reads") + FileAddUint64(ftypIO, "Ave Reads") + FileAddUint64(ftypIO, "Max Reads") + FileAddUint64(ftypIO, "Total Reads") + FileAddDbl(ftypIO, "Min Read Megabytes") + FileAddDbl(ftypIO, "Ave Read Megabytes") + FileAddDbl(ftypIO, "Max Read Megabytes") + FileAddDbl(ftypIO, "Total Read Megabytes") + FileAddUint64(ftypIO, "Min Writes") + FileAddUint64(ftypIO, "Ave Writes") + FileAddUint64(ftypIO, "Max Writes") + FileAddUint64(ftypIO, "Total Writes") + FileAddDbl(ftypIO, "Min Write Megabytes") + FileAddDbl(ftypIO, "Ave Write Megabytes") + FileAddDbl(ftypIO, "Max Write Megabytes") + FileAddDbl(ftypIO, "Total Write Megabytes") +#endif + return ftypIO; +} + +void* io_init_job_series(int nSamples) { + profile_io_t* ioData; + ioData = xmalloc(nSamples * sizeof(profile_io_t)); + if (ioData == NULL) { + debug3("PROFILE: failed to get memory for combined io data"); + return NULL; + } + return (void*) ioData; +} + +void io_merge_step_series(hid_t group, void* prior, void* cur, void* buf) { +// This is a difference series + profile_io_t* prfCur = (profile_io_t*) cur; + profile_io_t* prfBuf = (profile_io_t*) buf; + struct tm *ts; + ts = localtime(&prfCur->time); + strftime(prfBuf->tod, TOD_LEN, TOD_FMT, ts); + if (prior == NULL) { + // First sample. + seriesStart = prfCur->time; + prfBuf->time = 0; + } else { + prfBuf->time = prfCur->time - seriesStart; + } + prfBuf->reads = prfCur->reads; + prfBuf->writes = prfCur->writes; + prfBuf->read_size = prfCur->read_size; + prfBuf->write_size = prfCur->write_size; + return; +} + +void* io_series_total(int nSamples, void* data) +{ + profile_io_t* ioData; + profile_io_s_t* total; + if (nSamples < 1) + return NULL; + ioData = (profile_io_t*) data; + total = xmalloc(sizeof(profile_io_s_t)); + if (total == NULL) { + error("PROFILE: Out of memory getting I/O total"); + return NULL; + } + // Assuming io series are a running total, and the first + // sample just sets the initial conditions + strcpy(total->start_time, ioData[0].tod); + total->elapsed_time = ioData[nSamples-1].time; + IncrDifSample(total, ioData, reads, nSamples) + IncrDifSample(total, ioData, read_size, nSamples) + IncrDifSample(total, ioData, writes, nSamples) + IncrDifSample(total, ioData, write_size, nSamples) + return total; +} + +void io_extract_series(FILE* fOt, bool putHeader, int job, int step, + char* node, char* series, void* data, int sizeData) { + + int nItems, ix; + profile_io_t* ioData = (profile_io_t*) data; + if (putHeader) { + fprintf(fOt,"Job,Step,Node,Series,Date_Time,Elapsed_time," + "Reads,Read Megabytes,Writes,Write Megabytes\n"); + } + nItems = sizeData / sizeof(profile_io_t); + for (ix=0; ix < nItems; ix++) { + fprintf(fOt,"%d,%d,%s,%s,%s,%ld,%ld,%.3f,%ld,%.3f\n", + job,step,node,series, + ioData[ix].tod, ioData[ix].time, + ioData[ix].reads, ioData[ix].read_size, + ioData[ix].writes, ioData[ix].write_size); + } + return; +} + +void io_extract_total(FILE* fOt, bool putHeader, int job, int step, + char* node, char* series, void* data, int sizeData) { + + profile_io_s_t* ioData = (profile_io_s_t*) data; + if (putHeader) { + fprintf(fOt,"Job,Step,Node,Series,Start_Time,Elapsed_time," + "Min_Reads,Ave_Reads,Max_Reads,Total_Reads," + "Min_Read_Megabytes,Ave_Read_Megabytes," + "Max_Read_Megabytes,Total_Read_Megabytes," + "Min_Writes,Ave_Writes,Max_Writes,Total_Writes," + "Min_Write_Megabytes,Ave_Write_Megabytes," + "Max_Write_Megabytes,Total_Write_Megabytes\n"); + } + fprintf(fOt,"%d,%d,%s,%s,%s,%ld", job,step,node,series, + ioData->start_time, ioData->elapsed_time); + PutUintSum(fOt, ioData->reads,",") + PutDblSum(fOt, ioData->read_size,",") + PutUintSum(fOt, ioData->writes,",") + PutDblSum(fOt, ioData->write_size,",") + fprintf(fOt,"\n"); + return; +} + +profile_hdf5_ops_t* io_profile_factory() { + profile_hdf5_ops_t* ops = xmalloc(sizeof(profile_hdf5_ops_t)); + ops->dataset_size = &io_dataset_size; + ops->create_memory_datatype = &io_create_memory_datatype; + ops->create_file_datatype = &io_create_file_datatype; + ops->create_s_memory_datatype = &io_s_create_memory_datatype; + ops->create_s_file_datatype = &io_s_create_file_datatype; + ops->init_job_series = &io_init_job_series; + ops->merge_step_series = &io_merge_step_series; + ops->series_total = &io_series_total; + ops->extract_series = &io_extract_series; + ops->extract_total = &io_extract_total; + return ops; +} + + +// ============================================================================ +// Routines supporting Network Data type +// ============================================================================ + +int network_dataset_size() { + return sizeof(profile_network_t); +} + +hid_t network_create_memory_datatype(void) { + hid_t mtypNetwork = -1; +#ifdef HAVE_HDF5 + mtypNetwork = H5Tcreate(H5T_COMPOUND, sizeof(profile_network_t)); + if (mtypNetwork < 0) { + debug3("PROFILE: failed to create Network memory datatype"); + return -1; + } + MemAddDateTime(mtypNetwork,"Date Time", profile_network_t,tod) + MemAddUint64(mtypNetwork, "Time", profile_network_t, time) + MemAddUint64(mtypNetwork, "Packets In", profile_network_t, packets_in) + MemAddDbl(mtypNetwork, "Megabytes In", profile_network_t, size_in) + MemAddUint64(mtypNetwork, "Packets Out", profile_network_t,packets_out) + MemAddDbl(mtypNetwork, "Megabytes Out", profile_network_t,size_out) +#endif + return mtypNetwork; +} + +hid_t network_create_file_datatype(void) { + hid_t ftypNetwork = -1; +#ifdef HAVE_HDF5 + ftypNetwork = H5Tcreate(H5T_COMPOUND,TOD_LEN+5*8); + if (ftypNetwork < 0) { + debug3("PROFILE: failed to create Network file datatype"); + return -1; + } + moffset = TOD_LEN; + FileAddDateTime(ftypNetwork, "Date Time", 0) + FileAddUint64(ftypNetwork, "Time") + FileAddUint64(ftypNetwork, "Packets In") + FileAddDbl(ftypNetwork, "Megabytes In") + FileAddUint64(ftypNetwork, "Packets Out") + FileAddDbl(ftypNetwork, "Megabytes Out") +#endif + return ftypNetwork; +} + +hid_t network_s_create_memory_datatype(void) { + hid_t mtypNetwork = -1; +#ifdef HAVE_HDF5 + mtypNetwork = H5Tcreate(H5T_COMPOUND, sizeof(profile_network_s_t)); + if (mtypNetwork < 0) { + debug3("PROFILE: failed to create Network memory datatype"); + return -1; + } + MemAddDateTime(mtypNetwork,"Start Time", profile_network_s_t, + start_time) + MemAddUint64(mtypNetwork, "Elapsed Time", profile_network_s_t, + elapsed_time) + MemAddUint64(mtypNetwork, "Min Packets In", profile_network_s_t, + packets_in.min) + MemAddUint64(mtypNetwork, "Ave Packets In", profile_network_s_t, + packets_in.ave) + MemAddUint64(mtypNetwork, "Max Packets In", profile_network_s_t, + packets_in.max) + MemAddUint64(mtypNetwork, "Total Packets In", profile_network_s_t, + packets_in.total) + MemAddDbl(mtypNetwork, "Min Megabytes In", profile_network_s_t, + size_in.min) + MemAddDbl(mtypNetwork, "Ave Megabytes In", profile_network_s_t, + size_in.ave) + MemAddDbl(mtypNetwork, "Max Megabytes In", profile_network_s_t, + size_in.max) + MemAddDbl(mtypNetwork, "Total Megabytes In", profile_network_s_t, + size_in.total) + MemAddUint64(mtypNetwork, "Min Packets Out", profile_network_s_t, + packets_out.min) + MemAddUint64(mtypNetwork, "Ave Packets Out", profile_network_s_t, + packets_out.ave) + MemAddUint64(mtypNetwork, "Max Packets Out", profile_network_s_t, + packets_out.max) + MemAddUint64(mtypNetwork, "Total Packets Out", profile_network_s_t, + packets_out.total) + MemAddDbl(mtypNetwork, "Min Megabytes Out", profile_network_s_t, + size_out.min) + MemAddDbl(mtypNetwork, "Ave Megabytes Out", profile_network_s_t, + size_out.ave) + MemAddDbl(mtypNetwork, "Max Megabytes Out", profile_network_s_t, + size_out.max) + MemAddDbl(mtypNetwork, "Total Megabytes Out",profile_network_s_t, + size_out.total) +#endif + return mtypNetwork; +} + +hid_t network_s_create_file_datatype(void) { + hid_t ftypNetwork = -1; +#ifdef HAVE_HDF5 + ftypNetwork = H5Tcreate(H5T_COMPOUND,TOD_LEN+17*8); + if (ftypNetwork < 0) { + debug3("PROFILE: failed to create Network file datatype"); + return -1; + } + moffset = TOD_LEN; + FileAddDateTime(ftypNetwork,"Start Time", 0) + FileAddUint64(ftypNetwork, "Elapsed Time") + FileAddUint64(ftypNetwork, "Min Packets In") + FileAddUint64(ftypNetwork, "Ave Packets In") + FileAddUint64(ftypNetwork, "Max Packets In") + FileAddUint64(ftypNetwork, "Total Packets In") + FileAddDbl(ftypNetwork, "Min Megabytes In") + FileAddDbl(ftypNetwork, "Ave Megabytes In") + FileAddDbl(ftypNetwork, "Max Megabytes In") + FileAddDbl(ftypNetwork, "Total Megabytes In") + FileAddUint64(ftypNetwork, "Min Packets Out") + FileAddUint64(ftypNetwork, "Ave Packets Out") + FileAddUint64(ftypNetwork, "Max Packets Out") + FileAddUint64(ftypNetwork, "Total Packets Out") + FileAddDbl(ftypNetwork, "Min Megabytes Out") + FileAddDbl(ftypNetwork, "Ave Megabytes Out") + FileAddDbl(ftypNetwork, "Max Megabytes Out") + FileAddDbl(ftypNetwork, "Total Megabytes Out") +#endif + return ftypNetwork; +} + +void* network_init_job_series(int nSamples) { + profile_network_t* networkData; + + networkData = xmalloc(nSamples * sizeof(profile_network_t)); + if (networkData == NULL) { + debug3("PROFILE: failed to get memory for network data"); + return NULL; + } + return (void*) networkData; +} + +void network_merge_step_series(hid_t group, void* prior,void* cur,void* buf) { +// This is a difference series + profile_network_t* prfCur = (profile_network_t*) cur; + profile_network_t* prfBuf = (profile_network_t*) buf; + struct tm *ts; + ts = localtime(&prfCur->time); + strftime(prfBuf->tod, TOD_LEN, TOD_FMT, ts); + if (prior == NULL) { + // First sample. + seriesStart = prfCur->time; + prfBuf->time = 0; + } else { + prfBuf->time = prfCur->time - seriesStart; + } + prfBuf->packets_in = prfCur->packets_in; + prfBuf->packets_out = prfCur->packets_out; + prfBuf->size_in = prfCur->size_in; + prfBuf->size_out = prfCur->size_out; + return; +} + +void* network_series_total(int nSamples, void* data) { + profile_network_t* networkData; + profile_network_s_t* total; + if (nSamples < 1) + return NULL; + networkData = (profile_network_t*) data; + total = xmalloc(sizeof(profile_network_s_t)); + if (total == NULL) { + error("PROFILE: Out of memory getting energy total"); + return NULL; + } + // Assuming network series are a running total, and the first + // sample just sets the initial conditions + strcpy(total->start_time, networkData[0].tod); + total->elapsed_time = networkData[nSamples-1].time; + IncrDifSample(total, networkData, packets_in, nSamples) + IncrDifSample(total, networkData, size_in, nSamples) + IncrDifSample(total, networkData, packets_out, nSamples) + IncrDifSample(total, networkData, size_out, nSamples) + return total; +} + +void network_extract_series(FILE* fOt, bool putHeader, int job, int step, + char* node, char* series, void* data, int sizeData) { + + int nItems, ix; + profile_network_t* networkData = (profile_network_t*) data; + + if (putHeader) { + fprintf(fOt,"Job,Step,Node,Series,Date_Time,Elapsed_time," + "Packets_In,MegaBytes_In,Packets_Out,MegaBytes_Out\n"); + } + nItems = sizeData / sizeof(profile_network_t); + for (ix=0; ix < nItems; ix++) { + fprintf(fOt,"%d,%d,%s,%s,%s,%ld,%ld,%.3f,%ld,%.3f\n", + job,step,node,series, + networkData[ix].tod, networkData[ix].time, + networkData[ix].packets_in, networkData[ix].size_in, + networkData[ix].packets_out, networkData[ix].size_out); + } + return; +} + +void network_extract_total(FILE* fOt, bool putHeader, int job, int step, + char* node, char* series, void* data, int sizeData) { + profile_network_s_t* networkData = (profile_network_s_t*) data; + if (putHeader) { + fprintf(fOt,"Job,Step,Node,Series,Start_Time,Elapsed_time," + "Min_Packets_In,Ave_Packets_In," + "Max_Packets_In,Total_Packets_In," + "Min_Megabytes_In,Ave_Megabytes_In," + "Max_Megabytes_In,Total_Megabytes_In," + "Min_Packets_Out,Ave_Packets_Out," + "Max_Packets_Out,Total_Packets_Out," + "Min_Megabytes_Out,Ave_Megabytes_Out," + "Max_Megabytes_Out,Total_Megabytes_Out\n"); + } + fprintf(fOt,"%d,%d,%s,%s,%s,%ld", job,step,node,series, + networkData->start_time, networkData->elapsed_time); + PutUintSum(fOt, networkData->packets_in,",") + PutDblSum(fOt, networkData->size_in,",") + PutUintSum(fOt, networkData->packets_out,",") + PutDblSum(fOt, networkData->size_out,",") + fprintf(fOt,"\n"); + return; +} + +profile_hdf5_ops_t* network_profile_factory() { + profile_hdf5_ops_t* ops = xmalloc(sizeof(profile_hdf5_ops_t)); + ops->dataset_size = &network_dataset_size; + ops->create_memory_datatype = &network_create_memory_datatype; + ops->create_file_datatype = &network_create_file_datatype; + ops->create_s_memory_datatype = &network_s_create_memory_datatype; + ops->create_s_file_datatype = &network_s_create_file_datatype; + ops->init_job_series = &network_init_job_series; + ops->merge_step_series = &network_merge_step_series; + ops->series_total = &network_series_total; + ops->extract_series = &network_extract_series; + ops->extract_total = &network_extract_total; + return ops; +} + +// ============================================================================ +// Routines supporting Task Data type +// ============================================================================ + +int task_dataset_size() { + return sizeof(profile_task_t); +} + +hid_t task_create_memory_datatype() { + hid_t mtypTask = -1; +#ifdef HAVE_HDF5 + mtypTask = H5Tcreate(H5T_COMPOUND, sizeof(profile_task_t)); + if (mtypTask < 0) { + debug3("PROFILE: failed to create Task memory datatype"); + return -1; + } + MemAddDateTime(mtypTask,"Date Time", profile_task_t,tod) + MemAddUint64(mtypTask,"Time", profile_task_t, time) + MemAddUint64(mtypTask,"CPU Frequency", profile_task_t, cpu_freq) + MemAddUint64(mtypTask,"CPU Time", profile_task_t, cpu_time) + MemAddDbl(mtypTask,"CPU Utilization", profile_task_t, cpu_utilization) + MemAddUint64(mtypTask,"RSS", profile_task_t, rss) + MemAddUint64(mtypTask,"VM Size", profile_task_t, vm_size) + MemAddUint64(mtypTask,"Pages", profile_task_t, pages) + MemAddDbl(mtypTask,"Read Megabytes", profile_task_t, read_size) + MemAddDbl(mtypTask,"Write Megabytes", profile_task_t, write_size) +#endif + return mtypTask; +} + +hid_t task_create_file_datatype() { + hid_t ftypTask = -1; +#ifdef HAVE_HDF5 + ftypTask = H5Tcreate(H5T_COMPOUND,TOD_LEN+9*8); + if (ftypTask < 0) { + debug3("PROFILE: failed to create Task file datatype"); + return -1; + } + moffset = TOD_LEN; + FileAddDateTime(ftypTask, "Date Time", 0) + FileAddUint64(ftypTask, "Time") + FileAddUint64(ftypTask, "CPU Frequency") + FileAddUint64(ftypTask, "CPU Time") + FileAddDbl(ftypTask, "CPU Utilization") + FileAddUint64(ftypTask, "RSS") + FileAddUint64(ftypTask, "VM Size") + FileAddUint64(ftypTask, "Pages") + FileAddDbl(ftypTask, "Read Megabytes") + FileAddDbl(ftypTask, "Write Megabytes") +#endif + return ftypTask; +} + +hid_t task_s_create_memory_datatype() { + hid_t mtypTask = -1; +#ifdef HAVE_HDF5 + mtypTask = H5Tcreate(H5T_COMPOUND, sizeof(profile_task_s_t)); + if (mtypTask < 0) { + debug3("PROFILE: failed to create Task memory datatype"); + return -1; + } + MemAddDateTime(mtypTask,"Start Time", profile_task_s_t,start_time) + MemAddUint64(mtypTask,"Elapsed Time", profile_task_s_t, elapsed_time) + MemAddUint64(mtypTask,"Min CPU Frequency", profile_task_s_t, + cpu_freq.min) + MemAddUint64(mtypTask,"Ave CPU Frequency", profile_task_s_t, + cpu_freq.ave) + MemAddUint64(mtypTask,"Max CPU Frequency", profile_task_s_t, + cpu_freq.max) + MemAddUint64(mtypTask,"Total CPU Frequency", profile_task_s_t, + cpu_freq.total) + MemAddUint64(mtypTask,"Min CPU Time", profile_task_s_t, cpu_time.min) + MemAddUint64(mtypTask,"Ave CPU Time", profile_task_s_t, cpu_time.ave) + MemAddUint64(mtypTask,"Max CPU Time", profile_task_s_t, cpu_time.max) + MemAddUint64(mtypTask,"Total CPU Time", profile_task_s_t, + cpu_time.total) + MemAddDbl(mtypTask,"Min CPU Utilization", profile_task_s_t, + cpu_utilization.min) + MemAddDbl(mtypTask,"Ave CPU Utilization", profile_task_s_t, + cpu_utilization.ave) + MemAddDbl(mtypTask,"Max CPU Utilization", profile_task_s_t, + cpu_utilization.max) + MemAddDbl(mtypTask,"Total CPU Utilization", profile_task_s_t, + cpu_utilization.total) + MemAddUint64(mtypTask,"Min RSS", profile_task_s_t, rss.min) + MemAddUint64(mtypTask,"Ave RSS", profile_task_s_t, rss.ave) + MemAddUint64(mtypTask,"Max RSS", profile_task_s_t, rss.max) + MemAddUint64(mtypTask,"Total RSS", profile_task_s_t, rss.total) + MemAddUint64(mtypTask,"Min VM Size", profile_task_s_t, vm_size.min) + MemAddUint64(mtypTask,"Ave VM Size", profile_task_s_t, vm_size.ave) + MemAddUint64(mtypTask,"Max VM Size", profile_task_s_t, vm_size.max) + MemAddUint64(mtypTask,"Total VM Size", profile_task_s_t, vm_size.total) + MemAddUint64(mtypTask,"Min Pages", profile_task_s_t, pages.min) + MemAddUint64(mtypTask,"Ave Pages", profile_task_s_t, pages.ave) + MemAddUint64(mtypTask, "Max Pages", profile_task_s_t, pages.max) + MemAddUint64(mtypTask, "Total Pages", profile_task_s_t, pages.total) + MemAddDbl(mtypTask, "Min Read Megabytes", profile_task_s_t, + read_size.min) + MemAddDbl(mtypTask, "Ave Read Megabytes", profile_task_s_t, + read_size.ave) + MemAddDbl(mtypTask, "Max Read Megabytes", profile_task_s_t, + read_size.max) + MemAddDbl(mtypTask, "Total Read Megabytes", profile_task_s_t, + read_size.total) + MemAddDbl(mtypTask, "Min Write Megabytes", profile_task_s_t, + write_size.min) + MemAddDbl(mtypTask, "Ave Write Megabytes", profile_task_s_t, + write_size.ave) + MemAddDbl(mtypTask, "Max Write Megabytes", profile_task_s_t, + write_size.max) + MemAddDbl(mtypTask, "Total Write Megabytes", profile_task_s_t, + write_size.total) +#endif + return mtypTask; +} + +hid_t task_s_create_file_datatype() { + hid_t ftypTask = -1; +#ifdef HAVE_HDF5 + ftypTask = H5Tcreate(H5T_COMPOUND,TOD_LEN+33*8); + if (ftypTask < 0) { + debug3("PROFILE: failed to create Task file datatype"); + return -1; + } + moffset = TOD_LEN; + FileAddDateTime(ftypTask, "Start Time", 0) + FileAddUint64(ftypTask, "Elapsed Time") + FileAddUint64(ftypTask, "Min CPU Frequency") + FileAddUint64(ftypTask, "Ave CPU Frequency") + FileAddUint64(ftypTask, "Max CPU Frequency") + FileAddUint64(ftypTask, "Total CPU Frequency") + FileAddUint64(ftypTask, "Min CPU Time") + FileAddUint64(ftypTask, "Ave CPU Time") + FileAddUint64(ftypTask, "Max CPU Time") + FileAddUint64(ftypTask, "Total CPU Time") + FileAddDbl(ftypTask, "Min CPU Utilization") + FileAddDbl(ftypTask, "Ave CPU Utilization") + FileAddDbl(ftypTask, "Max CPU Utilization") + FileAddDbl(ftypTask, "Total CPU Utilization") + FileAddUint64(ftypTask, "Min RSS") + FileAddUint64(ftypTask, "Ave RSS") + FileAddUint64(ftypTask, "Max RSS") + FileAddUint64(ftypTask, "Total RSS") + FileAddUint64(ftypTask, "Min VM Size") + FileAddUint64(ftypTask, "Ave VM Size") + FileAddUint64(ftypTask, "Max VM Size") + FileAddUint64(ftypTask, "Total VM Size") + FileAddUint64(ftypTask, "Min Pages") + FileAddUint64(ftypTask, "Ave Pages") + FileAddUint64(ftypTask, "Max Pages") + FileAddUint64(ftypTask, "Total Pages") + FileAddDbl(ftypTask, "Min Read Megabytes") + FileAddDbl(ftypTask, "Ave Read Megabytes") + FileAddDbl(ftypTask, "Max Read Megabytes") + FileAddDbl(ftypTask, "Total Read Megabytes") + FileAddDbl(ftypTask, "Min Write Megabytes") + FileAddDbl(ftypTask, "Ave Write Megabytes") + FileAddDbl(ftypTask, "Max Write Megabytes") + FileAddDbl(ftypTask, "Total Write Megabytes") +#endif + return ftypTask; +} + +void* task_init_job_series(int nSamples) { + profile_task_t* taskData; + taskData = xmalloc(nSamples * sizeof(profile_task_t)); + if (taskData == NULL) { + debug3("PROFILE: failed to get memory for combined task data"); + return NULL; + } + return (void*) taskData; +} + +void task_merge_step_series(hid_t group, void* prior, void* cur, void* buf) { +// This is a running total series + profile_task_t* prfPrior = (profile_task_t*) prior; + profile_task_t* prfCur = (profile_task_t*) cur; + profile_task_t* prfBuf = (profile_task_t*) buf; + + struct tm *ts; + ts = localtime(&prfCur->time); + strftime(prfBuf->tod, TOD_LEN, TOD_FMT, ts); + if (prfPrior == NULL) { + // First sample. + seriesStart = prfCur->time; + prfBuf->time = 0; + prfBuf->cpu_time = 0; + prfBuf->read_size = 0.0; + prfBuf->write_size = 0.0; + } else { + prfBuf->time = prfCur->time - seriesStart; + prfBuf->cpu_time = prfCur->cpu_time - prfPrior->cpu_time; + prfBuf->read_size = + prfCur->read_size - prfPrior->read_size; + prfBuf->write_size = + prfCur->write_size - prfPrior->write_size; + } + prfBuf->cpu_freq = prfCur->cpu_freq; + prfBuf->cpu_utilization = prfCur->cpu_utilization; + prfBuf->rss = prfCur->rss; + prfBuf->vm_size = prfCur->vm_size; + prfBuf->pages = prfCur->pages; + return; +} + +void* task_series_total(int nSamples, void* data) +{ + int ix; + profile_task_t* taskData; + profile_task_s_t* total; + taskData = (profile_task_t*) data; + total = xmalloc(sizeof(profile_task_s_t)); + if (total == NULL) { + error("PROFILE: Out of memory getting task total"); + return NULL; + } + strcpy(total->start_time, taskData[0].tod); + total->elapsed_time = taskData[nSamples-1].time; + IncrDifSample(total, taskData, cpu_freq, nSamples) + IncrRTSample(total, taskData, cpu_time, nSamples) + IncrDifSample(total, taskData, cpu_utilization, nSamples) + IncrDifSample(total, taskData, rss, nSamples) + IncrDifSample(total, taskData, vm_size , nSamples) + IncrDifSample(total, taskData, pages, nSamples) + IncrRTSample(total, taskData, read_size, nSamples) + IncrRTSample(total, taskData, write_size, nSamples) + return total; +} + +void task_extract_series(FILE* fOt, bool putHeader, int job, int step, + char* node, char* series, void* data, int sizeData) { + + int nItems, ix; + profile_task_t* taskData = (profile_task_t*) data; + if (putHeader) { + fprintf(fOt,"Job,Step,Node,Series,Date Time,ElapsedTime," + "CPU Frequency,CPU Time," + "CPU Utilization,rss,VM Size,Pages," + "Read_bytes,Write_bytes\n"); + } + nItems = sizeData / sizeof(profile_task_t); + for (ix=0; ix < nItems; ix++) { + fprintf(fOt,"%d,%d,%s,%s,%s,%ld,%ld,%ld,%.3f", + job, step, node, series, + taskData[ix].tod, taskData[ix].time, + taskData[ix].cpu_freq, + taskData[ix].cpu_time, taskData[ix].cpu_utilization); + fprintf(fOt,",%ld,%ld,%ld,%.3f,%.3f\n", taskData[ix].rss, + taskData[ix].vm_size, taskData[ix].pages, + taskData[ix].read_size, taskData[ix].write_size); + } + return; +} + +void task_extract_total(FILE* fOt, bool putHeader, int job, int step, + char* node, char* series, void* data, int sizeData) { + + profile_task_s_t* taskData = (profile_task_s_t*) data; + if (putHeader) { + fprintf(fOt,"Job,Step,Node,Series,Start_Time,Elapsed_time," + "Min CPU Frequency,Ave CPU Frequency," + "Ave CPU Frequency,Total CPU Frequency," + "Min_CPU_Time,Ave_CPU_Time," + "Max_CPU_Time,Total_CPU_Time," + "Min_CPU_Utilization,Ave_CPU_Utilization," + "Max_CPU_Utilization,Total_CPU_Utilization," + "Min_RSS,Ave_RSS,Max_RSS,Total_RSS," + "Min_VMSize,Ave_VMSize,Max_VMSize,Total_VMSize," + "Min_Pages,Ave_Pages,Max_Pages,Total_Pages," + "Min_Read_Megabytes,Ave_Read_Megabytes," + "Max_Read_Megabytes,Total_Read_Megabytes," + "Min_Write_Megabytes,Ave_Write_Megabytes," + "Max_Write_Megabytes,Total_Write_Megabytes\n"); + } + fprintf(fOt,"%d,%d,%s,%s,%s,%ld",job,step,node,series, + taskData->start_time, taskData->elapsed_time); + PutUintSum(fOt, taskData->cpu_freq,",") + PutUintSum(fOt, taskData->cpu_time,",") + PutDblSum(fOt, taskData->cpu_utilization,",") + PutUintSum(fOt, taskData->rss,",") + PutUintSum(fOt, taskData->vm_size,",") + PutUintSum(fOt, taskData->pages,",") + PutDblSum(fOt, taskData->read_size,",") + PutDblSum(fOt, taskData->write_size,",") + fprintf(fOt,"\n"); + return; +} + +profile_hdf5_ops_t* task_profile_factory() { + profile_hdf5_ops_t* ops = xmalloc(sizeof(profile_hdf5_ops_t)); + ops->dataset_size = &task_dataset_size; + ops->create_memory_datatype = &task_create_memory_datatype; + ops->create_file_datatype = &task_create_file_datatype; + ops->create_s_memory_datatype = &task_s_create_memory_datatype; + ops->create_s_file_datatype = &task_s_create_file_datatype; + ops->init_job_series = &task_init_job_series; + ops->merge_step_series = &task_merge_step_series; + ops->series_total = &task_series_total; + ops->extract_series = &task_extract_series; + ops->extract_total = &task_extract_total; + return ops; +} diff --git a/src/plugins/acct_gather_profile/hdf5/hdf5_api.h b/src/plugins/acct_gather_profile/hdf5/hdf5_api.h new file mode 100644 index 00000000000..4bcfea548d1 --- /dev/null +++ b/src/plugins/acct_gather_profile/hdf5/hdf5_api.h @@ -0,0 +1,938 @@ +/****************************************************************************\ + * profile_hdf5.h + ***************************************************************************** + * Copyright (C) 2013 Bull S. A. S. + * Bull, Rue Jean Jaures, B.P.68, 78340, Les Clayes-sous-Bois. + * + * Written by Rod Schultz <rod.schultz@bull.com> + * + * Provide support for acct_gather_profile plugins based on HDF5 files. + * + * This file is part of SLURM, a resource management program. + * For details, see <http://www.schedmd.com/slurmdocs/>. + * Please also read the included file: DISCLAIMER. + * + * SLURM is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * In addition, as a special exception, the copyright holders give permission + * to link the code of portions of this program with the OpenSSL library under + * certain conditions as described in each individual source file, and + * distribute linked combinations including the two. You must obey the GNU + * General Public License in all respects for all of the code used other than + * OpenSSL. If you modify file(s) with this exception, you may extend this + * exception to your version of the file(s), but you are not obligated to do + * so. If you do not wish to do so, delete this exception statement from your + * version. If you delete this exception statement from all source files in + * the program, then also delete it here. + * + * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with SLURM; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +\****************************************************************************/ +#ifndef __ACCT_GATHER_PROFILE_HDF5_H__ +#define __ACCT_GATHER_PROFILE_HDF5_H__ + +#if HAVE_CONFIG_H +# include "config.h" +# if HAVE_INTTYPES_H +# include <inttypes.h> +# else +# if HAVE_STDINT_H +# include <stdint.h> +# endif +# endif /* HAVE_INTTYPES_H */ +#else /* !HAVE_CONFIG_H */ +# include <inttypes.h> +#endif /* HAVE_CONFIG_H */ + +#include <stdlib.h> + +#ifdef HAVE_HDF5 +#include <hdf5.h> +#else +// Needed in some function signatures +typedef int hid_t; +#endif +#include "src/common/slurm_acct_gather_profile.h" + +#define MAX_PROFILE_PATH 1024 +#define MAX_ATTR_NAME 64 +#define MAX_GROUP_NAME 64 +#define MAX_DATASET_NAME 64 + +#define ATTR_NODENAME "Node Name" +#define ATTR_STARTTIME "Start Time" +#define ATTR_NSTEPS "Number of Steps" +#define ATTR_NNODES "Number of Nodes" +#define ATTR_NTASKS "Number of Tasks" +#define ATTR_TASKID "Task Id" +#define ATTR_CPUPERTASK "CPUs per Task" +#define ATTR_DATATYPE "Data Type" +#define ATTR_SUBDATATYPE "Subdata Type" +#define ATTR_STARTTIME "Start Time" +#define ATTR_STARTSEC "Start Second" +#define SUBDATA_DATA "Data" +#define SUBDATA_NODE "Node" +#define SUBDATA_SAMPLE "Sample" +#define SUBDATA_SERIES "Series" +#define SUBDATA_TOTAL "Total" +#define SUBDATA_SUMMARY "Summary" + +#define GRP_STEP "Step" +#define GRP_NODES "Nodes" +#define GRP_NODE "Node" +#define GRP_SAMPLES "Time Series" +#define GRP_SAMPLE "Sample" +#define GRP_TASKS "Tasks" +#define GRP_TOTALS "Totals" + +// Data types supported by all HDF5 plugins of this type + +#define TOD_LEN 24 +#define TOD_FMT "%F %T" + +/* + * prof_uint_sum is a low level structure intended to hold the + * minimum, average, maximum, and total values of a data item. + * It is usually used in a summary data structure for an item + * that occurs in a time series. + */ +typedef struct prof_uint_sum { + uint64_t min; // Minumum value + uint64_t ave; // Average value + uint64_t max; // Maximum value + uint64_t total; // Accumlated value +} prof_uint_sum_t; + +// Save as prof_uint_sum, but for double precision items +typedef struct prof_dbl_sum { + double min; // Minumum value + double ave; // Average value + double max; // Maximum value + double total; // Accumlated value +} prof_dbl_sum_t; + +#define PROFILE_ENERGY_DATA "Energy" +// energy data structures +// node_step file +typedef struct profile_energy { + char tod[TOD_LEN]; // Not used in node-step + time_t time; + uint64_t power; + uint64_t cpu_freq; +} profile_energy_t; +// summary data in job-node-totals +typedef struct profile_energy_s { + char start_time[TOD_LEN]; + uint64_t elapsed_time; + prof_uint_sum_t power; + prof_uint_sum_t cpu_freq; +} profile_energy_s_t; // series summary + +#define PROFILE_IO_DATA "I/O" +// io data structure +// node_step file +typedef struct profile_io { + char tod[TOD_LEN]; // Not used in node-step + time_t time; + uint64_t reads; + double read_size; // currently in megabytes + uint64_t writes; + double write_size; // currently in megabytes +} profile_io_t; +// summary data in job-node-totals +typedef struct profile_io_s { + char start_time[TOD_LEN]; + uint64_t elapsed_time; + prof_uint_sum_t reads; + prof_dbl_sum_t read_size; // currently in megabytes + prof_uint_sum_t writes; + prof_dbl_sum_t write_size; // currently in megabytes +} profile_io_s_t; + +#define PROFILE_NETWORK_DATA "Network" +// Network data structure +// node_step file +typedef struct profile_network { + char tod[TOD_LEN]; // Not used in node-step + time_t time; + uint64_t packets_in; + double size_in; // currently in megabytes + uint64_t packets_out; + double size_out; // currently in megabytes +} profile_network_t; +// summary data in job-node-totals +typedef struct profile_network_s { + char start_time[TOD_LEN]; + uint64_t elapsed_time; + prof_uint_sum_t packets_in; + prof_dbl_sum_t size_in; // currently in megabytes + prof_uint_sum_t packets_out; + prof_dbl_sum_t size_out; // currently in megabytes +} profile_network_s_t; + +#define PROFILE_TASK_DATA "Task" +// task data structure +// node_step file +typedef struct profile_task { + char tod[TOD_LEN]; // Not used in node-step + time_t time; + uint64_t cpu_freq; + uint64_t cpu_time; + double cpu_utilization; + uint64_t rss; + uint64_t vm_size; + uint64_t pages; + double read_size; // currently in megabytes + double write_size; // currently in megabytes +} profile_task_t; +// summary data in job-node-totals +typedef struct profile_task_s { + char start_time[TOD_LEN]; + uint64_t elapsed_time; + prof_uint_sum_t cpu_freq; + prof_uint_sum_t cpu_time; + prof_dbl_sum_t cpu_utilization; + prof_uint_sum_t rss; + prof_uint_sum_t vm_size; + prof_uint_sum_t pages; + prof_dbl_sum_t read_size; // currently in megabytes + prof_dbl_sum_t write_size; // currently in megabytes +} profile_task_s_t; + +// EnergyData structure +#define GRP_ENERGY "Energy" + +// Luster structure +#define GRP_LUSTRE "Lustre" + +// NetIO structure +#define GRP_NETWORK "Network" + +// Disk I/O and Memory per task +#define GRP_TASK "Task" + +extern int numAvailSeries; +extern char* availSeries[]; + +/* + * Structure of function pointers of common operations on a profile data type. + * dataset_size -- size of one dataset (structure size) + * create_memory_datatype -- creates hdf5 memory datatype corresponding + * to the datatype structure. + * create_file_datatype -- creates hdf5 file datatype corresponding + * to the datatype structure. + * create_s_memory_datatype -- creates hdf5 memory datatype corresponding + * to the summary datatype structure. + * create_s_file_datatype -- creates hdf5 file datatype corresponding + * to the summary datatype structure. + * init_job_series -- allocates a buffer for a complete time series + * (in job merge) and initializes each member + * merge_step_series -- merges all the individual time samples into a + * single data set with one item per sample. + * Data items can be scaled (e.g. subtracting beginning time) + * differenced (to show counts in interval) or other things + * appropriate for the series. + * series_total -- accumulate or average members in the entire series to + * be added to the file as totals for the node or task. + * extract_series -- format members of a structure for putting to + * to a file data extracted from a time series to be imported into + * another analysis tool. (e.g. format as comma separated value.) + * extract_totals -- format members of a structure for putting to + * to a file data extracted from a time series total to be + * imported into another analysis tool. + * (format as comma,separated value, for example.) + */ +typedef struct profile_hdf5_ops { + int (*dataset_size) (); + hid_t (*create_memory_datatype) (); + hid_t (*create_file_datatype) (); + hid_t (*create_s_memory_datatype) (); + hid_t (*create_s_file_datatype) (); + void* (*init_job_series) (int); + void (*merge_step_series) (hid_t, void*, void*, void*); + void* (*series_total) (int, void*); + void (*extract_series) (FILE*, bool, int, int, char*, char*, void*, + int); + void (*extract_total) (FILE*, bool, int, int, char*, char*, void*, + int); +} profile_hdf5_ops_t; + +/* ============================================================================ + * Common support functions + ==========================================================================*/ + +/* + * Create a opts group from type + */ +profile_hdf5_ops_t* profile_factory(char* type); + +/* + * Initialize profile (initialize static memory) + */ +void ProfileInit(); + +/* + * Finish profile (free objects in static memory) + */ +void ProfileFinish(); + + +/* + * Checks if series should be collected + * + * Parameters + * series - name of series to validate + * seriesList - lists of series to be collected (all & none also checked) + * numSeries - number of items in seriesList + */ +bool DoSeries(char* series, char** seriesList, int numSeries); + +/* + * Validate list contains items in availSeries (or 'all' or 'none') + * + * Parameters + * listStr - list in string from + */ +void ValidSeriesList(char* listStr); + +/* + * Parse a list of strings. + * + * Parameter + * list - comma separated list of data series + * listLen - (out) length of list + * + * Returns + * Validated Array of series. Caller must free with delete_string_list + */ +char** GetStringList(char* list, int* listLen); + +/* + * delete list of strings + * + * Parameters + * list - xmalloc'd list of pointers of xmalloc'd strings. + * listlen - number of strings in the list + */ +void delete_string_list(char** list, int listLen); + +/* + * Make a dataset name + * + * Parameters + * type - series name + * + * Returns + * common data set name based on type in static memory + */ +char* DataSetName(char* type); + +/* + * Make path to a job profile hdf5 file. + * + * Parameters + * rootDir - path to directory on shared file system into which profile + * data is written. Typically a parameter specified in slurm.conf + * jobid - id of the job + * + * Returns - fully qualified path name + * (in static memory as the merge is a standalone program) + */ +char* make_job_profile_path(char* rootDir, int jobid); + +/* + * Make path to a node-step profile hdf5 file. + * + * Parameters + * rootDir - path to directory on shared file system into which profile + * data is written. Typically a parameter specified in + * slurm.conf + * nodename - name of the node + * jobid - id of the job + * stepid - id of the step + * + * Returns - fully qualified path name + * (in static memory as each step on each node has its own stepd) + */ +char* make_node_step_profile_path(char* rootDir, char* nodename, + int jobid, int stepid); + +/* + * print info on an object for debugging + * + * Parameters + * group - handle to group. + * namGroup - name of the group + */ +void hdf5_obj_info(hid_t group, char* namGroup); + +/* + * get attribute handle by name. + * + * Parameters + * parent - handle to parent group. + * name - name of the attribute + * + * Returns - handle for attribute (or -1 when not found), caller must close + */ +hid_t get_attribute_handle(hid_t parent, char* name); + +/* + * get group by name. + * + * Parameters + * parent - handle to parent group. + * name - name of the group + * + * Returns - handle for group (or -1 when not found), caller must close + */ +hid_t get_group(hid_t parent, char* name); + +/* + * make group by name. + * + * Parameters + * parent - handle to parent group. + * name - name of the group + * + * Returns - handle for group (or -1 on error), caller must close + */ +hid_t make_group(hid_t parent, char* name); + +/* + * Put string attribute + * + * Parameters + * parent - handle to parent group. + * name - name of the attribute + * value - value of the attribute + */ +void put_string_attribute(hid_t parent, char* name, char* value); + +/* + * get string attribute + * + * Parameters + * parent - handle to parent group. + * name - name of the attribute + * + * Return: pointer to value. Caller responsibility to free!!! + */ +char* get_string_attribute(hid_t parent, char* name); + +/* + * Put integer attribute + * + * Parameters + * parent - handle to parent group. + * name - name of the attribute + * value - value of the attribute + */ +void put_int_attribute(hid_t parent, char* name, int value); + +/* + * get int attribute + * + * Parameters + * parent - handle to parent group. + * name - name of the attribute + * + * Return: value + */ +int get_int_attribute(hid_t parent, char* name); + +/* + * Get data from a group of a HDF5 file + * + * Parameters + * parent - handle to parent. + * type - type of data (PROFILE_*_DATA in slurm_acct_gather_profile.h) + * namGroup - name of group + * sizeData - pointer to variable into which to put size of dataset + * + * Returns -- data set of type (or null), caller must free. + */ +void* get_hdf5_data(hid_t parent, char* type, char* namGroup, int* sizeData); + +/* + * Put one data sample into a new group in an HDF5 file + * + * Parameters + * parent - handle to parent group. + * type - type of data (PROFILE_*_DATA from slurm_acct_gather_profile.h) + * subtype - generally source (node, series, ...) or summary + * group - name of new group + * data - data for the sample + * nItems - number of items of type in the data + */ +void put_hdf5_data(hid_t parent, char* type, char* subtype, char* group, + void* data, int nItems); + +// ============================================================================ +// Routines supporting Energy Data type +// ============================================================================ + +/* + * returns size of one dataset + */ +int energy_dataset_size(); + +/* + * Create the energy memory datatype. + * + * Returns - memory datatype for energy (caller must close) + * + */ +hid_t energy_create_memory_datatype(); + +/* + * Create the summary energy memory datatype. + * + * Returns - memory datatype for energy summary (caller must close) + * + */ +hid_t energy_s_create_memory_datatype(); + +/* + * Create the energy file datatype. + * + * Returns - file datatype for energy (caller must close) + * + */ +hid_t energy_create_file_datatype(); + +/* + * Create the summary energy file datatype. + * + * Returns - file datatype for energy summary (caller must close) + * + */ +hid_t energy_s_create_file_datatype(); + +/* + * Initialize data array for all energy samples + * + * Parameters + * nSamples - number of samples in data series + * + * Returns + * initialized data set; Caller must free + */ +void* energy_init_job_series(int nSamples); + +/* + * Add current sample to collection of all samples. + * Opportunity to do difference calculations + * + * Parameters + * group - group containing dataset + * prior - previous sample + * cur - current sample + * buf - loc in collection for current sample + * + */ +void energy_merge_step_series(hid_t group, void* prior, void* cur, void* buf); + +/* + * create total for series + * + * Parameters + * nSamples - number of samples in the series + * data - data for series + * + * Returns + * series total. caller must free + * + */ +void* energy_series_total(int nSamples, void* data); + +/* + * Write energy time series data to a csv file + * + * Parameters + * fOt - file descriptor for output file + * putHeader - first data item, put the header line + * job - jobid + * step - stepid + * node - node name + * series - series name + * data - data of type + * sizeData - size of data set + */ +void energy_extract_series(FILE* fOt, bool putHeader, int job, int step, + char* node, char* series, void* data, int sizeData); + +/* + * Write energy totals data to a csv file + * + * Parameters + * fOt - file descriptor for output file + * putHeader - first data item, put the header line + * job - jobid + * step - stepid + * node - node name + * series - series name + * data - data of type + * sizeData - size of data set + */ +void energy_extract_totals(FILE* fOt, bool putHeader, int job, int step, + char* node, char* series, void* data, int sizeData); + +/* + * Create array of function pointers for common HDF5 operations + */ +profile_hdf5_ops_t* energy_profile_factory(); + +// ============================================================================ +// Routines supporting I/O Data type +// ============================================================================ + +/* + * returns size of one dataset + */ +int io_dataset_size(); + +/* + * Create the IO memory datatype. + * + * Returns - memory datatype for IO (caller must close) + * + */ +hid_t io_create_memory_datatype(void); + +/* + * Create the IO Summary memory datatype. + * + * Returns - memory datatype for IO summary (caller must close) + * + */ +hid_t io_s_create_memory_datatype(void); + +/* + * Create the IO file datatype. + * + * Returns - file datatype for IO (caller must close) + * + */ +hid_t io_create_file_datatype(void); + +/* + * Create the IO Summary file datatype. + * + * Returns - file datatype for IO Summary (caller must close) + * + */ +hid_t io_s_create_file_datatype(void); + +/* + * Initialize data array for all io samples + * + * Parameters + * nSamples - number of samples in data series + * + * Returns + * initialized data set; Caller must free + */ +void* io_init_job_series(int nSamples); + +/* + * Add current sample to collection of all samples. + * Opportunity to do difference calculations + * + * Parameters + * group - group containing dataset + * prior - previous sample + * cur - current sample + * buf - loc in collection for current sample + */ +void io_merge_step_series(hid_t group, void* prior, void* cur, void* buf); + +/* + * create total for series + * + * Parameters + * nSamples - number of samples in the series + * data - data for series + * + * Returns + * series total. caller must free + * + */ +void* io_series_total(int nSamples, void* data); + +/* + * Write IO series data to a csv file + * + * Parameters + * fOt - file descriptor for output file + * putHeader - first data item, put the header line + * job - jobid + * step - stepid + * node - node name + * series - series name + * data - data of type + * sizeData - size of data set + */ +void io_extract_series(FILE* fOt, bool putHeader, int job, int step, + char* node, char* series, void* data, int sizeData); + +/* + * Write IO Totals data to a csv file + * + * Parameters + * fOt - file descriptor for output file + * putHeader - first data item, put the header line + * job - jobid + * step - stepid + * node - node name + * series - series name + * data - data of type + * sizeData - size of data set + */ +void io_extract_total(FILE* fOt, bool putHeader, int job, int step, + char* node, char* series, void* data, int sizeData); + +/* + * Create array of function pointers for common HDF5 operations + */ +profile_hdf5_ops_t* io_profile_factory(); + +// ============================================================================ +// Routines supporting Network Data type +// ============================================================================ + +/* + * returns size of one dataset + */ +int network_dataset_size(); + +/* + * Create the Network memory datatype. + * + * Returns - memory datatype for Network (caller must close) + * + */ +hid_t network_create_memory_datatype(void); + +/* + * Create the Network Summary memory datatype. + * + * Returns - memory datatype for Network summary (caller must close) + * + */ +hid_t network_s_create_memory_datatype(void); + +/* + * Create the Network file datatype. + * + * Returns - file datatype for Network (caller must close) + * + */ +hid_t network_create_file_datatype(void); + +/* + * Create the Network summary file datatype. + * + * Returns - file datatype for Network summary (caller must close) + * + */ +hid_t network_s_create_file_datatype(void); + +/* + * Initialize data array for all Network samples + * + * Parameters + * nSamples - number of samples in data series + * + * Returns + * initialized data set; Caller must free + */ +void* network_init_job_series(int nSamples); + +/* + * Add current sample to collection of all samples. + * Opportunity to do difference calculations + * + * Parameters + * group - group containing dataset + * prior - previous sample + * cur - current sample + * buf - loc in collection for current sample + * + */ +void network_merge_step_series(hid_t group, void* prior, void* cur, void* buf); + +/* + * create total for series + * + * Parameters + * nSamples - number of samples in the series + * data - data for series + * + * Returns + * series total. caller must free + * + */ +void* network_series_totals(int nSamples, void* data); + +/* + * Write Network series data to a csv file + * + * Parameters + * fOt - file descriptor for output file + * putHeader - first data item, put the header line + * job - jobid + * step - stepid + * node - node name + * series - series name + * data - data of type + * sizeData - size of data set + */ +void network_extract_series(FILE* fOt, bool putHeader, int job, int step, + char* node, char* series, void* data, int sizeData); + +/* + * Write Network totals data to a csv file + * + * Parameters + * fOt - file descriptor for output file + * putHeader - first data item, put the header line + * job - jobid + * step - stepid + * node - node name + * series - series name + * data - data of type + * sizeData - size of data set + */ +void network_extract_total(FILE* fOt, bool putHeader, int job, int step, + char* node, char* series, void* data, int sizeData); + +/* + * Create array of function pointers for common HDF5 operations + */ +profile_hdf5_ops_t* network_profile_factory(); + + +// ============================================================================ +// Routines supporting Task Data type +// ============================================================================ + +/* + * returns size of one dataset + */ +int task_dataset_size(); + +/* + * Create the task memory datatype. + * + * Returns - memory datatype for task (caller must close) + * + */ +hid_t task_create_memory_datatype(); + +/* + * Create the task summary memory datatype. + * + * Returns - memory datatype for task summary (caller must close) + * + */ +hid_t task_s_create_memory_datatype(); + +/* + * Create the task file datatype. + * + * Returns - file datatype for task (caller must close) + * + */ +hid_t task_create_file_datatype(); + +/* + * Create the task summary file datatype. + * + * Returns - file datatype for task summary (caller must close) + * + */ +hid_t task_s_create_file_datatype(); + +/* + * Initialize data array for all task samples + * + * Parameters + * nSamples - number of samples in data series + * + * Returns + * initialized data set; Caller must free + */ +void* task_init_job_series(int nSamples); + +/* + * Add current sample to collection of all samples. + * Opportunity to do difference calculations + * + * Parameters + * group - group containing dataset + * prior - previous sample + * cur - current sample + * buf - loc in collection for current sample + */ +void task_merge_step_series(hid_t group, void* prior, void* cur, void* buf); + +/* + * create total for series + * + * Parameters + * nSamples - number of samples in the series + * data - data for series + * + * Returns + * series total. caller must free + */ +void* task_series_total(int nSamples, void* data); + +/* + * Write task series data to a csv file + * + * Parameters + * fOt - file descriptor for output file + * putHeader - first data item, put the header line + * job - jobid + * step - stepid + * node - node name + * series - series name + * data - data of type + * sizeData - size of data set + */ +void task_extract_series(FILE* fOt, bool putHeader, int job, int step, + char* node, char* series, void* data, int sizeData); + +/* + * Write task totals data to a csv file + * + * Parameters + * fOt - file descriptor for output file + * putHeader - first data item, put the header line + * job - jobid + * step - stepid + * node - node name + * series - series name + * data - data of type + * sizeData - size of data set + */ +void task_extract_total(FILE* fOt, bool putHeader, int job, int step, + char* node, char* series, void* data, int sizeData); + +/* + * Create array of function pointers for common HDF5 operations + */ +profile_hdf5_ops_t* task_profile_factory(); + +#endif /*__ACCT_GATHER_PROFILE_H__*/ diff --git a/src/plugins/acct_gather_profile/hdf5/sh5util/Makefile.am b/src/plugins/acct_gather_profile/hdf5/sh5util/Makefile.am new file mode 100644 index 00000000000..c48dab48a7a --- /dev/null +++ b/src/plugins/acct_gather_profile/hdf5/sh5util/Makefile.am @@ -0,0 +1,20 @@ +# +# Makefile for sprfmrgh5 + +AUTOMAKE_OPTIONS = foreign + +INCLUDES = -I$(top_srcdir) -I$(top_srcdir)/src/plugins/acct_gather_profile/common +bin_PROGRAMS = sprfmrgh5 + +sprfmrgh5_LDADD = $(top_builddir)/src/api/libslurm.o $(DL_LIBS) + +sprfmrgh5_SOURCES = sprfmrgh5.c sprfmrg5.h \ + ../common/profile_hdf5.c ../common/profile_hdf5.h + +force: +$(sprfmrgh5_LDADD) : force + @cd `dirname $@` && $(MAKE) `basename $@` + +sprfmrgh5_LDFLAGS = -export-dynamic $(CMD_LDFLAGS) \ + $(HWLOC_LDFLAGS) $(HWLOC_LIBS) $(HDF5_LDFLAGS) $(HDF5_LIBS) + diff --git a/src/plugins/acct_gather_profile/hdf5/sh5util/sh5util.c b/src/plugins/acct_gather_profile/hdf5/sh5util/sh5util.c new file mode 100644 index 00000000000..be951408828 --- /dev/null +++ b/src/plugins/acct_gather_profile/hdf5/sh5util/sh5util.c @@ -0,0 +1,1029 @@ +/*****************************************************************************\ + * sprfmrgh5.c - slurm profile accounting plugin for io and energy using hdf5. + * - Utility to merge node-step files into a job file + * - or extract data from an job file + ***************************************************************************** + * Copyright (C) 2013 Bull S. A. S. + * Bull, Rue Jean Jaures, B.P.68, 78340, Les Clayes-sous-Bois. + * + * Written by Rod Schultz <rod.schultz@bull.com> + * + * This file is part of SLURM, a resource management program. + * For details, see <http://www.schedmd.com/slurmdocs/>. + * Please also read the included file: DISCLAIMER. + * + * SLURM is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * In addition, as a special exception, the copyright holders give permission + * to link the code of portions of this program with the OpenSSL library under + * certain conditions as described in each individual source file, and + * distribute linked combinations including the two. You must obey the GNU + * General Public License in all respects for all of the code used other than + * OpenSSL. If you modify file(s) with this exception, you may extend this + * exception to your version of the file(s), but you are not obligated to do + * so. If you do not wish to do so, delete this exception statement from your + * version. If you delete this exception statement from all source files in + * the program, then also delete it here. + * + * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with SLURM; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + ***************************************************************************** + * + * This program is expected to be launched by the SLURM epilog script for a + * job on the controller node to merge node-step files into a job file. + * +\*****************************************************************************/ + +#include <dirent.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/time.h> + +#include "src/common/xstring.h" +#include "src/plugins/acct_gather_profile/common/profile_hdf5.h" + +// ============================================================================ + +// Options +static int jobid = -1; // SLURM jobid +static int xstepid = -1; // SLURM step id to be extracted +static bool mergeMode = true; +static bool header = true; +static bool keepfiles = false; +static char levelName[MAX_GROUP_NAME+1]; +static char seriesName[MAX_GROUP_NAME+1]; +static char outputFile[MAX_PROFILE_PATH+1]; +static char** seriesNames = NULL; +static int numSeries = 0; +static char* xnode; +static char* slurmDataRoot = NULL; + +void usage() { + printf("\n\n\nsprfmrgh5 --jobid=n ...\n"); + printf(" Merges node-step HDF5 files for a SLURM job.\n"); + printf(" Required command line arguments are:\n"); + printf(" --jobid=n jobid of SLURM job\n"); + printf(" Optional command line arguments are:\n"); + printf(" --profiledir=path path to directory holding" + " profile files\n"); + printf(" --savefiles save node/step files\n"); + printf(" --extract extract data series from job file\n"); + printf(" default mode is merge node-step files\n"); + printf(" Extract mode options (all imply --extract)\n"); + printf(" --stepid={n|*) id step to extract (*=all,default)\n"); + printf(" --node={name|*} Node name to extract (*=all,default)\n"); + printf(" --level=[Node:Totals|Node:TimeSeries\n"); + printf(" Level to which series is attached\n"); + printf(" --series=[name|Tasks|*] Name of series\n"); + printf(" name=Specific name, Tasks=all tasks, (*=all)\n"); + printf(" --output=path " + "path to a file into which to write the extract\n"); + printf(" --help prints this message\n"); + printf(" Note all option values are case sensitive\n\n\n"); +} + +void opts(int argc, char **argv) { + int errors = 0; + int iax; + char *posValue,*posDash; + // Establish some defaults + strcpy(outputFile,"profile_data.csv"); + xnode = xstrdup("*"); + for (iax=1; iax<argc; iax++) { + if (strncasecmp(argv[iax],"--help=",6)==0) { + usage(); + exit(0); + } else if (strncasecmp(argv[iax],"--jobid=",8)==0) { + posValue = &argv[iax][8]; + jobid = (int) strtol(posValue,NULL,10); + if (jobid < 1) { + printf("Jobid (%d) must be positive\n",jobid); + errors++; + } + } else if (strncasecmp(argv[iax],"--profiledir=",13)==0) { + xfree(slurmDataRoot); + posValue = &argv[iax][13]; + slurmDataRoot = xstrdup(posValue); + } else if (strncasecmp(argv[iax],"--savefiles",11)==0) { + keepfiles = true; + } else if (strncasecmp(argv[iax],"--stepid=",9)==0) { + posValue = &argv[iax][9]; + if (strcmp(posValue,"*") != 0) { + xstepid = (int) strtol(posValue, NULL, 10); + if (xstepid < 0) { + printf("stepid (%d) must be > 0\n", + xstepid); + errors++; + } + } + mergeMode = false; + } else if (strncasecmp(argv[iax],"--extract",9)==0) { + mergeMode = false; + } else if (strncasecmp(argv[iax],"--level=",8)==0) { + posValue = &argv[iax][8]; + if (strlen(posValue) > MAX_GROUP_NAME) { + printf("--level is too long\n"); + errors++; + } else { + strcpy(levelName,posValue); + mergeMode = false; + } + } else if (strncasecmp(argv[iax],"--node=",7)==0) { + xfree(xnode); + posValue = &argv[iax][7]; + xnode = xstrdup(posValue); + mergeMode = false; + } else if (strncasecmp(argv[iax],"--output=",9)==0) { + posValue = &argv[iax][9]; + if (strlen(posValue) > MAX_PROFILE_PATH) { + printf("--output is too long\n"); + errors++; + } else { + strcpy(outputFile,posValue); + mergeMode = false; + } + } else if (strncasecmp(argv[iax],"--series=",9)==0) { + posValue = &argv[iax][9]; + if (strlen(posValue) > MAX_GROUP_NAME) { + printf("--series is too long\n"); + errors++; + } else { + strcpy(seriesName,posValue); + if (strstr(seriesName,GRP_TASK)) { + posDash = strchr(seriesName,'-'); + // Task name have '~' not '-', + // user may not recognize this + if (posDash > 0) { + posDash[0] = '~'; + } + } + } + mergeMode = false; + } else { + printf("%s is an unknown option",argv[iax]); + errors++; + } + } + if (errors) { + printf("Too many errors\n\n"); + usage(); + exit(1); + } +} + +/* ============================================================================ + * ============================================================================ + * Functions for merging samples from node step files into a job file + * ============================================================================ + * ========================================================================= */ + +void* get_all_samples(hid_t gidSeries, char* namSeries, char* type, + int nSamples) { + void* data = NULL; +#ifdef HAVE_HDF5 + hid_t idDataSet, dtypMemory, gSample, szDset; + herr_t ec; + int smpx ,len; + void *dataPrior = NULL, *dataCur = NULL; + char namSample[MAX_GROUP_NAME+1]; + profile_hdf5_ops_t* ops; + + ops = profile_factory(type); + if (ops == NULL) { + info("Failed to create operations for %s", type); + return NULL; + } + data = (*(ops->init_job_series))(nSamples); + if (data == NULL) { + xfree(ops); + info("Failed to get memory for combined data"); + return NULL; + } + dtypMemory = (*(ops->create_memory_datatype))(); + if (dtypMemory < 0) { + xfree(ops); + xfree(data); + info("Failed to create %s memory datatype", type); + return NULL; + } + for (smpx=0; smpx<nSamples; smpx++) { + len = H5Gget_objname_by_idx(gidSeries, smpx, namSample, + MAX_GROUP_NAME); + if (len<1 || len>MAX_GROUP_NAME) { + info("Invalid group name %s", namSample); + continue; + } + gSample = H5Gopen(gidSeries, namSample, H5P_DEFAULT); + if (gSample < 0) { + info("Failed to open %s", namSample); + } + idDataSet = H5Dopen(gSample, DataSetName(namSample), + H5P_DEFAULT); + if (idDataSet < 0) { + H5Gclose(gSample); + info("Failed to open %s dataset", type); + continue; + } + szDset = (*(ops->dataset_size))(); + dataCur = xmalloc(szDset); + if (dataCur == NULL) { + H5Dclose(idDataSet); + H5Gclose(gSample); + info("Failed to get memory for prior data"); + continue; + } + ec = H5Dread(idDataSet, dtypMemory, H5S_ALL, H5S_ALL, + H5P_DEFAULT, dataCur); + if (ec < 0) { + xfree(dataCur); + H5Dclose(idDataSet); + H5Gclose(gSample); + info("Failed to read %s data", type); + continue; + } + (*(ops->merge_step_series))(gSample, dataPrior, dataCur, + data+(smpx)*szDset); + + xfree(dataPrior); + dataPrior = dataCur; + H5Dclose(idDataSet); + H5Gclose(gSample); + } + xfree(dataCur); + H5Tclose(dtypMemory); + xfree(ops); +#endif + return data; +} + +void merge_series_data(hid_t jgidTasks, + hid_t jgNode, hid_t nsgNode) +{ +#ifdef HAVE_HDF5 + + hid_t jgSamples, nsgSamples; + hid_t gSeries, objType, gSeriesTotal = -1; + hsize_t numSamples, nSeries; + int idsx, len; + void *data = NULL, *seriesTotal = NULL; + char *dataType = NULL; + char namSeries[MAX_GROUP_NAME+1]; + char namSample1[MAX_GROUP_NAME+1]; + profile_hdf5_ops_t* ops = NULL; + + if (jgNode < 0) { + info("Job Node is not HDF5 object"); + return; + } + if (nsgNode < 0) { + info("Node-Step is not HDF5 object"); + return; + } + + jgSamples = H5Gcreate(jgNode, GRP_SAMPLES, + H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + if (jgSamples < 0) { + info("Failed to create job node Samples"); + return; + } + nsgSamples = get_group(nsgNode, GRP_SAMPLES); + if (nsgSamples < 0) { + H5Gclose(jgSamples); +#ifdef PROFILE_HDF5_DEBUG + info("Failed to get node-step Samples"); +#endif + return; + } + H5Gget_num_objs(nsgSamples, &nSeries); + if (nSeries < 1) { + // No series? + H5Gclose(jgSamples); + H5Gclose(nsgSamples); + info("No Samples"); + return; + } + for (idsx = 0; idsx < nSeries; idsx++) { + objType = H5Gget_objtype_by_idx(nsgSamples, idsx); + if (objType != H5G_GROUP) + continue; + len = H5Gget_objname_by_idx(nsgSamples, idsx, namSeries, + MAX_GROUP_NAME); + if (len<1 || len>MAX_GROUP_NAME) { + info("Invalid group name %s", namSeries); + continue; + } + gSeries = H5Gopen(nsgSamples, namSeries, H5P_DEFAULT); + if (gSeries < 0) { + info("Failed to open %s", namSeries); + continue; + } + H5Gget_num_objs(gSeries, &numSamples); + if (numSamples <= 0) { + H5Gclose(gSeries); + info("Series %s has no samples", namSeries); + continue; + } + // Get first sample in series to find out how big the data is. + dataType = get_string_attribute(gSeries, ATTR_DATATYPE); + if (dataType == NULL) { + H5Gclose(gSeries); + info("Failed to get datatype for Time Series Dataset"); + continue; + } + data = get_all_samples(gSeries, namSeries, dataType, + numSamples); + if (data == NULL) { + xfree(dataType); + H5Gclose(gSeries); + info("Failed to get memory for Time Series Dataset"); + continue; + } + put_hdf5_data(jgSamples, dataType, SUBDATA_SERIES, namSeries, + data, numSamples); + ops = profile_factory(dataType); + if (ops == NULL) { + xfree(data); + xfree(dataType); + H5Gclose(gSeries); + info("Failed to create operations for %s", dataType); + continue; + } + seriesTotal = (*(ops->series_total))(numSamples, data); + if (seriesTotal != NULL) { + // Totals for series attaches to node + gSeriesTotal = make_group(jgNode, GRP_TOTALS); + if (gSeriesTotal < 0) { + H5Gclose(gSeries); + xfree(seriesTotal); + xfree(data); + xfree(dataType); + xfree(ops); + info("Failed to make Totals for Node"); + continue; + } + put_hdf5_data(gSeriesTotal, dataType, SUBDATA_SUMMARY, + namSeries, seriesTotal, 1); + H5Gclose(gSeriesTotal); + } + xfree(seriesTotal); + xfree(ops); + xfree(data); + xfree(dataType); + H5Gclose(gSeries); + } +#endif + return; +} + +/* ============================================================================ + * Functions for merging tasks data into a job file + ==========================================================================*/ + +void merge_task_totals(hid_t jgTasks, hid_t nsgNode, char* nodeName) { +#ifdef HAVE_HDF5 + + hid_t jgTask, jgTotals, nsgTotals, gTotal, nsgTasks, nsgTask = -1; + hsize_t nobj, ntasks = -1; + int i, len, taskx, taskid, taskcpus, sizeData; + void *data; + char *type; + char buf[MAX_GROUP_NAME+1]; + char groupName[MAX_GROUP_NAME+1]; + + if (jgTasks < 0) { + info("Job Tasks is not HDF5 object"); + return; + } + if (nsgNode < 0) { + info("Node-Step is not HDF5 object"); + return; + } + + nsgTasks = get_group(nsgNode, GRP_TASKS); + if (nsgTasks < 0) { +#ifdef PROFILE_HDF5_DEBUG + info("No Tasks group in node-step file"); +#endif + return; + } + H5Gget_num_objs(nsgTasks, &ntasks); + for (taskx = 0; ((int)ntasks>0) && (taskx<((int)ntasks)); taskx++) { + // Get the name of the group. + len = H5Gget_objname_by_idx(nsgTasks, taskx, buf, + MAX_GROUP_NAME); + if (len<1 || len>MAX_GROUP_NAME) { + info("Invalid group name %s", buf); + continue; + } + nsgTask = H5Gopen(nsgTasks, buf, H5P_DEFAULT); + if (nsgTask < 0) { +#ifdef PROFILE_HDF5_DEBUG + info("Failed to open %s", buf); +#endif + continue; + } + taskid = get_int_attribute(nsgTask, ATTR_TASKID); + sprintf(groupName,"%s~%d", GRP_TASK, taskid); + jgTask = H5Gcreate(jgTasks, groupName, + H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + if (jgTask < 0) { + H5Gclose(nsgTask); + info("Failed to create job task group"); + continue; + } + put_string_attribute(jgTask, ATTR_NODENAME, nodeName); + put_int_attribute(jgTask, ATTR_TASKID, taskid); + taskcpus = get_int_attribute(nsgTask, ATTR_CPUPERTASK); + put_int_attribute(jgTask, ATTR_CPUPERTASK, taskcpus); + nsgTotals = get_group(nsgTask, GRP_TOTALS); + if (nsgTotals < 0) { + H5Gclose(jgTask); + H5Gclose(nsgTask); + continue; + } + jgTotals = H5Gcreate(jgTask, GRP_TOTALS, + H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + if (jgTotals < 0) { + H5Gclose(jgTask); + H5Gclose(nsgTask); + info("Failed to create job task totals"); + continue; + } + H5Gget_num_objs(nsgTotals, &nobj); + for (i = 0; (nobj>0) && (i<nobj); i++) { + // Get the name of the group. + len = H5Gget_objname_by_idx(nsgTotals, i, buf, + MAX_GROUP_NAME); + + if (len<1 || len>MAX_GROUP_NAME) { + info("Invalid group name %s", buf); + continue; + } + gTotal = H5Gopen(nsgTotals, buf, H5P_DEFAULT); + if (gTotal < 0) { + info("Failed to open %s", buf); + continue; + } + type = get_string_attribute(gTotal, ATTR_DATATYPE); + if (type == NULL) { + H5Gclose(gTotal); + info("No %s attribute", ATTR_DATATYPE); + continue; + } + data = get_hdf5_data(gTotal, type, buf, &sizeData); + if (data == NULL) { + xfree(type); + H5Gclose(gTotal); + info("Failed to get group %d type %d data", buf, + type); + continue; + } + put_hdf5_data(jgTotals,type,SUBDATA_DATA,buf,data,1); + xfree(data); + xfree(type); + H5Gclose(gTotal); + } + H5Gclose(nsgTotals); + H5Gclose(nsgTask); + H5Gclose(jgTotals); + H5Gclose(jgTask); + } + H5Gclose(nsgTasks); +#endif +} + +/* ============================================================================ + * Functions for merging node totals into a job file + ==========================================================================*/ + +void merge_node_totals(hid_t jgNode, hid_t nsgNode) { +#ifdef HAVE_HDF5 + + hid_t jgTotals, nsgTotals, gTotal; + hsize_t nobj; + int i, len, sizeData; + void *data; + char *type; + char buf[MAX_GROUP_NAME+1]; + + if (jgNode < 0) { + info("Job Node is not HDF5 object"); + return; + } + if (nsgNode < 0) { + info("Node-Step is not HDF5 object"); + return; + } + jgTotals = H5Gcreate(jgNode, GRP_TOTALS, + H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + if (jgTotals < 0) { + info("Failed to create job node totals"); + return; + } + nsgTotals = get_group(nsgNode, GRP_TOTALS); + if (nsgTotals < 0) { + H5Gclose(jgTotals); + return; + } + H5Gget_num_objs(nsgTotals, &nobj); + for (i = 0; (nobj>0) && (i<nobj); i++) { + // Get the name of the group. + len = H5Gget_objname_by_idx(nsgTotals, i, buf, + MAX_GROUP_NAME); + + if (len<1 || len>MAX_GROUP_NAME) { + info("invalid group name %s", buf); + continue; + } + gTotal = H5Gopen(nsgTotals, buf, H5P_DEFAULT); + if (gTotal < 0) { + info("Failed to open %s", buf); + continue; + } + type = get_string_attribute(gTotal, ATTR_DATATYPE); + if (type == NULL) { + H5Gclose(gTotal); + info("No %s attribute", ATTR_DATATYPE); + continue; + } + data = get_hdf5_data(gTotal, type, buf, &sizeData); + if (data == NULL) { + xfree(type); + H5Gclose(gTotal); + info("Failed to get group %d type %d data", buf, type); + continue; + + } + put_hdf5_data(jgTotals, type, SUBDATA_DATA, buf, data, 1); + xfree(data); + xfree(type); + H5Gclose(gTotal); + } + H5Gclose(nsgTotals); + H5Gclose(jgTotals); +#endif + return; +} + +/* ============================================================================ + * Functions for merging step data into a job file + ==========================================================================*/ + +void merge_node_step_data(hid_t fid_job, char* fileName, int nodeIndex, + char* nodeName, hid_t jgidNodes, hid_t jgidTasks) { +#ifdef HAVE_HDF5 + + hid_t fid_nodestep, jgidNode, nsgidRoot, nsgidNode; + char *startTime; + char groupName[MAX_GROUP_NAME+1]; + + jgidNode = H5Gcreate(jgidNodes, nodeName, + H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + if (jgidNode < 0) { + error("Failed to create group %s",nodeName); + return; + } + put_string_attribute(jgidNode, ATTR_NODENAME, nodeName); + // Process node step file + // Open the file and the node group. + fid_nodestep = H5Fopen(fileName, H5F_ACC_RDONLY, H5P_DEFAULT); + if (fid_nodestep < 0) { + H5Gclose(jgidNode); + error("Failed to open %s",fileName); + return; + } + nsgidRoot = H5Gopen(fid_nodestep,"/", H5P_DEFAULT); + sprintf(groupName,"/%s~%s", GRP_NODE, nodeName); + nsgidNode = H5Gopen(nsgidRoot, groupName, H5P_DEFAULT); + if (nsgidNode < 0) { + H5Gclose(fid_nodestep); + H5Gclose(jgidNode); + error("Failed to open node group"); + return;; + } + startTime = get_string_attribute(nsgidNode,ATTR_STARTTIME); + if (startTime == NULL) { + info("No %s attribute", ATTR_STARTTIME); + } else { + put_string_attribute(jgidNode,ATTR_STARTTIME,startTime); + xfree(startTime); + } + merge_node_totals(jgidNode, nsgidNode); + merge_task_totals(jgidTasks, nsgidNode, nodeName); + merge_series_data(jgidTasks, jgidNode, nsgidNode); + H5Gclose(nsgidNode); + H5Fclose(fid_nodestep); + H5Gclose(jgidNode); + if (!keepfiles) + remove(fileName); + return; +#endif +} + +void merge_step_files() { +#ifdef HAVE_HDF5 + + hid_t fid_job = -1, jgidStep = -1, jgidNodes = -1, jgidTasks = -1; + DIR *dir; + struct dirent *de; + char jobprefix[MAX_PROFILE_PATH+1]; + char stepnode[MAX_PROFILE_PATH+1]; + char profileStepDir[MAX_PROFILE_PATH+1]; + char profileStepPath[MAX_PROFILE_PATH+1]; + char jgrpStepName[MAX_GROUP_NAME+1]; + char jgrpNodesName[MAX_GROUP_NAME+1]; + char jgrpTasksName[MAX_GROUP_NAME+1]; + char *possquiggle, *posdot,*profileJobFileName; + int stepx = 0, numSteps = 0, nodex = -1; + bool foundFiles = false; + + sprintf(profileStepDir,"%s/tmp",slurmDataRoot); + profileJobFileName = make_job_profile_path(slurmDataRoot, jobid); + while (nodex != 0) { + if ((dir = opendir(profileStepDir)) == NULL) { + error("opendir for job profile directory): %m"); + exit(1); + } + sprintf(jobprefix,"job~%d~%d~",jobid,stepx); + nodex = 0; + while ((de = readdir(dir)) != NULL) { + if (strncmp(jobprefix,de->d_name,strlen(jobprefix)) + == 0) { + // Found a node step file for this job + if (!foundFiles) { + // Need to create the job file + fid_job = H5Fcreate(profileJobFileName, + H5F_ACC_TRUNC, + H5P_DEFAULT, + H5P_DEFAULT); + if (fid_job < 0) { + fatal("Failed to %s %s", + "create HDF5 file:", + profileJobFileName); + } + foundFiles = true; + } + possquiggle = de->d_name+strlen(jobprefix); + strcpy(stepnode,possquiggle); + posdot = strchr(stepnode,'.'); + posdot[0] = '\0'; // remove extension + if (nodex == 0) { + numSteps++; + sprintf(jgrpStepName,"/%s~%d",GRP_STEP, + stepx); + jgidStep = make_group(fid_job, + jgrpStepName); + if (jgidStep < 0) { + error("Failed to create %s", + jgrpStepName); + continue; + } + sprintf(jgrpNodesName,"%s/%s", + jgrpStepName, + GRP_NODES); + jgidNodes = make_group(jgidStep, + jgrpNodesName); + if (jgidNodes < 0) { + error("Failed to create %s", + jgrpNodesName); + continue; + } + sprintf(jgrpTasksName,"%s/%s", + jgrpStepName, + GRP_TASKS); + jgidTasks = make_group(jgidStep, + jgrpTasksName); + if (jgidTasks < 0) { + error("Failed to create %s", + jgrpTasksName); + continue; + } + } + sprintf(profileStepPath,"%s/%s",profileStepDir, + de->d_name); +#ifdef PROFILE_HDF5_DEBUG + printf("Adding %s to the job file\n", + profileStepPath); +#endif + + merge_node_step_data(fid_job, profileStepPath, + nodex, stepnode, + jgidNodes, jgidTasks); + + nodex++; + } + } + closedir(dir); + if (nodex > 0) { + put_int_attribute(jgidStep, ATTR_NNODES, nodex); + H5Gclose(jgidTasks); + H5Gclose(jgidNodes); + H5Gclose(jgidStep); + } + stepx++; + } + put_int_attribute(fid_job, ATTR_NSTEPS, numSteps); + if (!foundFiles) + info("No node step files found for jobid=%d",jobid); + if (fid_job != -1) + H5Fclose(fid_job); +#endif +} + +/* ============================================================================ + * ============================================================================ + * Functions for data extraction + * ============================================================================ + * ========================================================================= */ + +hid_t get_series_parent(hid_t group) { + hid_t gidLevel = -1; +#ifdef HAVE_HDF5 + if (strcasecmp(levelName,"Node:Totals") == 0) { + gidLevel = get_group(group, GRP_TOTALS); + if (gidLevel < 0) { + info("Failed to open group %s", GRP_TOTALS); + } + } else if (strcasecmp(levelName,"Node:TimeSeries") == 0) { + gidLevel = get_group(group, GRP_SAMPLES); + if (gidLevel < 0) { + info("Failed to open group %s", GRP_SAMPLES); + } + } else { + info("%s is an illegal level", levelName); + return -1; + + } +#endif + return gidLevel; +} + + +void get_series_names(hid_t group) { +#ifdef HAVE_HDF5 + int i, len; + hsize_t nobj; + char buf[MAX_GROUP_NAME+1]; + H5Gget_num_objs(group, &nobj); + numSeries = (int) nobj; + if (numSeries < 0) { +#ifdef PROFILE_HDF5_DEBUG + info("No Data Series in group"); + hdf5_obj_info(group, "???"); +#endif + return; + } + seriesNames = xmalloc(sizeof(char*)*numSeries); + for (i = 0; (numSeries>0) && (i<numSeries); i++) { + len = H5Gget_objname_by_idx(group, i, buf, MAX_GROUP_NAME); + if ((len < 0) || (len > MAX_GROUP_NAME)) { + info("Invalid series name=%s", buf); + // put into list anyway so list doesn't have a null. + } + seriesNames[i] = xstrdup(buf); + } +#endif +} + + +void extract_node_level(FILE* fOt, int stepx, hid_t jgidNodes, int nnodes, + bool header, char* dataSetName) { +#ifdef HAVE_HDF5 + hid_t jgidNode, gidLevel, gidSeries; + int nodex, len, sizeData; + void *data; + char *dataType, *subtype; + char jgrpNodeName[MAX_GROUP_NAME+1]; + profile_hdf5_ops_t* ops; + + for (nodex=0;nodex<nnodes;nodex++) { + len = H5Gget_objname_by_idx(jgidNodes, nodex, + jgrpNodeName, MAX_GROUP_NAME); + if ((len < 0) || (len > MAX_GROUP_NAME)) { + info("Invalid node name=%s", jgrpNodeName); + continue; + } + jgidNode = get_group(jgidNodes, jgrpNodeName); + if (jgidNode < 0) { + info("Failed to open group %s", jgrpNodeName); + continue; + } + if (strcmp(xnode,"*")!=0 && strcmp(xnode,jgrpNodeName)!=0) + continue; + gidLevel = get_series_parent(jgidNode); + if (gidLevel == -1) { + H5Gclose(jgidNode); + continue; + } + gidSeries = get_group(gidLevel, dataSetName); + if (gidSeries < 0) { + // This is okay, may not have ran long enough for + // a sample (hostname????) + H5Gclose(gidLevel); + H5Gclose(jgidNode); + continue; + } + dataType = get_string_attribute(gidSeries, ATTR_DATATYPE); + if (dataType == NULL) { + H5Gclose(gidSeries); + H5Gclose(gidLevel); + H5Gclose(jgidNode); + info("No datatype in %s", dataSetName); + continue; + } + subtype = get_string_attribute(gidSeries, ATTR_SUBDATATYPE); + if (subtype == NULL) { + xfree(dataType); + H5Gclose(gidSeries); + H5Gclose(gidLevel); + H5Gclose(jgidNode); + info("No %s attribute", ATTR_SUBDATATYPE); + continue; + } + ops = profile_factory(dataType); + if (ops == NULL) { + xfree(subtype); + xfree(dataType); + H5Gclose(gidSeries); + H5Gclose(gidLevel); + H5Gclose(jgidNode); + info("Failed to create operations for %s", dataType); + continue; + } + data = get_hdf5_data(gidSeries,dataType,dataSetName,&sizeData); + if (data != NULL) { + if (strcmp(subtype,SUBDATA_SUMMARY) != 0) + (*(ops->extract_series)) + (fOt, header, jobid, stepx, + jgrpNodeName,dataSetName, + data,sizeData); + else + (*(ops->extract_total)) + (fOt, header, jobid, stepx, + jgrpNodeName,dataSetName, + data,sizeData); + + header = false; + xfree(data); + } else { + fprintf(fOt,"%d,%d,%s,No %s Data\n", + jobid,stepx,jgrpNodeName,dataSetName); + } + xfree(ops); + xfree(dataType); + H5Gclose(gidSeries); + H5Gclose(gidLevel); + H5Gclose(jgidNode); + } +#endif +} + + +void extract_data() { +#ifdef HAVE_HDF5 + hid_t fid_job, jgidRoot, jgidStep, jgidNodes, jgidNode, jgidLevel; + int nsteps, nnodes, stepx, isx, len; + char jgrpStepName[MAX_GROUP_NAME+1]; + char jgrpNodeName[MAX_GROUP_NAME+1]; + char fileName[MAX_PROFILE_PATH+1]; + bool header; + + FILE* fOt = fopen(outputFile,"w"); + if (fOt == NULL) { + error("Failed to create output file %s -- %m",outputFile); + } + len = snprintf(fileName,MAX_PROFILE_PATH,"%s/job~%d.h5", + slurmDataRoot, jobid); + if (len >= MAX_PROFILE_PATH) { + error("path is too big"); + exit(1); + } + fid_job = H5Fopen(fileName, H5F_ACC_RDONLY, H5P_DEFAULT); + if (fid_job < 0) { + error("Failed to open %s", fileName); + return; + } + jgidRoot = H5Gopen(fid_job,"/", H5P_DEFAULT); + if (jgidRoot < 0) { + H5Fclose(fid_job); + error("Failed to open root"); + return; + } + nsteps = get_int_attribute(jgidRoot,ATTR_NSTEPS); + for (stepx=0;stepx<nsteps;stepx++) { + if ((xstepid!=-1) && (stepx!=xstepid)) + continue; + sprintf(jgrpStepName,"%s~%d",GRP_STEP,stepx); + jgidStep = get_group(jgidRoot, jgrpStepName); + if (jgidStep < 0) { + error("Failed to open group %s", jgrpStepName); + continue; + } + if (strncasecmp(levelName,"Node:",5)== 0) { + nnodes = get_int_attribute(jgidStep,ATTR_NNODES); + jgidNodes = get_group(jgidStep, GRP_NODES); + if (jgidNodes < 0) { + H5Gclose(jgidStep); + error("Failed to open group %s",GRP_NODES); + continue; + } + len = H5Gget_objname_by_idx(jgidNodes, 0, jgrpNodeName, + MAX_GROUP_NAME); + if ((len < 0) || (len > MAX_GROUP_NAME)) { + H5Gclose(jgidNodes); + H5Gclose(jgidStep); + error("Invalid node name %s",jgrpNodeName); + continue; + } + jgidNode = get_group(jgidNodes, jgrpNodeName); + if (jgidNode < 0) { + H5Gclose(jgidNodes); + H5Gclose(jgidStep); + info("Failed to open group %s", jgrpNodeName); + continue; + } + jgidLevel = get_series_parent(jgidNode); + if (jgidLevel == -1) { + H5Gclose(jgidNode); + H5Gclose(jgidNodes); + H5Gclose(jgidStep); + continue; + } + get_series_names(jgidLevel); + H5Gclose(jgidLevel); + H5Gclose(jgidNode); + if (strcmp(seriesName,"*") == 0) { + for (isx=0; isx<numSeries; isx++) { + extract_node_level(fOt,stepx,jgidNodes, + nnodes,true,seriesNames[isx]); + } + } else if (strcmp(seriesName,GRP_TASKS) == 0) { + header = true; + for (isx=0; isx<numSeries; isx++) { + if (strstr(seriesNames[isx],GRP_TASK)) + { + extract_node_level(fOt,stepx, + jgidNodes, nnodes, + header, + seriesNames[isx]); + header = false; + } + } + } else { + extract_node_level(fOt, stepx, jgidNodes, + nnodes, true, seriesName); + } + delete_string_list(seriesNames, numSeries); + seriesNames = NULL; + numSeries = 0; + H5Gclose(jgidNodes); +// } else if (strncasecmp(levelName,"Task:",5)== 0) { + // TODO: do task (currently no task data + } else { + info("%s is an illegal level", levelName); + } + H5Gclose(jgidStep); + } + H5Gclose(jgidRoot); + H5Fclose(fid_job); + fclose(fOt); +#endif +} + + +int main (int argc, char **argv) +{ + if (argc <= 1) { + usage(); + exit(0); + } + opts(argc, argv); + + ProfileInit(); + if (mergeMode) { + printf("Merging node-step files into %s\n", + make_job_profile_path(slurmDataRoot, jobid)); + merge_step_files(); + } else { + printf("Extracting job data from %s into %s\n", + make_job_profile_path(slurmDataRoot, jobid), + outputFile); + extract_data(); + } + ProfileFinish(); + xfree(slurmDataRoot); + xfree(xnode); + return 0; +} diff --git a/src/plugins/acct_gather_profile/hdf5/sh5util/sh5util.h b/src/plugins/acct_gather_profile/hdf5/sh5util/sh5util.h new file mode 100644 index 00000000000..f57aae80933 --- /dev/null +++ b/src/plugins/acct_gather_profile/hdf5/sh5util/sh5util.h @@ -0,0 +1,180 @@ +/*****************************************************************************\ + * sprfmrgh5.h - slurm profile accounting plugin for io and energy using hdf5. + ***************************************************************************** + * Copyright (C) 2013 Bull S. A. S. + * Bull, Rue Jean Jaures, B.P.68, 78340, Les Clayes-sous-Bois. + * + * Written by Rod Schultz <rod.schultz@bull.com> + * + * This file is part of SLURM, a resource management program. + * For details, see <http://www.schedmd.com/slurmdocs/>. + * Please also read the included file: DISCLAIMER. + * + * SLURM is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * In addition, as a special exception, the copyright holders give permission + * to link the code of portions of this program with the OpenSSL library under + * certain conditions as described in each individual source file, and + * distribute linked combinations including the two. You must obey the GNU + * General Public License in all respects for all of the code used other than + * OpenSSL. If you modify file(s) with this exception, you may extend this + * exception to your version of the file(s), but you are not obligated to do + * so. If you do not wish to do so, delete this exception statement from your + * version. If you delete this exception statement from all source files in + * the program, then also delete it here. + * + * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with SLURM; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +\*****************************************************************************/ + +#ifndef _GATHER_PROFILE_SPRFMRGH5_H_ +#define _GATHER_PROFILE_SPRFMRGH5_H_ + +/* ============================================================================ + * ============================================================================ + * Functions for merging samples from node step files into a job file + * ============================================================================ + * ========================================================================+ */ + +/* + * get all samples for a series for a node-step + * + * Parameters + * gidSamples - handle to samples group. + * namSeries - name of data series + * type - data type in sample (PROFILE_*_DATA_ + * nSamples - number of samples in series + * + * Returns -- data, caller must free. + */ +void* get_all_samples(hid_t gidSamples, char* namSeries, + char* type, int nSamples); + +/* + * Add data from the time-series section from the node-step HDF5 file + * (if it exists) to corresponding node and step in job HDF5 file + * + * Parameters + * jgidTasks - Tasks nodes in job file + * jgNode - group for node of job + * nsgNode - group for node in node-step file + */ +void merge_series_data(hid_t jgidTasks, hid_t jgNode, hid_t nsgNode); + + +/* ============================================================================ + * Functions for merging tasks data into a job file + ==========================================================================*/ + +/* + * Add data from the tasks section from node-step HDF5 file (if it exists) + * to corresponding node and step in job HDF5 file + * + * Parameters + * jgTasks - group for tasks of step + * nsgNode - group for node in node-step file + * nodeName - name of node + */ +void merge_task_totals(hid_t jgTasks, hid_t nsgNode, char* nodeName); + +/* ============================================================================ + * Functions for merging node totals into a job file + ==========================================================================*/ + +/* + * Add data from the nodes section from node-step HDF5 file (if it exists) + * to corresponding node and step in job HDF5 file + * + * Parameters + * jgNode - group for node of step + * nsdNode - group for node in node-step file + */ +void merge_node_totals(hid_t jgNode, hid_t nsgNode); + +/* ============================================================================ + * Functions for merging step data into a job file + ==========================================================================*/ + +/* + * add node-step data to job file + * + * Parameters + * fid_job - hdf5 file descriptor for job + * filename - name of node-step file + * nodeIndex - index of node within step + * nodeName - hostname of node + * jgidNodes - Nodes group in job file + * jgidTasks - Tasks group in job file + */ +void merge_node_step_data(hid_t fid_job, char* fileName, int nodeIndex, + char* nodeName, hid_t jgidNodes, hid_t jgidTasks); + +/* + * Merge of the (node) step files into one file for the job. + */ +void merge_step_files(); + + +/* ============================================================================ + * ============================================================================ + * Functions for data extraction + * ============================================================================ + * ========================================================================= */ + +/* + * Get the parent group of a specified series + * + * Parameters + * group - id of node containing series + * + * Returns + * id of parent of series level (caller must close) + * + */ +hid_t get_series_parent(hid_t group); + +/* + * Get names of all series on the node + * + * Parameters + * group - id of node + * + * Returns + * Creates static seriesNames with pointers to string names; + * Caller must delete with 'delete_string_list' + */ +void get_series_names(hid_t group); + +/* + * extract a data set from a node(s) + * + * Parameters + * fOt - File def for output file + * stepx - stepid + * jgidNodes - nodes group in job (and step) + * nnodes - number of nodes + * header - put heading in ouput + * dataset - name of dataset + */ +void extract_node_level(FILE* fOt, int stepx, hid_t jgidNodes, int nnodes, + bool header, char* dataSet); + +/* + * extract data from job file. + * + * Parameters + * command line options are static data + * + */ +void extract_data(); + +#endif diff --git a/src/plugins/acct_gather_profile/none/Makefile.am b/src/plugins/acct_gather_profile/none/Makefile.am new file mode 100644 index 00000000000..cc5d507772b --- /dev/null +++ b/src/plugins/acct_gather_profile/none/Makefile.am @@ -0,0 +1,15 @@ +# Makefile for acct_gather_profile/none plugin + +AUTOMAKE_OPTIONS = foreign + +PLUGIN_FLAGS = -module -avoid-version --export-dynamic + +INCLUDES = -I$(top_srcdir) -I$(top_srcdir)/src/common + +pkglib_LTLIBRARIES = acct_gather_profile_none.la + +# Null job completion logging plugin. +acct_gather_profile_none_la_SOURCES = acct_gather_profile_none.c + +acct_gather_profile_none_la_LDFLAGS = $(SO_LDFLAGS) $(PLUGIN_FLAGS) + diff --git a/src/plugins/acct_gather_profile/none/acct_gather_profile_none.c b/src/plugins/acct_gather_profile/none/acct_gather_profile_none.c new file mode 100644 index 00000000000..09f138dcbec --- /dev/null +++ b/src/plugins/acct_gather_profile/none/acct_gather_profile_none.c @@ -0,0 +1,169 @@ +/*****************************************************************************\ + * acct_gather_profile_none.c - slurm profile accounting plugin for none. + ***************************************************************************** + * Copyright (C) 2013 Bull S. A. S. + * Bull, Rue Jean Jaures, B.P.68, 78340, Les Clayes-sous-Bois. + * + * Written by Rod Schultz <rod.schultz@bull.com> + * + * This file is part of SLURM, a resource management program. + * For details, see <http://www.schedmd.com/slurmdocs/>. + * Please also read the included file: DISCLAIMER. + * + * SLURM is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * In addition, as a special exception, the copyright holders give permission + * to link the code of portions of this program with the OpenSSL library under + * certain conditions as described in each individual source file, and + * distribute linked combinations including the two. You must obey the GNU + * General Public License in all respects for all of the code used other than + * OpenSSL. If you modify file(s) with this exception, you may extend this + * exception to your version of the file(s), but you are not obligated to do + * so. If you do not wish to do so, delete this exception statement from your + * version. If you delete this exception statement from all source files in + * the program, then also delete it here. + * + * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with SLURM; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * +\*****************************************************************************/ + +/* acct_gather_profile_none + * This plugin does not initiate a node-level thread. + * It is the acct_gather_profile stub. + */ + +#include "src/common/slurm_xlator.h" +#include "src/common/slurm_jobacct_gather.h" +#include "src/common/slurm_protocol_api.h" +#include "src/common/slurm_protocol_defs.h" +#include "src/slurmd/common/proctrack.h" + +#include <fcntl.h> +#include <signal.h> + +#define _DEBUG 1 +#define _DEBUG_STATS 1 + +/* + * These variables are required by the generic plugin interface. If they + * are not found in the plugin, the plugin loader will ignore it. + * + * plugin_name - a string giving a human-readable description of the + * plugin. There is no maximum length, but the symbol must refer to + * a valid string. + * + * plugin_type - a string suggesting the type of the plugin or its + * applicability to a particular form of data or method of data handling. + * If the low-level plugin API is used, the contents of this string are + * unimportant and may be anything. SLURM uses the higher-level plugin + * interface which requires this string to be of the form + * + * <application>/<method> + * + * where <application> is a description of the intended application of + * the plugin (e.g., "jobacct" for SLURM job completion logging) and <method> + * is a description of how this plugin satisfies that application. SLURM will + * only load job completion logging plugins if the plugin_type string has a + * prefix of "jobacct/". + * + * plugin_version - an unsigned 32-bit integer giving the version number + * of the plugin. If major and minor revisions are desired, the major + * version number may be multiplied by a suitable magnitude constant such + * as 100 or 1000. Various SLURM versions will likely require a certain + * minimum version for their plugins as the job accounting API + * matures. + */ +const char plugin_name[] = "AcctGatherProfile NONE plugin"; +const char plugin_type[] = "acct_gather_Profile/none"; +const uint32_t plugin_version = 100; + +/* + * init() is called when the plugin is loaded, before any other functions + * are called. Put global initialization here. + */ +extern int init(void) +{ + verbose("%s loaded", plugin_name); + return SLURM_SUCCESS; +} + +extern int fini(void) +{ + return SLURM_SUCCESS; +} + +extern void acct_gather_profile_p_conf_options(s_p_options_t **full_options, + int *full_options_cnt) +{ + return; +} + +extern void acct_gather_profile_p_conf_set(s_p_hashtbl_t *tbl) +{ + return; +} + +extern void* acct_gather_profile_p_conf_get() +{ + return NULL; +} + + +extern int acct_gather_profile_p_controller_start() +{ + return SLURM_SUCCESS; +} + +extern int acct_gather_profile_p_node_step_start(slurmd_job_t* job) +{ + return SLURM_SUCCESS; +} + +extern int acct_gather_profile_p_node_step_end(slurmd_job_t* job) +{ + return SLURM_SUCCESS; +} + +extern int acct_gather_profile_p_task_start(slurmd_job_t* job, uint32_t taskid) +{ + return SLURM_SUCCESS; +} + +extern int acct_gather_profile_p_task_end(slurmd_job_t* job, pid_t taskpid) +{ + return SLURM_SUCCESS; +} + +extern int acct_gather_profile_p_job_sample() +{ + return SLURM_SUCCESS; +} + +extern int acct_gather_profile_p_add_node_data(slurmd_job_t* job, char* group, + char* type, void* data) +{ + return SLURM_SUCCESS; +} + +extern int acct_gather_profile_p_add_sample_data(char* group, char* type, + void* data) +{ + return SLURM_SUCCESS; +} + +extern int acct_gather_profile_p_add_task_data(slurmd_job_t* job, + uint32_t taskid, char* group, char* type, void* data) +{ + return SLURM_SUCCESS; +} + diff --git a/src/plugins/launch/slurm/launch_slurm.c b/src/plugins/launch/slurm/launch_slurm.c index 353da7135fd..a42b8392d13 100644 --- a/src/plugins/launch/slurm/launch_slurm.c +++ b/src/plugins/launch/slurm/launch_slurm.c @@ -505,6 +505,7 @@ extern int launch_p_step_launch( launch_params.remote_output_filename =fname_remote_string(job->ofname); launch_params.remote_input_filename = fname_remote_string(job->ifname); launch_params.remote_error_filename = fname_remote_string(job->efname); + launch_params.profile = opt.profile; launch_params.task_prolog = opt.task_prolog; launch_params.task_epilog = opt.task_epilog; launch_params.cpu_bind = opt.cpu_bind; diff --git a/src/salloc/opt.c b/src/salloc/opt.c index 1db86c427c7..286d74a2311 100644 --- a/src/salloc/opt.c +++ b/src/salloc/opt.c @@ -163,6 +163,7 @@ #define LONG_OPT_GRES 0x141 #define LONG_OPT_WAIT_ALL_NODES 0x142 #define LONG_OPT_REQ_SWITCH 0x143 +#define LONG_OPT_PROFILE 0x144 /*---- global variables, defined in opt.h ----*/ opt_t opt; @@ -304,6 +305,7 @@ static void _opt_default() opt.time_min = NO_VAL; opt.time_min_str = NULL; opt.partition = NULL; + opt.profile = NULL; opt.job_name = NULL; opt.jobid = NO_VAL; @@ -399,6 +401,7 @@ env_vars_t env_vars[] = { {"SALLOC_NO_ROTATE", OPT_NO_ROTATE, NULL, NULL }, {"SALLOC_OVERCOMMIT", OPT_OVERCOMMIT, NULL, NULL }, {"SALLOC_PARTITION", OPT_STRING, &opt.partition, NULL }, + {"SALLOC_PROFILE", OPT_STRING, &opt.profile, NULL }, {"SALLOC_QOS", OPT_STRING, &opt.qos, NULL }, {"SALLOC_RESERVATION", OPT_STRING, &opt.reservation, NULL }, {"SALLOC_SIGNAL", OPT_SIGNAL, NULL, NULL }, @@ -667,6 +670,7 @@ void set_options(const int argc, char **argv) {"ntasks-per-node", required_argument, 0, LONG_OPT_NTASKSPERNODE}, {"ntasks-per-socket",required_argument, 0, LONG_OPT_NTASKSPERSOCKET}, {"qos", required_argument, 0, LONG_OPT_QOS}, + {"profile", optional_argument, 0, LONG_OPT_PROFILE}, {"ramdisk-image", required_argument, 0, LONG_OPT_RAMDISK_IMAGE}, {"reboot", no_argument, 0, LONG_OPT_REBOOT}, {"reservation", required_argument, 0, LONG_OPT_RESERVATION}, @@ -1008,6 +1012,10 @@ void set_options(const int argc, char **argv) case LONG_OPT_JOBID: opt.jobid = _get_int(optarg, "jobid"); break; + case LONG_OPT_PROFILE: + xfree(opt.profile); + opt.profile = xstrdup(optarg); + break; case LONG_OPT_COMMENT: xfree(opt.comment); opt.comment = xstrdup(optarg); @@ -1570,6 +1578,9 @@ static bool _opt_verify(void) opt.ntasks_per_node); } + if (opt.profile) + setenvfs("SLURM_PROFILE=%s", opt.profile); + return verified; } @@ -1753,6 +1764,7 @@ static void _opt_list(void) if (opt.gres != NULL) info("gres : %s", opt.gres); info("network : %s", opt.network); + info("profile : `%s'", opt.profile); info("qos : %s", opt.qos); str = print_constraints(); info("constraints : %s", str); @@ -1842,7 +1854,7 @@ static void _usage(void) " [--nodefile=file] [--nodelist=hosts] [--exclude=hosts]\n" " [--network=type] [--mem-per-cpu=MB] [--qos=qos]\n" " [--cpu_bind=...] [--mem_bind=...] [--reservation=name]\n" -" [--time-min=minutes] [--gres=list]\n" +" [--time-min=minutes] [--gres=list] [--profile=all]\n" " [--switches=max-switches[@max-time-to-wait]]\n" " [executable [args...]]\n"); } @@ -1883,6 +1895,7 @@ static void _help(void) " --ntasks-per-node=n number of tasks to invoke on each node\n" " -N, --nodes=N number of nodes on which to run (N = min[-max])\n" " -O, --overcommit overcommit resources\n" +" --profile=value enable acct_gather_profile for detailed data\n" " -p, --partition=partition partition requested\n" " --qos=qos quality of service\n" " -Q, --quiet quiet mode (suppress informational messages)\n" diff --git a/src/salloc/opt.h b/src/salloc/opt.h index 2039ca8fe9c..1780b86fe8e 100644 --- a/src/salloc/opt.h +++ b/src/salloc/opt.h @@ -94,6 +94,7 @@ typedef struct salloc_options { int time_min; /* --min-time (int minutes) */ char *time_min_str; /* --min-time (string) */ char *partition; /* --partition=n, -p n */ + char *profile; /* --profile=[all | none} */ enum task_dist_states distribution; /* --distribution=, -m dist */ uint32_t plane_size; /* lllp distribution -> plane_size for diff --git a/src/salloc/salloc.c b/src/salloc/salloc.c index 5e2226df285..3059402cc60 100644 --- a/src/salloc/salloc.c +++ b/src/salloc/salloc.c @@ -614,6 +614,7 @@ static int _fill_job_desc_from_opts(job_desc_msg_t *desc) desc->immediate = 1; desc->name = xstrdup(opt.job_name); desc->reservation = xstrdup(opt.reservation); + desc->profile = xstrdup(opt.profile); desc->wckey = xstrdup(opt.wckey); if (opt.req_switch >= 0) desc->req_switch = opt.req_switch; diff --git a/src/sbatch/opt.c b/src/sbatch/opt.c index 1974c3dc426..c3b29def74f 100644 --- a/src/sbatch/opt.c +++ b/src/sbatch/opt.c @@ -171,6 +171,7 @@ #define LONG_OPT_EXPORT 0x151 #define LONG_OPT_REQ_SWITCH 0x152 #define LONG_OPT_EXPORT_FILE 0x153 +#define LONG_OPT_PROFILE 0x154 /*---- global variables, defined in opt.h ----*/ opt_t opt; @@ -361,6 +362,7 @@ static void _opt_default() opt.euid = (uid_t) -1; opt.egid = (gid_t) -1; + opt.profile = NULL; /* acct_gather_profile selection */ opt.propagate = NULL; /* propagate specific rlimits */ opt.ifname = xstrdup("/dev/null"); @@ -472,6 +474,7 @@ env_vars_t env_vars[] = { {"SBATCH_OPEN_MODE", OPT_OPEN_MODE, NULL, NULL }, {"SBATCH_OVERCOMMIT", OPT_OVERCOMMIT, NULL, NULL }, {"SBATCH_PARTITION", OPT_STRING, &opt.partition, NULL }, + {"SBATCH_PROFILE", OPT_STRING, &opt.profile, NULL }, {"SBATCH_QOS", OPT_STRING, &opt.qos, NULL }, {"SBATCH_RAMDISK_IMAGE", OPT_STRING, &opt.ramdiskimage, NULL }, {"SBATCH_REQUEUE", OPT_REQUEUE, NULL, NULL }, @@ -743,6 +746,7 @@ static struct option long_options[] = { {"ntasks-per-socket",required_argument, 0, LONG_OPT_NTASKSPERSOCKET}, {"open-mode", required_argument, 0, LONG_OPT_OPEN_MODE}, {"propagate", optional_argument, 0, LONG_OPT_PROPAGATE}, + {"profile", optional_argument, 0, LONG_OPT_PROFILE}, {"qos", required_argument, 0, LONG_OPT_QOS}, {"ramdisk-image", required_argument, 0, LONG_OPT_RAMDISK_IMAGE}, {"reboot", no_argument, 0, LONG_OPT_REBOOT}, @@ -1482,6 +1486,10 @@ static void _set_options(int argc, char **argv) case LONG_OPT_REQUEUE: opt.requeue = 1; break; + case LONG_OPT_PROFILE: + xfree(opt.profile); + opt.profile = xstrdup(optarg); + break; case LONG_OPT_COMMENT: xfree(opt.comment); opt.comment = xstrdup(optarg); @@ -2436,6 +2444,10 @@ static bool _opt_verify(void) if (opt.dependency) setenvfs("SLURM_JOB_DEPENDENCY=%s", opt.dependency); + if (opt.profile) + setenvfs("SLURM_PROFILE=%s", opt.profile); + + if (opt.acctg_freq >= 0) setenvf(NULL, "SLURM_ACCTG_FREQ", "%d", opt.acctg_freq); @@ -2710,6 +2722,7 @@ static void _opt_list(void) opt.jobid_set ? "(set)" : "(default)"); info("partition : %s", opt.partition == NULL ? "default" : opt.partition); + info("profile : `%s'", opt.profile); info("job name : `%s'", opt.job_name); info("reservation : `%s'", opt.reservation); info("wckey : `%s'", opt.wckey); @@ -2832,7 +2845,7 @@ static void _usage(void) " [--network=type] [--mem-per-cpu=MB] [--qos=qos] [--gres=list]\n" " [--cpu_bind=...] [--mem_bind=...] [--reservation=name]\n" " [--switches=max-switches{@max-time-to-wait}]\n" -" [--array=index_values]\n" +" [--array=index_values] [--profile=all]\n" " [--export[=names]] [--export-file=file|fd] executable [args...]\n"); } @@ -2880,6 +2893,7 @@ static void _help(void) " -o, --output=out file for batch script's standard output\n" " -O, --overcommit overcommit resources\n" " -p, --partition=partition partition requested\n" +" --profile=value enable acct_gather_profile for detailed data\n" " --propagate[=rlimits] propagate all [or specific list of] rlimits\n" " --qos=qos quality of service\n" " -Q, --quiet quiet mode (suppress informational messages)\n" diff --git a/src/sbatch/opt.h b/src/sbatch/opt.h index d8324a58df9..796e42af4a6 100644 --- a/src/sbatch/opt.h +++ b/src/sbatch/opt.h @@ -96,6 +96,7 @@ typedef struct sbatch_options { int time_min; /* --min-time (int minutes) */ char *time_min_str; /* --min-time (string) */ char *partition; /* --partition=n, -p n */ + char *profile; /* --profile=[all | none} */ enum task_dist_states distribution; /* --distribution=, -m dist */ uint32_t plane_size; /* lllp distribution -> plane_size for diff --git a/src/sbatch/sbatch.c b/src/sbatch/sbatch.c index b7a9c6d73a6..16834765e7f 100644 --- a/src/sbatch/sbatch.c +++ b/src/sbatch/sbatch.c @@ -287,6 +287,7 @@ static int _fill_job_desc_from_opts(job_desc_msg_t *desc) desc->req_nodes = opt.nodelist; desc->exc_nodes = opt.exc_nodes; desc->partition = opt.partition; + desc->profile = opt.profile; if (opt.licenses) desc->licenses = xstrdup(opt.licenses); if (opt.nodes_set) { diff --git a/src/slurmctld/controller.c b/src/slurmctld/controller.c index 29cbbb99fcb..674052f8143 100644 --- a/src/slurmctld/controller.c +++ b/src/slurmctld/controller.c @@ -73,6 +73,7 @@ #include "src/common/pack.h" #include "src/common/proc_args.h" #include "src/common/read_config.h" +#include "src/common/slurm_acct_gather_profile.h" #include "src/common/slurm_jobacct_gather.h" #include "src/common/slurm_accounting_storage.h" #include "src/common/slurm_auth.h" @@ -447,6 +448,10 @@ int main(int argc, char *argv[]) fatal( "failed to initialize job_submit plugin"); if (ext_sensors_init() != SLURM_SUCCESS ) fatal( "failed to initialize ext_sensors plugin"); + if (slurm_acct_gather_profile_init() != SLURM_SUCCESS ) { + fatal( "failed to initialize profile plugin"); + acct_gather_profile_g_controller_start(); + } while (1) { /* initialization for each primary<->backup switch */ diff --git a/src/slurmctld/job_mgr.c b/src/slurmctld/job_mgr.c index b237978eba9..4f4fafa932f 100644 --- a/src/slurmctld/job_mgr.c +++ b/src/slurmctld/job_mgr.c @@ -844,6 +844,7 @@ static void _dump_job_state(struct job_record *dump_job_ptr, Buf buffer) packstr(dump_job_ptr->network, buffer); packstr(dump_job_ptr->licenses, buffer); packstr(dump_job_ptr->mail_user, buffer); + packstr(dump_job_ptr->profile, buffer); packstr(dump_job_ptr->resv_name, buffer); packstr(dump_job_ptr->batch_host, buffer); @@ -907,6 +908,7 @@ static int _load_job_state(Buf buffer, uint16_t protocol_version) uint16_t limit_set_time = 0, limit_set_qos = 0; char *nodes = NULL, *partition = NULL, *name = NULL, *resp_host = NULL; char *account = NULL, *network = NULL, *mail_user = NULL; + char *profile = NULL; char *comment = NULL, *nodes_completing = NULL, *alloc_node = NULL; char *licenses = NULL, *state_desc = NULL, *wckey = NULL; char *resv_name = NULL, *gres = NULL, *batch_host = NULL; @@ -1034,6 +1036,7 @@ static int _load_job_state(Buf buffer, uint16_t protocol_version) safe_unpackstr_xmalloc(&network, &name_len, buffer); safe_unpackstr_xmalloc(&licenses, &name_len, buffer); safe_unpackstr_xmalloc(&mail_user, &name_len, buffer); + safe_unpackstr_xmalloc(&profile, &name_len, buffer); safe_unpackstr_xmalloc(&resv_name, &name_len, buffer); safe_unpackstr_xmalloc(&batch_host, &name_len, buffer); @@ -1463,6 +1466,9 @@ static int _load_job_state(Buf buffer, uint16_t protocol_version) xfree(job_ptr->mail_user); job_ptr->mail_user = mail_user; mail_user = NULL; /* reused, nothing left to free */ + xfree(job_ptr->profile); + job_ptr->profile = profile; + profile = NULL; /* reused, nothing left to free */ xfree(job_ptr->name); /* in case duplicate record */ job_ptr->name = name; name = NULL; /* reused, nothing left to free */ @@ -2767,6 +2773,7 @@ struct job_record *_job_rec_copy(struct job_record *job_ptr) job_ptr_new->node_bitmap_cg = bit_copy(job_ptr->node_bitmap_cg); job_ptr_new->nodes_completing = xstrdup(job_ptr->nodes_completing); job_ptr_new->partition = xstrdup(job_ptr->partition); + job_ptr_new->profile = xstrdup(job_ptr->profile); job_ptr_new->part_ptr_list = part_list_copy(job_ptr->part_ptr_list); if (job_ptr->prio_factors) { i = sizeof(priority_factors_object_t); @@ -4982,6 +4989,8 @@ _copy_job_desc_to_job_record(job_desc_msg_t * job_desc, return error_code; job_ptr->partition = xstrdup(job_desc->partition); + if (job_desc->profile) + job_ptr->profile = xstrdup(job_desc->profile); if (job_desc->job_id != NO_VAL) { /* already confirmed unique */ job_ptr->job_id = job_desc->job_id; @@ -5554,6 +5563,7 @@ static void _list_delete_job(void *job_entry) xfree(job_ptr->nodes); xfree(job_ptr->nodes_completing); xfree(job_ptr->partition); + xfree(job_ptr->profile); FREE_NULL_LIST(job_ptr->part_ptr_list); xfree(job_ptr->priority_array); slurm_destroy_priority_factors_object(job_ptr->prio_factors); @@ -5876,6 +5886,7 @@ void pack_job(struct job_record *dump_job_ptr, uint16_t show_flags, Buf buffer, } else { packnull(buffer); } + packstr(dump_job_ptr->profile, buffer); assoc_mgr_lock(&locks); if (assoc_mgr_qos_list) { diff --git a/src/slurmctld/job_scheduler.c b/src/slurmctld/job_scheduler.c index da7c84cf830..0dc8b28ce85 100644 --- a/src/slurmctld/job_scheduler.c +++ b/src/slurmctld/job_scheduler.c @@ -62,6 +62,7 @@ #include "src/common/macros.h" #include "src/common/node_select.h" #include "src/common/slurm_accounting_storage.h" +#include "src/common/slurm_acct_gather.h" #include "src/common/timers.h" #include "src/common/uid.h" #include "src/common/xassert.h" @@ -1960,6 +1961,7 @@ extern int epilog_slurmctld(struct job_record *job_ptr) static char **_build_env(struct job_record *job_ptr) { char **my_env, *name; + slurm_acct_gather_conf_t* acct_gathter_conf; my_env = xmalloc(sizeof(char *)); my_env[0] = NULL; @@ -2016,6 +2018,15 @@ static char **_build_env(struct job_record *job_ptr) name = uid_to_string((uid_t) job_ptr->user_id); setenvf(&my_env, "SLURM_JOB_USER", "%s", name); xfree(name); + if (job_ptr->profile) { + // Profile plugin is used. + acct_gathter_conf = + (slurm_acct_gather_conf_t*) acct_gather_profile_g_conf_get(); + if (acct_gathter_conf && acct_gathter_conf->profile_dir) { + setenvf(&my_env, "SLURM_PROFILE_DIR", "%s", + acct_gathter_conf->profile_dir); + } + } return my_env; } diff --git a/src/slurmctld/proc_req.c b/src/slurmctld/proc_req.c index 1e3a5e48cd0..fc8efd9b4c1 100644 --- a/src/slurmctld/proc_req.c +++ b/src/slurmctld/proc_req.c @@ -485,6 +485,8 @@ void _fill_ctld_conf(slurm_ctl_conf_t * conf_ptr) conf_ptr->acct_gather_energy_type = xstrdup(conf->acct_gather_energy_type); + conf_ptr->acct_gather_profile_type = + xstrdup(conf->acct_gather_profile_type); conf_ptr->acct_gather_node_freq = conf->acct_gather_node_freq; conf_ptr->authtype = xstrdup(conf->authtype); @@ -1963,6 +1965,7 @@ static void _slurm_rpc_job_will_run(slurm_msg_t * msg) uint16_t port; /* dummy value */ slurm_addr_t resp_addr; will_run_response_msg_t *resp = NULL; + char* account = ""; START_TIMER; debug2("Processing RPC: REQUEST_JOB_WILL_RUN from uid=%d", uid); diff --git a/src/slurmctld/slurmctld.h b/src/slurmctld/slurmctld.h index 8fa60b0e289..3020dab6224 100644 --- a/src/slurmctld/slurmctld.h +++ b/src/slurmctld/slurmctld.h @@ -583,6 +583,7 @@ struct job_record { uint32_t *priority_array; /* partition based priority */ priority_factors_object_t *prio_factors; /* cached value used * by sprio command */ + char *profile; /* Acct_gather_profile option */ uint32_t qos_id; /* quality of service id */ void *qos_ptr; /* pointer to the quality of * service record used for diff --git a/src/slurmd/slurmd/slurmd.c b/src/slurmd/slurmd/slurmd.c index 994a80a28d8..211dfc70c0b 100644 --- a/src/slurmd/slurmd/slurmd.c +++ b/src/slurmd/slurmd/slurmd.c @@ -85,6 +85,7 @@ #include "src/common/slurm_auth.h" #include "src/common/slurm_cred.h" #include "src/common/slurm_acct_gather_energy.h" +#include "src/common/slurm_acct_gather_profile.h" #include "src/common/slurm_jobacct_gather.h" #include "src/common/slurm_protocol_api.h" #include "src/common/slurm_topology.h" @@ -891,6 +892,8 @@ _read_config(void) _free_and_set(&conf->acct_gather_energy_type, xstrdup(cf->acct_gather_energy_type)); + _free_and_set(&conf->acct_gather_profile_type, + xstrdup(cf->acct_gather_profile_type)); _free_and_set(&conf->job_acct_gather_type, xstrdup(cf->job_acct_gather_type)); @@ -1106,6 +1109,7 @@ _destroy_conf(void) { if (conf) { xfree(conf->acct_gather_energy_type); + xfree(conf->acct_gather_profile_type); xfree(conf->block_map); xfree(conf->block_map_inv); xfree(conf->conffile); @@ -1553,11 +1557,13 @@ _slurmd_fini(void) node_fini2(); gres_plugin_fini(); slurm_topo_fini(); + acct_gather_energy_fini(); slurmd_req(NULL); /* purge memory allocated by slurmd_req() */ fini_setproctitle(); slurm_select_fini(); jobacct_gather_fini(); acct_gather_energy_fini(); + acct_gather_profile_fini(); spank_slurmd_exit(); cpu_freq_fini(); diff --git a/src/slurmd/slurmd/slurmd.h b/src/slurmd/slurmd/slurmd.h index 8c4fe74a76a..cd3bb4c9a74 100644 --- a/src/slurmd/slurmd/slurmd.h +++ b/src/slurmd/slurmd/slurmd.h @@ -136,6 +136,7 @@ typedef struct slurmd_config { uint16_t job_acct_gather_freq; char *job_acct_gather_type; /* job accounting gather type */ char *acct_gather_energy_type; /* */ + char *acct_gather_profile_type; /* */ uint16_t use_pam; uint16_t task_plugin_param; /* TaskPluginParams, expressed * using cpu_bind_type_t flags */ diff --git a/src/slurmd/slurmstepd/mgr.c b/src/slurmd/slurmstepd/mgr.c index 7cb50be36e4..8ee09f4b334 100644 --- a/src/slurmd/slurmstepd/mgr.c +++ b/src/slurmd/slurmstepd/mgr.c @@ -96,6 +96,7 @@ #include "src/common/plugstack.h" #include "src/common/safeopen.h" #include "src/common/slurm_jobacct_gather.h" +#include "src/common/slurm_acct_gather_profile.h" #include "src/common/switch.h" #include "src/common/util-net.h" #include "src/common/xmalloc.h" @@ -1019,6 +1020,7 @@ job_manager(slurmd_job_t *job) */ if (!conf->job_acct_gather_freq) jobacct_gather_stat_task(0); + acct_gather_profile_g_node_step_start(job); /* Send job launch response with list of pids */ _send_launch_resp(job, 0); @@ -1398,6 +1400,7 @@ _fork_all_tasks(slurmd_job_t *job, bool *io_initialized) pid_t pid; struct exec_wait_info *ei; + acct_gather_profile_g_task_start(job, i); if ((ei = fork_child_with_wait_info (i)) == NULL) { error("child fork: %m"); exec_wait_kill_children (exec_wait_list); @@ -1691,6 +1694,7 @@ _wait_for_any_task(slurmd_job_t *job, bool waitflag) jobacctinfo_aggregate(job->jobacct, jobacct); jobacctinfo_destroy(jobacct); } + acct_gather_profile_g_task_end(job, pid); /*********************************************/ if ((t = job_task_info_by_pid(job, pid))) { @@ -1771,6 +1775,7 @@ _wait_for_all_tasks(slurmd_job_t *job) while (_send_pending_exit_msgs(job)) {;} } + acct_gather_profile_g_node_step_end(job); } static void *_kill_thr(void *args) diff --git a/src/slurmd/slurmstepd/slurmstepd_job.c b/src/slurmd/slurmstepd/slurmstepd_job.c index 8c5fb969601..84985646874 100644 --- a/src/slurmd/slurmstepd/slurmstepd_job.c +++ b/src/slurmd/slurmstepd/slurmstepd_job.c @@ -281,6 +281,7 @@ job_create(launch_tasks_request_msg_t *msg) job->buffered_stdio = msg->buffered_stdio; job->labelio = msg->labelio; + job->profile = xstrdup(msg->profile); job->task_prolog = xstrdup(msg->task_prolog); job->task_epilog = xstrdup(msg->task_epilog); @@ -578,6 +579,7 @@ job_destroy(slurmd_job_t *job) list_destroy(job->sruns); xfree(job->envtp); xfree(job->node_name); + xfree(job->profile); xfree(job->task_prolog); xfree(job->task_epilog); xfree(job->job_alloc_cores); diff --git a/src/slurmd/slurmstepd/slurmstepd_job.h b/src/slurmd/slurmstepd/slurmstepd_job.h index e7b9dfbdf3f..0a754592db3 100644 --- a/src/slurmd/slurmstepd/slurmstepd_job.h +++ b/src/slurmd/slurmstepd/slurmstepd_job.h @@ -147,6 +147,7 @@ typedef struct slurmd_job { bool run_prolog; /* true if need to run prolog */ bool user_managed_io; time_t timelimit; /* time at which job must stop */ + char *profile; /* Level of acct_gather_profile */ char *task_prolog; /* per-task prolog */ char *task_epilog; /* per-task epilog */ struct passwd *pwd; /* saved passwd struct for user job */ diff --git a/src/srun/libsrun/allocate.c b/src/srun/libsrun/allocate.c index e51e0a1f98e..b90b96f9b39 100644 --- a/src/srun/libsrun/allocate.c +++ b/src/srun/libsrun/allocate.c @@ -663,6 +663,8 @@ job_desc_msg_create_from_opts (void) j->licenses = opt.licenses; if (opt.network) j->network = opt.network; + if (opt.profile) + j->profile = opt.profile; if (opt.account) j->account = opt.account; if (opt.comment) diff --git a/src/srun/libsrun/opt.c b/src/srun/libsrun/opt.c index 2aededbae98..db2beaed183 100644 --- a/src/srun/libsrun/opt.c +++ b/src/srun/libsrun/opt.c @@ -188,6 +188,7 @@ #define LONG_OPT_LAUNCHER_OPTS 0x154 #define LONG_OPT_CPU_FREQ 0x155 #define LONG_OPT_LAUNCH_CMD 0x156 +#define LONG_OPT_PROFILE 0x157 extern char **environ; @@ -487,6 +488,7 @@ static void _opt_default() opt.egid = (gid_t) -1; opt.propagate = NULL; /* propagate specific rlimits */ + opt.profile = NULL; /* acct_gather_profile selection */ opt.prolog = slurm_get_srun_prolog(); opt.epilog = slurm_get_srun_epilog(); @@ -581,6 +583,7 @@ env_vars_t env_vars[] = { {"SLURM_OPEN_MODE", OPT_OPEN_MODE, NULL, NULL }, {"SLURM_OVERCOMMIT", OPT_OVERCOMMIT, NULL, NULL }, {"SLURM_PARTITION", OPT_STRING, &opt.partition, NULL }, +{"SLURM_PROFILE", OPT_STRING, &opt.profile, NULL }, {"SLURM_PROLOG", OPT_STRING, &opt.prolog, NULL }, {"SLURM_QOS", OPT_STRING, &opt.qos, NULL }, {"SLURM_RAMDISK_IMAGE", OPT_STRING, &opt.ramdiskimage, NULL }, @@ -885,6 +888,7 @@ static void _set_options(const int argc, char **argv) {"ntasks-per-node", required_argument, 0, LONG_OPT_NTASKSPERNODE}, {"ntasks-per-socket",required_argument, 0, LONG_OPT_NTASKSPERSOCKET}, {"open-mode", required_argument, 0, LONG_OPT_OPEN_MODE}, + {"profile", optional_argument, 0, LONG_OPT_PROFILE}, {"prolog", required_argument, 0, LONG_OPT_PROLOG}, {"propagate", optional_argument, 0, LONG_OPT_PROPAGATE}, {"pty", no_argument, 0, LONG_OPT_PTY}, @@ -1513,6 +1517,10 @@ static void _set_options(const int argc, char **argv) xfree(opt.wckey); opt.wckey = xstrdup(optarg); break; + case LONG_OPT_PROFILE: + xfree(opt.profile); + opt.profile = xstrdup(optarg); + break; case LONG_OPT_RESERVATION: xfree(opt.reservation); opt.reservation = xstrdup(optarg); @@ -2268,6 +2276,7 @@ static void _opt_list(void) opt.jobid_set ? "(set)" : "(default)"); info("partition : %s", opt.partition == NULL ? "default" : opt.partition); + info("profile : `%s'", opt.profile); info("job name : `%s'", opt.job_name); info("reservation : `%s'", opt.reservation); info("wckey : `%s'", opt.wckey); @@ -2410,6 +2419,7 @@ static void _usage(void) " [--cpu_bind=...] [--mem_bind=...] [--network=type]\n" " [--ntasks-per-node=n] [--ntasks-per-socket=n] [reservation=name]\n" " [--ntasks-per-core=n] [--mem-per-cpu=MB] [--preserve-env]\n" +" [--profile=all]\n" #ifdef HAVE_BG /* Blue gene specific options */ #ifdef HAVE_BG_L_P " [--geometry=XxYxZ] " @@ -2484,6 +2494,7 @@ static void _help(void) " -O, --overcommit overcommit resources\n" " -p, --partition=partition partition requested\n" " --prolog=program run \"program\" before launching job step\n" +" --profile=value enable acct_gather_profile for detailed data\n" " --propagate[=rlimits] propagate all [or specific list of] rlimits\n" #ifdef HAVE_PTY_H " --pty run task zero in pseudo terminal\n" diff --git a/src/srun/libsrun/opt.h b/src/srun/libsrun/opt.h index 339aa30a4b3..862d29561ff 100644 --- a/src/srun/libsrun/opt.h +++ b/src/srun/libsrun/opt.h @@ -173,6 +173,7 @@ typedef struct srun_options { bool parallel_debug; /* srun controlled by debugger */ bool debugger_test; /* --debugger-test */ bool test_only; /* --test-only */ + char *profile; /* --profile=[all | none} */ char *propagate; /* --propagate[=RLIMIT_CORE,...]*/ char *task_epilog; /* --task-epilog= */ char *task_prolog; /* --task-prolog= */ -- GitLab