diff --git a/RELEASE_NOTES b/RELEASE_NOTES index 9a5aae02ca221ebec13ca41175232ffc7e7416df..c074bf8058dc2f36f5a772011bc9b9a162ab25d4 100644 --- a/RELEASE_NOTES +++ b/RELEASE_NOTES @@ -86,6 +86,10 @@ HIGHLIGHTS NO RECORDS WILL BE LOST, BUT THE SLURMDBD MAY NOT BE RESPONSIVE DURING THE UPDATE. IT WILL ALSO NOT BE POSSIBLE TO AUTOMATICALLY REVERT THE DATABASE TO THE FORMAT FOR AN EARLIER VERSION OF SLURM. PLAN ACCORDINGLY. + -- The performance of Profiling with HDF5 is improved. In addition, internal + structures are changed to make it easier to add new profile types, + particularly energy sensors. This has introduced an operational issue. See + OTHER CHANGES. RPMBUILD CHANGES ================ @@ -177,6 +181,15 @@ OTHER CHANGES the Prolog and Epilog scripts. -- job_submit/lua: Enable reading and writing job environment variables. For example: if (job_desc.environment.LANGUAGE == "en_US") then ... + -- The format of HDF5 node-step files has changed, so the sh5util program that + merges them into job files has changed. The command line options to sh5util + have not changed. The old sh5util program is available in the contribs + section. It may be used to extract data from HDF5 profiles collected with + earlier versions of slurm. + A batch job that invokes sh5util as the last step that is launched on 15-08 + will require no change. One that started pre 15-8 but finished on 15-08, + will not correctly merge the node-step files. The merge will have to be done + manually with the sh5util in contribs. API CHANGES =========== diff --git a/auxdir/ltmain.sh b/auxdir/ltmain.sh index c29db3631ea16ddcec7af3ba6ce9d2f348dc73ee..bffda54187af4d101157c43c6c6d26dd735e51d3 100644 --- a/auxdir/ltmain.sh +++ b/auxdir/ltmain.sh @@ -70,7 +70,7 @@ # compiler: $LTCC # compiler flags: $LTCFLAGS # linker: $LD (gnu? $with_gnu_ld) -# $progname: (GNU libtool) 2.4.2 Debian-2.4.2-1.10ubuntu1 +# $progname: (GNU libtool) 2.4.2 Debian-2.4.2-1.11 # automake: $automake_version # autoconf: $autoconf_version # @@ -80,7 +80,7 @@ PROGRAM=libtool PACKAGE=libtool -VERSION="2.4.2 Debian-2.4.2-1.10ubuntu1" +VERSION="2.4.2 Debian-2.4.2-1.11" TIMESTAMP="" package_revision=1.3337 diff --git a/configure b/configure index ad645d833b38046b32ed47e463bdb625506d301e..09e8cb08b38d03dc2360b4ae0e230e49b5b31852 100755 --- a/configure +++ b/configure @@ -24593,7 +24593,7 @@ fi -ac_config_files="$ac_config_files Makefile config.xml auxdir/Makefile contribs/Makefile contribs/cray/Makefile contribs/lua/Makefile contribs/mic/Makefile contribs/pam/Makefile contribs/pam_slurm_adopt/Makefile contribs/perlapi/Makefile contribs/perlapi/libslurm/Makefile contribs/perlapi/libslurm/perl/Makefile.PL contribs/perlapi/libslurmdb/Makefile contribs/perlapi/libslurmdb/perl/Makefile.PL contribs/torque/Makefile contribs/phpext/Makefile contribs/phpext/slurm_php/config.m4 contribs/sgather/Makefile contribs/sgi/Makefile contribs/sjobexit/Makefile contribs/slurmdb-direct/Makefile contribs/pmi2/Makefile doc/Makefile doc/man/Makefile doc/man/man1/Makefile doc/man/man3/Makefile doc/man/man5/Makefile doc/man/man8/Makefile doc/html/Makefile doc/html/configurator.html doc/html/configurator.easy.html etc/cgroup.release_common.example etc/init.d.slurm etc/init.d.slurmdbd etc/slurmctld.service etc/slurmd.service etc/slurmdbd.service src/Makefile src/api/Makefile src/common/Makefile src/db_api/Makefile src/layouts/Makefile src/layouts/unit/Makefile src/layouts/power/Makefile src/database/Makefile src/sacct/Makefile src/sacctmgr/Makefile src/sreport/Makefile src/salloc/Makefile src/sbatch/Makefile src/sbcast/Makefile src/sattach/Makefile src/scancel/Makefile src/scontrol/Makefile src/sdiag/Makefile src/sinfo/Makefile src/slurmctld/Makefile src/slurmd/Makefile src/slurmd/common/Makefile src/slurmd/slurmd/Makefile src/slurmd/slurmstepd/Makefile src/slurmdbd/Makefile src/smap/Makefile src/smd/Makefile src/sprio/Makefile src/squeue/Makefile src/srun/Makefile src/srun/libsrun/Makefile src/srun_cr/Makefile src/sshare/Makefile src/sstat/Makefile src/strigger/Makefile src/sview/Makefile src/plugins/Makefile src/plugins/accounting_storage/Makefile src/plugins/accounting_storage/common/Makefile src/plugins/accounting_storage/filetxt/Makefile src/plugins/accounting_storage/mysql/Makefile src/plugins/accounting_storage/none/Makefile src/plugins/accounting_storage/slurmdbd/Makefile src/plugins/acct_gather_energy/Makefile src/plugins/acct_gather_energy/cray/Makefile src/plugins/acct_gather_energy/rapl/Makefile src/plugins/acct_gather_energy/ipmi/Makefile src/plugins/acct_gather_energy/none/Makefile src/plugins/acct_gather_infiniband/Makefile src/plugins/acct_gather_infiniband/ofed/Makefile src/plugins/acct_gather_infiniband/none/Makefile src/plugins/acct_gather_filesystem/Makefile src/plugins/acct_gather_filesystem/lustre/Makefile src/plugins/acct_gather_filesystem/none/Makefile src/plugins/acct_gather_profile/Makefile src/plugins/acct_gather_profile/hdf5/Makefile src/plugins/acct_gather_profile/hdf5/sh5util/Makefile src/plugins/acct_gather_profile/none/Makefile src/plugins/auth/Makefile src/plugins/auth/authd/Makefile src/plugins/auth/munge/Makefile src/plugins/auth/none/Makefile src/plugins/burst_buffer/Makefile src/plugins/burst_buffer/common/Makefile src/plugins/burst_buffer/cray/Makefile src/plugins/burst_buffer/generic/Makefile src/plugins/checkpoint/Makefile src/plugins/checkpoint/aix/Makefile src/plugins/checkpoint/blcr/Makefile src/plugins/checkpoint/blcr/cr_checkpoint.sh src/plugins/checkpoint/blcr/cr_restart.sh src/plugins/checkpoint/none/Makefile src/plugins/checkpoint/ompi/Makefile src/plugins/checkpoint/poe/Makefile src/plugins/core_spec/Makefile src/plugins/core_spec/cray/Makefile src/plugins/core_spec/none/Makefile src/plugins/crypto/Makefile src/plugins/crypto/munge/Makefile src/plugins/crypto/openssl/Makefile src/plugins/ext_sensors/Makefile src/plugins/ext_sensors/rrd/Makefile src/plugins/ext_sensors/none/Makefile src/plugins/gres/Makefile src/plugins/gres/gpu/Makefile src/plugins/gres/nic/Makefile src/plugins/gres/mic/Makefile src/plugins/jobacct_gather/Makefile src/plugins/jobacct_gather/common/Makefile src/plugins/jobacct_gather/linux/Makefile src/plugins/jobacct_gather/aix/Makefile src/plugins/jobacct_gather/cgroup/Makefile src/plugins/jobacct_gather/none/Makefile src/plugins/jobcomp/Makefile src/plugins/jobcomp/elasticsearch/Makefile src/plugins/jobcomp/filetxt/Makefile src/plugins/jobcomp/none/Makefile src/plugins/jobcomp/script/Makefile src/plugins/jobcomp/mysql/Makefile src/plugins/job_container/Makefile src/plugins/job_container/cncu/Makefile src/plugins/job_container/none/Makefile src/plugins/job_submit/Makefile src/plugins/job_submit/all_partitions/Makefile src/plugins/job_submit/cnode/Makefile src/plugins/job_submit/cray/Makefile src/plugins/job_submit/defaults/Makefile src/plugins/job_submit/logging/Makefile src/plugins/job_submit/lua/Makefile src/plugins/job_submit/partition/Makefile src/plugins/job_submit/pbs/Makefile src/plugins/job_submit/require_timelimit/Makefile src/plugins/job_submit/throttle/Makefile src/plugins/launch/Makefile src/plugins/launch/aprun/Makefile src/plugins/launch/poe/Makefile src/plugins/launch/runjob/Makefile src/plugins/launch/slurm/Makefile src/plugins/power/Makefile src/plugins/power/common/Makefile src/plugins/power/cray/Makefile src/plugins/power/none/Makefile src/plugins/preempt/Makefile src/plugins/preempt/job_prio/Makefile src/plugins/preempt/none/Makefile src/plugins/preempt/partition_prio/Makefile src/plugins/preempt/qos/Makefile src/plugins/priority/Makefile src/plugins/priority/basic/Makefile src/plugins/priority/multifactor/Makefile src/plugins/proctrack/Makefile src/plugins/proctrack/aix/Makefile src/plugins/proctrack/cray/Makefile src/plugins/proctrack/cgroup/Makefile src/plugins/proctrack/pgid/Makefile src/plugins/proctrack/linuxproc/Makefile src/plugins/proctrack/sgi_job/Makefile src/plugins/proctrack/lua/Makefile src/plugins/route/Makefile src/plugins/route/default/Makefile src/plugins/route/topology/Makefile src/plugins/sched/Makefile src/plugins/sched/backfill/Makefile src/plugins/sched/builtin/Makefile src/plugins/sched/hold/Makefile src/plugins/sched/wiki/Makefile src/plugins/sched/wiki2/Makefile src/plugins/select/Makefile src/plugins/select/alps/Makefile src/plugins/select/alps/libalps/Makefile src/plugins/select/alps/libemulate/Makefile src/plugins/select/bluegene/Makefile src/plugins/select/bluegene/ba/Makefile src/plugins/select/bluegene/ba_bgq/Makefile src/plugins/select/bluegene/bl/Makefile src/plugins/select/bluegene/bl_bgq/Makefile src/plugins/select/bluegene/sfree/Makefile src/plugins/select/cons_res/Makefile src/plugins/select/cray/Makefile src/plugins/select/linear/Makefile src/plugins/select/other/Makefile src/plugins/select/serial/Makefile src/plugins/slurmctld/Makefile src/plugins/slurmctld/nonstop/Makefile src/plugins/slurmd/Makefile src/plugins/switch/Makefile src/plugins/switch/cray/Makefile src/plugins/switch/generic/Makefile src/plugins/switch/none/Makefile src/plugins/switch/nrt/Makefile src/plugins/switch/nrt/libpermapi/Makefile src/plugins/mpi/Makefile src/plugins/mpi/mpich1_p4/Makefile src/plugins/mpi/mpich1_shmem/Makefile src/plugins/mpi/mpichgm/Makefile src/plugins/mpi/mpichmx/Makefile src/plugins/mpi/mvapich/Makefile src/plugins/mpi/lam/Makefile src/plugins/mpi/none/Makefile src/plugins/mpi/openmpi/Makefile src/plugins/mpi/pmi2/Makefile src/plugins/task/Makefile src/plugins/task/affinity/Makefile src/plugins/task/cgroup/Makefile src/plugins/task/cray/Makefile src/plugins/task/none/Makefile src/plugins/topology/Makefile src/plugins/topology/3d_torus/Makefile src/plugins/topology/hypercube/Makefile src/plugins/topology/node_rank/Makefile src/plugins/topology/none/Makefile src/plugins/topology/tree/Makefile testsuite/Makefile testsuite/expect/Makefile testsuite/slurm_unit/Makefile testsuite/slurm_unit/api/Makefile testsuite/slurm_unit/api/manual/Makefile testsuite/slurm_unit/common/Makefile" +ac_config_files="$ac_config_files Makefile config.xml auxdir/Makefile contribs/Makefile contribs/cray/Makefile contribs/lua/Makefile contribs/mic/Makefile contribs/pam/Makefile contribs/pam_slurm_adopt/Makefile contribs/perlapi/Makefile contribs/perlapi/libslurm/Makefile contribs/perlapi/libslurm/perl/Makefile.PL contribs/perlapi/libslurmdb/Makefile contribs/perlapi/libslurmdb/perl/Makefile.PL contribs/torque/Makefile contribs/phpext/Makefile contribs/phpext/slurm_php/config.m4 contribs/sgather/Makefile contribs/sgi/Makefile contribs/sjobexit/Makefile contribs/slurmdb-direct/Makefile contribs/pmi2/Makefile doc/Makefile doc/man/Makefile doc/man/man1/Makefile doc/man/man3/Makefile doc/man/man5/Makefile doc/man/man8/Makefile doc/html/Makefile doc/html/configurator.html doc/html/configurator.easy.html etc/cgroup.release_common.example etc/init.d.slurm etc/init.d.slurmdbd etc/slurmctld.service etc/slurmd.service etc/slurmdbd.service src/Makefile src/api/Makefile src/common/Makefile src/db_api/Makefile src/layouts/Makefile src/layouts/unit/Makefile src/layouts/power/Makefile src/database/Makefile src/sacct/Makefile src/sacctmgr/Makefile src/sreport/Makefile src/salloc/Makefile src/sbatch/Makefile src/sbcast/Makefile src/sattach/Makefile src/scancel/Makefile src/scontrol/Makefile src/sdiag/Makefile src/sinfo/Makefile src/slurmctld/Makefile src/slurmd/Makefile src/slurmd/common/Makefile src/slurmd/slurmd/Makefile src/slurmd/slurmstepd/Makefile src/slurmdbd/Makefile src/smap/Makefile src/smd/Makefile src/sprio/Makefile src/squeue/Makefile src/srun/Makefile src/srun/libsrun/Makefile src/srun_cr/Makefile src/sshare/Makefile src/sstat/Makefile src/strigger/Makefile src/sview/Makefile src/plugins/Makefile src/plugins/accounting_storage/Makefile src/plugins/accounting_storage/common/Makefile src/plugins/accounting_storage/filetxt/Makefile src/plugins/accounting_storage/mysql/Makefile src/plugins/accounting_storage/none/Makefile src/plugins/accounting_storage/slurmdbd/Makefile src/plugins/acct_gather_energy/Makefile src/plugins/acct_gather_energy/cray/Makefile src/plugins/acct_gather_energy/rapl/Makefile src/plugins/acct_gather_energy/ipmi/Makefile src/plugins/acct_gather_energy/none/Makefile src/plugins/acct_gather_infiniband/Makefile src/plugins/acct_gather_infiniband/ofed/Makefile src/plugins/acct_gather_infiniband/none/Makefile src/plugins/acct_gather_filesystem/Makefile src/plugins/acct_gather_filesystem/lustre/Makefile src/plugins/acct_gather_filesystem/none/Makefile src/plugins/acct_gather_profile/Makefile src/plugins/acct_gather_profile/hdf5/Makefile src/plugins/acct_gather_profile/hdf5/sh5util/Makefile src/plugins/acct_gather_profile/hdf5/sh5util/libsh5util_old/Makefile src/plugins/acct_gather_profile/none/Makefile src/plugins/auth/Makefile src/plugins/auth/authd/Makefile src/plugins/auth/munge/Makefile src/plugins/auth/none/Makefile src/plugins/burst_buffer/Makefile src/plugins/burst_buffer/common/Makefile src/plugins/burst_buffer/cray/Makefile src/plugins/burst_buffer/generic/Makefile src/plugins/checkpoint/Makefile src/plugins/checkpoint/aix/Makefile src/plugins/checkpoint/blcr/Makefile src/plugins/checkpoint/blcr/cr_checkpoint.sh src/plugins/checkpoint/blcr/cr_restart.sh src/plugins/checkpoint/none/Makefile src/plugins/checkpoint/ompi/Makefile src/plugins/checkpoint/poe/Makefile src/plugins/core_spec/Makefile src/plugins/core_spec/cray/Makefile src/plugins/core_spec/none/Makefile src/plugins/crypto/Makefile src/plugins/crypto/munge/Makefile src/plugins/crypto/openssl/Makefile src/plugins/ext_sensors/Makefile src/plugins/ext_sensors/rrd/Makefile src/plugins/ext_sensors/none/Makefile src/plugins/gres/Makefile src/plugins/gres/gpu/Makefile src/plugins/gres/nic/Makefile src/plugins/gres/mic/Makefile src/plugins/jobacct_gather/Makefile src/plugins/jobacct_gather/common/Makefile src/plugins/jobacct_gather/linux/Makefile src/plugins/jobacct_gather/aix/Makefile src/plugins/jobacct_gather/cgroup/Makefile src/plugins/jobacct_gather/none/Makefile src/plugins/jobcomp/Makefile src/plugins/jobcomp/elasticsearch/Makefile src/plugins/jobcomp/filetxt/Makefile src/plugins/jobcomp/none/Makefile src/plugins/jobcomp/script/Makefile src/plugins/jobcomp/mysql/Makefile src/plugins/job_container/Makefile src/plugins/job_container/cncu/Makefile src/plugins/job_container/none/Makefile src/plugins/job_submit/Makefile src/plugins/job_submit/all_partitions/Makefile src/plugins/job_submit/cnode/Makefile src/plugins/job_submit/cray/Makefile src/plugins/job_submit/defaults/Makefile src/plugins/job_submit/logging/Makefile src/plugins/job_submit/lua/Makefile src/plugins/job_submit/partition/Makefile src/plugins/job_submit/pbs/Makefile src/plugins/job_submit/require_timelimit/Makefile src/plugins/job_submit/throttle/Makefile src/plugins/launch/Makefile src/plugins/launch/aprun/Makefile src/plugins/launch/poe/Makefile src/plugins/launch/runjob/Makefile src/plugins/launch/slurm/Makefile src/plugins/power/Makefile src/plugins/power/common/Makefile src/plugins/power/cray/Makefile src/plugins/power/none/Makefile src/plugins/preempt/Makefile src/plugins/preempt/job_prio/Makefile src/plugins/preempt/none/Makefile src/plugins/preempt/partition_prio/Makefile src/plugins/preempt/qos/Makefile src/plugins/priority/Makefile src/plugins/priority/basic/Makefile src/plugins/priority/multifactor/Makefile src/plugins/proctrack/Makefile src/plugins/proctrack/aix/Makefile src/plugins/proctrack/cray/Makefile src/plugins/proctrack/cgroup/Makefile src/plugins/proctrack/pgid/Makefile src/plugins/proctrack/linuxproc/Makefile src/plugins/proctrack/sgi_job/Makefile src/plugins/proctrack/lua/Makefile src/plugins/route/Makefile src/plugins/route/default/Makefile src/plugins/route/topology/Makefile src/plugins/sched/Makefile src/plugins/sched/backfill/Makefile src/plugins/sched/builtin/Makefile src/plugins/sched/hold/Makefile src/plugins/sched/wiki/Makefile src/plugins/sched/wiki2/Makefile src/plugins/select/Makefile src/plugins/select/alps/Makefile src/plugins/select/alps/libalps/Makefile src/plugins/select/alps/libemulate/Makefile src/plugins/select/bluegene/Makefile src/plugins/select/bluegene/ba/Makefile src/plugins/select/bluegene/ba_bgq/Makefile src/plugins/select/bluegene/bl/Makefile src/plugins/select/bluegene/bl_bgq/Makefile src/plugins/select/bluegene/sfree/Makefile src/plugins/select/cons_res/Makefile src/plugins/select/cray/Makefile src/plugins/select/linear/Makefile src/plugins/select/other/Makefile src/plugins/select/serial/Makefile src/plugins/slurmctld/Makefile src/plugins/slurmctld/nonstop/Makefile src/plugins/slurmd/Makefile src/plugins/switch/Makefile src/plugins/switch/cray/Makefile src/plugins/switch/generic/Makefile src/plugins/switch/none/Makefile src/plugins/switch/nrt/Makefile src/plugins/switch/nrt/libpermapi/Makefile src/plugins/mpi/Makefile src/plugins/mpi/mpich1_p4/Makefile src/plugins/mpi/mpich1_shmem/Makefile src/plugins/mpi/mpichgm/Makefile src/plugins/mpi/mpichmx/Makefile src/plugins/mpi/mvapich/Makefile src/plugins/mpi/lam/Makefile src/plugins/mpi/none/Makefile src/plugins/mpi/openmpi/Makefile src/plugins/mpi/pmi2/Makefile src/plugins/task/Makefile src/plugins/task/affinity/Makefile src/plugins/task/cgroup/Makefile src/plugins/task/cray/Makefile src/plugins/task/none/Makefile src/plugins/topology/Makefile src/plugins/topology/3d_torus/Makefile src/plugins/topology/hypercube/Makefile src/plugins/topology/node_rank/Makefile src/plugins/topology/none/Makefile src/plugins/topology/tree/Makefile testsuite/Makefile testsuite/expect/Makefile testsuite/slurm_unit/Makefile testsuite/slurm_unit/api/Makefile testsuite/slurm_unit/api/manual/Makefile testsuite/slurm_unit/common/Makefile" cat >confcache <<\_ACEOF @@ -25988,6 +25988,7 @@ do "src/plugins/acct_gather_profile/Makefile") CONFIG_FILES="$CONFIG_FILES src/plugins/acct_gather_profile/Makefile" ;; "src/plugins/acct_gather_profile/hdf5/Makefile") CONFIG_FILES="$CONFIG_FILES src/plugins/acct_gather_profile/hdf5/Makefile" ;; "src/plugins/acct_gather_profile/hdf5/sh5util/Makefile") CONFIG_FILES="$CONFIG_FILES src/plugins/acct_gather_profile/hdf5/sh5util/Makefile" ;; + "src/plugins/acct_gather_profile/hdf5/sh5util/libsh5util_old/Makefile") CONFIG_FILES="$CONFIG_FILES src/plugins/acct_gather_profile/hdf5/sh5util/libsh5util_old/Makefile" ;; "src/plugins/acct_gather_profile/none/Makefile") CONFIG_FILES="$CONFIG_FILES src/plugins/acct_gather_profile/none/Makefile" ;; "src/plugins/auth/Makefile") CONFIG_FILES="$CONFIG_FILES src/plugins/auth/Makefile" ;; "src/plugins/auth/authd/Makefile") CONFIG_FILES="$CONFIG_FILES src/plugins/auth/authd/Makefile" ;; diff --git a/configure.ac b/configure.ac index d43020cb622853070455f6a7e3d00b2c1a85aaf3..b946b5b4ee01d7e488000679b70efdb7870d4e64 100644 --- a/configure.ac +++ b/configure.ac @@ -529,6 +529,7 @@ AC_CONFIG_FILES([Makefile src/plugins/acct_gather_profile/Makefile src/plugins/acct_gather_profile/hdf5/Makefile src/plugins/acct_gather_profile/hdf5/sh5util/Makefile + src/plugins/acct_gather_profile/hdf5/sh5util/libsh5util_old/Makefile src/plugins/acct_gather_profile/none/Makefile src/plugins/auth/Makefile src/plugins/auth/authd/Makefile diff --git a/slurm/slurm.h.in b/slurm/slurm.h.in index 8ffb92521187c30e514c72916c58dc5818c0b949..d341d7d4be298ed00ce502ba6852b63fe1b925a4 100644 --- a/slurm/slurm.h.in +++ b/slurm/slurm.h.in @@ -562,6 +562,8 @@ enum acct_energy_type { ENERGY_DATA_RECONFIG, ENERGY_DATA_PROFILE, ENERGY_DATA_LAST_POLL, + ENERGY_DATA_SENSOR_CNT, + ENERGY_DATA_NODE_ENERGY, }; /* @@ -1930,8 +1932,9 @@ typedef struct powercap_info_msg { typedef struct powercap_info_msg update_powercap_msg_t; typedef struct acct_gather_node_resp_msg { - char *node_name; /* node name */ acct_gather_energy_t *energy; + char *node_name; /* node name */ + uint16_t sensor_cnt; } acct_gather_node_resp_msg_t; typedef struct acct_gather_energy_req_msg { @@ -3758,16 +3761,20 @@ extern int slurm_load_node_single PARAMS((node_info_msg_t **resp, char *node_name, uint16_t show_flags)); /* - * slurm_node_energy - issue RPC to get the energy data on this machine + * slurm_get_node_energy_n - issue RPC to get the energy data of all + * configured sensors on the target machine * IN host - name of node to query, NULL if localhost * IN delta - Use cache if data is newer than this in seconds - * OUT acct_gather_energy_t structure on success or NULL other wise - * RET 0 or a slurm error code - * NOTE: free the response using slurm_acct_gather_energy_destroy + * OUT sensor_cnt - number of sensors + * OUT energy - array of acct_gather_energy_t structures on success or + * NULL other wise + * RET 0 on success or a slurm error code + * NOTE: free the response using xfree */ extern int slurm_get_node_energy PARAMS( (char *host, uint16_t delta, - acct_gather_energy_t **acct_gather_energy)); + uint16_t *sensors_cnt, + acct_gather_energy_t **energy)); /* * slurm_free_node_info_msg - free the node information response message diff --git a/src/api/node_info.c b/src/api/node_info.c index 3ea4e8a1f02092151d9b7a88fea4495f69683ec9..9e6c9d905667396f5c18784c0b453d3ddb52672b 100644 --- a/src/api/node_info.c +++ b/src/api/node_info.c @@ -585,15 +585,19 @@ extern int slurm_load_node_single (node_info_msg_t **resp, } /* - * slurm_node_energy - issue RPC to get the energy data on this machine + * slurm_get_node_energy_n - issue RPC to get the energy data of all + * configured sensors on the target machine * IN host - name of node to query, NULL if localhost * IN delta - Use cache if data is newer than this in seconds - * OUT acct_gather_energy_t structure on success or NULL other wise - * RET 0 or a slurm error code - * NOTE: free the response using slurm_acct_gather_energy_destroy + * OUT nb_sensors - number of sensors + * OUT energy - array of acct_gather_energy_t structures on success or + * NULL other wise + * RET 0 on success or a slurm error code + * NOTE: free the response using xfree */ extern int slurm_get_node_energy(char *host, uint16_t delta, - acct_gather_energy_t **acct_gather_energy) + uint16_t *sensor_cnt, + acct_gather_energy_t **energy) { int rc; slurm_msg_t req_msg; @@ -647,8 +651,10 @@ extern int slurm_get_node_energy(char *host, uint16_t delta, g_slurm_auth_destroy(resp_msg.auth_cred); switch (resp_msg.msg_type) { case RESPONSE_ACCT_GATHER_ENERGY: - *acct_gather_energy = ((acct_gather_node_resp_msg_t *) - resp_msg.data)->energy; + *sensor_cnt = ((acct_gather_node_resp_msg_t *) + resp_msg.data)->sensor_cnt; + *energy = ((acct_gather_node_resp_msg_t *) + resp_msg.data)->energy; ((acct_gather_node_resp_msg_t *) resp_msg.data)->energy = NULL; slurm_free_acct_gather_node_resp_msg(resp_msg.data); break; diff --git a/src/common/node_conf.c b/src/common/node_conf.c index c51b1fad4ac8c7a761f6c3568f468e7737cea31e..37413dbd90705f4cf3707902abb031ee00095917 100644 --- a/src/common/node_conf.c +++ b/src/common/node_conf.c @@ -804,7 +804,7 @@ extern struct node_record *create_node_record ( node_ptr->node_spec_bitmap = NULL; node_ptr->tmp_disk = config_ptr->tmp_disk; node_ptr->select_nodeinfo = select_g_select_nodeinfo_alloc(); - node_ptr->energy = acct_gather_energy_alloc(); + node_ptr->energy = acct_gather_energy_alloc(1); node_ptr->ext_sensors = ext_sensors_alloc(); node_ptr->owner = NO_VAL; xassert (node_ptr->magic = NODE_MAGIC) /* set value */; diff --git a/src/common/slurm_acct_gather_energy.c b/src/common/slurm_acct_gather_energy.c index 33a1013ae12f3f6551ce419c3f735faec78f2232..ee22e2ba73b947d6652698e236515aaaec104956 100644 --- a/src/common/slurm_acct_gather_energy.c +++ b/src/common/slurm_acct_gather_energy.c @@ -152,10 +152,10 @@ extern int acct_gather_energy_fini(void) return rc; } -extern acct_gather_energy_t *acct_gather_energy_alloc(void) +extern acct_gather_energy_t *acct_gather_energy_alloc(uint16_t cnt) { acct_gather_energy_t *energy = - xmalloc(sizeof(struct acct_gather_energy)); + xmalloc(sizeof(struct acct_gather_energy) * cnt); return energy; } @@ -187,10 +187,16 @@ extern void acct_gather_energy_pack(acct_gather_energy_t *energy, Buf buffer, } extern int acct_gather_energy_unpack(acct_gather_energy_t **energy, Buf buffer, - uint16_t protocol_version) + uint16_t protocol_version, bool need_alloc) { - acct_gather_energy_t *energy_ptr = acct_gather_energy_alloc(); - *energy = energy_ptr; + acct_gather_energy_t *energy_ptr; + + if (need_alloc) { + energy_ptr = acct_gather_energy_alloc(1); + *energy = energy_ptr; + } else { + energy_ptr = *energy; + } if (protocol_version >= SLURM_MIN_PROTOCOL_VERSION) { safe_unpack32(&energy_ptr->base_consumed_energy, buffer); @@ -204,8 +210,12 @@ extern int acct_gather_energy_unpack(acct_gather_energy_t **energy, Buf buffer, return SLURM_SUCCESS; unpack_error: - acct_gather_energy_destroy(energy_ptr); - *energy = NULL; + if (need_alloc) { + acct_gather_energy_destroy(energy_ptr); + *energy = NULL; + } else + memset(energy_ptr, 0, sizeof(acct_gather_energy_t)); + return SLURM_ERROR; } diff --git a/src/common/slurm_acct_gather_energy.h b/src/common/slurm_acct_gather_energy.h index 9f9b0c2fac32ec1b315acfd428665d0518d7db4b..3e6e6cfc7d1536432f06cf35884aaf0b61731266 100644 --- a/src/common/slurm_acct_gather_energy.h +++ b/src/common/slurm_acct_gather_energy.h @@ -75,12 +75,13 @@ typedef struct acct_energy_data { extern int acct_gather_energy_init(void); /* load the plugin */ extern int acct_gather_energy_fini(void); /* unload the plugin */ -extern acct_gather_energy_t *acct_gather_energy_alloc(void); +extern acct_gather_energy_t *acct_gather_energy_alloc(uint16_t cnt); extern void acct_gather_energy_destroy(acct_gather_energy_t *energy); extern void acct_gather_energy_pack(acct_gather_energy_t *energy, Buf buffer, uint16_t protocol_version); extern int acct_gather_energy_unpack(acct_gather_energy_t **energy, Buf buffer, - uint16_t protocol_version); + uint16_t protocol_version, + bool need_alloc); extern int acct_gather_energy_g_update_node_energy(void); extern int acct_gather_energy_g_get_data(enum acct_energy_type data_type, diff --git a/src/common/slurm_acct_gather_profile.c b/src/common/slurm_acct_gather_profile.c index 8d5d766dbed690e076e6ea143663b36749927859..dc51887c51d195ba224fe3f9e94e4fc0a865c891 100644 --- a/src/common/slurm_acct_gather_profile.c +++ b/src/common/slurm_acct_gather_profile.c @@ -70,8 +70,12 @@ typedef struct slurm_acct_gather_profile_ops { int (*node_step_end) (void); int (*task_start) (uint32_t); int (*task_end) (pid_t); - int (*add_sample_data) (uint32_t, void*); + int (*create_group) (const char*); + int (*create_dataset) (const char*, int, + acct_gather_profile_dataset_t *); + int (*add_sample_data) (uint32_t, void*, time_t); void (*conf_values) (List *data); + bool (*is_active) (uint32_t); } slurm_acct_gather_profile_ops_t; @@ -88,8 +92,11 @@ static const char *syms[] = { "acct_gather_profile_p_node_step_end", "acct_gather_profile_p_task_start", "acct_gather_profile_p_task_end", + "acct_gather_profile_p_create_group", + "acct_gather_profile_p_create_dataset", "acct_gather_profile_p_add_sample_data", "acct_gather_profile_p_conf_values", + "acct_gather_profile_p_is_active", }; acct_gather_profile_timer_t acct_gather_profile_timer[PROFILE_CNT]; @@ -348,6 +355,40 @@ extern char *acct_gather_profile_type_t_name(acct_gather_profile_type_t type) return "Unknown"; } +extern char *acct_gather_profile_dataset_str( + acct_gather_profile_dataset_t *dataset, void *data, + char *str, int str_len) +{ + int cur_loc = 0; + + while (dataset && (dataset->type != PROFILE_FIELD_NOT_SET)) { + switch (dataset->type) { + case PROFILE_FIELD_UINT64: + cur_loc += snprintf(str+cur_loc, str_len-cur_loc, + "%s%s=%"PRIu64, + cur_loc ? " " : "", + dataset->name, *(uint64_t *)data); + data += sizeof(uint64_t); + break; + case PROFILE_FIELD_DOUBLE: + cur_loc += snprintf(str+cur_loc, str_len-cur_loc, + "%s%s=%lf", + cur_loc ? " " : "", + dataset->name, *(double *)data); + data += sizeof(double); + break; + case PROFILE_FIELD_NOT_SET: + break; + } + + if (cur_loc >= str_len) + break; + dataset++; + } + + return str; +} + extern int acct_gather_profile_startpoll(char *freq, char *freq_def) { int retval = SLURM_SUCCESS; @@ -548,7 +589,22 @@ extern int acct_gather_profile_g_task_end(pid_t taskpid) return retval; } -extern int acct_gather_profile_g_add_sample_data(uint32_t type, void* data) +extern int acct_gather_profile_g_create_group(const char *name) +{ + int retval = SLURM_ERROR; + + if (acct_gather_profile_init() < 0) + return retval; + + slurm_mutex_lock(&profile_mutex); + retval = (*(ops.create_group))(name); + slurm_mutex_unlock(&profile_mutex); + return retval; +} + +extern int acct_gather_profile_g_create_dataset( + const char *name, int parent, + acct_gather_profile_dataset_t *dataset) { int retval = SLURM_ERROR; @@ -556,7 +612,21 @@ extern int acct_gather_profile_g_add_sample_data(uint32_t type, void* data) return retval; slurm_mutex_lock(&profile_mutex); - retval = (*(ops.add_sample_data))(type, data); + retval = (*(ops.create_dataset))(name, parent, dataset); + slurm_mutex_unlock(&profile_mutex); + return retval; +} + +extern int acct_gather_profile_g_add_sample_data(int dataset_id, void* data, + time_t sample_time) +{ + int retval = SLURM_ERROR; + + if (acct_gather_profile_init() < 0) + return retval; + + slurm_mutex_lock(&profile_mutex); + retval = (*(ops.add_sample_data))(dataset_id, data, sample_time); slurm_mutex_unlock(&profile_mutex); return retval; } @@ -568,3 +638,11 @@ extern void acct_gather_profile_g_conf_values(void *data) (*(ops.conf_values))(data); } + +extern bool acct_gather_profile_g_is_active(uint32_t type) +{ + if (acct_gather_profile_init() < 0) + return false; + + return (*(ops.is_active))(type); +} diff --git a/src/common/slurm_acct_gather_profile.h b/src/common/slurm_acct_gather_profile.h index 5a6504e9514dfa5513c7e5f26b1271cbf1418c35..b5defd222a185137a94b605c69f6e5df72514a79 100644 --- a/src/common/slurm_acct_gather_profile.h +++ b/src/common/slurm_acct_gather_profile.h @@ -66,6 +66,8 @@ #include "src/common/slurm_acct_gather.h" #include "src/slurmd/slurmstepd/slurmstepd_job.h" +#define NO_PARENT -1 + typedef enum { PROFILE_ENERGY, PROFILE_TASK, @@ -74,6 +76,17 @@ typedef enum { PROFILE_CNT } acct_gather_profile_type_t; +typedef enum { + PROFILE_FIELD_NOT_SET, + PROFILE_FIELD_UINT64, + PROFILE_FIELD_DOUBLE +} acct_gather_profile_field_type_t; + +typedef struct { + char *name; + acct_gather_profile_field_type_t type; +} acct_gather_profile_dataset_t; + typedef struct { int freq; time_t last_notify; @@ -104,7 +117,9 @@ extern char *acct_gather_profile_type_to_string(uint32_t series); extern uint32_t acct_gather_profile_type_from_string(char *series_str); extern char *acct_gather_profile_type_t_name(acct_gather_profile_type_t type); - +extern char *acct_gather_profile_dataset_str( + acct_gather_profile_dataset_t *dataset, void *data, + char *str, int str_len); extern int acct_gather_profile_startpoll(char *freq, char *freq_def); extern void acct_gather_profile_endpoll(void); @@ -184,19 +199,46 @@ extern int acct_gather_profile_g_task_start(uint32_t taskid); */ extern int acct_gather_profile_g_task_end(pid_t taskpid); +/* + * Create a new group which can contain datasets. + * + * Returns -- the identifier of the group on success, + * a negative value on failure + */ +extern int acct_gather_profile_g_create_group(const char* name); + +/* + * Create a new dataset to record profiling data in the group "parent". + * Must be called by each accounting plugin in order to record data. + * A "Time" field is automatically added. + * + * Parameters + * name -- name of the dataset + * parent -- id of the parent group created with + * acct_gather_profile_g_create_group, or NO_PARENT for + * default group + * profile_series -- profile_series_def_t array filled in with the + * series definition + * Returns -- an identifier to the dataset on success + * a negative value on failure + */ +extern int acct_gather_profile_g_create_dataset( + const char *name, int parent, acct_gather_profile_dataset_t *dataset); + /* * Put data at the Node Samples level. Typically called from something called * at either job_acct_gather interval or acct_gather_energy interval. - * All samples in the same group will eventually be consolidated in one - * dataset + * Time is automatically added. * * Parameters - * type -- identifies the type of data. - * data -- data structure to be put to the file. + * dataset_id -- identifies the dataset to add data to. + * data -- data structure to be recorded + * sample_time-- when the sample happened * * Returns -- SLURM_SUCCESS or SLURM_ERROR */ -extern int acct_gather_profile_g_add_sample_data(uint32_t type, void *data); +extern int acct_gather_profile_g_add_sample_data(int dataset_id, void *data, + time_t sample_time); /* Get the values from the plugin that are setup in the .conf * file. This function should most likely only be called from @@ -204,4 +246,7 @@ extern int acct_gather_profile_g_add_sample_data(uint32_t type, void *data); */ extern void acct_gather_profile_g_conf_values(void *data); +/* Return true if the given type of plugin must be profiled */ +extern bool acct_gather_profile_g_is_active(uint32_t type); + #endif /*__SLURM_ACCT_GATHER_PROFILE_H__*/ diff --git a/src/common/slurm_jobacct_gather.c b/src/common/slurm_jobacct_gather.c index 2c6a460367596412b77d0684ecad0f8918303712..5585b53708162d05d1e77924d26648f56ace925d 100644 --- a/src/common/slurm_jobacct_gather.c +++ b/src/common/slurm_jobacct_gather.c @@ -82,7 +82,8 @@ strong_alias(jobacctinfo_destroy, slurm_jobacctinfo_destroy); * at the end of the structure. */ typedef struct slurm_jobacct_gather_ops { - void (*poll_data) (List task_list, bool pgid_plugin, uint64_t cont_id); + void (*poll_data) (List task_list, bool pgid_plugin, uint64_t cont_id, + bool profile); int (*endpoll) (); int (*add_task) (pid_t pid, jobacct_id_t *jobacct_id); } slurm_jobacct_gather_ops_t; @@ -168,12 +169,12 @@ unpack_error: return SLURM_ERROR; } -static void _poll_data(void) +static void _poll_data(bool profile) { /* Update the data */ slurm_mutex_lock(&task_list_lock); if (task_list) - (*(ops.poll_data))(task_list, pgid_plugin, cont_id); + (*(ops.poll_data))(task_list, pgid_plugin, cont_id, profile); slurm_mutex_unlock(&task_list_lock); } @@ -200,13 +201,14 @@ static void *_watch_tasks(void *arg) _task_sleep(1); while (!jobacct_shutdown && acct_gather_profile_running) { /* Do this until shutdown is requested */ - _poll_data(); slurm_mutex_lock(&acct_gather_profile_timer[type].notify_mutex); pthread_cond_wait( &acct_gather_profile_timer[type].notify, &acct_gather_profile_timer[type].notify_mutex); slurm_mutex_unlock(&acct_gather_profile_timer[type]. notify_mutex); + /* The initial poll is done after the last task is added */ + _poll_data(1); } return NULL; } @@ -376,6 +378,7 @@ extern int jobacct_gather_add_task(pid_t pid, jobacct_id_t *jobacct_id, } jobacct->pid = pid; + memcpy(&jobacct->id, jobacct_id, sizeof(jobacct_id_t)); jobacct->min_cpu = 0; debug2("adding task %u pid %d on node %u to jobacct", jobacct_id->taskid, pid, jobacct_id->nodeid); @@ -385,7 +388,7 @@ extern int jobacct_gather_add_task(pid_t pid, jobacct_id_t *jobacct_id, (*(ops.add_task))(pid, jobacct_id); if (poll == 1) - _poll_data(); + _poll_data(1); return SLURM_SUCCESS; error: @@ -403,7 +406,7 @@ extern jobacctinfo_t *jobacct_gather_stat_task(pid_t pid) struct jobacctinfo *ret_jobacct = NULL; ListIterator itr = NULL; - _poll_data(); + _poll_data(0); slurm_mutex_lock(&task_list_lock); if (!task_list) { @@ -433,7 +436,7 @@ extern jobacctinfo_t *jobacct_gather_stat_task(pid_t pid) * spawned, which would prevent a valid checkpoint/restart * with some systems */ _task_sleep(1); - _poll_data(); + _poll_data(0); return NULL; } } @@ -448,7 +451,7 @@ extern jobacctinfo_t *jobacct_gather_remove_task(pid_t pid) /* poll data one last time before removing task * mainly for updating energy consumption */ - _poll_data(); + _poll_data(1); if (jobacct_shutdown) return NULL; @@ -585,6 +588,7 @@ extern jobacctinfo_t *jobacctinfo_create(jobacct_id_t *jobacct_id) jobacct_id = &temp_id; } memset(jobacct, 0, sizeof(struct jobacctinfo)); + jobacct->dataset_id = -1; jobacct->sys_cpu_sec = 0; jobacct->sys_cpu_usec = 0; jobacct->user_cpu_sec = 0; diff --git a/src/common/slurm_jobacct_gather.h b/src/common/slurm_jobacct_gather.h index e5270f5290a8dffbd35951404201a85fba6c8b1a..e6f14d8f892b4a69a9a5613935bbd903b4337cb7 100644 --- a/src/common/slurm_jobacct_gather.h +++ b/src/common/slurm_jobacct_gather.h @@ -128,6 +128,14 @@ struct jobacctinfo { double max_disk_write; /* max disk write data */ jobacct_id_t max_disk_write_id; /* max disk write data task id */ double tot_disk_write; /* total local disk writes in megabytes */ + + jobacct_id_t id; + int dataset_id; /* dataset associated to this task when profiling */ + + double last_tot_disk_read; + double last_tot_disk_write; + time_t cur_time; + time_t last_time; }; /* Define jobacctinfo_t below to avoid including extraneous slurm headers */ diff --git a/src/common/slurm_protocol_pack.c b/src/common/slurm_protocol_pack.c index d3840020e36f8234461f6e45bd29bb48c523af4d..46e383bea08ccde1f96cf88af310736999fbe1fd 100644 --- a/src/common/slurm_protocol_pack.c +++ b/src/common/slurm_protocol_pack.c @@ -3137,28 +3137,59 @@ static void _pack_acct_gather_node_resp_msg(acct_gather_node_resp_msg_t *msg, Buf buffer, uint16_t protocol_version) { + unsigned int i; + xassert(msg != NULL); - packstr(msg->node_name, buffer); - acct_gather_energy_pack(msg->energy, buffer, protocol_version); + if (protocol_version >= SLURM_15_08_PROTOCOL_VERSION) { + packstr(msg->node_name, buffer); + pack16(msg->sensor_cnt, buffer); + for (i = 0; i < msg->sensor_cnt; i++) + acct_gather_energy_pack(&msg->energy[i], + buffer, protocol_version); + } else { + acct_gather_energy_t *energy = NULL; + + packstr(msg->node_name, buffer); + if (msg->sensor_cnt) + energy = &msg->energy[0]; + acct_gather_energy_pack(energy, buffer, protocol_version); + } + } static int _unpack_acct_gather_node_resp_msg(acct_gather_node_resp_msg_t **msg, Buf buffer, uint16_t protocol_version) { + unsigned int i; acct_gather_node_resp_msg_t *node_data_ptr; uint32_t uint32_tmp; + acct_gather_energy_t *e; /* alloc memory for structure */ xassert(msg != NULL); node_data_ptr = xmalloc(sizeof(acct_gather_node_resp_msg_t)); *msg = node_data_ptr; - - safe_unpackstr_xmalloc(&node_data_ptr->node_name, - &uint32_tmp, buffer); - if (acct_gather_energy_unpack(&node_data_ptr->energy, buffer, - protocol_version) != SLURM_SUCCESS) - goto unpack_error; - + if (protocol_version >= SLURM_15_08_PROTOCOL_VERSION) { + safe_unpackstr_xmalloc(&node_data_ptr->node_name, + &uint32_tmp, buffer); + safe_unpack16(&node_data_ptr->sensor_cnt, buffer); + node_data_ptr->energy = xmalloc(sizeof(acct_gather_energy_t) + * node_data_ptr->sensor_cnt); + for (i = 0; i < node_data_ptr->sensor_cnt; ++i) { + e = &node_data_ptr->energy[i]; + if (acct_gather_energy_unpack( + &e, buffer, protocol_version, 0) + != SLURM_SUCCESS) + goto unpack_error; + } + } else { + safe_unpackstr_xmalloc(&node_data_ptr->node_name, + &uint32_tmp, buffer); + if (acct_gather_energy_unpack(&node_data_ptr->energy, buffer, + protocol_version, 1) + != SLURM_SUCCESS) + goto unpack_error; + } return SLURM_SUCCESS; unpack_error: @@ -3358,7 +3389,7 @@ _unpack_node_registration_status_msg(slurm_node_registration_status_msg_t gres_info_size); } if (acct_gather_energy_unpack(&node_reg_ptr->energy, buffer, - protocol_version) + protocol_version, 1) != SLURM_SUCCESS) goto unpack_error; safe_unpackstr_xmalloc(&node_reg_ptr->version, @@ -3415,7 +3446,7 @@ _unpack_node_registration_status_msg(slurm_node_registration_status_msg_t gres_info_size); } if (acct_gather_energy_unpack(&node_reg_ptr->energy, buffer, - protocol_version) + protocol_version, 1) != SLURM_SUCCESS) goto unpack_error; safe_unpackstr_xmalloc(&node_reg_ptr->version, @@ -3858,7 +3889,7 @@ _unpack_node_info_members(node_info_t * node, Buf buffer, safe_unpackstr_xmalloc(&node->os, &uint32_tmp, buffer); safe_unpackstr_xmalloc(&node->reason, &uint32_tmp, buffer); if (acct_gather_energy_unpack(&node->energy, buffer, - protocol_version) + protocol_version, 1) != SLURM_SUCCESS) goto unpack_error; if (ext_sensors_data_unpack(&node->ext_sensors, buffer, @@ -3909,7 +3940,7 @@ _unpack_node_info_members(node_info_t * node, Buf buffer, safe_unpackstr_xmalloc(&node->os, &uint32_tmp, buffer); safe_unpackstr_xmalloc(&node->reason, &uint32_tmp, buffer); if (acct_gather_energy_unpack(&node->energy, buffer, - protocol_version) + protocol_version, 1) != SLURM_SUCCESS) goto unpack_error; if (ext_sensors_data_unpack(&node->ext_sensors, buffer, @@ -3949,7 +3980,7 @@ _unpack_node_info_members(node_info_t * node, Buf buffer, safe_unpackstr_xmalloc(&node->os, &uint32_tmp, buffer); safe_unpackstr_xmalloc(&node->reason, &uint32_tmp, buffer); if (acct_gather_energy_unpack(&node->energy, buffer, - protocol_version) + protocol_version, 1) != SLURM_SUCCESS) goto unpack_error; if (ext_sensors_data_unpack(&node->ext_sensors, buffer, diff --git a/src/plugins/acct_gather_energy/cray/acct_gather_energy_cray.c b/src/plugins/acct_gather_energy/cray/acct_gather_energy_cray.c index 2a911be60d6d059f7f11faa5319f883443d66488..ffb4ec95a3e68bdbb94040bb0440d6b5b23bb501 100644 --- a/src/plugins/acct_gather_energy/cray/acct_gather_energy_cray.c +++ b/src/plugins/acct_gather_energy/cray/acct_gather_energy_cray.c @@ -188,7 +188,13 @@ static int _running_profile(void) static int _send_profile(void) { - acct_energy_data_t ener; + uint64_t curr_watts; + acct_gather_profile_dataset_t dataset[] = { + { "Power", PROFILE_FIELD_UINT64 }, + { NULL, PROFILE_FIELD_NOT_SET } + }; + + static int dataset_id = -1; /* id of the dataset for profile data */ if (!_running_profile()) return SLURM_SUCCESS; @@ -197,14 +203,26 @@ static int _send_profile(void) info("_send_profile: consumed %d watts", local_energy->current_watts); - memset(&ener, 0, sizeof(acct_energy_data_t)); - ener.cpu_freq = 1; - ener.time = time(NULL); - ener.power = local_energy->current_watts; - acct_gather_profile_g_add_sample_data( - ACCT_GATHER_PROFILE_ENERGY, &ener); + if (dataset_id < 0) { + dataset_id = acct_gather_profile_g_create_dataset( + "Energy", NO_PARENT, dataset); + if (debug_flags & DEBUG_FLAG_ENERGY) + debug("Energy: dataset created (id = %d)", dataset_id); + if (dataset_id == SLURM_ERROR) { + error("Energy: Failed to create the dataset for RAPL"); + return SLURM_ERROR; + } + } + + curr_watts = (uint64_t)local_energy->current_watts; + + if (debug_flags & DEBUG_FLAG_PROFILE) { + info("PROFILE-Energy: power=%u", local_energy->current_watts); + } - return SLURM_ERROR; + return acct_gather_profile_g_add_sample_data(dataset_id, + (void *)&curr_watts, + local_energy->poll_time); } extern int acct_gather_energy_p_update_node_energy(void) @@ -316,7 +334,7 @@ extern void acct_gather_energy_p_conf_set(s_p_hashtbl_t *tbl) if (!flag_init) { flag_init = 1; - local_energy = acct_gather_energy_alloc(); + local_energy = acct_gather_energy_alloc(1); if (!_get_latest_stats(GET_ENERGY)) local_energy->current_watts = NO_VAL; else diff --git a/src/plugins/acct_gather_energy/ipmi/acct_gather_energy_ipmi.c b/src/plugins/acct_gather_energy/ipmi/acct_gather_energy_ipmi.c index fb88a9dc88a388eac3c900d25cd65a6df44404bf..53e1022a0b5aa37a6e886b09a981f6462aded0ee 100644 --- a/src/plugins/acct_gather_energy/ipmi/acct_gather_energy_ipmi.c +++ b/src/plugins/acct_gather_energy/ipmi/acct_gather_energy_ipmi.c @@ -2,7 +2,7 @@ * acct_gather_energy_ipmi.c - slurm energy accounting plugin for ipmi. ***************************************************************************** * Copyright (C) 2012 - * Written by Bull- Thomas Cadeau + * Initially written by Thomas Cadeau @ Bull. Adapted by Yoann Blein @ Bull. * * This file is part of SLURM, a resource management program. * For details, see <http://slurm.schedmd.com/>. @@ -82,7 +82,7 @@ slurmd_conf_t *conf = NULL; #define _DEBUG 1 #define _DEBUG_ENERGY 1 -#define IPMI_VERSION 1 /* Data structure version number */ +#define IPMI_VERSION 2 /* Data structure version number */ #define NBFIRSTREAD 3 /* @@ -130,10 +130,31 @@ char *sensor_config_file = NULL; /* * internal variables */ -static uint32_t last_update_watt = 0; static time_t last_update_time = 0; static time_t previous_update_time = 0; -static acct_gather_energy_t *local_energy = NULL; + +/* array of struct to track the status of multiple sensors */ +typedef struct sensor_status { + uint32_t id; + uint32_t last_update_watt; + acct_gather_energy_t energy; +} sensor_status_t; +static sensor_status_t *sensors; +static uint16_t sensors_len; +static uint32_t *start_current_energies; + +/* array of struct describing the configuration of the sensors */ +typedef struct description { + const char* label; + uint16_t sensor_cnt; + uint16_t *sensor_idxs; +} description_t; +static description_t *descriptions; +static uint16_t descriptions_len; +static const char *NODE_DESC = "Node"; + +static int dataset_id = -1; /* id of the dataset for profile data */ + static slurm_ipmi_conf_t slurm_ipmi_conf; static uint64_t debug_flags = 0; static bool flag_energy_accounting_shutdown = false; @@ -304,53 +325,61 @@ static int _init_ipmi_config (void) */ static int _check_power_sensor(void) { - unsigned int record_ids[] = {(int) slurm_ipmi_conf.power_sensor_num}; - unsigned int record_ids_length = 1; + /* check the sensors list */ + void *sensor_reading; + int rc; int sensor_units; - void* sensor_reading; - - if ((ipmi_monitoring_sensor_readings_by_record_id( - ipmi_ctx, - hostname, - &ipmi_config, - sensor_reading_flags, - record_ids, - record_ids_length, - NULL, NULL)) != record_ids_length) { + uint16_t i; + unsigned int ids[sensors_len]; + + for (i = 0; i < sensors_len; ++i) + ids[i] = sensors[i].id; + rc = ipmi_monitoring_sensor_readings_by_record_id(ipmi_ctx, + hostname, + &ipmi_config, + sensor_reading_flags, + ids, + sensors_len, + NULL, + NULL); + if (rc != sensors_len) { error("ipmi_monitoring_sensor_readings_by_record_id: %s", ipmi_monitoring_ctx_errormsg(ipmi_ctx)); return SLURM_FAILURE; } - if ((sensor_units = ipmi_monitoring_sensor_read_sensor_units(ipmi_ctx)) - < 0) { - error("ipmi_monitoring_sensor_read_sensor_units: %s", - ipmi_monitoring_ctx_errormsg(ipmi_ctx)); - return SLURM_FAILURE; - } + i = 0; + do { + /* check if the sensor unit is watts */ + sensor_units = + ipmi_monitoring_sensor_read_sensor_units(ipmi_ctx); + if (sensor_units < 0) { + error("ipmi_monitoring_sensor_read_sensor_units: %s", + ipmi_monitoring_ctx_errormsg(ipmi_ctx)); + return SLURM_FAILURE; + } + if (sensor_units != slurm_ipmi_conf.variable) { + error("Configured sensor is not in Watt, " + "please check ipmi.conf"); + return SLURM_FAILURE; + } - if (sensor_units != slurm_ipmi_conf.variable) { - error("Configured sensor is not in Watt, " - "please check ipmi.conf"); - return SLURM_FAILURE; - } + /* update current value of the sensor */ + sensor_reading = + ipmi_monitoring_sensor_read_sensor_reading(ipmi_ctx); + if (sensor_reading) { + sensors[i].last_update_watt = + (uint32_t) (*((double *)sensor_reading)); + } else { + error("ipmi read an empty value for power consumption"); + return SLURM_FAILURE; + } + ++i; + } while (ipmi_monitoring_sensor_iterator_next(ipmi_ctx)); - ipmi_monitoring_sensor_iterator_first(ipmi_ctx); - if (ipmi_monitoring_sensor_read_record_id(ipmi_ctx) < 0) { - error("ipmi_monitoring_sensor_read_record_id: %s", - ipmi_monitoring_ctx_errormsg(ipmi_ctx)); - return SLURM_FAILURE; - } + previous_update_time = last_update_time; + last_update_time = time(NULL); - sensor_reading = ipmi_monitoring_sensor_read_sensor_reading(ipmi_ctx); - if (sensor_reading) { - last_update_watt = (uint32_t)(*((double *)sensor_reading)); - previous_update_time = last_update_time; - last_update_time = time(NULL); - } else { - error("ipmi read an empty value for power consumption"); - return SLURM_FAILURE; - } return SLURM_SUCCESS; } @@ -383,9 +412,9 @@ static int _find_power_sensor(void) for (i = 0; i < sensor_count; i++, ipmi_monitoring_sensor_iterator_next(ipmi_ctx)) { - if ((sensor_units = - ipmi_monitoring_sensor_read_sensor_units(ipmi_ctx)) - < 0) { + sensor_units = + ipmi_monitoring_sensor_read_sensor_units(ipmi_ctx); + if (sensor_units < 0) { error("ipmi_monitoring_sensor_read_sensor_units: %s", ipmi_monitoring_ctx_errormsg(ipmi_ctx)); return SLURM_FAILURE; @@ -394,19 +423,34 @@ static int _find_power_sensor(void) if (sensor_units != slurm_ipmi_conf.variable) continue; - if ((record_id = - ipmi_monitoring_sensor_read_record_id(ipmi_ctx)) - < 0) { + record_id = ipmi_monitoring_sensor_read_record_id(ipmi_ctx); + if (record_id < 0) { error("ipmi_monitoring_sensor_read_record_id: %s", ipmi_monitoring_ctx_errormsg(ipmi_ctx)); return SLURM_FAILURE; } - slurm_ipmi_conf.power_sensor_num = (uint32_t) record_id; - sensor_reading = ipmi_monitoring_sensor_read_sensor_reading( - ipmi_ctx); + + sensor_reading = + ipmi_monitoring_sensor_read_sensor_reading(ipmi_ctx); if (sensor_reading) { - last_update_watt = - (uint32_t)(*((double *)sensor_reading)); + /* we found a valid sensor, allocate room for its + * status and its description as the main sensor */ + sensors_len = 1; + sensors = xmalloc(sizeof(sensor_status_t)); + sensors[0].id = (uint32_t)record_id; + sensors[0].last_update_watt = + (uint32_t) (*((double *)sensor_reading)); + + descriptions_len = 1; + descriptions = xmalloc(sizeof(description_t)); + descriptions[0].label = xstrdup(NODE_DESC); + descriptions[0].sensor_cnt = 1; + descriptions[0].sensor_idxs = xmalloc(sizeof(uint16_t)); + descriptions[0].sensor_idxs[0] = 0; + + start_current_energies = xmalloc(sizeof(uint32_t)); + + previous_update_time = last_update_time; last_update_time = time(NULL); } else { error("ipmi read an empty value for power consumption"); @@ -431,41 +475,73 @@ static int _find_power_sensor(void) */ static int _read_ipmi_values(void) { - unsigned int record_ids[] = {(int) slurm_ipmi_conf.power_sensor_num}; - unsigned int record_ids_length = 1; - void* sensor_reading; - - if ((ipmi_monitoring_sensor_readings_by_record_id( - ipmi_ctx, - hostname, - &ipmi_config, - sensor_reading_flags, - record_ids, - record_ids_length, - NULL,NULL)) != record_ids_length) { + /* read sensors list */ + void *sensor_reading; + int rc; + uint16_t i; + unsigned int ids[sensors_len]; + + for (i = 0; i < sensors_len; ++i) + ids[i] = sensors[i].id; + rc = ipmi_monitoring_sensor_readings_by_record_id(ipmi_ctx, + hostname, + &ipmi_config, + sensor_reading_flags, + ids, + sensors_len, + NULL, + NULL); + if (rc != sensors_len) { error("ipmi_monitoring_sensor_readings_by_record_id: %s", ipmi_monitoring_ctx_errormsg(ipmi_ctx)); return SLURM_FAILURE; } - ipmi_monitoring_sensor_iterator_first(ipmi_ctx); - if (ipmi_monitoring_sensor_read_record_id(ipmi_ctx) < 0) { - error("ipmi_monitoring_sensor_read_record_id: %s", - ipmi_monitoring_ctx_errormsg(ipmi_ctx)); - return SLURM_FAILURE; - } - sensor_reading = ipmi_monitoring_sensor_read_sensor_reading(ipmi_ctx); - if (sensor_reading) { - last_update_watt = (uint32_t)(*((double *)sensor_reading)); - previous_update_time = last_update_time; - last_update_time = time(NULL); - } else { - error("ipmi read an empty value for power consumption"); - return SLURM_FAILURE; - } + + i = 0; + do { + sensor_reading = + ipmi_monitoring_sensor_read_sensor_reading(ipmi_ctx); + if (sensor_reading) { + sensors[i].last_update_watt = + (uint32_t) (*((double *)sensor_reading)); + } else { + error("ipmi read an empty value for power consumption"); + return SLURM_FAILURE; + } + ++i; + } while (ipmi_monitoring_sensor_iterator_next(ipmi_ctx)); + + previous_update_time = last_update_time; + last_update_time = time(NULL); return SLURM_SUCCESS; } +/* updates the given energy according to the last watt reading of the sensor */ +static void _update_energy(acct_gather_energy_t *e, uint32_t last_update_watt) +{ + if (e->current_watts != 0) { + e->base_watts = e->current_watts; + e->current_watts = last_update_watt; + if (previous_update_time == 0) + e->base_consumed_energy = 0; + else + e->base_consumed_energy = + _get_additional_consumption( + previous_update_time, + last_update_time, + e->base_watts, + e->current_watts); + e->previous_consumed_energy = e->consumed_energy; + e->consumed_energy += e->base_consumed_energy; + } else if (e->current_watts == 0) { + e->consumed_energy = 0; + e->base_watts = 0; + e->current_watts = last_update_watt; + } + e->poll_time = time(NULL); +} + /* * _thread_update_node_energy calls _read_ipmi_values and updates all values * for node consumption @@ -473,46 +549,31 @@ static int _read_ipmi_values(void) static int _thread_update_node_energy(void) { int rc = SLURM_SUCCESS; - - if (local_energy->current_watts == NO_VAL) - return rc; + uint16_t i; rc = _read_ipmi_values(); if (rc == SLURM_SUCCESS) { - if (local_energy->current_watts != 0) { - local_energy->base_watts = local_energy->current_watts; - local_energy->current_watts = last_update_watt; - if (previous_update_time == 0) - local_energy->base_consumed_energy = 0; - else - local_energy->base_consumed_energy = - _get_additional_consumption( - previous_update_time, - last_update_time, - local_energy->base_watts, - local_energy->current_watts); - local_energy->previous_consumed_energy = - local_energy->consumed_energy; - local_energy->consumed_energy += - local_energy->base_consumed_energy; + /* sensors list */ + for (i = 0; i < sensors_len; ++i) { + if (sensors[i].energy.current_watts == NO_VAL) + return rc; + _update_energy(&sensors[i].energy, + sensors[i].last_update_watt); } + if (previous_update_time == 0) previous_update_time = last_update_time; - if (local_energy->current_watts == 0) { - local_energy->consumed_energy = 0; - local_energy->base_watts = 0; - local_energy->current_watts = last_update_watt; - } - local_energy->poll_time = time(NULL); } + if (debug_flags & DEBUG_FLAG_ENERGY) { - info("ipmi-thread = %d sec, current %d Watts, " - "consumed %d Joules %d new", - (int) (last_update_time - previous_update_time), - local_energy->current_watts, - local_energy->consumed_energy, - local_energy->base_consumed_energy); + for (i = 0; i < sensors_len; ++i) + info("ipmi-thread: sensor %u current_watts: %u, " + "consumed %d, new %d", + sensors[i].id, + sensors[i].energy.current_watts, + sensors[i].energy.consumed_energy, + sensors[i].energy.base_consumed_energy); } return rc; @@ -526,6 +587,7 @@ static int _thread_init(void) static bool first = true; static bool first_init = SLURM_FAILURE; int rc = SLURM_SUCCESS; + uint16_t i; if (!first) return first_init; @@ -535,12 +597,17 @@ static int _thread_init(void) //TODO verbose error? rc = SLURM_FAILURE; } else { - if ((slurm_ipmi_conf.power_sensor_num == -1 - && _find_power_sensor() != SLURM_SUCCESS) + if ((sensors_len == 0 && _find_power_sensor() != SLURM_SUCCESS) || _check_power_sensor() != SLURM_SUCCESS) { - local_energy->current_watts = NO_VAL; + /* no valid sensors found */ + for (i = 0; i < sensors_len; ++i) { + sensors[i].energy.current_watts = NO_VAL; + } } else { - local_energy->current_watts = last_update_watt; + for (i = 0; i < sensors_len; ++i) { + sensors[i].energy.current_watts = + sensors[i].last_update_watt; + } } if (slurm_ipmi_conf.reread_sdr_cache) //IPMI cache is reread only on initialisation @@ -548,8 +615,6 @@ static int _thread_init(void) sensor_reading_flags ^= IPMI_MONITORING_SENSOR_READING_FLAGS_REREAD_SDR_CACHE; } - local_energy->consumed_energy = 0; - local_energy->base_watts = 0; slurm_mutex_unlock(&ipmi_mutex); if (rc != SLURM_SUCCESS) @@ -566,25 +631,53 @@ static int _thread_init(void) static int _ipmi_send_profile(void) { - acct_energy_data_t ener; + uint16_t i, j; + uint64_t data[descriptions_len]; + uint32_t id; if (!_running_profile()) return SLURM_SUCCESS; - if (debug_flags & DEBUG_FLAG_ENERGY) - info("_ipmi_send_profile: consumed %d watts", - local_energy->current_watts); - - memset(&ener, 0, sizeof(acct_energy_data_t)); - /*TODO function to calculate Average CPUs Frequency*/ - /*ener->cpu_freq = // read /proc/...*/ - ener.cpu_freq = 1; - ener.time = time(NULL); - ener.power = local_energy->current_watts; - acct_gather_profile_g_add_sample_data( - ACCT_GATHER_PROFILE_ENERGY, &ener); + if (dataset_id < 0) { + acct_gather_profile_dataset_t dataset[descriptions_len+1]; + for (i = 0; i < descriptions_len; i++) { + dataset[i].name = xstrdup_printf( + "%sPower", descriptions[i].label); + dataset[i].type = PROFILE_FIELD_UINT64; + } + dataset[i].name = NULL; + dataset[i].type = PROFILE_FIELD_NOT_SET; + dataset_id = acct_gather_profile_g_create_dataset( + "Energy", NO_PARENT, dataset); + for (i = 0; i < descriptions_len; ++i) + xfree(dataset[i].name); + if (debug_flags & DEBUG_FLAG_ENERGY) + debug("Energy: dataset created (id = %d)", dataset_id); + if (dataset_id == SLURM_ERROR) { + error("Energy: Failed to create the dataset for IPMI"); + return SLURM_ERROR; + } + } - return SLURM_ERROR; + /* pack an array of uint64_t with current power of sensors */ + memset(data, 0, sizeof(data)); + for (i = 0; i < descriptions_len; ++i) { + for (j = 0; j < descriptions[i].sensor_cnt; ++j) { + id = descriptions[i].sensor_idxs[j]; + data[i] += sensors[id].energy.current_watts; + } + } + + if (debug_flags & DEBUG_FLAG_PROFILE) { + for (i = 0; i < descriptions_len; i++) { + id = descriptions[i].sensor_idxs[j]; + info("PROFILE-Energy: %sPower=%d", + descriptions[i].label, + sensors[id].energy.current_watts); + } + } + return acct_gather_profile_g_add_sample_data(dataset_id, (void *)data, + last_update_time); } @@ -699,60 +792,101 @@ static void *_thread_launcher(void *no_data) static int _get_joules_task(uint16_t delta) { - acct_gather_energy_t *last_energy = NULL; - time_t now; + time_t now = time(NULL); static bool first = true; - static uint32_t start_current_energy = 0; uint32_t adjustment = 0; + uint16_t i; + acct_gather_energy_t *new; + acct_gather_energy_t *old; - last_energy = local_energy; - local_energy = NULL; + /* sensors list */ + acct_gather_energy_t *energies; + uint16_t sensor_cnt; - if (slurm_get_node_energy(NULL, delta, &local_energy)) { + if (slurm_get_node_energy(NULL, delta, &sensor_cnt, &energies)) { error("_get_joules_task: can't get info from slurmd"); - local_energy = last_energy; return SLURM_ERROR; } - now = time(NULL); + if (sensor_cnt != sensors_len) { + error("_get_joules_task: received %u sensors, %u expected", + sensor_cnt, sensors_len); + return SLURM_ERROR; + } - local_energy->previous_consumed_energy = last_energy->consumed_energy; + for (i = 0; i < sensor_cnt; ++i) { + new = &energies[i]; + old = &sensors[i].energy; + new->previous_consumed_energy = old->consumed_energy; + + if (slurm_ipmi_conf.adjustment) + adjustment = _get_additional_consumption( + new->poll_time, now, + new->current_watts, + new->current_watts); + + if (!first) { + new->consumed_energy -= start_current_energies[i]; + new->base_consumed_energy = adjustment + + (new->consumed_energy - old->consumed_energy); + } else { + /* This is just for the step, so take all the pervious + consumption out of the mix. + */ + start_current_energies[i] = + new->consumed_energy + adjustment; + new->base_consumed_energy = 0; + //first = false; + } - if (slurm_ipmi_conf.adjustment) - adjustment = _get_additional_consumption( - local_energy->poll_time, now, - local_energy->current_watts, - local_energy->current_watts); + new->consumed_energy = new->previous_consumed_energy + + new->base_consumed_energy; + memcpy(old, new, sizeof(acct_gather_energy_t)); + } - if (!first) { - local_energy->consumed_energy -= start_current_energy; + xfree(energies); - local_energy->base_consumed_energy = - (local_energy->consumed_energy - - last_energy->consumed_energy) - + adjustment; - } else { - /* This is just for the step, so take all the pervious - consumption out of the mix. - */ - start_current_energy = - local_energy->consumed_energy + adjustment; - local_energy->base_consumed_energy = 0; - first = false; + first = false; + + if (debug_flags & DEBUG_FLAG_ENERGY) { + for (i = 0; i < sensors_len; ++i) + info("_get_joules_task: consumed %u Joules " + "(received %u(%u watts) from slurmd)", + sensors[i].energy.consumed_energy, + sensors[i].energy.base_consumed_energy, + sensors[i].energy.current_watts); } - local_energy->consumed_energy = local_energy->previous_consumed_energy - + local_energy->base_consumed_energy; + return SLURM_SUCCESS; +} - acct_gather_energy_destroy(last_energy); +static void _get_node_energy(acct_gather_energy_t *energy) +{ + uint16_t i, j, id; + acct_gather_energy_t *e; - if (debug_flags & DEBUG_FLAG_ENERGY) - info("_get_joules_task: consumed %u Joules " - "(received %u(%u watts) from slurmd)", - local_energy->consumed_energy, - local_energy->base_consumed_energy, - local_energy->current_watts); + /* find the "Node" description */ + for (i = 0; i < descriptions_len; ++i) + if (xstrcmp(descriptions[i].label, NODE_DESC) == 0) + break; + /* not found, init is not finished or there is no watt sensors */ + if (i >= descriptions_len) + return; - return SLURM_SUCCESS; + /* sum the energy of all sensors described for "Node" */ + memset(energy, 0, sizeof(acct_gather_energy_t)); + for (j = 0; j < descriptions[i].sensor_cnt; ++j) { + id = descriptions[i].sensor_idxs[j]; + e = &sensors[id].energy; + energy->base_consumed_energy += e->base_consumed_energy; + energy->base_watts += e->base_watts; + energy->consumed_energy += e->consumed_energy; + energy->current_watts += e->current_watts; + energy->previous_consumed_energy += e->previous_consumed_energy; + /* node poll_time is computed as the oldest poll_time of + the sensors */ + if (energy->poll_time == 0 || energy->poll_time > e->poll_time) + energy->poll_time = e->poll_time; + } } /* @@ -771,6 +905,8 @@ extern int init(void) extern int fini(void) { + uint16_t i; + if (!_run_in_daemon()) return SLURM_SUCCESS; @@ -783,8 +919,15 @@ extern int fini(void) pthread_join(cleanup_handler_thread, NULL); slurm_mutex_unlock(&ipmi_mutex); - acct_gather_energy_destroy(local_energy); - local_energy = NULL; + xfree(sensors); sensors = NULL; + xfree(start_current_energies); start_current_energies = NULL; + + for (i = 0; i < descriptions_len; ++i) { + xfree(descriptions[i].label); + xfree(descriptions[i].sensor_idxs); + } + xfree(descriptions); descriptions = NULL; + return SLURM_SUCCESS; } @@ -799,39 +942,46 @@ extern int acct_gather_energy_p_update_node_energy(void) extern int acct_gather_energy_p_get_data(enum acct_energy_type data_type, void *data) { + uint16_t i; int rc = SLURM_SUCCESS; acct_gather_energy_t *energy = (acct_gather_energy_t *)data; time_t *last_poll = (time_t *)data; + uint16_t *sensor_cnt = (uint16_t *)data; xassert(_run_in_daemon()); switch (data_type) { - case ENERGY_DATA_JOULES_TASK: + case ENERGY_DATA_NODE_ENERGY: slurm_mutex_lock(&ipmi_mutex); - if (_is_thread_launcher()) { - if (_thread_init() == SLURM_SUCCESS) - _thread_update_node_energy(); - } else - _get_joules_task(10); /* Since we don't have - access to the - frequency here just - send in something. - */ - memcpy(energy, local_energy, sizeof(acct_gather_energy_t)); + _get_node_energy(energy); + slurm_mutex_unlock(&ipmi_mutex); + break; + case ENERGY_DATA_LAST_POLL: + slurm_mutex_lock(&ipmi_mutex); + *last_poll = last_update_time; slurm_mutex_unlock(&ipmi_mutex); break; + case ENERGY_DATA_SENSOR_CNT: + *sensor_cnt = sensors_len; + break; case ENERGY_DATA_STRUCT: slurm_mutex_lock(&ipmi_mutex); - memcpy(energy, local_energy, sizeof(acct_gather_energy_t)); + for (i = 0; i < sensors_len; ++i) + memcpy(&energy[i], &sensors[i].energy, + sizeof(acct_gather_energy_t)); slurm_mutex_unlock(&ipmi_mutex); - if (debug_flags & DEBUG_FLAG_ENERGY) { - info("_get_joules_node_ipmi = consumed %d Joules", - energy->consumed_energy); - } break; - case ENERGY_DATA_LAST_POLL: + case ENERGY_DATA_JOULES_TASK: slurm_mutex_lock(&ipmi_mutex); - *last_poll = local_energy->poll_time; + if (_is_thread_launcher()) { + if (_thread_init() == SLURM_SUCCESS) + _thread_update_node_energy(); + } else { + _get_joules_task(10); + } + for (i = 0; i < sensors_len; ++i) + memcpy(&energy[i], &sensors[i].energy, + sizeof(acct_gather_energy_t)); slurm_mutex_unlock(&ipmi_mutex); break; default: @@ -870,6 +1020,117 @@ extern int acct_gather_energy_p_set_data(enum acct_energy_type data_type, return rc; } +/* Parse the sensor descriptions stored into slurm_ipmi_conf.power_sensors. + * Expected format: comma-separated sensors ids and semi-colon-separated + * sensors descriptions. Also expects a mandatory description with label + * "Node". */ +static int _parse_sensor_descriptions(void) +{ + /* TODO: error propagation */ + + const char *sep1 = ";"; + const char *sep2 = ","; + char *str_desc_list, *str_desc, *str_id, *mid, *endptr; + char *saveptr1, *saveptr2; // pointers for strtok_r storage + uint16_t i, j, k; + uint16_t id; + uint16_t *idx; + description_t *d; + bool found; + + if (slurm_ipmi_conf.power_sensors == NULL + || xstrcmp(slurm_ipmi_conf.power_sensors, "") == 0) { + return SLURM_ERROR; + } + + /* count the number of descriptions */ + str_desc_list = xstrdup(slurm_ipmi_conf.power_sensors); + descriptions_len = 0; + str_desc = strtok_r(str_desc_list, sep1, &saveptr1); + while (str_desc) { + ++descriptions_len; + str_desc = strtok_r(NULL, sep1, &saveptr1); + } + + descriptions = xmalloc(sizeof(description_t) * descriptions_len); + + /* parse descriptions */ + strcpy(str_desc_list, slurm_ipmi_conf.power_sensors); + i = 0; + str_desc = strtok_r(str_desc_list, sep1, &saveptr1); + while (str_desc) { + mid = xstrchr(str_desc, '='); + if (!mid || mid == str_desc) { + goto error; + } + /* label */ + *mid = '\0'; + d = &descriptions[i]; + d->label = xstrdup(str_desc); + /* associated sensors */ + ++mid; + str_id = strtok_r(mid, sep2, &saveptr2); + /* parse sensor ids of the current description */ + while (str_id) { + id = strtol(str_id, &endptr, 10); + if (*endptr != '\0') + goto error; + d->sensor_cnt++; + xrealloc(d->sensor_idxs, + sizeof(uint16_t) * d->sensor_cnt); + d->sensor_idxs[d->sensor_cnt - 1] = id; + str_id = strtok_r(NULL, sep2, &saveptr2); + } + ++i; + str_desc = strtok_r(NULL, sep1, &saveptr1); + } + xfree(str_desc_list); + + /* Ensure that the "Node" description is provided */ + found = false; + for (i = 0; i < descriptions_len && !found; ++i) + found = (xstrcasecmp(descriptions[i].label, NODE_DESC) == 0); + if (!found) + goto error; + + /* Here we have the list of descriptions with sensors ids in the + * sensors_idxs field instead of their indexes. We still have to + * gather the unique sensors ids and replace sensors_idxs by their + * indexes in the sensors array */ + for (i = 0; i < descriptions_len; ++i) { + for (j = 0; j < descriptions[i].sensor_cnt; ++j) { + idx = &descriptions[i].sensor_idxs[j]; + found = false; + for (k = 0; k < sensors_len && !found; ++k) + found = (*idx == sensors[k].id); + if (found) { + *idx = k - 1; + } else { + ++sensors_len; + xrealloc(sensors, sensors_len + * sizeof(sensor_status_t)); + sensors[sensors_len - 1].id = *idx; + *idx = sensors_len - 1;; + } + } + } + + start_current_energies = xmalloc(sensors_len * sizeof(uint32_t)); + + return SLURM_SUCCESS; + +error: + error("Configuration of EnergyIPMIPowerSensors is malformed. " + "Make sure that the expected format is respected and that " + "the \"Node\" label is provided."); + for (i = 0; i < descriptions_len; ++i) { + xfree(descriptions[i].label); + xfree(descriptions[i].sensor_idxs); + } + xfree(descriptions); descriptions = NULL; + return SLURM_ERROR; +} + extern void acct_gather_energy_p_conf_options(s_p_options_t **full_options, int *full_options_cnt) { @@ -903,7 +1164,7 @@ extern void acct_gather_energy_p_conf_options(s_p_options_t **full_options, {"EnergyIPMIEntitySensorNames", S_P_BOOLEAN}, {"EnergyIPMIFrequency", S_P_UINT32}, {"EnergyIPMICalcAdjustment", S_P_BOOLEAN}, - {"EnergyIPMIPowerSensor", S_P_UINT32}, + {"EnergyIPMIPowerSensors", S_P_STRING}, {"EnergyIPMITimeout", S_P_UINT32}, {"EnergyIPMIVariable", S_P_STRING}, {NULL} }; @@ -999,8 +1260,8 @@ extern void acct_gather_energy_p_conf_set(s_p_hashtbl_t *tbl) "EnergyIPMICalcAdjustment", tbl)) slurm_ipmi_conf.adjustment = false; - s_p_get_uint32(&slurm_ipmi_conf.power_sensor_num, - "EnergyIPMIPowerSensor", tbl); + s_p_get_string(&slurm_ipmi_conf.power_sensors, + "EnergyIPMIPowerSensors", tbl); s_p_get_uint32(&slurm_ipmi_conf.timeout, "EnergyIPMITimeout", tbl); @@ -1017,10 +1278,9 @@ extern void acct_gather_energy_p_conf_set(s_p_hashtbl_t *tbl) return; if (!flag_init) { - local_energy = acct_gather_energy_alloc(); - local_energy->consumed_energy=0; - local_energy->base_consumed_energy=0; - local_energy->base_watts=0; + /* try to parse the PowerSensors settings */ + _parse_sensor_descriptions(); + flag_init = true; if (_is_thread_launcher()) { pthread_attr_t attr; @@ -1191,9 +1451,9 @@ extern void acct_gather_energy_p_conf_values(List *data) list_append(*data, key_pair); key_pair = xmalloc(sizeof(config_key_pair_t)); - key_pair->name = xstrdup("EnergyIPMIPowerSensor"); - key_pair->value = xstrdup_printf( - "%u", slurm_ipmi_conf.power_sensor_num); + key_pair->name = xstrdup("EnergyIPMIPowerSensors"); + key_pair->value = + xstrdup_printf("%s", slurm_ipmi_conf.power_sensors); list_append(*data, key_pair); key_pair = xmalloc(sizeof(config_key_pair_t)); diff --git a/src/plugins/acct_gather_energy/ipmi/acct_gather_energy_ipmi_config.c b/src/plugins/acct_gather_energy/ipmi/acct_gather_energy_ipmi_config.c index 23d1d32259a5cf477cd6d22b0fd4f8146b9e7fda..0883e7ed9293735f5900925341b51867928a0917 100644 --- a/src/plugins/acct_gather_energy/ipmi/acct_gather_energy_ipmi_config.c +++ b/src/plugins/acct_gather_energy/ipmi/acct_gather_energy_ipmi_config.c @@ -61,6 +61,8 @@ extern void reset_slurm_ipmi_conf(slurm_ipmi_conf_t *slurm_ipmi_conf) { if (slurm_ipmi_conf) { slurm_ipmi_conf->power_sensor_num = -1; + xfree(slurm_ipmi_conf->power_sensors); + slurm_ipmi_conf->power_sensors = NULL; slurm_ipmi_conf->freq = DEFAULT_IPMI_FREQ; slurm_ipmi_conf->adjustment = false; slurm_ipmi_conf->timeout = TIMEOUT; diff --git a/src/plugins/acct_gather_energy/ipmi/acct_gather_energy_ipmi_config.h b/src/plugins/acct_gather_energy/ipmi/acct_gather_energy_ipmi_config.h index e104f92c16ba59c15c7da31e744ba5bf35c66a74..1cf7a65ccfcb1569c87bb0a9d1f96ce9d709786c 100644 --- a/src/plugins/acct_gather_energy/ipmi/acct_gather_energy_ipmi_config.h +++ b/src/plugins/acct_gather_energy/ipmi/acct_gather_energy_ipmi_config.h @@ -136,6 +136,7 @@ typedef struct slurm_ipmi_conf { /* Options for SLURM IPMI plugin*/ /* sensor num (only for power) */ uint32_t power_sensor_num; + char *power_sensors; /* Out-of-band Communication Configuration */ /* Indicate the IPMI protocol version to use * IPMI_MONITORING_PROTOCOL_VERSION_1_5 = 0x00, diff --git a/src/plugins/acct_gather_energy/rapl/acct_gather_energy_rapl.c b/src/plugins/acct_gather_energy/rapl/acct_gather_energy_rapl.c index cc4bc12d828e0556931aa20a7f45c82b882c6738..2db71a500217e927eb18c18a71c744cc03bcc9a1 100644 --- a/src/plugins/acct_gather_energy/rapl/acct_gather_energy_rapl.c +++ b/src/plugins/acct_gather_energy/rapl/acct_gather_energy_rapl.c @@ -130,6 +130,8 @@ const uint32_t plugin_version = SLURM_VERSION_NUMBER; static acct_gather_energy_t *local_energy = NULL; static uint64_t debug_flags = 0; +static int dataset_id = -1; /* id of the dataset for profile data */ + /* one cpu in the package */ static int pkg2cpu[MAX_PKGS] = {[0 ... MAX_PKGS-1] = -1}; static int pkg_fd[MAX_PKGS] = {[0 ... MAX_PKGS-1] = -1}; @@ -389,25 +391,38 @@ static int _running_profile(void) static int _send_profile(void) { - acct_energy_data_t ener; + uint64_t curr_watts; + acct_gather_profile_dataset_t dataset[] = { + { "Power", PROFILE_FIELD_UINT64 }, + { NULL, PROFILE_FIELD_NOT_SET } + }; if (!_running_profile()) return SLURM_SUCCESS; if (debug_flags & DEBUG_FLAG_ENERGY) - info("_send_profile: consumed %d watts", + info("_send_profile: consumed %u watts", local_energy->current_watts); - memset(&ener, 0, sizeof(acct_energy_data_t)); - /*TODO function to calculate Average CPUs Frequency*/ - /*ener->cpu_freq = // read /proc/...*/ - ener.cpu_freq = 1; - ener.time = time(NULL); - ener.power = local_energy->current_watts; - acct_gather_profile_g_add_sample_data( - ACCT_GATHER_PROFILE_ENERGY, &ener); + if (dataset_id < 0) { + dataset_id = acct_gather_profile_g_create_dataset( + "Energy", NO_PARENT, dataset); + if (debug_flags & DEBUG_FLAG_ENERGY) + debug("Energy: dataset created (id = %d)", dataset_id); + if (dataset_id == SLURM_ERROR) { + error("Energy: Failed to create the dataset for RAPL"); + return SLURM_ERROR; + } + } + + curr_watts = (uint64_t)local_energy->current_watts; + if (debug_flags & DEBUG_FLAG_PROFILE) { + info("PROFILE-Energy: power=%u", local_energy->current_watts); + } - return SLURM_ERROR; + return acct_gather_profile_g_add_sample_data(dataset_id, + (void *)&curr_watts, + local_energy->poll_time); } extern int acct_gather_energy_p_update_node_energy(void) @@ -416,7 +431,7 @@ extern int acct_gather_energy_p_update_node_energy(void) xassert(_run_in_daemon()); - if (!local_energy || local_energy->current_watts == NO_VAL) + if (local_energy->current_watts == NO_VAL) return rc; _get_joules_task(local_energy); @@ -466,6 +481,7 @@ extern int acct_gather_energy_p_get_data(enum acct_energy_type data_type, int rc = SLURM_SUCCESS; acct_gather_energy_t *energy = (acct_gather_energy_t *)data; time_t *last_poll = (time_t *)data; + uint16_t *sensor_cnt = (uint16_t *)data; xassert(_run_in_daemon()); @@ -476,12 +492,16 @@ extern int acct_gather_energy_p_get_data(enum acct_energy_type data_type, else _get_joules_task(energy); break; + case ENERGY_DATA_NODE_ENERGY: case ENERGY_DATA_STRUCT: memcpy(energy, local_energy, sizeof(acct_gather_energy_t)); break; case ENERGY_DATA_LAST_POLL: *last_poll = local_energy->poll_time; break; + case ENERGY_DATA_SENSOR_CNT: + *sensor_cnt = 1; + break; default: error("acct_gather_energy_p_get_data: unknown enum %d", data_type); @@ -533,7 +553,7 @@ extern void acct_gather_energy_p_conf_set(s_p_hashtbl_t *tbl) for (i = 0; i < nb_pkg; i++) pkg_fd[i] = _open_msr(pkg2cpu[i]); - local_energy = acct_gather_energy_alloc(); + local_energy = acct_gather_energy_alloc(1); result = _read_msr(pkg_fd[0], MSR_RAPL_POWER_UNIT); if (result == 0) diff --git a/src/plugins/acct_gather_filesystem/lustre/acct_gather_filesystem_lustre.c b/src/plugins/acct_gather_filesystem/lustre/acct_gather_filesystem_lustre.c index b4495de162bcb83f85ede07d8fde2484f0f92aa8..579d4d62cf9d00bc248c99b85f607677fff63996 100644 --- a/src/plugins/acct_gather_filesystem/lustre/acct_gather_filesystem_lustre.c +++ b/src/plugins/acct_gather_filesystem/lustre/acct_gather_filesystem_lustre.c @@ -270,57 +270,83 @@ static int _read_lustre_counters(void) */ static int _update_node_filesystem(void) { - static acct_filesystem_data_t fls; - static acct_filesystem_data_t current; static acct_filesystem_data_t previous; + static int dataset_id = -1; static bool first = true; - int cc; + acct_filesystem_data_t current; + + enum { + FIELD_READ, + FIELD_READMB, + FIELD_WRITE, + FIELD_WRITEMB, + FIELD_CNT + }; + + acct_gather_profile_dataset_t dataset[] = { + { "Reads", PROFILE_FIELD_UINT64 }, + { "ReadMB", PROFILE_FIELD_DOUBLE }, + { "Writes", PROFILE_FIELD_UINT64 }, + { "WriteMB", PROFILE_FIELD_DOUBLE }, + { NULL, PROFILE_FIELD_NOT_SET } + }; + + union { + double d; + uint64_t u64; + } data[FIELD_CNT]; slurm_mutex_lock(&lustre_lock); - cc = _read_lustre_counters(); - if (cc != SLURM_SUCCESS) { + if (_read_lustre_counters() != SLURM_SUCCESS) { error("%s: Cannot read lustre counters", __func__); slurm_mutex_unlock(&lustre_lock); return SLURM_FAILURE; } if (first) { - /* First time initialize the counters and return. - */ + dataset_id = acct_gather_profile_g_create_dataset("Network", + NO_PARENT, dataset); + if (dataset_id == SLURM_ERROR) { + error("FileSystem: Failed to create the dataset " + "for Lustre"); + return SLURM_ERROR; + } + previous.reads = lustre_se.all_lustre_nb_reads; previous.writes = lustre_se.all_lustre_nb_writes; - previous.read_size - = (double)lustre_se.all_lustre_read_bytes/1048576.0; - previous.write_size - = (double)lustre_se.all_lustre_write_bytes/1048576.0; + previous.read_size = (double)lustre_se.all_lustre_read_bytes; + previous.write_size = (double)lustre_se.all_lustre_write_bytes; first = false; - memset(&lustre_se, 0, sizeof(lustre_sens_t)); - slurm_mutex_unlock(&lustre_lock); + } - return SLURM_SUCCESS; + if (dataset_id < 0) { + slurm_mutex_unlock(&lustre_lock); + return SLURM_ERROR; } - /* Compute the current values read from all lustre-xxxx - * directories - */ + /* Compute the current values read from all lustre-xxxx directories */ current.reads = lustre_se.all_lustre_nb_reads; current.writes = lustre_se.all_lustre_nb_writes; - current.read_size = (double)lustre_se.all_lustre_read_bytes/1048576.0; - current.write_size = (double)lustre_se.all_lustre_write_bytes/1048576.0; - - /* Now compute the difference between the two snapshots - * and send it to hdf5 log. - */ - fls.reads = fls.reads + (current.reads - previous.reads); - fls.writes = fls.writes + (current.writes - previous.writes); - fls.read_size = fls.read_size - + (current.read_size - previous.read_size); - fls.write_size = fls.write_size - + (current.write_size - previous.write_size); - - acct_gather_profile_g_add_sample_data(ACCT_GATHER_PROFILE_LUSTRE, &fls); + current.read_size = (double)lustre_se.all_lustre_read_bytes; + current.write_size = (double)lustre_se.all_lustre_write_bytes; + + /* record sample */ + data[FIELD_READ].u64 = current.reads - previous.reads; + data[FIELD_READMB].d = (current.read_size - previous.read_size) / + (1 << 20); + data[FIELD_WRITE].u64 = current.writes - previous.writes; + data[FIELD_WRITEMB].d = (current.write_size - previous.write_size) / + (1 << 20); + + if (debug_flags & DEBUG_FLAG_PROFILE) { + char str[256]; + info("PROFILE-Lustre: %s", acct_gather_profile_dataset_str( + dataset, data, str, sizeof(str))); + } + acct_gather_profile_g_add_sample_data(dataset_id, (void *)data, + lustre_se.update_time); /* Save current as previous and clean up the working * data structure. @@ -328,10 +354,6 @@ static int _update_node_filesystem(void) memcpy(&previous, ¤t, sizeof(acct_filesystem_data_t)); memset(&lustre_se, 0, sizeof(lustre_sens_t)); - info("%s: num reads %"PRIu64" nums write %"PRIu64" " - "read %f MB wrote %f MB", - __func__, fls.reads, fls.writes, fls.read_size, fls.write_size); - slurm_mutex_unlock(&lustre_lock); return SLURM_SUCCESS; diff --git a/src/plugins/acct_gather_infiniband/ofed/acct_gather_infiniband_ofed.c b/src/plugins/acct_gather_infiniband/ofed/acct_gather_infiniband_ofed.c index 6fdb0c35aa75c37c112eaceea2bb1972e7d47ee5..ed6c02512eed859d2044cecd2df7604c6047c598 100644 --- a/src/plugins/acct_gather_infiniband/ofed/acct_gather_infiniband_ofed.c +++ b/src/plugins/acct_gather_infiniband/ofed/acct_gather_infiniband_ofed.c @@ -137,6 +137,8 @@ static slurm_ofed_conf_t ofed_conf; static uint64_t debug_flags = 0; static pthread_mutex_t ofed_lock = PTHREAD_MUTEX_INITIALIZER; +static int dataset_id = -1; /* id of the dataset for profile data */ + static uint8_t *_slurm_pma_query_via(void *rcvbuf, ib_portid_t * dest, int port, unsigned timeout, unsigned id, const struct ibmad_port *srcport) @@ -260,20 +262,50 @@ static int _read_ofed_values(void) */ static int _update_node_infiniband(void) { - acct_network_data_t net; - int rc = SLURM_SUCCESS; + int rc; + + enum { + FIELD_PACKIN, + FIELD_PACKOUT, + FIELD_MBIN, + FIELD_MBOUT, + FIELD_CNT + }; + + acct_gather_profile_dataset_t dataset[] = { + { "PacketsIn", PROFILE_FIELD_UINT64 }, + { "PacketsOut", PROFILE_FIELD_UINT64 }, + { "InMB", PROFILE_FIELD_DOUBLE }, + { "OutMB", PROFILE_FIELD_DOUBLE }, + { NULL, PROFILE_FIELD_NOT_SET } + }; + + union { + double d; + uint64_t u64; + } data[FIELD_CNT]; + + if (dataset_id < 0) { + dataset_id = acct_gather_profile_g_create_dataset("Network", + NO_PARENT, dataset); + if (debug_flags & DEBUG_FLAG_INFINIBAND) + debug("IB: dataset created (id = %d)", dataset_id); + if (dataset_id == SLURM_ERROR) { + error("IB: Failed to create the dataset for ofed"); + return SLURM_ERROR; + } + } slurm_mutex_lock(&ofed_lock); - rc = _read_ofed_values(); - - memset(&net, 0, sizeof(acct_network_data_t)); + if ((rc = _read_ofed_values()) != SLURM_SUCCESS) { + slurm_mutex_unlock(&ofed_lock); + return rc; + } - net.packets_in = ofed_sens.rcvpkts; - net.packets_out = ofed_sens.xmtpkts; - net.size_in = (double) ofed_sens.rcvdata / 1048576; - net.size_out = (double) ofed_sens.xmtdata / 1048576; - acct_gather_profile_g_add_sample_data(ACCT_GATHER_PROFILE_NETWORK, - &net); + data[FIELD_PACKIN].u64 = ofed_sens.rcvpkts; + data[FIELD_PACKOUT].u64 = ofed_sens.xmtpkts; + data[FIELD_MBIN].d = (double) ofed_sens.rcvdata / (1 << 20); + data[FIELD_MBOUT].d = (double) ofed_sens.xmtdata / (1 << 20); if (debug_flags & DEBUG_FLAG_INFINIBAND) { info("ofed-thread = %d sec, transmitted %"PRIu64" bytes, " @@ -283,7 +315,13 @@ static int _update_node_infiniband(void) } slurm_mutex_unlock(&ofed_lock); - return rc; + if (debug_flags & DEBUG_FLAG_PROFILE) { + char str[256]; + info("PROFILE-Network: %s", acct_gather_profile_dataset_str( + dataset, data, str, sizeof(str))); + } + return acct_gather_profile_g_add_sample_data(dataset_id, (void *)data, + ofed_sens.update_time); } static bool _run_in_daemon(void) diff --git a/src/plugins/acct_gather_profile/hdf5/acct_gather_profile_hdf5.c b/src/plugins/acct_gather_profile/hdf5/acct_gather_profile_hdf5.c index aa86c9be4760496458138e1cb49d9140f2e68a4b..d68be3a5c39f2a71290fbae2f12ac9502b21e4d3 100644 --- a/src/plugins/acct_gather_profile/hdf5/acct_gather_profile_hdf5.c +++ b/src/plugins/acct_gather_profile/hdf5/acct_gather_profile_hdf5.c @@ -4,10 +4,12 @@ ***************************************************************************** * Copyright (C) 2013 Bull S. A. S. * Bull, Rue Jean Jaures, B.P.68, 78340, Les Clayes-sous-Bois. - * Written by Rod Schultz <rod.schultz@bull.com> * * Portions Copyright (C) 2013 SchedMD LLC. - * Written by Danny Auble <da@schedmd.com> + * + * Initially written by Rod Schultz <rod.schultz@bull.com> @ Bull + * and Danny Auble <da@schedmd.com> @ SchedMD. + * Adapted by Yoann Blein <yoann.blein@bull.net> @ Bull. * * This file is part of SLURM, a resource management program. * For details, see <http://www.schedmd.com/slurmdocs/>. @@ -62,6 +64,13 @@ #include "src/slurmd/common/proctrack.h" #include "hdf5_api.h" +#define HDF5_CHUNK_SIZE 10 +/* Compression level, a value of 0 through 9. Level 0 is faster but offers the + * least compression; level 9 is slower but offers maximum compression. + * A setting of -1 indicates that no compression is desired. */ +/* TODO: Make this configurable with a parameter */ +#define HDF5_COMPRESS 0 + /* * These variables are required by the generic plugin interface. If they * are not found in the plugin, the plugin loader will ignore it. @@ -91,13 +100,16 @@ const char plugin_name[] = "AcctGatherProfile hdf5 plugin"; const char plugin_type[] = "acct_gather_profile/hdf5"; const uint32_t plugin_version = SLURM_VERSION_NUMBER; -hid_t typTOD; - typedef struct { char *dir; uint32_t def; } slurm_hdf5_conf_t; +typedef struct { + hid_t table_id; + size_t type_size; +} table_t; + // Global HDF5 Variables // The HDF5 file and base objects will remain open for the duration of the // step. This avoids reconstruction on every acct_gather_sample and @@ -113,6 +125,13 @@ static slurm_hdf5_conf_t hdf5_conf; static uint64_t debug_flags = 0; static uint32_t g_profile_running = ACCT_GATHER_PROFILE_NOT_SET; static stepd_step_rec_t *g_job = NULL; +static time_t step_start_time; + +static hid_t *groups = NULL; +static size_t groups_len = 0; +static table_t *tables = NULL; +static size_t tables_max_len = 0; +static size_t tables_cur_len = 0; static void _reset_slurm_profile_conf(void) { @@ -136,22 +155,6 @@ static uint32_t _determine_profile(void) return profile; } -static int _get_taskid_from_pid(pid_t pid, uint32_t *gtid) -{ - int tx; - - xassert(g_job); - - for (tx=0; tx<g_job->node_tasks; tx++) { - if (g_job->task[tx]->pid == pid) { - *gtid = g_job->task[tx]->gtid; - return SLURM_SUCCESS; - } - } - - return SLURM_ERROR; -} - static int _create_directories(void) { int rc; @@ -196,17 +199,6 @@ static int _create_directories(void) return SLURM_SUCCESS; } -static bool _do_profile(uint32_t profile, uint32_t req_profiles) -{ - if (req_profiles <= ACCT_GATHER_PROFILE_NONE) - return false; - if ((profile == ACCT_GATHER_PROFILE_NOT_SET) - || (req_profiles & profile)) - return true; - - return false; -} - static bool _run_in_daemon(void) { static bool set = false; @@ -231,11 +223,17 @@ extern int init(void) debug_flags = slurm_get_debug_flags(); + /* Move HDF5 trace printing to log file instead of stderr */ + H5Eset_auto(H5E_DEFAULT, (herr_t (*)(hid_t, void *))H5Eprint, + log_fp()); + return SLURM_SUCCESS; } extern int fini(void) { + xfree(tables); + xfree(groups); xfree(hdf5_conf.dir); return SLURM_SUCCESS; } @@ -303,8 +301,7 @@ extern int acct_gather_profile_p_node_step_start(stepd_step_rec_t* job) { int rc = SLURM_SUCCESS; - time_t start_time; - char *profile_file_name; + char *profile_file_name; char *profile_str; xassert(_run_in_daemon()); @@ -343,7 +340,6 @@ extern int acct_gather_profile_p_node_step_start(stepd_step_rec_t* job) } // Create a new file using the default properties. - profile_init(); file_id = H5Fcreate(profile_file_name, H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT); if (chown(profile_file_name, (uid_t)g_job->uid, @@ -357,10 +353,9 @@ extern int acct_gather_profile_p_node_step_start(stepd_step_rec_t* job) return SLURM_FAILURE; } /* fd_set_close_on_exec(file_id); Not supported for HDF5 */ - sprintf(group_node, "/%s_%s", GRP_NODE, g_job->node_name); - gid_node = H5Gcreate(file_id, group_node, H5P_DEFAULT, - H5P_DEFAULT, H5P_DEFAULT); - if (gid_node < 1) { + sprintf(group_node, "/%s", g_job->node_name); + gid_node = make_group(file_id, group_node); + if (gid_node < 0) { H5Fclose(file_id); file_id = -1; info("PROFILE: Failed to create Node group"); @@ -368,9 +363,11 @@ extern int acct_gather_profile_p_node_step_start(stepd_step_rec_t* job) } put_string_attribute(gid_node, ATTR_NODENAME, g_job->node_name); put_int_attribute(gid_node, ATTR_NTASKS, g_job->node_tasks); - start_time = time(NULL); + put_int_attribute(gid_node, ATTR_CPUPERTASK, g_job->cpus_per_task); + + step_start_time = time(NULL); put_string_attribute(gid_node, ATTR_STARTTIME, - slurm_ctime2(&start_time)); + slurm_ctime2(&step_start_time)); return rc; } @@ -394,6 +391,7 @@ extern int acct_gather_profile_p_child_forked(void) extern int acct_gather_profile_p_node_step_end(void) { int rc = SLURM_SUCCESS; + size_t i; xassert(_run_in_daemon()); @@ -412,6 +410,15 @@ extern int acct_gather_profile_p_node_step_end(void) if (debug_flags & DEBUG_FLAG_PROFILE) info("PROFILE: node_step_end (shutdown)"); + /* close tables */ + for (i = 0; i < tables_cur_len; ++i) { + H5PTclose(tables[i].table_id); + } + /* close groups */ + for (i = 0; i < groups_len; ++i) { + H5Gclose(groups[i]); + } + if (gid_totals > 0) H5Gclose(gid_totals); if (gid_samples > 0) @@ -451,181 +458,165 @@ extern int acct_gather_profile_p_task_start(uint32_t taskid) extern int acct_gather_profile_p_task_end(pid_t taskpid) { - hid_t gid_task; - char group_task[MAX_GROUP_NAME+1]; - uint32_t task_id; - int rc = SLURM_SUCCESS; - - xassert(_run_in_daemon()); - xassert(g_job); - - if (g_job->stepid == NO_VAL) - return rc; - - xassert(g_profile_running != ACCT_GATHER_PROFILE_NOT_SET); - - if (!_do_profile(ACCT_GATHER_PROFILE_NOT_SET, g_profile_running)) - return rc; - - if (_get_taskid_from_pid(taskpid, &task_id) != SLURM_SUCCESS) - return SLURM_FAILURE; - if (file_id == -1) { - info("PROFILE: add_task_data, HDF5 file is not open"); - return SLURM_FAILURE; - } - if (gid_tasks < 0) { - gid_tasks = make_group(gid_node, GRP_TASKS); - if (gid_tasks < 1) { - info("PROFILE: Failed to create Tasks group"); - return SLURM_FAILURE; - } - } - sprintf(group_task, "%s_%d", GRP_TASK, task_id); - gid_task = get_group(gid_tasks, group_task); - if (gid_task == -1) { - gid_task = make_group(gid_tasks, group_task); - if (gid_task < 0) { - info("Failed to open tasks %s", group_task); - return SLURM_FAILURE; - } - put_int_attribute(gid_task, ATTR_TASKID, task_id); - } - put_int_attribute(gid_task, ATTR_CPUPERTASK, g_job->cpus_per_task); - if (debug_flags & DEBUG_FLAG_PROFILE) info("PROFILE: task_end"); - return rc; + return SLURM_SUCCESS; } -extern int acct_gather_profile_p_add_sample_data(uint32_t type, void *data) +extern int acct_gather_profile_p_create_group(const char* name) { - hid_t g_sample_grp; - char group[MAX_GROUP_NAME+1]; - char group_sample[MAX_GROUP_NAME+1]; - static uint32_t sample_no = 0; - uint32_t task_id = 0; - void *send_profile = NULL; - char *type_name = NULL; - - profile_task_t profile_task; - profile_network_t profile_network; - profile_energy_t profile_energy; - profile_io_t profile_io; - - struct jobacctinfo *jobacct = (struct jobacctinfo *)data; - acct_network_data_t *net = (acct_network_data_t *)data; - acct_energy_data_t *ener = (acct_energy_data_t *)data; - struct lustre_data *lus = (struct lustre_data *)data; - - xassert(_run_in_daemon()); - xassert(g_job); + hid_t gid_group = make_group(gid_node, name); + if (gid_group < 0) { + return SLURM_ERROR; + } - if (g_job->stepid == NO_VAL) - return SLURM_SUCCESS; + /* store the group to keep track of it */ + groups = xrealloc(groups, (groups_len + 1) * sizeof(hid_t)); + groups[groups_len] = gid_group; + ++groups_len; - xassert(g_profile_running != ACCT_GATHER_PROFILE_NOT_SET); + return gid_group; +} - if (!_do_profile(type, g_profile_running)) - return SLURM_SUCCESS; +extern int acct_gather_profile_p_create_dataset( + const char* name, int parent, acct_gather_profile_dataset_t *dataset) +{ + size_t type_size; + size_t offset, field_size; + hid_t dtype_id; + hid_t field_id; + hid_t table_id; + acct_gather_profile_dataset_t *dataset_loc = dataset; - switch (type) { - case ACCT_GATHER_PROFILE_ENERGY: - snprintf(group, sizeof(group), "%s", GRP_ENERGY); + if (g_profile_running <= ACCT_GATHER_PROFILE_NONE) + return SLURM_ERROR; - memset(&profile_energy, 0, sizeof(profile_energy_t)); - profile_energy.time = ener->time; - profile_energy.cpu_freq = ener->cpu_freq; - profile_energy.power = ener->power; + debug("acct_gather_profile_p_create_dataset %s", name); + + /* compute the size of the type needed to create the table */ + type_size = sizeof(uint64_t) * 2; /* size for time field */ + while (dataset_loc && (dataset_loc->type != PROFILE_FIELD_NOT_SET)) { + switch (dataset_loc->type) { + case PROFILE_FIELD_UINT64: + type_size += sizeof(uint64_t); + break; + case PROFILE_FIELD_DOUBLE: + type_size += sizeof(double); + break; + case PROFILE_FIELD_NOT_SET: + break; + } + dataset_loc++; + } - send_profile = &profile_energy; - break; - case ACCT_GATHER_PROFILE_TASK: - if (_get_taskid_from_pid(jobacct->pid, &task_id) - != SLURM_SUCCESS) - return SLURM_ERROR; + /* create the datatype for the dataset */ + if ((dtype_id = H5Tcreate(H5T_COMPOUND, type_size)) < 0) { + debug3("PROFILE: failed to create datatype for table %s", + name); + return SLURM_ERROR; + } - snprintf(group, sizeof(group), "%s_%u", GRP_TASK, task_id); + /* insert fields */ + if (H5Tinsert(dtype_id, "ElapsedTime", sizeof(uint64_t), + H5T_NATIVE_UINT64) < 0) + return SLURM_ERROR; + if (H5Tinsert(dtype_id, "EpochTime", 0, H5T_NATIVE_UINT64) < 0) + return SLURM_ERROR; - memset(&profile_task, 0, sizeof(profile_task_t)); - profile_task.time = time(NULL); - profile_task.cpu_freq = jobacct->act_cpufreq; - profile_task.cpu_time = jobacct->tot_cpu; - profile_task.cpu_utilization = jobacct->tot_cpu; - profile_task.pages = jobacct->tot_pages; - profile_task.read_size = jobacct->tot_disk_read; - profile_task.rss = jobacct->tot_rss; - profile_task.vm_size = jobacct->tot_vsize; - profile_task.write_size = jobacct->tot_disk_write; + dataset_loc = dataset; + + offset = sizeof(uint64_t) * 2; + while (dataset_loc && (dataset_loc->type != PROFILE_FIELD_NOT_SET)) { + switch (dataset_loc->type) { + case PROFILE_FIELD_UINT64: + field_id = H5T_NATIVE_UINT64; + field_size = sizeof(uint64_t); + break; + case PROFILE_FIELD_DOUBLE: + field_id = H5T_NATIVE_DOUBLE; + field_size = sizeof(double); + break; + case PROFILE_FIELD_NOT_SET: + break; + } + if (H5Tinsert(dtype_id, dataset_loc->name, + offset, field_id) < 0) + return SLURM_ERROR; + offset += field_size; + dataset_loc++; + } - send_profile = &profile_task; - break; - case ACCT_GATHER_PROFILE_LUSTRE: - snprintf(group, sizeof(group), "%s", GRP_LUSTRE); + /* create the table */ + if (parent < 0) + parent = gid_node; /* default parent is the node group */ + table_id = H5PTcreate_fl(parent, name, dtype_id, HDF5_CHUNK_SIZE, + HDF5_COMPRESS); + if (table_id < 0) { + error("PROFILE: Impossible to create the table %s", name); + H5Tclose(dtype_id); + return SLURM_ERROR; + } + H5Tclose(dtype_id); /* close the datatype since H5PT keeps a copy */ + + /* resize the tables array if full */ + if (tables_cur_len == tables_max_len) { + if (tables_max_len == 0) + ++tables_max_len; + tables_max_len *= 2; + tables = xrealloc(tables, tables_max_len * sizeof(table_t)); + } - memset(&profile_io, 0, sizeof(profile_io_t)); - profile_io.time = time(NULL); - profile_io.reads = lus->reads; - profile_io.read_size = lus->read_size; - profile_io.writes = lus->writes; - profile_io.write_size = lus->write_size; + /* reserve a new table */ + tables[tables_cur_len].table_id = table_id; + tables[tables_cur_len].type_size = type_size; + ++tables_cur_len; - send_profile = &profile_io; + return tables_cur_len - 1; +} - break; - case ACCT_GATHER_PROFILE_NETWORK: +extern int acct_gather_profile_p_add_sample_data(int table_id, void *data, + time_t sample_time) +{ + table_t *ds = &tables[table_id]; + uint8_t send_data[ds->type_size]; + int header_size = 0; + debug("acct_gather_profile_p_add_sample_data %d", table_id); - snprintf(group, sizeof(group), "%s", GRP_NETWORK); + if (file_id < 0) { + debug("PROFILE: Trying to add data but profiling is over"); + return SLURM_SUCCESS; + } - memset(&profile_network, 0, sizeof(profile_network_t)); - profile_network.time = time(NULL); - profile_network.packets_in = net->packets_in; - profile_network.size_in = net->size_in; - profile_network.packets_out = net->packets_out; - profile_network.size_out = net->size_out; + if (table_id < 0 || table_id >= tables_cur_len) { + error("PROFILE: trying to add samples to an invalid table %d", + table_id); + return SLURM_ERROR; + } - send_profile = &profile_network; + /* ensure that we have to record something */ + xassert(_run_in_daemon()); + xassert(g_job); + if (g_job->stepid == NO_VAL) + return SLURM_SUCCESS; + xassert(g_profile_running != ACCT_GATHER_PROFILE_NOT_SET); - break; - default: - error("acct_gather_profile_p_add_sample_data: " - "Unknown type %d sent", type); + if (g_profile_running <= ACCT_GATHER_PROFILE_NONE) return SLURM_ERROR; - } - type_name = acct_gather_profile_type_to_string(type); + /* prepend timestampe and relative time */ + ((uint64_t *)send_data)[0] = difftime(sample_time, step_start_time); + header_size += sizeof(uint64_t); + ((uint64_t *)send_data)[1] = sample_time; + header_size += sizeof(uint64_t); - if (debug_flags & DEBUG_FLAG_PROFILE) - info("PROFILE: add_sample_data Group-%s Type=%s", - group, type_name); - - if (file_id == -1) { - if (debug_flags & DEBUG_FLAG_PROFILE) { - // This can happen from samples from the gather threads - // before the step actually starts. - info("PROFILE: add_sample_data, HDF5 file not open"); - } - return SLURM_FAILURE; - } - if (gid_samples < 0) { - gid_samples = make_group(gid_node, GRP_SAMPLES); - if (gid_samples < 1) { - info("PROFILE: failed to create TimeSeries group"); - return SLURM_FAILURE; - } - } - g_sample_grp = get_group(gid_samples, group); - if (g_sample_grp < 0) { - g_sample_grp = make_group(gid_samples, group); - if (g_sample_grp < 0) { - info("PROFILE: failed to open TimeSeries %s", group); - return SLURM_FAILURE; - } - put_string_attribute(g_sample_grp, ATTR_DATATYPE, type_name); + memcpy(send_data + header_size, data, ds->type_size - header_size); + + /* append the record to the table */ + if (H5PTappend(ds->table_id, 1, send_data) < 0) { + error("PROFILE: Impossible to add data to the table %d; " + "maybe the table has not been created?", table_id); + return SLURM_ERROR; } - sprintf(group_sample, "%s_%10.10d", group, ++sample_no); - put_hdf5_data(g_sample_grp, type, SUBDATA_SAMPLE, - group_sample, send_profile, 1); - H5Gclose(g_sample_grp); return SLURM_SUCCESS; } @@ -649,3 +640,12 @@ extern void acct_gather_profile_p_conf_values(List *data) return; } + +extern bool acct_gather_profile_p_is_active(uint32_t type) +{ + if (g_profile_running <= ACCT_GATHER_PROFILE_NONE) + return false; + return (type == ACCT_GATHER_PROFILE_NOT_SET) + || (g_profile_running & type); +} + diff --git a/src/plugins/acct_gather_profile/hdf5/hdf5_api.c b/src/plugins/acct_gather_profile/hdf5/hdf5_api.c index fcce7693e4be9c03535b7d1be267f90e3349b3e3..67368fd304ea64153fa567b2281aa6c986c7070e 100644 --- a/src/plugins/acct_gather_profile/hdf5/hdf5_api.c +++ b/src/plugins/acct_gather_profile/hdf5/hdf5_api.c @@ -38,1583 +38,23 @@ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. \****************************************************************************/ +#include <string.h> + #include "src/common/macros.h" -#include "src/common/slurm_time.h" #include "src/common/xassert.h" #include "src/common/xstring.h" +#include "src/common/xmalloc.h" +#include "src/common/slurm_acct_gather_profile.h" #include "hdf5_api.h" - -// Static variables ok as add function are inside a lock. -static time_t seriesStart; -static hid_t typTOD; -static int i; // General index used in some macros. -static int moffset; // General variable used by insert macros - -/* - * Macro to insert a date string type into a compound memory type - * - * Parameters - * p parent (group) memory type - * label description of item - * type profile struct type - * item data item in type - */ -#define MEM_ADD_DATE_TIME(p, label, type, item) \ - if(H5Tinsert(p, label, HOFFSET(type, item), typTOD) < 0) { \ - debug3("PROFILE: failed insert into memory datatype"); \ - H5Tclose(p); \ - return -1; \ - } -/* - * Macro to insert a date string type into a compound file type - * - * Parameters - * p parent (group) file type - * label description of item - * offset offset into record - */ -#define FILE_ADD_DATE_TIME(p, label, offset) \ - if(H5Tinsert(p, label, offset, typTOD) < 0) { \ - debug3("PROFILE: failed insert into file datatype"); \ - H5Tclose(p); \ - return -1; \ - } - -/* - * Macro to insert an uint64 into a compound memory type - * - * Parameters - * p parent (group) memory type - * label description of item - * type profile struct type - * item data item in type - */ -#define MEM_ADD_UINT64(p, label, type, item) \ - if(H5Tinsert(p, label, HOFFSET(type, item), H5T_NATIVE_UINT64) < 0) { \ - debug3("PROFILE: failed insert64 into memory datatype"); \ - H5Tclose(p); \ - return -1; \ - } -/* - * Macro to insert a uint64 into a compound file type - * - * Parameters - * p parent (group) file type - * label description of item - */ -#define FILE_ADD_UINT64(p, label) \ - if(H5Tinsert(p, label, moffset, H5T_NATIVE_UINT64) < 0) { \ - debug3("PROFILE: failed insert64 into file datatype"); \ - H5Tclose(p); \ - return -1; \ - } \ - moffset += 8; - -/* - * Macro to insert a double into a compound memory type - * - * Parameters - * p parent (group) memory type - * label description of item - * type profile struct type - * item data item in type - */ -#define MEM_ADD_DBL(p, label, type, item) \ - if(H5Tinsert(p, label, HOFFSET(type, item), H5T_NATIVE_DOUBLE) < 0) { \ - debug3("PROFILE: failed insertdbl into memory datatype"); \ - H5Tclose(p); \ - return -1; \ - } -/* - * Macro to insert a double into a compound file type - * - * Parameters - * p parent (group) file type - * label description of item - */ -#define FILE_ADD_DBL(p, label) \ - if(H5Tinsert(p, label, moffset, H5T_NATIVE_DOUBLE) < 0) { \ - debug3("PROFILE: failed insertdbl into file datatype"); \ - H5Tclose(p); \ - return -1; \ - } \ - moffset += 8; - -/* - * Macro to increment a sample in a difference series - * -- Difference means each sample represents counts for only that interval - * (assumes consistent naming convention) - * - * - * Parameters - * tot total pointer - * smp sample pointer - * var variable name in sample - * count number of items in series - */ -#define INCR_DIF_SAMPLE(tot, smp, var, count) \ - for (i=0; i<count; i++) { \ - if (i == 0) { \ - total->var.min = smp[i].var; \ - } \ - tot->var.total += smp[i].var; \ - tot->var.min = MIN(smp[i].var, tot->var.min); \ - tot->var.max = MAX(smp[i].var, tot->var.max); \ - } \ - tot->var.ave = tot->var.total / count; - -/* - * Macro to increment a sample in a running total - * -- Running total means first sample is initial conditions - * (assumes consistent naming convention) - * - * - * Parameters - * tot total pointer - * smp sample pointer - * var variable name in sample - * count number of items in series - */ -#define INCR_RT_SAMPLE(tot, smp, var, count) \ - for (i=1; i<count; i++) { \ - if (i == 1) { \ - total->var.min = smp[i].var; \ - } \ - tot->var.total += smp[i].var; \ - tot->var.min = MIN(smp[i].var, tot->var.min); \ - tot->var.max = MAX(smp[i].var, tot->var.max); \ - } \ - tot->var.ave = tot->var.total / count; - -/* Macro to put an int min,ave,max,total for a variable to extract file - * - * Parameters - * fp file descriptor - * var variable name - * prf prefix for series (usually ',' - */ -#define PUT_UINT_SUM(fp, var, prfx) \ - fprintf(fp, "%s%ld,%ld,%ld,%ld", prfx, \ - var.min, var.ave, var.max, var.total); -/* Macro to put an int min,ave,max,total for a variable to extract file - * - * Parameters - * fp file descriptor - * var variable name - * prf prefix for series (usually ',' - */ -#define PUT_DBL_SUM(fp, var, prfx) \ - fprintf(fp, "%s%.3f,%.3f,%.3f,%.3f", prfx, \ - var.min, var.ave, var.max, var.total); - - -// ============================================================================ -// Routines supporting Energy Data type -// ============================================================================ - -static int _energy_dataset_size(void) -{ - return sizeof(profile_energy_t); -} - -static hid_t _energy_create_memory_datatype(void) -{ - hid_t mtyp_energy = H5Tcreate(H5T_COMPOUND, sizeof(profile_energy_t)); - if (mtyp_energy < 0) { - debug3("PROFILE: failed to create Energy memory datatype"); - return -1; - } - MEM_ADD_DATE_TIME(mtyp_energy, "Date_Time", profile_energy_t, tod); - MEM_ADD_UINT64(mtyp_energy, "Time", profile_energy_t, time); - MEM_ADD_UINT64(mtyp_energy, "Power", profile_energy_t, power); - MEM_ADD_UINT64(mtyp_energy, "CPU_Frequency", - profile_energy_t, cpu_freq); - - return mtyp_energy; -} - -static hid_t _energy_create_file_datatype(void) -{ - hid_t ftyp_energy = H5Tcreate(H5T_COMPOUND, (TOD_LEN+3*8)); - if (ftyp_energy < 0) { - debug3("PROFILE: failed to create Energy file datatype"); - return -1; - } - moffset = TOD_LEN; - FILE_ADD_DATE_TIME(ftyp_energy, "Date_Time", 0); - FILE_ADD_UINT64(ftyp_energy, "Time"); - FILE_ADD_UINT64(ftyp_energy, "Power"); - FILE_ADD_UINT64(ftyp_energy, "CPU_Frequency"); - - return ftyp_energy; -} - -static hid_t _energy_s_create_memory_datatype(void) -{ - hid_t mtyp_energy = H5Tcreate(H5T_COMPOUND, - sizeof(profile_energy_s_t)); - if (mtyp_energy < 0) { - debug3("PROFILE: failed to create Energy_s memory datatype"); - return -1; - } - MEM_ADD_DATE_TIME(mtyp_energy, "Start Time", - profile_energy_s_t, start_time); - MEM_ADD_UINT64(mtyp_energy, "Elapsed Time", - profile_energy_s_t, elapsed_time); - MEM_ADD_UINT64(mtyp_energy, "Min Power", profile_energy_s_t, power.min); - MEM_ADD_UINT64(mtyp_energy, "Ave Power", profile_energy_s_t, power.ave); - MEM_ADD_UINT64(mtyp_energy, "Max Power", profile_energy_s_t, power.max); - MEM_ADD_UINT64(mtyp_energy, "Total Power", - profile_energy_s_t, power.total); - MEM_ADD_UINT64(mtyp_energy, "Min CPU Frequency", profile_energy_s_t, - cpu_freq.min); - MEM_ADD_UINT64(mtyp_energy, "Ave CPU Frequency", profile_energy_s_t, - cpu_freq.ave); - MEM_ADD_UINT64(mtyp_energy, "Max CPU Frequency", profile_energy_s_t, - cpu_freq.max); - MEM_ADD_UINT64(mtyp_energy, "Total CPU Frequency", profile_energy_s_t, - cpu_freq.total); - - return mtyp_energy; -} - -static hid_t _energy_s_create_file_datatype(void) -{ - hid_t ftyp_energy = H5Tcreate(H5T_COMPOUND, (TOD_LEN+9*8)); - if (ftyp_energy < 0) { - debug3("PROFILE: failed to create Energy_s file datatype"); - return -1; - } - moffset = TOD_LEN; - FILE_ADD_DATE_TIME(ftyp_energy, "Start Time", 0); - FILE_ADD_UINT64(ftyp_energy, "Elapsed Time"); - FILE_ADD_UINT64(ftyp_energy, "Min Power"); - FILE_ADD_UINT64(ftyp_energy, "Ave Power"); - FILE_ADD_UINT64(ftyp_energy, "Max Power"); - FILE_ADD_UINT64(ftyp_energy, "Total Power"); - FILE_ADD_UINT64(ftyp_energy, "Min CPU Frequency"); - FILE_ADD_UINT64(ftyp_energy, "Ave CPU Frequency"); - FILE_ADD_UINT64(ftyp_energy, "Max CPU Frequency"); - FILE_ADD_UINT64(ftyp_energy, "Total CPU Frequency"); - - return ftyp_energy; -} - -static void *_energy_init_job_series(int n_samples) -{ - profile_energy_t* energy_data; - - energy_data = xmalloc(n_samples * sizeof(profile_energy_t)); - if (energy_data == NULL) { - debug3("PROFILE: failed to get memory for energy data"); - return NULL; - } - return (void*) energy_data; -} - -static char** _energy_get_series_tod(void* data, int nsmp) -{ - int ix; - char **tod_values = NULL; - profile_energy_t* energy_series = (profile_energy_t*) data; - tod_values = (char**) xmalloc(nsmp*sizeof(char*)); - if (tod_values == NULL) { - info("Failed to get memory for energy tod"); - return NULL; - } - for (ix=0; ix < nsmp; ix++) { - tod_values[ix] = xstrdup(energy_series[ix].tod); - } - return tod_values; -} - -static double* _energy_get_series_values(char* data_name, void* data, int nsmp) -{ - int ix; - profile_energy_t* energy_series = (profile_energy_t*) data; - double *energy_values = NULL; - energy_values = xmalloc(nsmp*sizeof(double)); - if (energy_values == NULL) { - info("PROFILE: Failed to get memory for energy data"); - return NULL; - } - if (strcasecmp(data_name,"Time") == 0) { - for (ix=0; ix < nsmp; ix++) { - energy_values[ix] = (double) energy_series[ix].time; - - } - return energy_values; - } else if (strcasecmp(data_name,"Power") == 0) { - for (ix=0; ix < nsmp; ix++) { - energy_values[ix] = (double) energy_series[ix].power; - - } - return energy_values; - } else if (strcasecmp(data_name,"CPU_Frequency") == 0) { - for (ix=0; ix < nsmp; ix++) { - energy_values[ix] = (double) energy_series[ix].cpu_freq; - - } - return energy_values; - } - xfree(energy_values); - info("PROFILE: %s is invalid data item for energy data", data_name); - return NULL; -} - -static void _energy_merge_step_series( - hid_t group, void *prior, void *cur, void *buf) -{ -// This is a difference series - profile_energy_t* prf_cur = (profile_energy_t*) cur; - profile_energy_t* prf_buf = (profile_energy_t*) buf; - struct tm *ts = slurm_localtime(&prf_cur->time); - strftime(prf_buf->tod, TOD_LEN, TOD_FMT, ts); - if (prior == NULL) { - // First sample. - seriesStart = prf_cur->time; - prf_buf->time = 0; - - } else { - prf_buf->time = prf_cur->time - seriesStart; - } - prf_buf->power = prf_cur->power; - prf_buf->cpu_freq = prf_cur->cpu_freq; - return; -} - -static void *_energy_series_total(int n_samples, void *data) -{ - profile_energy_t* energy_data; - profile_energy_s_t* total; - if (n_samples < 1) - return NULL; - energy_data = (profile_energy_t*) data; - total = xmalloc(sizeof(profile_energy_s_t)); - if (total == NULL) { - error("PROFILE: Out of memory getting energy total"); - return NULL; - } - // Assuming energy series are a difference series - strcpy(total->start_time, energy_data[0].tod); - total->elapsed_time = energy_data[n_samples-1].time; - INCR_DIF_SAMPLE(total, energy_data, power, n_samples); - INCR_DIF_SAMPLE(total, energy_data, cpu_freq, n_samples); - return total; -} - -static void _energy_extract_series( - FILE* fp, bool put_header, int job, int step, - char *node, char *series, void *data, int size_data) -{ - - int n_items, ix; - profile_energy_t* energy_data = (profile_energy_t*) data; - if (put_header) { - fprintf(fp, "Job,Step,Node,Series,Date_Time,Elapsed_Time," - "Power, CPU_Frequency\n"); - } - n_items = size_data / sizeof(profile_energy_t); - for (ix=0; ix < n_items; ix++) { - fprintf(fp, "%d,%d,%s,%s,%s,%ld,%ld,%ld\n", job, step, node, - series, energy_data[ix].tod, energy_data[ix].time, - energy_data[ix].power, energy_data[ix].cpu_freq); - } - return; -} - -static void _energy_extract_total( - FILE* fp, bool put_header, int job, int step, - char *node, char *series, void *data, int size_data) -{ - profile_energy_s_t* energy_data = (profile_energy_s_t*) data; - if (put_header) { - fprintf(fp, "Job,Step,Node,Series,Start_Time,Elapsed_Time," - "Min_Power,Ave_Power,Max_Power,Total_Power," - "Min_CPU Frequency,Ave_CPU Frequency," - "Max_CPU Frequency,Total_CPU Frequency\n"); - } - fprintf(fp, "%d,%d,%s,%s,%s,%ld", job, step, node, series, - energy_data->start_time, energy_data->elapsed_time); - PUT_UINT_SUM(fp, energy_data->power, ","); - PUT_UINT_SUM(fp, energy_data->cpu_freq, ","); - fprintf(fp, "\n"); - return; -} - -static hdf5_api_ops_t* _energy_profile_factory(void) -{ - hdf5_api_ops_t* ops = xmalloc(sizeof(hdf5_api_ops_t)); - ops->dataset_size = &_energy_dataset_size; - ops->create_memory_datatype = &_energy_create_memory_datatype; - ops->create_file_datatype = &_energy_create_file_datatype; - ops->create_s_memory_datatype = &_energy_s_create_memory_datatype; - ops->create_s_file_datatype = &_energy_s_create_file_datatype; - ops->init_job_series = &_energy_init_job_series; - ops->get_series_tod = &_energy_get_series_tod; - ops->get_series_values = &_energy_get_series_values; - ops->merge_step_series = &_energy_merge_step_series; - ops->series_total = &_energy_series_total; - ops->extract_series = &_energy_extract_series; - ops->extract_total = &_energy_extract_total; - return ops; -} - - -// ============================================================================ -// Routines supporting I/O Data type -// ============================================================================ - -static int _io_dataset_size(void) -{ - return sizeof(profile_io_t); -} - -static hid_t _io_create_memory_datatype(void) -{ - hid_t mtyp_io = -1; - - mtyp_io = H5Tcreate(H5T_COMPOUND, sizeof(profile_io_t)); - if (mtyp_io < 0) { - debug3("PROFILE: failed to create IO memory datatype"); - return -1; - } - MEM_ADD_DATE_TIME(mtyp_io, "Date_Time", profile_io_t, tod); - MEM_ADD_UINT64(mtyp_io, "Time", profile_io_t, time); - MEM_ADD_UINT64(mtyp_io, "Reads", profile_io_t, reads); - MEM_ADD_DBL(mtyp_io, "Megabytes_Read", profile_io_t, read_size); - MEM_ADD_UINT64(mtyp_io, "Writes", profile_io_t, writes); - MEM_ADD_DBL(mtyp_io, "Megabytes_Write", profile_io_t, write_size); - return mtyp_io; -} - -static hid_t _io_create_file_datatype(void) -{ - hid_t ftyp_io = -1; - - ftyp_io = H5Tcreate(H5T_COMPOUND, TOD_LEN+5*8); - if (ftyp_io < 0) { - debug3("PROFILE: failed to create IO file datatype"); - return -1; - } - moffset = TOD_LEN; - FILE_ADD_DATE_TIME(ftyp_io, "Date_Time", 0); - FILE_ADD_UINT64(ftyp_io, "Time"); - FILE_ADD_UINT64(ftyp_io, "Reads"); - FILE_ADD_DBL(ftyp_io, "Megabytes_Read"); - FILE_ADD_UINT64(ftyp_io, "Writes"); - FILE_ADD_DBL(ftyp_io, "Megabytes_Write"); - - return ftyp_io; -} - -static hid_t _io_s_create_memory_datatype(void) -{ - hid_t mtyp_io = -1; - - mtyp_io = H5Tcreate(H5T_COMPOUND, sizeof(profile_io_s_t)); - if (mtyp_io < 0) { - debug3("PROFILE: failed to create IO memory datatype"); - return -1; - } - MEM_ADD_DATE_TIME(mtyp_io, "Start Time", profile_io_s_t, start_time); - MEM_ADD_UINT64(mtyp_io, "Elapsed Time", profile_io_s_t, elapsed_time); - MEM_ADD_UINT64(mtyp_io, "Min Reads", profile_io_s_t, reads.min); - MEM_ADD_UINT64(mtyp_io, "Ave Reads", profile_io_s_t, reads.ave); - MEM_ADD_UINT64(mtyp_io, "Max Reads", profile_io_s_t, reads.max); - MEM_ADD_UINT64(mtyp_io, "Total Reads", profile_io_s_t, reads.total); - MEM_ADD_DBL(mtyp_io, "Min Read Megabytes", - profile_io_s_t, read_size.min); - MEM_ADD_DBL(mtyp_io, "Ave Read Megabytes", - profile_io_s_t, read_size.ave); - MEM_ADD_DBL(mtyp_io, "Max Read Megabytes", - profile_io_s_t, read_size.max); - MEM_ADD_DBL(mtyp_io, "Total Read Megabytes", profile_io_s_t, - read_size.total); - MEM_ADD_UINT64(mtyp_io, "Min Writes", profile_io_s_t, writes.min); - MEM_ADD_UINT64(mtyp_io, "Ave Writes", profile_io_s_t, writes.ave); - MEM_ADD_UINT64(mtyp_io, "Max Writes", profile_io_s_t, writes.max); - MEM_ADD_UINT64(mtyp_io, "Total Writes", profile_io_s_t, writes.total); - MEM_ADD_DBL(mtyp_io, "Min Write Megabytes", profile_io_s_t, - write_size.min); - MEM_ADD_DBL(mtyp_io, "Ave Write Megabytes", profile_io_s_t, - write_size.ave); - MEM_ADD_DBL(mtyp_io, "Max Write Megabytes", profile_io_s_t, - write_size.max); - MEM_ADD_DBL(mtyp_io, "Total Write Megabytes", profile_io_s_t, - write_size.total); - - return mtyp_io; -} - -static hid_t _io_s_create_file_datatype(void) -{ - hid_t ftyp_io = -1; - - ftyp_io = H5Tcreate(H5T_COMPOUND, TOD_LEN+17*8); - if (ftyp_io < 0) { - debug3("PROFILE: failed to create IO file datatype"); - return -1; - } - moffset = TOD_LEN; - FILE_ADD_DATE_TIME(ftyp_io, "Start Time", 0); - FILE_ADD_UINT64(ftyp_io, "Elapsed Time"); - FILE_ADD_UINT64(ftyp_io, "Min Reads"); - FILE_ADD_UINT64(ftyp_io, "Ave Reads"); - FILE_ADD_UINT64(ftyp_io, "Max Reads"); - FILE_ADD_UINT64(ftyp_io, "Total Reads"); - FILE_ADD_DBL(ftyp_io, "Min Read Megabytes"); - FILE_ADD_DBL(ftyp_io, "Ave Read Megabytes"); - FILE_ADD_DBL(ftyp_io, "Max Read Megabytes"); - FILE_ADD_DBL(ftyp_io, "Total Read Megabytes"); - FILE_ADD_UINT64(ftyp_io, "Min Writes"); - FILE_ADD_UINT64(ftyp_io, "Ave Writes"); - FILE_ADD_UINT64(ftyp_io, "Max Writes"); - FILE_ADD_UINT64(ftyp_io, "Total Writes"); - FILE_ADD_DBL(ftyp_io, "Min Write Megabytes"); - FILE_ADD_DBL(ftyp_io, "Ave Write Megabytes"); - FILE_ADD_DBL(ftyp_io, "Max Write Megabytes"); - FILE_ADD_DBL(ftyp_io, "Total Write Megabytes"); - - return ftyp_io; -} - -static void *_io_init_job_series(int n_samples) -{ - profile_io_t* io_data; - io_data = xmalloc(n_samples * sizeof(profile_io_t)); - if (io_data == NULL) { - debug3("PROFILE: failed to get memory for combined io data"); - return NULL; - } - return (void*) io_data; -} - -static char** _io_get_series_tod(void* data, int nsmp) -{ - int ix; - char **tod_values = NULL; - profile_io_t* io_series = (profile_io_t*) data; - tod_values = (char**) xmalloc(nsmp*sizeof(char*)); - if (tod_values == NULL) { - info("Failed to get memory for io tod"); - return NULL; - } - for (ix=0; ix < nsmp; ix++) { - tod_values[ix] = xstrdup(io_series[ix].tod); - } - return tod_values; -} - -static double* _io_get_series_values(char* data_name, void* data, int nsmp) -{ - int ix; - profile_io_t* io_series = (profile_io_t*) data; - double *io_values = NULL; - io_values = xmalloc(nsmp*sizeof(double)); - if (io_values == NULL) { - info("PROFILE: Failed to get memory for io data"); - return NULL; - } - if (strcasecmp(data_name,"Time") == 0) { - for (ix=0; ix < nsmp; ix++) { - io_values[ix] = (double) io_series[ix].time; - - } - return io_values; - } else if (strcasecmp(data_name,"Reads") == 0) { - for (ix=0; ix < nsmp; ix++) { - io_values[ix] = (double) io_series[ix].reads; - - } - return io_values; - } else if (strcasecmp(data_name,"Megabytes_Read") == 0) { - for (ix=0; ix < nsmp; ix++) { - io_values[ix] = io_series[ix].read_size; - - } - return io_values; - } else if (strcasecmp(data_name,"Writes") == 0) { - for (ix=0; ix < nsmp; ix++) { - io_values[ix] = (double) io_series[ix].writes; - - } - return io_values; - } else if (strcasecmp(data_name,"Megabytes_Write") == 0) { - for (ix=0; ix < nsmp; ix++) { - io_values[ix] = io_series[ix].write_size; - - } - return io_values; - } - xfree(io_values); - info("PROFILE: %s is invalid data item for io data", data_name); - return NULL; -} - -static void _io_merge_step_series( - hid_t group, void *prior, void *cur, void *buf) -{ - // This is a difference series - static uint64_t start_reads = 0; - static uint64_t start_writes = 0; - static double start_read_size = 0; - static double start_write_size = 0; - profile_io_t* prfCur = (profile_io_t*) cur; - profile_io_t* prfBuf = (profile_io_t*) buf; - struct tm *ts = slurm_localtime(&prfCur->time); - strftime(prfBuf->tod, TOD_LEN, TOD_FMT, ts); - if (prior == NULL) { - // First sample. - seriesStart = prfCur->time; - prfBuf->time = 0; - start_reads = prfCur->reads; - prfBuf->reads = 0; - start_writes = prfCur->writes; - prfBuf->writes = 0; - start_read_size = prfCur->read_size; - prfBuf->read_size = 0; - start_write_size = prfCur->write_size; - prfBuf->write_size = 0; - } else { - prfBuf->time = prfCur->time - seriesStart; - prfBuf->reads = prfCur->reads - start_reads; - prfBuf->writes = prfCur->writes - start_writes; - prfBuf->read_size = prfCur->read_size - start_read_size; - prfBuf->write_size = prfCur->write_size - start_write_size; - } - return; -} - -static void *_io_series_total(int n_samples, void *data) -{ - profile_io_t* io_data; - profile_io_s_t* total; - if (n_samples < 1) - return NULL; - io_data = (profile_io_t*) data; - total = xmalloc(sizeof(profile_io_s_t)); - if (total == NULL) { - error("PROFILE: Out of memory getting I/O total"); - return NULL; - } - // Assuming io series are a running total, and the first - // sample just sets the initial conditions - strcpy(total->start_time, io_data[0].tod); - total->elapsed_time = io_data[n_samples-1].time; - INCR_DIF_SAMPLE(total, io_data, reads, n_samples); - INCR_DIF_SAMPLE(total, io_data, read_size, n_samples); - INCR_DIF_SAMPLE(total, io_data, writes, n_samples); - INCR_DIF_SAMPLE(total, io_data, write_size, n_samples); - return total; -} - -static void _io_extract_series( - FILE* fp, bool put_header, int job, int step, - char *node, char *series, void *data, int size_data) -{ - int n_items, ix; - profile_io_t* io_data = (profile_io_t*) data; - if (put_header) { - fprintf(fp,"Job,Step,Node,Series,Date_Time,Elapsed_time," - "Reads,Read Megabytes,Writes,Write Megabytes\n"); - } - n_items = size_data / sizeof(profile_io_t); - for (ix=0; ix < n_items; ix++) { - fprintf(fp,"%d,%d,%s,%s,%s,%ld,%ld,%.3f,%ld,%.3f\n", - job, step, node, series, - io_data[ix].tod, io_data[ix].time, - io_data[ix].reads, io_data[ix].read_size, - io_data[ix].writes, io_data[ix].write_size); - } - return; -} - -static void _io_extract_total( - FILE* fp, bool put_header, int job, int step, - char *node, char *series, void *data, int size_data) -{ - profile_io_s_t* io_data = (profile_io_s_t*) data; - if (put_header) { - fprintf(fp,"Job,Step,Node,Series,Start_Time,Elapsed_time," - "Min_Reads,Ave_Reads,Max_Reads,Total_Reads," - "Min_Read_Megabytes,Ave_Read_Megabytes," - "Max_Read_Megabytes,Total_Read_Megabytes," - "Min_Writes,Ave_Writes,Max_Writes,Total_Writes," - "Min_Write_Megabytes,Ave_Write_Megabytes," - "Max_Write_Megabytes,Total_Write_Megabytes\n"); - } - fprintf(fp, "%d,%d,%s,%s,%s,%ld", job, step, node, series, - io_data->start_time, io_data->elapsed_time); - PUT_UINT_SUM(fp, io_data->reads, ","); - PUT_DBL_SUM(fp, io_data->read_size, ","); - PUT_UINT_SUM(fp, io_data->writes, ","); - PUT_DBL_SUM(fp, io_data->write_size, ","); - fprintf(fp, "\n"); - return; -} - -static hdf5_api_ops_t* _io_profile_factory(void) -{ - hdf5_api_ops_t* ops = xmalloc(sizeof(hdf5_api_ops_t)); - ops->dataset_size = &_io_dataset_size; - ops->create_memory_datatype = &_io_create_memory_datatype; - ops->create_file_datatype = &_io_create_file_datatype; - ops->create_s_memory_datatype = &_io_s_create_memory_datatype; - ops->create_s_file_datatype = &_io_s_create_file_datatype; - ops->init_job_series = &_io_init_job_series; - ops->get_series_tod = &_io_get_series_tod; - ops->get_series_values = &_io_get_series_values; - ops->merge_step_series = &_io_merge_step_series; - ops->series_total = &_io_series_total; - ops->extract_series = &_io_extract_series; - ops->extract_total = &_io_extract_total; - return ops; -} - - -// ============================================================================ -// Routines supporting Network Data type -// ============================================================================ - -static int _network_dataset_size(void) -{ - return sizeof(profile_network_t); -} - -static hid_t _network_create_memory_datatype(void) -{ - hid_t mtyp_network = H5Tcreate(H5T_COMPOUND, - sizeof(profile_network_t)); - if (mtyp_network < 0) { - debug3("PROFILE: failed to create Network memory datatype"); - return -1; - } - MEM_ADD_DATE_TIME(mtyp_network, "Date_Time", profile_network_t, tod); - MEM_ADD_UINT64(mtyp_network, "Time", profile_network_t, time); - MEM_ADD_UINT64(mtyp_network, "Packets_In", - profile_network_t, packets_in); - MEM_ADD_DBL(mtyp_network, "Megabytes_In", profile_network_t, size_in); - MEM_ADD_UINT64(mtyp_network, "Packets_Out", - profile_network_t, packets_out); - MEM_ADD_DBL(mtyp_network, "Megabytes_Out", profile_network_t, size_out); - - return mtyp_network; -} - -static hid_t _network_create_file_datatype(void) -{ - hid_t ftyp_network = H5Tcreate(H5T_COMPOUND, TOD_LEN+5*8); - if (ftyp_network < 0) { - debug3("PROFILE: failed to create Network file datatype"); - return -1; - } - moffset = TOD_LEN; - FILE_ADD_DATE_TIME(ftyp_network, "Date_Time", 0); - FILE_ADD_UINT64(ftyp_network, "Time"); - FILE_ADD_UINT64(ftyp_network, "Packets_In"); - FILE_ADD_DBL(ftyp_network, "Megabytes_In"); - FILE_ADD_UINT64(ftyp_network, "Packets_Out"); - FILE_ADD_DBL(ftyp_network, "Megabytes_Out"); - - return ftyp_network; -} - -static hid_t _network_s_create_memory_datatype(void) -{ - hid_t mtyp_network = -1; - - mtyp_network = H5Tcreate(H5T_COMPOUND, sizeof(profile_network_s_t)); - if (mtyp_network < 0) { - debug3("PROFILE: failed to create Network memory datatype"); - return -1; - } - MEM_ADD_DATE_TIME(mtyp_network, "Start Time", profile_network_s_t, - start_time); - MEM_ADD_UINT64(mtyp_network, "Elapsed Time", profile_network_s_t, - elapsed_time); - MEM_ADD_UINT64(mtyp_network, "Min Packets In", profile_network_s_t, - packets_in.min); - MEM_ADD_UINT64(mtyp_network, "Ave Packets In", profile_network_s_t, - packets_in.ave); - MEM_ADD_UINT64(mtyp_network, "Max Packets In", profile_network_s_t, - packets_in.max); - MEM_ADD_UINT64(mtyp_network, "Total Packets In", profile_network_s_t, - packets_in.total); - MEM_ADD_DBL(mtyp_network, "Min Megabytes In", profile_network_s_t, - size_in.min); - MEM_ADD_DBL(mtyp_network, "Ave Megabytes In", profile_network_s_t, - size_in.ave); - MEM_ADD_DBL(mtyp_network, "Max Megabytes In", profile_network_s_t, - size_in.max); - MEM_ADD_DBL(mtyp_network, "Total Megabytes In", profile_network_s_t, - size_in.total); - MEM_ADD_UINT64(mtyp_network, "Min Packets Out", profile_network_s_t, - packets_out.min); - MEM_ADD_UINT64(mtyp_network, "Ave Packets Out", profile_network_s_t, - packets_out.ave); - MEM_ADD_UINT64(mtyp_network, "Max Packets Out", profile_network_s_t, - packets_out.max); - MEM_ADD_UINT64(mtyp_network, "Total Packets Out", profile_network_s_t, - packets_out.total); - MEM_ADD_DBL(mtyp_network, "Min Megabytes Out", profile_network_s_t, - size_out.min); - MEM_ADD_DBL(mtyp_network, "Ave Megabytes Out", profile_network_s_t, - size_out.ave); - MEM_ADD_DBL(mtyp_network, "Max Megabytes Out", profile_network_s_t, - size_out.max); - MEM_ADD_DBL(mtyp_network, "Total Megabytes Out", profile_network_s_t, - size_out.total); - - return mtyp_network; -} - -static hid_t _network_s_create_file_datatype(void) -{ - hid_t ftyp_network = H5Tcreate(H5T_COMPOUND, TOD_LEN+17*8); - if (ftyp_network < 0) { - debug3("PROFILE: failed to create Network file datatype"); - return -1; - } - moffset = TOD_LEN; - FILE_ADD_DATE_TIME(ftyp_network, "Start Time", 0); - FILE_ADD_UINT64(ftyp_network, "Elapsed Time"); - FILE_ADD_UINT64(ftyp_network, "Min Packets In"); - FILE_ADD_UINT64(ftyp_network, "Ave Packets In"); - FILE_ADD_UINT64(ftyp_network, "Max Packets In"); - FILE_ADD_UINT64(ftyp_network, "Total Packets In"); - FILE_ADD_DBL(ftyp_network, "Min Megabytes In"); - FILE_ADD_DBL(ftyp_network, "Ave Megabytes In"); - FILE_ADD_DBL(ftyp_network, "Max Megabytes In"); - FILE_ADD_DBL(ftyp_network, "Total Megabytes In"); - FILE_ADD_UINT64(ftyp_network, "Min Packets Out"); - FILE_ADD_UINT64(ftyp_network, "Ave Packets Out"); - FILE_ADD_UINT64(ftyp_network, "Max Packets Out"); - FILE_ADD_UINT64(ftyp_network, "Total Packets Out"); - FILE_ADD_DBL(ftyp_network, "Min Megabytes Out"); - FILE_ADD_DBL(ftyp_network, "Ave Megabytes Out"); - FILE_ADD_DBL(ftyp_network, "Max Megabytes Out"); - FILE_ADD_DBL(ftyp_network, "Total Megabytes Out"); - - return ftyp_network; -} - -static void *_network_init_job_series(int n_samples) -{ - profile_network_t* network_data; - - network_data = xmalloc(n_samples * sizeof(profile_network_t)); - if (network_data == NULL) { - debug3("PROFILE: failed to get memory for network data"); - return NULL; - } - return (void*) network_data; -} - -static char** _network_get_series_tod(void* data, int nsmp) -{ - int ix; - char **tod_values = NULL; - profile_network_t* network_series = (profile_network_t*) data; - tod_values = (char**) xmalloc(nsmp*sizeof(char*)); - if (tod_values == NULL) { - info("Failed to get memory for network tod"); - return NULL; - } - for (ix=0; ix < nsmp; ix++) { - tod_values[ix] = xstrdup(network_series[ix].tod); - } - return tod_values; -} - -static double* _network_get_series_values(char* data_name, void* data, int nsmp) -{ - int ix; - profile_network_t* network_series = (profile_network_t*) data; - double *network_values = NULL; - network_values = xmalloc(nsmp*sizeof(double)); - if (network_values == NULL) { - info("PROFILE: Failed to get memory for network data"); - return NULL; - } - if (strcasecmp(data_name,"Time") == 0) { - for (ix=0; ix < nsmp; ix++) { - network_values[ix] = (double) network_series[ix].time; - - } - return network_values; - } else if (strcasecmp(data_name,"Packets_In") == 0) { - for (ix=0; ix < nsmp; ix++) { - network_values[ix] = - (double) network_series[ix].packets_in; - - } - return network_values; - } else if (strcasecmp(data_name,"Megabytes_In") == 0) { - for (ix=0; ix < nsmp; ix++) { - network_values[ix] = network_series[ix].size_in; - - } - return network_values; - } else if (strcasecmp(data_name,"Packets_Out") == 0) { - for (ix=0; ix < nsmp; ix++) { - network_values[ix] = - (double) network_series[ix].packets_out; - - } - return network_values; - } else if (strcasecmp(data_name,"Megabytes_Out") == 0) { - for (ix=0; ix < nsmp; ix++) { - network_values[ix] = network_series[ix].size_out; - - } - return network_values; - } - xfree(network_values); - info("PROFILE: %s is invalid data item for network data", data_name); - return NULL; -} - -static void _network_merge_step_series( - hid_t group, void *prior, void *cur, void *buf) -{ -// This is a difference series - profile_network_t* prf_cur = (profile_network_t*) cur; - profile_network_t* prf_buf = (profile_network_t*) buf; - struct tm *ts = slurm_localtime(&prf_cur->time); - strftime(prf_buf->tod, TOD_LEN, TOD_FMT, ts); - if (prior == NULL) { - // First sample. - seriesStart = prf_cur->time; - prf_buf->time = 0; - } else { - prf_buf->time = prf_cur->time - seriesStart; - } - prf_buf->packets_in = prf_cur->packets_in; - prf_buf->packets_out = prf_cur->packets_out; - prf_buf->size_in = prf_cur->size_in; - prf_buf->size_out = prf_cur->size_out; - return; -} - -static void *_network_series_total(int n_samples, void *data) -{ - profile_network_t* network_data; - profile_network_s_t* total; - if (n_samples < 1) - return NULL; - network_data = (profile_network_t*) data; - total = xmalloc(sizeof(profile_network_s_t)); - if (total == NULL) { - error("PROFILE: Out of memory getting network total"); - return NULL; - } - // Assuming network series are a running total, and the first - // sample just sets the initial conditions - strcpy(total->start_time, network_data[0].tod); - total->elapsed_time = network_data[n_samples-1].time; - INCR_DIF_SAMPLE(total, network_data, packets_in, n_samples); - INCR_DIF_SAMPLE(total, network_data, size_in, n_samples); - INCR_DIF_SAMPLE(total, network_data, packets_out, n_samples); - INCR_DIF_SAMPLE(total, network_data, size_out, n_samples); - return total; -} - -static void _network_extract_series( - FILE* fp, bool put_header, int job, int step, - char *node, char *series, void *data, int size_data) -{ - int n_items, ix; - profile_network_t* network_data = (profile_network_t*) data; - - if (put_header) { - fprintf(fp,"Job,Step,Node,Series,Date_Time,Elapsed_time," - "Packets_In,MegaBytes_In,Packets_Out,MegaBytes_Out\n"); - } - n_items = size_data / sizeof(profile_network_t); - for (ix=0; ix < n_items; ix++) { - fprintf(fp,"%d,%d,%s,%s,%s,%ld,%ld,%.3f,%ld,%.3f\n", - job, step, node,series, - network_data[ix].tod, network_data[ix].time, - network_data[ix].packets_in, network_data[ix].size_in, - network_data[ix].packets_out, - network_data[ix].size_out); - } - return; -} - -static void _network_extract_total( - FILE* fp, bool put_header, int job, int step, - char *node, char *series, void *data, int size_data) -{ - profile_network_s_t* network_data = (profile_network_s_t*) data; - if (put_header) { - fprintf(fp,"Job,Step,Node,Series,Start_Time,Elapsed_time," - "Min_Packets_In,Ave_Packets_In," - "Max_Packets_In,Total_Packets_In," - "Min_Megabytes_In,Ave_Megabytes_In," - "Max_Megabytes_In,Total_Megabytes_In," - "Min_Packets_Out,Ave_Packets_Out," - "Max_Packets_Out,Total_Packets_Out," - "Min_Megabytes_Out,Ave_Megabytes_Out," - "Max_Megabytes_Out,Total_Megabytes_Out\n"); - } - fprintf(fp, "%d,%d,%s,%s,%s,%ld", job, step, node, series, - network_data->start_time, network_data->elapsed_time); - PUT_UINT_SUM(fp, network_data->packets_in, ","); - PUT_DBL_SUM(fp, network_data->size_in, ","); - PUT_UINT_SUM(fp, network_data->packets_out, ","); - PUT_DBL_SUM(fp, network_data->size_out, ","); - fprintf(fp, "\n"); - return; -} - -static hdf5_api_ops_t *_network_profile_factory(void) -{ - hdf5_api_ops_t* ops = xmalloc(sizeof(hdf5_api_ops_t)); - ops->dataset_size = &_network_dataset_size; - ops->create_memory_datatype = &_network_create_memory_datatype; - ops->create_file_datatype = &_network_create_file_datatype; - ops->create_s_memory_datatype = &_network_s_create_memory_datatype; - ops->create_s_file_datatype = &_network_s_create_file_datatype; - ops->init_job_series = &_network_init_job_series; - ops->get_series_tod = &_network_get_series_tod; - ops->get_series_values = &_network_get_series_values; - ops->merge_step_series = &_network_merge_step_series; - ops->series_total = &_network_series_total; - ops->extract_series = &_network_extract_series; - ops->extract_total = &_network_extract_total; - return ops; -} - -// ============================================================================ -// Routines supporting Task Data type -// ============================================================================ - -static int _task_dataset_size(void) -{ - return sizeof(profile_task_t); -} - -static hid_t _task_create_memory_datatype(void) -{ - hid_t mtyp_task = H5Tcreate(H5T_COMPOUND, sizeof(profile_task_t)); - if (mtyp_task < 0) { - debug3("PROFILE: failed to create Task memory datatype"); - return -1; - } - MEM_ADD_DATE_TIME(mtyp_task, "Date_Time", profile_task_t, tod); - MEM_ADD_UINT64(mtyp_task, "Time", profile_task_t, time); - MEM_ADD_UINT64(mtyp_task, "CPU_Frequency", profile_task_t, cpu_freq); - MEM_ADD_UINT64(mtyp_task, "CPU_Time", profile_task_t, cpu_time); - MEM_ADD_DBL(mtyp_task, "CPU_Utilization", - profile_task_t, cpu_utilization); - MEM_ADD_UINT64(mtyp_task, "RSS", profile_task_t, rss); - MEM_ADD_UINT64(mtyp_task, "VM_Size", profile_task_t, vm_size); - MEM_ADD_UINT64(mtyp_task, "Pages", profile_task_t, pages); - MEM_ADD_DBL(mtyp_task, "Read_Megabytes", profile_task_t, read_size); - MEM_ADD_DBL(mtyp_task, "Write_Megabytes", profile_task_t, write_size); - - return mtyp_task; -} - -static hid_t _task_create_file_datatype(void) -{ - hid_t ftyp_task = H5Tcreate(H5T_COMPOUND, TOD_LEN+9*8); - if (ftyp_task < 0) { - debug3("PROFILE: failed to create Task file datatype"); - return -1; - } - moffset = TOD_LEN; - FILE_ADD_DATE_TIME(ftyp_task, "Date_Time", 0); - FILE_ADD_UINT64(ftyp_task, "Time"); - FILE_ADD_UINT64(ftyp_task, "CPU_Frequency"); - FILE_ADD_UINT64(ftyp_task, "CPU_Time"); - FILE_ADD_DBL(ftyp_task, "CPU_Utilization"); - FILE_ADD_UINT64(ftyp_task, "RSS"); - FILE_ADD_UINT64(ftyp_task, "VM_Size"); - FILE_ADD_UINT64(ftyp_task, "Pages"); - FILE_ADD_DBL(ftyp_task, "Read_Megabytes"); - FILE_ADD_DBL(ftyp_task, "Write_Megabytes"); - - return ftyp_task; -} - -static hid_t _task_s_create_memory_datatype(void) -{ - hid_t mtyp_task = H5Tcreate(H5T_COMPOUND, sizeof(profile_task_s_t)); - if (mtyp_task < 0) { - debug3("PROFILE: failed to create Task memory datatype"); - return -1; - } - MEM_ADD_DATE_TIME(mtyp_task, "Start Time", profile_task_s_t, - start_time); - MEM_ADD_UINT64(mtyp_task, "Elapsed Time", profile_task_s_t, - elapsed_time); - MEM_ADD_UINT64(mtyp_task, "Min CPU Frequency", profile_task_s_t, - cpu_freq.min); - MEM_ADD_UINT64(mtyp_task, "Ave CPU Frequency", profile_task_s_t, - cpu_freq.ave); - MEM_ADD_UINT64(mtyp_task, "Max CPU Frequency", profile_task_s_t, - cpu_freq.max); - MEM_ADD_UINT64(mtyp_task, "Total CPU Frequency", profile_task_s_t, - cpu_freq.total); - MEM_ADD_UINT64(mtyp_task, "Min CPU Time", profile_task_s_t, - cpu_time.min); - MEM_ADD_UINT64(mtyp_task, "Ave CPU Time", profile_task_s_t, - cpu_time.ave); - MEM_ADD_UINT64(mtyp_task, "Max CPU Time", profile_task_s_t, - cpu_time.max); - MEM_ADD_UINT64(mtyp_task, "Total CPU Time", profile_task_s_t, - cpu_time.total); - MEM_ADD_DBL(mtyp_task, "Min CPU Utilization", profile_task_s_t, - cpu_utilization.min); - MEM_ADD_DBL(mtyp_task, "Ave CPU Utilization", profile_task_s_t, - cpu_utilization.ave); - MEM_ADD_DBL(mtyp_task, "Max CPU Utilization", profile_task_s_t, - cpu_utilization.max); - MEM_ADD_DBL(mtyp_task, "Total CPU Utilization", profile_task_s_t, - cpu_utilization.total); - MEM_ADD_UINT64(mtyp_task, "Min RSS", profile_task_s_t, rss.min); - MEM_ADD_UINT64(mtyp_task, "Ave RSS", profile_task_s_t, rss.ave); - MEM_ADD_UINT64(mtyp_task, "Max RSS", profile_task_s_t, rss.max); - MEM_ADD_UINT64(mtyp_task, "Total RSS", profile_task_s_t, rss.total); - MEM_ADD_UINT64(mtyp_task, "Min VM Size", profile_task_s_t, vm_size.min); - MEM_ADD_UINT64(mtyp_task, "Ave VM Size", profile_task_s_t, vm_size.ave); - MEM_ADD_UINT64(mtyp_task, "Max VM Size", profile_task_s_t, vm_size.max); - MEM_ADD_UINT64(mtyp_task, "Total VM Size", - profile_task_s_t, vm_size.total); - MEM_ADD_UINT64(mtyp_task, "Min Pages", profile_task_s_t, pages.min); - MEM_ADD_UINT64(mtyp_task, "Ave Pages", profile_task_s_t, pages.ave); - MEM_ADD_UINT64(mtyp_task, "Max Pages", profile_task_s_t, pages.max); - MEM_ADD_UINT64(mtyp_task, "Total Pages", profile_task_s_t, pages.total); - MEM_ADD_DBL(mtyp_task, "Min Read Megabytes", profile_task_s_t, - read_size.min); - MEM_ADD_DBL(mtyp_task, "Ave Read Megabytes", profile_task_s_t, - read_size.ave); - MEM_ADD_DBL(mtyp_task, "Max Read Megabytes", profile_task_s_t, - read_size.max); - MEM_ADD_DBL(mtyp_task, "Total Read Megabytes", profile_task_s_t, - read_size.total); - MEM_ADD_DBL(mtyp_task, "Min Write Megabytes", profile_task_s_t, - write_size.min); - MEM_ADD_DBL(mtyp_task, "Ave Write Megabytes", profile_task_s_t, - write_size.ave); - MEM_ADD_DBL(mtyp_task, "Max Write Megabytes", profile_task_s_t, - write_size.max); - MEM_ADD_DBL(mtyp_task, "Total Write Megabytes", profile_task_s_t, - write_size.total); - - return mtyp_task; -} - -static hid_t _task_s_create_file_datatype(void) -{ - hid_t ftyp_task = H5Tcreate(H5T_COMPOUND, TOD_LEN+33*8); - if (ftyp_task < 0) { - debug3("PROFILE: failed to create Task file datatype"); - return -1; - } - moffset = TOD_LEN; - FILE_ADD_DATE_TIME(ftyp_task, "Start Time", 0); - FILE_ADD_UINT64(ftyp_task, "Elapsed Time"); - FILE_ADD_UINT64(ftyp_task, "Min CPU Frequency"); - FILE_ADD_UINT64(ftyp_task, "Ave CPU Frequency"); - FILE_ADD_UINT64(ftyp_task, "Max CPU Frequency"); - FILE_ADD_UINT64(ftyp_task, "Total CPU Frequency"); - FILE_ADD_UINT64(ftyp_task, "Min CPU Time"); - FILE_ADD_UINT64(ftyp_task, "Ave CPU Time"); - FILE_ADD_UINT64(ftyp_task, "Max CPU Time"); - FILE_ADD_UINT64(ftyp_task, "Total CPU Time"); - FILE_ADD_DBL(ftyp_task, "Min CPU Utilization"); - FILE_ADD_DBL(ftyp_task, "Ave CPU Utilization"); - FILE_ADD_DBL(ftyp_task, "Max CPU Utilization"); - FILE_ADD_DBL(ftyp_task, "Total CPU Utilization"); - FILE_ADD_UINT64(ftyp_task, "Min RSS"); - FILE_ADD_UINT64(ftyp_task, "Ave RSS"); - FILE_ADD_UINT64(ftyp_task, "Max RSS"); - FILE_ADD_UINT64(ftyp_task, "Total RSS"); - FILE_ADD_UINT64(ftyp_task, "Min VM Size"); - FILE_ADD_UINT64(ftyp_task, "Ave VM Size"); - FILE_ADD_UINT64(ftyp_task, "Max VM Size"); - FILE_ADD_UINT64(ftyp_task, "Total VM Size"); - FILE_ADD_UINT64(ftyp_task, "Min Pages"); - FILE_ADD_UINT64(ftyp_task, "Ave Pages"); - FILE_ADD_UINT64(ftyp_task, "Max Pages"); - FILE_ADD_UINT64(ftyp_task, "Total Pages"); - FILE_ADD_DBL(ftyp_task, "Min Read Megabytes"); - FILE_ADD_DBL(ftyp_task, "Ave Read Megabytes"); - FILE_ADD_DBL(ftyp_task, "Max Read Megabytes"); - FILE_ADD_DBL(ftyp_task, "Total Read Megabytes"); - FILE_ADD_DBL(ftyp_task, "Min Write Megabytes"); - FILE_ADD_DBL(ftyp_task, "Ave Write Megabytes"); - FILE_ADD_DBL(ftyp_task, "Max Write Megabytes"); - FILE_ADD_DBL(ftyp_task, "Total Write Megabytes"); - - return ftyp_task; -} - -static void *_task_init_job_series(int n_samples) -{ - profile_task_t* task_data; - task_data = xmalloc(n_samples * sizeof(profile_task_t)); - if (task_data == NULL) { - debug3("PROFILE: failed to get memory for combined task data"); - return NULL; - } - return (void*) task_data; -} - -static char** _task_get_series_tod(void* data, int nsmp) -{ - int ix; - char **tod_values = NULL; - profile_task_t* task_series = (profile_task_t*) data; - tod_values = (char**) xmalloc(nsmp*sizeof(char*)); - if (tod_values == NULL) { - info("Failed to get memory for task tod"); - return NULL; - } - for (ix=0; ix < nsmp; ix++) { - tod_values[ix] = xstrdup(task_series[ix].tod); - } - return tod_values; -} - -static double* _task_get_series_values(char* data_name, void* data, int nsmp) -{ - int ix; - profile_task_t* task_series = (profile_task_t*) data; - double *task_values = NULL; - task_values = xmalloc(nsmp*sizeof(double)); - if (task_values == NULL) { - info("PROFILE: Failed to get memory for task data"); - return NULL; - } - if (strcasecmp(data_name,"Time") == 0) { - for (ix=0; ix < nsmp; ix++) { - task_values[ix] = (double) task_series[ix].time; - - } - return task_values; - } else if (strcasecmp(data_name,"CPU_Frequency") == 0) { - for (ix=0; ix < nsmp; ix++) { - task_values[ix] = (double) task_series[ix].cpu_freq; - - } - return task_values; - } else if (strcasecmp(data_name,"CPU_Time") == 0) { - for (ix=0; ix < nsmp; ix++) { - task_values[ix] = (double) task_series[ix].cpu_time; - - } - return task_values; - } else if (strcasecmp(data_name,"CPU_Utilization") == 0) { - for (ix=0; ix < nsmp; ix++) { - task_values[ix] = task_series[ix].cpu_utilization; - - } - return task_values; - } else if (strcasecmp(data_name,"RSS") == 0) { - for (ix=0; ix < nsmp; ix++) { - task_values[ix] = (double) task_series[ix].rss; - - } - return task_values; - } else if (strcasecmp(data_name,"VM_Size") == 0) { - for (ix=0; ix < nsmp; ix++) { - task_values[ix] = (double) task_series[ix].vm_size; - - } - return task_values; - } else if (strcasecmp(data_name,"Pages") == 0) { - for (ix=0; ix < nsmp; ix++) { - task_values[ix] = (double) task_series[ix].pages; - - } - return task_values; - } else if (strcasecmp(data_name,"Read_Megabytes") == 0) { - for (ix=0; ix < nsmp; ix++) { - task_values[ix] = task_series[ix].read_size; - - } - return task_values; - } else if (strcasecmp(data_name,"Write_Megabytes") == 0) { - for (ix=0; ix < nsmp; ix++) { - task_values[ix] = task_series[ix].write_size; - - } - return task_values; - } - xfree(task_values); - info("PROFILE: %s is invalid data item for task data", data_name); - return NULL; -} - -static void _task_merge_step_series( - hid_t group, void *prior, void *cur, void *buf) -{ -// This is a running total series - profile_task_t* prf_prior = (profile_task_t*) prior; - profile_task_t* prf_cur = (profile_task_t*) cur; - profile_task_t* buf_prv = NULL; - profile_task_t* buf_cur = (profile_task_t*) buf; - struct tm *ts; - - ts = slurm_localtime(&prf_cur->time); - strftime(buf_cur->tod, TOD_LEN, TOD_FMT, ts); - if (prf_prior == NULL) { - // First sample. - seriesStart = prf_cur->time; - buf_cur->time = 0; - buf_cur->cpu_time = 0; - buf_cur->cpu_utilization = 0; - buf_cur->read_size = 0.0; - buf_cur->write_size = 0.0; - } else { - buf_prv = buf_cur - 1; - buf_cur->time = prf_cur->time - seriesStart; - buf_cur->cpu_time = prf_cur->cpu_time - prf_prior->cpu_time; - buf_cur->cpu_utilization = 100.0*((double) buf_cur->cpu_time / - (double) (buf_cur->time - buf_prv->time)); - buf_cur->read_size = - prf_cur->read_size - prf_prior->read_size; - buf_cur->write_size = - prf_cur->write_size - prf_prior->write_size; - } - buf_cur->cpu_freq = prf_cur->cpu_freq; - buf_cur->rss = prf_cur->rss; - buf_cur->vm_size = prf_cur->vm_size; - buf_cur->pages = prf_cur->pages; - return; -} - -static void *_task_series_total(int n_samples, void *data) -{ - profile_task_t* task_data; - profile_task_s_t* total; - task_data = (profile_task_t*) data; - total = xmalloc(sizeof(profile_task_s_t)); - if (total == NULL) { - error("PROFILE: Out of memory getting task total"); - return NULL; - } - strcpy(total->start_time, task_data[0].tod); - total->elapsed_time = task_data[n_samples-1].time; - INCR_DIF_SAMPLE(total, task_data, cpu_freq, n_samples); - INCR_RT_SAMPLE(total, task_data, cpu_time, n_samples); - INCR_DIF_SAMPLE(total, task_data, cpu_utilization, n_samples); - INCR_DIF_SAMPLE(total, task_data, rss, n_samples); - INCR_DIF_SAMPLE(total, task_data, vm_size , n_samples); - INCR_DIF_SAMPLE(total, task_data, pages, n_samples); - INCR_RT_SAMPLE(total, task_data, read_size, n_samples); - INCR_RT_SAMPLE(total, task_data, write_size, n_samples); - return total; -} - -static void _task_extract_series( - FILE* fp, bool put_header, int job, int step, - char *node, char *series, void *data, int size_data) -{ - int n_items, ix; - profile_task_t* task_data = (profile_task_t*) data; - if (put_header) { - fprintf(fp,"Job,Step,Node,Series,Date Time,ElapsedTime," - "CPU Frequency,CPU Time," - "CPU Utilization,rss,VM Size,Pages," - "Read_bytes,Write_bytes\n"); - } - n_items = size_data / sizeof(profile_task_t); - for (ix=0; ix < n_items; ix++) { - fprintf(fp,"%d,%d,%s,%s,%s,%ld,%ld,%ld,%.3f", - job, step, node, series, - task_data[ix].tod, task_data[ix].time, - task_data[ix].cpu_freq, - task_data[ix].cpu_time, task_data[ix].cpu_utilization); - fprintf(fp,",%ld,%ld,%ld,%.3f,%.3f\n", task_data[ix].rss, - task_data[ix].vm_size, task_data[ix].pages, - task_data[ix].read_size, task_data[ix].write_size); - } - return; -} - -static void _task_extract_total( - FILE* fp, bool put_header, int job, int step, - char *node, char *series, void *data, int size_data) -{ - - profile_task_s_t* task_data = (profile_task_s_t*) data; - if (put_header) { - fprintf(fp,"Job,Step,Node,Series,Start_Time,Elapsed_time," - "Min CPU Frequency,Ave CPU Frequency," - "Ave CPU Frequency,Total CPU Frequency," - "Min_CPU_Time,Ave_CPU_Time," - "Max_CPU_Time,Total_CPU_Time," - "Min_CPU_Utilization,Ave_CPU_Utilization," - "Max_CPU_Utilization,Total_CPU_Utilization," - "Min_RSS,Ave_RSS,Max_RSS,Total_RSS," - "Min_VMSize,Ave_VMSize,Max_VMSize,Total_VMSize," - "Min_Pages,Ave_Pages,Max_Pages,Total_Pages," - "Min_Read_Megabytes,Ave_Read_Megabytes," - "Max_Read_Megabytes,Total_Read_Megabytes," - "Min_Write_Megabytes,Ave_Write_Megabytes," - "Max_Write_Megabytes,Total_Write_Megabytes\n"); - } - fprintf(fp, "%d,%d,%s,%s,%s,%ld", job, step, node, series, - task_data->start_time, task_data->elapsed_time); - PUT_UINT_SUM(fp, task_data->cpu_freq, ","); - PUT_UINT_SUM(fp, task_data->cpu_time, ","); - PUT_DBL_SUM(fp, task_data->cpu_utilization, ","); - PUT_UINT_SUM(fp, task_data->rss, ","); - PUT_UINT_SUM(fp, task_data->vm_size, ","); - PUT_UINT_SUM(fp, task_data->pages, ","); - PUT_DBL_SUM(fp, task_data->read_size, ","); - PUT_DBL_SUM(fp, task_data->write_size, ","); - fprintf(fp, "\n"); - return; -} - -static hdf5_api_ops_t *_task_profile_factory(void) -{ - hdf5_api_ops_t* ops = xmalloc(sizeof(hdf5_api_ops_t)); - ops->dataset_size = &_task_dataset_size; - ops->create_memory_datatype = &_task_create_memory_datatype; - ops->create_file_datatype = &_task_create_file_datatype; - ops->create_s_memory_datatype = &_task_s_create_memory_datatype; - ops->create_s_file_datatype = &_task_s_create_file_datatype; - ops->init_job_series = &_task_init_job_series; - ops->get_series_tod = &_task_get_series_tod; - ops->get_series_values = &_task_get_series_values; - ops->merge_step_series = &_task_merge_step_series; - ops->series_total = &_task_series_total; - ops->extract_series = &_task_extract_series; - ops->extract_total = &_task_extract_total; - return ops; -} - -/* ============================================================================ - * Common support functions - ===========================================================================*/ - -extern hdf5_api_ops_t* profile_factory(uint32_t type) -{ - switch (type) { - case ACCT_GATHER_PROFILE_ENERGY: - return _energy_profile_factory(); - break; - case ACCT_GATHER_PROFILE_TASK: - return _task_profile_factory(); - break; - case ACCT_GATHER_PROFILE_LUSTRE: - return _io_profile_factory(); - break; - case ACCT_GATHER_PROFILE_NETWORK: - return _network_profile_factory(); - break; - default: - error("profile_factory: Unknown type %d sent", type); - return NULL; - } -} - - -extern void profile_init(void) -{ - typTOD = H5Tcopy (H5T_C_S1); - H5Tset_size (typTOD, TOD_LEN); /* create string of length TOD_LEN */ - - return; -} - extern void profile_fini(void) { - H5Tclose(typTOD); H5close(); /* make sure all H5 Objects are closed */ return; } -extern char *get_data_set_name(char *type) -{ - static char dset_name[MAX_DATASET_NAME+1]; - dset_name[0] = '\0'; - sprintf(dset_name, "%s Data", type); - - return dset_name; -} - - -static char* _H5O_type_t2str(H5O_type_t type) -{ - switch (type) - { - case H5O_TYPE_UNKNOWN: - return "H5O_TYPE_UNKNOWN"; - case H5O_TYPE_GROUP: - return "H5O_TYPE_GROUP"; - case H5O_TYPE_DATASET: - return "H5O_TYPE_DATASET"; - case H5O_TYPE_NAMED_DATATYPE: - return "H5O_TYPE_NAMED_DATATYPE"; - case H5O_TYPE_NTYPES: - return "H5O_TYPE_NTYPES"; - default: - return "Invalid H5O_TYPE"; - } -} - - -extern void hdf5_obj_info(hid_t group, char *nam_group) -{ - char buf[MAX_GROUP_NAME+1]; - hsize_t nobj, nattr; - hid_t aid; - int i, len; - H5G_info_t group_info; - H5O_info_t object_info; - - if (group < 0) { - info("PROFILE: Group is not HDF5 object"); - return; - } - H5Gget_info(group, &group_info); - nobj = group_info.nlinks; - H5Oget_info(group, &object_info); - nattr = object_info.num_attrs; - info("PROFILE group: %s NumObject=%d NumAttributes=%d", - nam_group, (int) nobj, (int) nattr); - for (i = 0; (nobj>0) && (i<nobj); i++) { - H5Oget_info_by_idx(group, ".", H5_INDEX_NAME, H5_ITER_INC, i, - &object_info, H5P_DEFAULT); - len = H5Lget_name_by_idx(group, ".", H5_INDEX_NAME, - H5_ITER_INC, i, buf, MAX_GROUP_NAME, - H5P_DEFAULT); - if ((len > 0) && (len < MAX_GROUP_NAME)) { - info("PROFILE: Obj=%d Type=%s Name=%s", - i, _H5O_type_t2str(object_info.type), buf); - } else { - info("PROFILE: Obj=%d Type=%s Name=%s (is truncated)", - i, _H5O_type_t2str(object_info.type), buf); - } - } - for (i = 0; (nattr>0) && (i<nattr); i++) { - aid = H5Aopen_by_idx(group, ".", H5_INDEX_NAME, H5_ITER_INC, - i, H5P_DEFAULT, H5P_DEFAULT); - // Get the name of the attribute. - len = H5Aget_name(aid, MAX_ATTR_NAME, buf); - if (len < MAX_ATTR_NAME) { - info("PROFILE: Attr=%d Name=%s", i, buf); - } else { - info("PROFILE: Attr=%d Name=%s (is truncated)", i, buf); - } - H5Aclose(aid); - } - - return; -} - extern hid_t get_attribute_handle(hid_t parent, char *name) { char buf[MAX_ATTR_NAME+1]; @@ -1646,7 +86,7 @@ extern hid_t get_attribute_handle(hid_t parent, char *name) return -1; } -extern hid_t get_group(hid_t parent, char *name) +extern hid_t get_group(hid_t parent, const char *name) { char buf[MAX_GROUP_NAME]; hsize_t nobj; @@ -1679,7 +119,7 @@ extern hid_t get_group(hid_t parent, char *name) return -1; } -extern hid_t make_group(hid_t parent, char *name) +extern hid_t make_group(hid_t parent, const char *name) { hid_t gid = -1; @@ -1739,47 +179,6 @@ extern void put_string_attribute(hid_t parent, char *name, char *value) return; } -extern char *get_string_attribute(hid_t parent, char *name) -{ - char *value = NULL; - - hid_t attr, type; - size_t size; - - attr = get_attribute_handle(parent, name); - if (attr < 0) { - debug3("PROFILE: Attribute=%s does not exist", name); - return NULL; - } - type = H5Aget_type(attr); - if (H5Tget_class(type) != H5T_STRING) { - H5Aclose(attr); - debug3("PROFILE: Attribute=%s is not a string", name); - return NULL; - } - size = H5Tget_size(type); - value = xmalloc(size+1); - if (value == NULL) { - H5Tclose(type); - H5Aclose(attr); - debug3("PROFILE: failed to malloc %d bytes for attribute=%s", - (int) size, - name); - return NULL; - } - if (H5Aread(attr, type, value) < 0) { - xfree(value); - H5Tclose(type); - H5Aclose(attr); - debug3("PROFILE: failed to read attribute=%s", name); - return NULL; - } - H5Tclose(type); - H5Aclose(attr); - - return value; -} - extern void put_int_attribute(hid_t parent, char *name, int value) { hid_t attr, space_attr; @@ -1807,243 +206,4 @@ extern void put_int_attribute(hid_t parent, char *name, int value) return; } -extern int get_int_attribute(hid_t parent, char *name) -{ - int value = 0; - - hid_t attr; - attr = get_attribute_handle(parent, name); - if (attr < 0) { - debug3("PROFILE: Attribute=%s does not exist, returning", name); - return value; - } - if (H5Aread(attr, H5T_NATIVE_INT, &value) < 0) { - debug3("PROFILE: failed to read attribute=%s, returning", name); - } - H5Aclose(attr); - - return value; -} - - -extern void put_uint32_attribute(hid_t parent, char *name, uint32_t value) -{ - hid_t attr, space_attr; - hsize_t dim_attr[1] = {1}; // Single dimension array of values - - space_attr = H5Screate_simple(1, dim_attr, NULL); - if (space_attr < 0) { - debug3("PROFILE: failed to create space for attribute %s", - name); - return; - } - attr = H5Acreate(parent, name, H5T_NATIVE_UINT32, space_attr, - H5P_DEFAULT, H5P_DEFAULT); - if (attr < 0) { - H5Sclose(space_attr); - debug3("PROFILE: failed to create attribute %s", name); - return; - } - if (H5Awrite(attr, H5T_NATIVE_UINT32, &value) < 0) { - debug3("PROFILE: failed to write attribute %s", name); - // Fall through to release resources - } - H5Sclose(space_attr); - H5Aclose(attr); - - return; -} - -extern uint32_t get_uint32_attribute(hid_t parent, char *name) -{ - int value = 0; - hid_t attr; - - attr = get_attribute_handle(parent, name); - if (attr < 0) { - debug3("PROFILE: Attribute=%s does not exist, returning", name); - return value; - } - if (H5Aread(attr, H5T_NATIVE_UINT32, &value) < 0) { - debug3("PROFILE: failed to read attribute=%s, returning", name); - } - H5Aclose(attr); - - return value; -} - -extern void *get_hdf5_data(hid_t parent, uint32_t type, - char *nam_group, int *size_data) -{ - void * data = NULL; - - hid_t id_data_set, dtyp_memory; - hsize_t szDset; - herr_t ec; - char *subtype = NULL; - hdf5_api_ops_t* ops = profile_factory(type); - char *type_name = acct_gather_profile_type_to_string(type); - - if (ops == NULL) { - debug3("PROFILE: failed to create %s operations", - type_name); - return NULL; - } - subtype = get_string_attribute(parent, ATTR_SUBDATATYPE); - if (subtype < 0) { - xfree(ops); - debug3("PROFILE: failed to get %s attribute", - ATTR_SUBDATATYPE); - return NULL; - } - id_data_set = H5Dopen(parent, get_data_set_name(nam_group), - H5P_DEFAULT); - if (id_data_set < 0) { - xfree(subtype); - xfree(ops); - debug3("PROFILE: failed to open %s Data Set", - type_name); - return NULL; - } - if (strcmp(subtype, SUBDATA_SUMMARY)) - dtyp_memory = (*(ops->create_memory_datatype))(); - else - dtyp_memory = (*(ops->create_s_memory_datatype))(); - xfree(subtype); - if (dtyp_memory < 0) { - H5Dclose(id_data_set); - xfree(ops); - debug3("PROFILE: failed to create %s memory datatype", - type_name); - return NULL; - } - szDset = H5Dget_storage_size(id_data_set); - *size_data = (int) szDset; - if (szDset == 0) { - H5Tclose(dtyp_memory); - H5Dclose(id_data_set); - xfree(ops); - debug3("PROFILE: %s data set is empty", - type_name); - return NULL; - } - data = xmalloc(szDset); - if (data == NULL) { - H5Tclose(dtyp_memory); - H5Dclose(id_data_set); - xfree(ops); - debug3("PROFILE: failed to get memory for %s data set", - type_name); - return NULL; - } - ec = H5Dread(id_data_set, dtyp_memory, H5S_ALL, H5S_ALL, H5P_DEFAULT, - data); - if (ec < 0) { - H5Tclose(dtyp_memory); - H5Dclose(id_data_set); - xfree(data); - xfree(ops); - debug3("PROFILE: failed to read %s data", - type_name); - return NULL; - } - H5Tclose(dtyp_memory); - H5Dclose(id_data_set); - xfree(ops); - - return data; -} - -extern void put_hdf5_data(hid_t parent, uint32_t type, char *subtype, - char *group, void *data, int n_item) -{ - hid_t id_group, dtyp_memory, dtyp_file, id_data_space, id_data_set; - hsize_t dims[1]; - herr_t ec; - hdf5_api_ops_t* ops = profile_factory(type); - char *type_name = acct_gather_profile_type_to_string(type); - - if (ops == NULL) { - debug3("PROFILE: failed to create %s operations", - type_name); - return; - } - // Create the datatypes. - if (strcmp(subtype, SUBDATA_SUMMARY)) { - dtyp_memory = (*(ops->create_memory_datatype))(); - dtyp_file = (*(ops->create_file_datatype))(); - } else { - dtyp_memory = (*(ops->create_s_memory_datatype))(); - dtyp_file = (*(ops->create_s_file_datatype))(); - } - - if (dtyp_memory < 0) { - xfree(ops); - debug3("PROFILE: failed to create %s memory datatype", - type_name); - return; - } - - if (dtyp_file < 0) { - H5Tclose(dtyp_memory); - xfree(ops); - debug3("PROFILE: failed to create %s file datatype", - type_name); - return; - } - - dims[0] = n_item; - id_data_space = H5Screate_simple(1, dims, NULL); - if (id_data_space < 0) { - H5Tclose(dtyp_file); - H5Tclose(dtyp_memory); - xfree(ops); - debug3("PROFILE: failed to create %s space descriptor", - type_name); - return; - } - - id_group = H5Gcreate(parent, group, H5P_DEFAULT, - H5P_DEFAULT, H5P_DEFAULT); - if (id_group < 0) { - H5Sclose(id_data_space); - H5Tclose(dtyp_file); - H5Tclose(dtyp_memory); - xfree(ops); - debug3("PROFILE: failed to create %s group", group); - return; - } - - put_string_attribute(id_group, ATTR_DATATYPE, type_name); - put_string_attribute(id_group, ATTR_SUBDATATYPE, subtype); - - id_data_set = H5Dcreate(id_group, get_data_set_name(group), dtyp_file, - id_data_space, H5P_DEFAULT, H5P_DEFAULT, - H5P_DEFAULT); - if (id_data_set < 0) { - H5Gclose(id_group); - H5Sclose(id_data_space); - H5Tclose(dtyp_file); - H5Tclose(dtyp_memory); - xfree(ops); - debug3("PROFILE: failed to create %s dataset", group); - return; - } - - ec = H5Dwrite(id_data_set, dtyp_memory, H5S_ALL, H5S_ALL, H5P_DEFAULT, - data); - if (ec < 0) { - debug3("PROFILE: failed to create write task data"); - // Fall through to release resources - } - H5Dclose(id_data_set); - H5Gclose(id_group); - H5Sclose(id_data_space); - H5Tclose(dtyp_file); - H5Tclose(dtyp_memory); - xfree(ops); - - - return; -} diff --git a/src/plugins/acct_gather_profile/hdf5/hdf5_api.h b/src/plugins/acct_gather_profile/hdf5/hdf5_api.h index 721bb189735e5c0f6c146351e0084b797062e2eb..16239875f68a103f8b57669e7eba3cf27aefbbfb 100644 --- a/src/plugins/acct_gather_profile/hdf5/hdf5_api.h +++ b/src/plugins/acct_gather_profile/hdf5/hdf5_api.h @@ -59,244 +59,31 @@ #include <stdlib.h> #include <hdf5.h> -#include "src/common/slurm_acct_gather_profile.h" +#include <hdf5_hl.h> #define MAX_PROFILE_PATH 1024 #define MAX_ATTR_NAME 64 #define MAX_GROUP_NAME 64 -#define MAX_DATASET_NAME 64 #define ATTR_NODENAME "Node Name" -#define ATTR_STARTTIME "Start Time" #define ATTR_NSTEPS "Number of Steps" #define ATTR_NNODES "Number of Nodes" #define ATTR_NTASKS "Number of Tasks" -#define ATTR_TASKID "Task Id" #define ATTR_CPUPERTASK "CPUs per Task" -#define ATTR_DATATYPE "Data Type" -#define ATTR_SUBDATATYPE "Subdata Type" #define ATTR_STARTTIME "Start Time" -#define ATTR_STARTSEC "Start Second" -#define SUBDATA_DATA "Data" -#define SUBDATA_NODE "Node" -#define SUBDATA_SAMPLE "Sample" -#define SUBDATA_SERIES "Series" -#define SUBDATA_TOTAL "Total" -#define SUBDATA_SUMMARY "Summary" #define GRP_ENERGY "Energy" #define GRP_LUSTRE "Lustre" -#define GRP_STEP "Step" +#define GRP_STEPS "Steps" #define GRP_NODES "Nodes" -#define GRP_NODE "Node" #define GRP_NETWORK "Network" -#define GRP_SAMPLES "Time Series" -#define GRP_SAMPLE "Sample" -#define GRP_TASKS "Tasks" #define GRP_TASK "Task" -#define GRP_TOTALS "Totals" - -// Data types supported by all HDF5 plugins of this type - -#define TOD_LEN 24 -#define TOD_FMT "%F %T" - -/* - * prof_uint_sum is a low level structure intended to hold the - * minimum, average, maximum, and total values of a data item. - * It is usually used in a summary data structure for an item - * that occurs in a time series. - */ -typedef struct prof_uint_sum { - uint64_t min; // Minumum value - uint64_t ave; // Average value - uint64_t max; // Maximum value - uint64_t total; // Accumlated value -} prof_uint_sum_t; - -// Save as prof_uint_sum, but for double precision items -typedef struct prof_dbl_sum { - double min; // Minumum value - double ave; // Average value - double max; // Maximum value - double total; // Accumlated value -} prof_dbl_sum_t; - -#define PROFILE_ENERGY_DATA "Energy" -// energy data structures -// node_step file -typedef struct profile_energy { - char tod[TOD_LEN]; // Not used in node-step - time_t time; - uint64_t power; - uint64_t cpu_freq; -} profile_energy_t; -// summary data in job-node-totals -typedef struct profile_energy_s { - char start_time[TOD_LEN]; - uint64_t elapsed_time; - prof_uint_sum_t power; - prof_uint_sum_t cpu_freq; -} profile_energy_s_t; // series summary - -#define PROFILE_IO_DATA "I/O" -// io data structure -// node_step file -typedef struct profile_io { - char tod[TOD_LEN]; // Not used in node-step - time_t time; - uint64_t reads; - double read_size; // currently in megabytes - uint64_t writes; - double write_size; // currently in megabytes -} profile_io_t; -// summary data in job-node-totals -typedef struct profile_io_s { - char start_time[TOD_LEN]; - uint64_t elapsed_time; - prof_uint_sum_t reads; - prof_dbl_sum_t read_size; // currently in megabytes - prof_uint_sum_t writes; - prof_dbl_sum_t write_size; // currently in megabytes -} profile_io_s_t; - -#define PROFILE_NETWORK_DATA "Network" -// Network data structure -// node_step file -typedef struct profile_network { - char tod[TOD_LEN]; // Not used in node-step - time_t time; - uint64_t packets_in; - double size_in; // currently in megabytes - uint64_t packets_out; - double size_out; // currently in megabytes -} profile_network_t; -// summary data in job-node-totals -typedef struct profile_network_s { - char start_time[TOD_LEN]; - uint64_t elapsed_time; - prof_uint_sum_t packets_in; - prof_dbl_sum_t size_in; // currently in megabytes - prof_uint_sum_t packets_out; - prof_dbl_sum_t size_out; // currently in megabytes -} profile_network_s_t; - -#define PROFILE_TASK_DATA "Task" -// task data structure -// node_step file -typedef struct profile_task { - char tod[TOD_LEN]; // Not used in node-step - time_t time; - uint64_t cpu_freq; - uint64_t cpu_time; - double cpu_utilization; - uint64_t rss; - uint64_t vm_size; - uint64_t pages; - double read_size; // currently in megabytes - double write_size; // currently in megabytes -} profile_task_t; -// summary data in job-node-totals -typedef struct profile_task_s { - char start_time[TOD_LEN]; - uint64_t elapsed_time; - prof_uint_sum_t cpu_freq; - prof_uint_sum_t cpu_time; - prof_dbl_sum_t cpu_utilization; - prof_uint_sum_t rss; - prof_uint_sum_t vm_size; - prof_uint_sum_t pages; - prof_dbl_sum_t read_size; // currently in megabytes - prof_dbl_sum_t write_size; // currently in megabytes -} profile_task_s_t; - -/* - * Structure of function pointers of common operations on a profile data type. - * dataset_size -- size of one dataset (structure size) - * create_memory_datatype -- creates hdf5 memory datatype corresponding - * to the datatype structure. - * create_file_datatype -- creates hdf5 file datatype corresponding - * to the datatype structure. - * create_s_memory_datatype -- creates hdf5 memory datatype corresponding - * to the summary datatype structure. - * create_s_file_datatype -- creates hdf5 file datatype corresponding - * to the summary datatype structure. - * init_job_series -- allocates a buffer for a complete time series - * (in job merge) and initializes each member - * get_series_tod -- get the date/time value of each sample in the series - * get_series_values -- gets a specific data item from each sample in the - * series - * merge_step_series -- merges all the individual time samples into a - * single data set with one item per sample. - * Data items can be scaled (e.g. subtracting beginning time) - * differenced (to show counts in interval) or other things - * appropriate for the series. - * series_total -- accumulate or average members in the entire series to - * be added to the file as totals for the node or task. - * extract_series -- format members of a structure for putting to - * to a file data extracted from a time series to be imported into - * another analysis tool. (e.g. format as comma separated value.) - * extract_totals -- format members of a structure for putting to - * to a file data extracted from a time series total to be - * imported into another analysis tool. - * (format as comma,separated value, for example.) - */ -typedef struct hdf5_api_ops { - int (*dataset_size) (void); - hid_t (*create_memory_datatype) (void); - hid_t (*create_file_datatype) (void); - hid_t (*create_s_memory_datatype) (void); - hid_t (*create_s_file_datatype) (void); - void* (*init_job_series) (int); - char** (*get_series_tod) (void*, int); - double* (*get_series_values) (char*, void*, int); - void (*merge_step_series) (hid_t, void*, void*, void*); - void* (*series_total) (int, void*); - void (*extract_series) (FILE*, bool, int, int, char*, char*, void*, - int); - void (*extract_total) (FILE*, bool, int, int, char*, char*, void*, - int); -} hdf5_api_ops_t; - -/* ============================================================================ - * Common support functions - ==========================================================================*/ - -/* - * Create a opts group from type - */ -hdf5_api_ops_t* profile_factory(uint32_t type); - -/* - * Initialize profile (initialize static memory) - */ -void profile_init(void); /* - * Finialize profile (initialize static memory) + * Finalize profile (initialize static memory) */ void profile_fini(void); -/* - * Make a dataset name - * - * Parameters - * type - series name - * - * Returns - * common data set name based on type in static memory - */ -char* get_data_set_name(char* type); - -/* - * print info on an object for debugging - * - * Parameters - * group - handle to group. - * namGroup - name of the group - */ -void hdf5_obj_info(hid_t group, char* namGroup); - /* * get attribute handle by name. * @@ -317,7 +104,7 @@ hid_t get_attribute_handle(hid_t parent, char* name); * * Returns - handle for group (or -1 when not found), caller must close */ -hid_t get_group(hid_t parent, char* name); +hid_t get_group(hid_t parent, const char* name); /* * make group by name. @@ -328,7 +115,7 @@ hid_t get_group(hid_t parent, char* name); * * Returns - handle for group (or -1 on error), caller must close */ -hid_t make_group(hid_t parent, char* name); +hid_t make_group(hid_t parent, const char* name); /* * Put string attribute @@ -340,17 +127,6 @@ hid_t make_group(hid_t parent, char* name); */ void put_string_attribute(hid_t parent, char* name, char* value); -/* - * get string attribute - * - * Parameters - * parent - handle to parent group. - * name - name of the attribute - * - * Return: pointer to value. Caller responsibility to free!!! - */ -char* get_string_attribute(hid_t parent, char* name); - /* * Put integer attribute * @@ -361,63 +137,4 @@ char* get_string_attribute(hid_t parent, char* name); */ void put_int_attribute(hid_t parent, char* name, int value); -/* - * get int attribute - * - * Parameters - * parent - handle to parent group. - * name - name of the attribute - * - * Return: value - */ -int get_int_attribute(hid_t parent, char* name); - -/* - * Put uint32_t attribute - * - * Parameters - * parent - handle to parent group. - * name - name of the attribute - * value - value of the attribute - */ -void put_uint32_attribute(hid_t parent, char* name, uint32_t value); - -/* - * get uint32_t attribute - * - * Parameters - * parent - handle to parent group. - * name - name of the attribute - * - * Return: value - */ -uint32_t get_uint32_attribute(hid_t parent, char* name); - -/* - * Get data from a group of a HDF5 file - * - * Parameters - * parent - handle to parent. - * type - type of data (ACCT_GATHER_PROFILE_* in slurm.h) - * namGroup - name of group - * sizeData - pointer to variable into which to put size of dataset - * - * Returns -- data set of type (or null), caller must free. - */ -void* get_hdf5_data(hid_t parent, uint32_t type, char* namGroup, int* sizeData); - -/* - * Put one data sample into a new group in an HDF5 file - * - * Parameters - * parent - handle to parent group. - * type - type of data (ACCT_GATHER_PROFILE_* in slurm.h) - * subtype - generally source (node, series, ...) or summary - * group - name of new group - * data - data for the sample - * nItems - number of items of type in the data - */ -void put_hdf5_data(hid_t parent, uint32_t type, char* subtype, char* group, - void* data, int nItems); - #endif /*__ACCT_GATHER_HDF5_API_H__*/ diff --git a/src/plugins/acct_gather_profile/hdf5/sh5util/Makefile.am b/src/plugins/acct_gather_profile/hdf5/sh5util/Makefile.am index b3169fb5f834c927b20c82f907253cfd6d9ddbf6..c1be619677b3b65b60209b233ac5968bafde9e04 100644 --- a/src/plugins/acct_gather_profile/hdf5/sh5util/Makefile.am +++ b/src/plugins/acct_gather_profile/hdf5/sh5util/Makefile.am @@ -7,15 +7,17 @@ AUTOMAKE_OPTIONS = foreign # hdf5 could of been installed with a link to the generic mpi.h. AM_CPPFLAGS = -I$(top_srcdir) -I../ $(HDF5_CPPFLAGS) -SHDF5_SOURCES = sh5util.c +SHDF5_SOURCES = sh5util.c sh5util.h if BUILD_HDF5 +SUBDIRS = libsh5util_old + bin_PROGRAMS = sh5util sh5util_SOURCES = $(SHDF5_SOURCES) sh5util_LDADD = $(top_builddir)/src/api/libslurm.o $(DL_LIBS) \ - ../libhdf5_api.la + ../libhdf5_api.la libsh5util_old/libsh5util_old.la sh5util_LDFLAGS = -export-dynamic $(CMD_LDFLAGS) \ $(HDF5_LDFLAGS) $(HDF5_LIBS) diff --git a/src/plugins/acct_gather_profile/hdf5/sh5util/Makefile.in b/src/plugins/acct_gather_profile/hdf5/sh5util/Makefile.in index 3ae5ea953cfbdd016698af32862c6ce2c0072b31..513f3672a4b5ec313661cbd71975a5370178e4c7 100644 --- a/src/plugins/acct_gather_profile/hdf5/sh5util/Makefile.in +++ b/src/plugins/acct_gather_profile/hdf5/sh5util/Makefile.in @@ -138,15 +138,16 @@ CONFIG_CLEAN_FILES = CONFIG_CLEAN_VPATH_FILES = am__installdirs = "$(DESTDIR)$(bindir)" PROGRAMS = $(bin_PROGRAMS) -am__sh5util_SOURCES_DIST = sh5util.c +am__sh5util_SOURCES_DIST = sh5util.c sh5util.h am__objects_1 = sh5util.$(OBJEXT) @BUILD_HDF5_TRUE@am_sh5util_OBJECTS = $(am__objects_1) -am__EXTRA_sh5util_SOURCES_DIST = sh5util.c +am__EXTRA_sh5util_SOURCES_DIST = sh5util.c sh5util.h sh5util_OBJECTS = $(am_sh5util_OBJECTS) am__DEPENDENCIES_1 = @BUILD_HDF5_TRUE@sh5util_DEPENDENCIES = \ @BUILD_HDF5_TRUE@ $(top_builddir)/src/api/libslurm.o \ -@BUILD_HDF5_TRUE@ $(am__DEPENDENCIES_1) ../libhdf5_api.la +@BUILD_HDF5_TRUE@ $(am__DEPENDENCIES_1) ../libhdf5_api.la \ +@BUILD_HDF5_TRUE@ libsh5util_old/libsh5util_old.la AM_V_lt = $(am__v_lt_@AM_V@) am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) am__v_lt_0 = --silent @@ -191,11 +192,27 @@ am__v_CCLD_1 = SOURCES = $(sh5util_SOURCES) $(EXTRA_sh5util_SOURCES) DIST_SOURCES = $(am__sh5util_SOURCES_DIST) \ $(am__EXTRA_sh5util_SOURCES_DIST) +RECURSIVE_TARGETS = all-recursive check-recursive cscopelist-recursive \ + ctags-recursive dvi-recursive html-recursive info-recursive \ + install-data-recursive install-dvi-recursive \ + install-exec-recursive install-html-recursive \ + install-info-recursive install-pdf-recursive \ + install-ps-recursive install-recursive installcheck-recursive \ + installdirs-recursive pdf-recursive ps-recursive \ + tags-recursive uninstall-recursive am__can_run_installinfo = \ case $$AM_UPDATE_INFO_DIR in \ n|no|NO) false;; \ *) (install-info --version) >/dev/null 2>&1;; \ esac +RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \ + distclean-recursive maintainer-clean-recursive +am__recursive_targets = \ + $(RECURSIVE_TARGETS) \ + $(RECURSIVE_CLEAN_TARGETS) \ + $(am__extra_recursive_targets) +AM_RECURSIVE_TARGETS = $(am__recursive_targets:-recursive=) TAGS CTAGS \ + distdir am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) # Read a list of newline-separated strings from the standard input, # and print each of them once, without duplicates. Input order is @@ -215,7 +232,33 @@ am__define_uniq_tagged_files = \ done | $(am__uniquify_input)` ETAGS = etags CTAGS = ctags +DIST_SUBDIRS = libsh5util_old DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +am__relativize = \ + dir0=`pwd`; \ + sed_first='s,^\([^/]*\)/.*$$,\1,'; \ + sed_rest='s,^[^/]*/*,,'; \ + sed_last='s,^.*/\([^/]*\)$$,\1,'; \ + sed_butlast='s,/*[^/]*$$,,'; \ + while test -n "$$dir1"; do \ + first=`echo "$$dir1" | sed -e "$$sed_first"`; \ + if test "$$first" != "."; then \ + if test "$$first" = ".."; then \ + dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \ + dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \ + else \ + first2=`echo "$$dir2" | sed -e "$$sed_first"`; \ + if test "$$first2" = "$$first"; then \ + dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \ + else \ + dir2="../$$dir2"; \ + fi; \ + dir0="$$dir0"/"$$first"; \ + fi; \ + fi; \ + dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \ + done; \ + reldir="$$dir2" ACLOCAL = @ACLOCAL@ AMTAR = @AMTAR@ AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ @@ -469,16 +512,17 @@ AUTOMAKE_OPTIONS = foreign # Do not put a link to common here. src/common contains an mpi.h which # hdf5 could of been installed with a link to the generic mpi.h. AM_CPPFLAGS = -I$(top_srcdir) -I../ $(HDF5_CPPFLAGS) -SHDF5_SOURCES = sh5util.c +SHDF5_SOURCES = sh5util.c sh5util.h +@BUILD_HDF5_TRUE@SUBDIRS = libsh5util_old @BUILD_HDF5_TRUE@sh5util_SOURCES = $(SHDF5_SOURCES) @BUILD_HDF5_TRUE@sh5util_LDADD = $(top_builddir)/src/api/libslurm.o $(DL_LIBS) \ -@BUILD_HDF5_TRUE@ ../libhdf5_api.la +@BUILD_HDF5_TRUE@ ../libhdf5_api.la libsh5util_old/libsh5util_old.la @BUILD_HDF5_TRUE@sh5util_LDFLAGS = -export-dynamic $(CMD_LDFLAGS) \ @BUILD_HDF5_TRUE@ $(HDF5_LDFLAGS) $(HDF5_LIBS) @BUILD_HDF5_FALSE@EXTRA_sh5util_SOURCES = $(SHDF5_SOURCES) -all: all-am +all: all-recursive .SUFFIXES: .SUFFIXES: .c .lo .o .obj @@ -601,14 +645,61 @@ mostlyclean-libtool: clean-libtool: -rm -rf .libs _libs +# This directory's subdirectories are mostly independent; you can cd +# into them and run 'make' without going through this Makefile. +# To change the values of 'make' variables: instead of editing Makefiles, +# (1) if the variable is set in 'config.status', edit 'config.status' +# (which will cause the Makefiles to be regenerated when you run 'make'); +# (2) otherwise, pass the desired values on the 'make' command line. +$(am__recursive_targets): + @fail=; \ + if $(am__make_keepgoing); then \ + failcom='fail=yes'; \ + else \ + failcom='exit 1'; \ + fi; \ + dot_seen=no; \ + target=`echo $@ | sed s/-recursive//`; \ + case "$@" in \ + distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \ + *) list='$(SUBDIRS)' ;; \ + esac; \ + for subdir in $$list; do \ + echo "Making $$target in $$subdir"; \ + if test "$$subdir" = "."; then \ + dot_seen=yes; \ + local_target="$$target-am"; \ + else \ + local_target="$$target"; \ + fi; \ + ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ + || eval $$failcom; \ + done; \ + if test "$$dot_seen" = "no"; then \ + $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \ + fi; test -z "$$fail" + ID: $(am__tagged_files) $(am__define_uniq_tagged_files); mkid -fID $$unique -tags: tags-am +tags: tags-recursive TAGS: tags tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) set x; \ here=`pwd`; \ + if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \ + include_option=--etags-include; \ + empty_fix=.; \ + else \ + include_option=--include; \ + empty_fix=; \ + fi; \ + list='$(SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + test ! -f $$subdir/TAGS || \ + set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \ + fi; \ + done; \ $(am__define_uniq_tagged_files); \ shift; \ if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ @@ -621,7 +712,7 @@ tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) $$unique; \ fi; \ fi -ctags: ctags-am +ctags: ctags-recursive CTAGS: ctags ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) @@ -634,7 +725,7 @@ GTAGS: here=`$(am__cd) $(top_builddir) && pwd` \ && $(am__cd) $(top_srcdir) \ && gtags -i $(GTAGS_ARGS) "$$here" -cscopelist: cscopelist-am +cscopelist: cscopelist-recursive cscopelist-am: $(am__tagged_files) list='$(am__tagged_files)'; \ @@ -683,22 +774,48 @@ distdir: $(DISTFILES) || exit 1; \ fi; \ done + @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + $(am__make_dryrun) \ + || test -d "$(distdir)/$$subdir" \ + || $(MKDIR_P) "$(distdir)/$$subdir" \ + || exit 1; \ + dir1=$$subdir; dir2="$(distdir)/$$subdir"; \ + $(am__relativize); \ + new_distdir=$$reldir; \ + dir1=$$subdir; dir2="$(top_distdir)"; \ + $(am__relativize); \ + new_top_distdir=$$reldir; \ + echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \ + echo " am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \ + ($(am__cd) $$subdir && \ + $(MAKE) $(AM_MAKEFLAGS) \ + top_distdir="$$new_top_distdir" \ + distdir="$$new_distdir" \ + am__remove_distdir=: \ + am__skip_length_check=: \ + am__skip_mode_fix=: \ + distdir) \ + || exit 1; \ + fi; \ + done check-am: all-am -check: check-am +check: check-recursive all-am: Makefile $(PROGRAMS) -installdirs: +installdirs: installdirs-recursive +installdirs-am: for dir in "$(DESTDIR)$(bindir)"; do \ test -z "$$dir" || $(MKDIR_P) "$$dir"; \ done -install: install-am -install-exec: install-exec-am -install-data: install-data-am -uninstall: uninstall-am +install: install-recursive +install-exec: install-exec-recursive +install-data: install-data-recursive +uninstall: uninstall-recursive install-am: all-am @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am -installcheck: installcheck-am +installcheck: installcheck-recursive install-strip: if test -z '$(STRIP)'; then \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ @@ -720,91 +837,92 @@ distclean-generic: maintainer-clean-generic: @echo "This command is intended for maintainers to use" @echo "it deletes files that may require special tools to rebuild." -clean: clean-am +clean: clean-recursive clean-am: clean-binPROGRAMS clean-generic clean-libtool mostlyclean-am -distclean: distclean-am +distclean: distclean-recursive -rm -rf ./$(DEPDIR) -rm -f Makefile distclean-am: clean-am distclean-compile distclean-generic \ distclean-tags -dvi: dvi-am +dvi: dvi-recursive dvi-am: -html: html-am +html: html-recursive html-am: -info: info-am +info: info-recursive info-am: install-data-am: -install-dvi: install-dvi-am +install-dvi: install-dvi-recursive install-dvi-am: install-exec-am: install-binPROGRAMS -install-html: install-html-am +install-html: install-html-recursive install-html-am: -install-info: install-info-am +install-info: install-info-recursive install-info-am: install-man: -install-pdf: install-pdf-am +install-pdf: install-pdf-recursive install-pdf-am: -install-ps: install-ps-am +install-ps: install-ps-recursive install-ps-am: installcheck-am: -maintainer-clean: maintainer-clean-am +maintainer-clean: maintainer-clean-recursive -rm -rf ./$(DEPDIR) -rm -f Makefile maintainer-clean-am: distclean-am maintainer-clean-generic -mostlyclean: mostlyclean-am +mostlyclean: mostlyclean-recursive mostlyclean-am: mostlyclean-compile mostlyclean-generic \ mostlyclean-libtool -pdf: pdf-am +pdf: pdf-recursive pdf-am: -ps: ps-am +ps: ps-recursive ps-am: uninstall-am: uninstall-binPROGRAMS -.MAKE: install-am install-strip - -.PHONY: CTAGS GTAGS TAGS all all-am check check-am clean \ - clean-binPROGRAMS clean-generic clean-libtool cscopelist-am \ - ctags ctags-am distclean distclean-compile distclean-generic \ - distclean-libtool distclean-tags distdir dvi dvi-am html \ - html-am info info-am install install-am install-binPROGRAMS \ - install-data install-data-am install-dvi install-dvi-am \ - install-exec install-exec-am install-html install-html-am \ - install-info install-info-am install-man install-pdf \ - install-pdf-am install-ps install-ps-am install-strip \ - installcheck installcheck-am installdirs maintainer-clean \ - maintainer-clean-generic mostlyclean mostlyclean-compile \ - mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ - tags tags-am uninstall uninstall-am uninstall-binPROGRAMS +.MAKE: $(am__recursive_targets) install-am install-strip + +.PHONY: $(am__recursive_targets) CTAGS GTAGS TAGS all all-am check \ + check-am clean clean-binPROGRAMS clean-generic clean-libtool \ + cscopelist-am ctags ctags-am distclean distclean-compile \ + distclean-generic distclean-libtool distclean-tags distdir dvi \ + dvi-am html html-am info info-am install install-am \ + install-binPROGRAMS install-data install-data-am install-dvi \ + install-dvi-am install-exec install-exec-am install-html \ + install-html-am install-info install-info-am install-man \ + install-pdf install-pdf-am install-ps install-ps-am \ + install-strip installcheck installcheck-am installdirs \ + installdirs-am maintainer-clean maintainer-clean-generic \ + mostlyclean mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool pdf pdf-am ps ps-am tags tags-am uninstall \ + uninstall-am uninstall-binPROGRAMS @BUILD_HDF5_TRUE@force: diff --git a/src/plugins/acct_gather_profile/hdf5/sh5util/libsh5util_old/Makefile.am b/src/plugins/acct_gather_profile/hdf5/sh5util/libsh5util_old/Makefile.am new file mode 100644 index 0000000000000000000000000000000000000000..82f29109d9cdd72efc2136000e05100e7dfe7a77 --- /dev/null +++ b/src/plugins/acct_gather_profile/hdf5/sh5util/libsh5util_old/Makefile.am @@ -0,0 +1,13 @@ +# +# Makefile for sh5util_old lib, can be taken out 2 versions past 15.08 +# + +AUTOMAKE_OPTIONS = foreign + +AM_CPPFLAGS = -I$(top_srcdir) -I$(top_srcdir)/src/common -I. + +noinst_LTLIBRARIES = libsh5util_old.la + +libsh5util_old_la_SOURCES = sh5util.c sh5util_old.h hdf5_api.c hdf5_api.h + +libsh5util_old_la_LDFLAGS = $(LIB_LDFLAGS) -module --export-dynamic diff --git a/src/plugins/acct_gather_profile/hdf5/sh5util/libsh5util_old/Makefile.in b/src/plugins/acct_gather_profile/hdf5/sh5util/libsh5util_old/Makefile.in new file mode 100644 index 0000000000000000000000000000000000000000..233acc3809305fdee831299e20fc9521e0e92a98 --- /dev/null +++ b/src/plugins/acct_gather_profile/hdf5/sh5util/libsh5util_old/Makefile.in @@ -0,0 +1,756 @@ +# Makefile.in generated by automake 1.14.1 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2013 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# +# Makefile for sh5util_old lib, can be taken out 2 versions past 15.08 +# + +VPATH = @srcdir@ +am__is_gnu_make = test -n '$(MAKEFILE_LIST)' && test -n '$(MAKELEVEL)' +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +target_triplet = @target@ +subdir = src/plugins/acct_gather_profile/hdf5/sh5util/libsh5util_old +DIST_COMMON = $(srcdir)/Makefile.in $(srcdir)/Makefile.am \ + $(top_srcdir)/auxdir/depcomp +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/auxdir/ax_lib_hdf5.m4 \ + $(top_srcdir)/auxdir/ax_pthread.m4 \ + $(top_srcdir)/auxdir/libtool.m4 \ + $(top_srcdir)/auxdir/ltoptions.m4 \ + $(top_srcdir)/auxdir/ltsugar.m4 \ + $(top_srcdir)/auxdir/ltversion.m4 \ + $(top_srcdir)/auxdir/lt~obsolete.m4 \ + $(top_srcdir)/auxdir/slurm.m4 \ + $(top_srcdir)/auxdir/x_ac__system_configuration.m4 \ + $(top_srcdir)/auxdir/x_ac_affinity.m4 \ + $(top_srcdir)/auxdir/x_ac_aix.m4 \ + $(top_srcdir)/auxdir/x_ac_blcr.m4 \ + $(top_srcdir)/auxdir/x_ac_bluegene.m4 \ + $(top_srcdir)/auxdir/x_ac_cflags.m4 \ + $(top_srcdir)/auxdir/x_ac_cray.m4 \ + $(top_srcdir)/auxdir/x_ac_curl.m4 \ + $(top_srcdir)/auxdir/x_ac_databases.m4 \ + $(top_srcdir)/auxdir/x_ac_debug.m4 \ + $(top_srcdir)/auxdir/x_ac_dlfcn.m4 \ + $(top_srcdir)/auxdir/x_ac_env.m4 \ + $(top_srcdir)/auxdir/x_ac_freeipmi.m4 \ + $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ + $(top_srcdir)/auxdir/x_ac_hwloc.m4 \ + $(top_srcdir)/auxdir/x_ac_iso.m4 \ + $(top_srcdir)/auxdir/x_ac_json.m4 \ + $(top_srcdir)/auxdir/x_ac_lua.m4 \ + $(top_srcdir)/auxdir/x_ac_man2html.m4 \ + $(top_srcdir)/auxdir/x_ac_munge.m4 \ + $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ + $(top_srcdir)/auxdir/x_ac_netloc.m4 \ + $(top_srcdir)/auxdir/x_ac_nrt.m4 \ + $(top_srcdir)/auxdir/x_ac_ofed.m4 \ + $(top_srcdir)/auxdir/x_ac_pam.m4 \ + $(top_srcdir)/auxdir/x_ac_printf_null.m4 \ + $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ + $(top_srcdir)/auxdir/x_ac_readline.m4 \ + $(top_srcdir)/auxdir/x_ac_rrdtool.m4 \ + $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ + $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ + $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ + $(top_srcdir)/auxdir/x_ac_slurm_ssl.m4 \ + $(top_srcdir)/auxdir/x_ac_sun_const.m4 \ + $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/config.h $(top_builddir)/slurm/slurm.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +LTLIBRARIES = $(noinst_LTLIBRARIES) +libsh5util_old_la_LIBADD = +am_libsh5util_old_la_OBJECTS = sh5util.lo hdf5_api.lo +libsh5util_old_la_OBJECTS = $(am_libsh5util_old_la_OBJECTS) +AM_V_lt = $(am__v_lt_@AM_V@) +am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) +am__v_lt_0 = --silent +am__v_lt_1 = +libsh5util_old_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC \ + $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CCLD) \ + $(AM_CFLAGS) $(CFLAGS) $(libsh5util_old_la_LDFLAGS) $(LDFLAGS) \ + -o $@ +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir) -I$(top_builddir)/slurm +depcomp = $(SHELL) $(top_srcdir)/auxdir/depcomp +am__depfiles_maybe = depfiles +am__mv = mv -f +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CFLAGS) $(CFLAGS) +AM_V_CC = $(am__v_CC_@AM_V@) +am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) +am__v_CC_0 = @echo " CC " $@; +am__v_CC_1 = +CCLD = $(CC) +LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_CCLD = $(am__v_CCLD_@AM_V@) +am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) +am__v_CCLD_0 = @echo " CCLD " $@; +am__v_CCLD_1 = +SOURCES = $(libsh5util_old_la_SOURCES) +DIST_SOURCES = $(libsh5util_old_la_SOURCES) +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +ETAGS = etags +CTAGS = ctags +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +ACLOCAL = @ACLOCAL@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +AR = @AR@ +AUTHD_CFLAGS = @AUTHD_CFLAGS@ +AUTHD_LIBS = @AUTHD_LIBS@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BGL_LOADED = @BGL_LOADED@ +BGQ_LOADED = @BGQ_LOADED@ +BG_INCLUDES = @BG_INCLUDES@ +BG_LDFLAGS = @BG_LDFLAGS@ +BG_L_P_LOADED = @BG_L_P_LOADED@ +BLCR_CPPFLAGS = @BLCR_CPPFLAGS@ +BLCR_HOME = @BLCR_HOME@ +BLCR_LDFLAGS = @BLCR_LDFLAGS@ +BLCR_LIBS = @BLCR_LIBS@ +BLUEGENE_LOADED = @BLUEGENE_LOADED@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ +CMD_LDFLAGS = @CMD_LDFLAGS@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CRAY_JOB_CPPFLAGS = @CRAY_JOB_CPPFLAGS@ +CRAY_JOB_LDFLAGS = @CRAY_JOB_LDFLAGS@ +CRAY_SELECT_CPPFLAGS = @CRAY_SELECT_CPPFLAGS@ +CRAY_SELECT_LDFLAGS = @CRAY_SELECT_LDFLAGS@ +CRAY_SWITCH_CPPFLAGS = @CRAY_SWITCH_CPPFLAGS@ +CRAY_SWITCH_LDFLAGS = @CRAY_SWITCH_LDFLAGS@ +CRAY_TASK_CPPFLAGS = @CRAY_TASK_CPPFLAGS@ +CRAY_TASK_LDFLAGS = @CRAY_TASK_LDFLAGS@ +CXX = @CXX@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DATAWARP_CPPFLAGS = @DATAWARP_CPPFLAGS@ +DATAWARP_LDFLAGS = @DATAWARP_LDFLAGS@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DLLTOOL = @DLLTOOL@ +DL_LIBS = @DL_LIBS@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +FGREP = @FGREP@ +FREEIPMI_CPPFLAGS = @FREEIPMI_CPPFLAGS@ +FREEIPMI_LDFLAGS = @FREEIPMI_LDFLAGS@ +FREEIPMI_LIBS = @FREEIPMI_LIBS@ +GLIB_CFLAGS = @GLIB_CFLAGS@ +GLIB_COMPILE_RESOURCES = @GLIB_COMPILE_RESOURCES@ +GLIB_GENMARSHAL = @GLIB_GENMARSHAL@ +GLIB_LIBS = @GLIB_LIBS@ +GLIB_MKENUMS = @GLIB_MKENUMS@ +GOBJECT_QUERY = @GOBJECT_QUERY@ +GREP = @GREP@ +GTK_CFLAGS = @GTK_CFLAGS@ +GTK_LIBS = @GTK_LIBS@ +H5CC = @H5CC@ +H5FC = @H5FC@ +HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ +HAVE_AIX = @HAVE_AIX@ +HAVE_MAN2HTML = @HAVE_MAN2HTML@ +HAVE_NRT = @HAVE_NRT@ +HAVE_OPENSSL = @HAVE_OPENSSL@ +HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +HDF5_CC = @HDF5_CC@ +HDF5_CFLAGS = @HDF5_CFLAGS@ +HDF5_CPPFLAGS = @HDF5_CPPFLAGS@ +HDF5_FC = @HDF5_FC@ +HDF5_FFLAGS = @HDF5_FFLAGS@ +HDF5_FLIBS = @HDF5_FLIBS@ +HDF5_LDFLAGS = @HDF5_LDFLAGS@ +HDF5_LIBS = @HDF5_LIBS@ +HDF5_VERSION = @HDF5_VERSION@ +HWLOC_CPPFLAGS = @HWLOC_CPPFLAGS@ +HWLOC_LDFLAGS = @HWLOC_LDFLAGS@ +HWLOC_LIBS = @HWLOC_LIBS@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +JSON_CPPFLAGS = @JSON_CPPFLAGS@ +JSON_LDFLAGS = @JSON_LDFLAGS@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBCURL = @LIBCURL@ +LIBCURL_CPPFLAGS = @LIBCURL_CPPFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LIBTOOL = @LIBTOOL@ +LIB_LDFLAGS = @LIB_LDFLAGS@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +MAINT = @MAINT@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MKDIR_P = @MKDIR_P@ +MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@ +MUNGE_DIR = @MUNGE_DIR@ +MUNGE_LDFLAGS = @MUNGE_LDFLAGS@ +MUNGE_LIBS = @MUNGE_LIBS@ +MYSQL_CFLAGS = @MYSQL_CFLAGS@ +MYSQL_LIBS = @MYSQL_LIBS@ +NCURSES = @NCURSES@ +NETLOC_CPPFLAGS = @NETLOC_CPPFLAGS@ +NETLOC_LDFLAGS = @NETLOC_LDFLAGS@ +NETLOC_LIBS = @NETLOC_LIBS@ +NM = @NM@ +NMEDIT = @NMEDIT@ +NRT_CPPFLAGS = @NRT_CPPFLAGS@ +NUMA_LIBS = @NUMA_LIBS@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OFED_CPPFLAGS = @OFED_CPPFLAGS@ +OFED_LDFLAGS = @OFED_LDFLAGS@ +OFED_LIBS = @OFED_LIBS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PAM_DIR = @PAM_DIR@ +PAM_LIBS = @PAM_LIBS@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PKG_CONFIG = @PKG_CONFIG@ +PKG_CONFIG_LIBDIR = @PKG_CONFIG_LIBDIR@ +PKG_CONFIG_PATH = @PKG_CONFIG_PATH@ +PROCTRACKDIR = @PROCTRACKDIR@ +PROJECT = @PROJECT@ +PTHREAD_CC = @PTHREAD_CC@ +PTHREAD_CFLAGS = @PTHREAD_CFLAGS@ +PTHREAD_LIBS = @PTHREAD_LIBS@ +RANLIB = @RANLIB@ +READLINE_LIBS = @READLINE_LIBS@ +REAL_BGQ_LOADED = @REAL_BGQ_LOADED@ +REAL_BG_L_P_LOADED = @REAL_BG_L_P_LOADED@ +RELEASE = @RELEASE@ +RRDTOOL_CPPFLAGS = @RRDTOOL_CPPFLAGS@ +RRDTOOL_LDFLAGS = @RRDTOOL_LDFLAGS@ +RRDTOOL_LIBS = @RRDTOOL_LIBS@ +RUNJOB_LDFLAGS = @RUNJOB_LDFLAGS@ +SED = @SED@ +SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ +SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SLEEP_CMD = @SLEEP_CMD@ +SLURMCTLD_PORT = @SLURMCTLD_PORT@ +SLURMCTLD_PORT_COUNT = @SLURMCTLD_PORT_COUNT@ +SLURMDBD_PORT = @SLURMDBD_PORT@ +SLURMD_PORT = @SLURMD_PORT@ +SLURM_API_AGE = @SLURM_API_AGE@ +SLURM_API_CURRENT = @SLURM_API_CURRENT@ +SLURM_API_MAJOR = @SLURM_API_MAJOR@ +SLURM_API_REVISION = @SLURM_API_REVISION@ +SLURM_API_VERSION = @SLURM_API_VERSION@ +SLURM_MAJOR = @SLURM_MAJOR@ +SLURM_MICRO = @SLURM_MICRO@ +SLURM_MINOR = @SLURM_MINOR@ +SLURM_PREFIX = @SLURM_PREFIX@ +SLURM_VERSION_NUMBER = @SLURM_VERSION_NUMBER@ +SLURM_VERSION_STRING = @SLURM_VERSION_STRING@ +SO_LDFLAGS = @SO_LDFLAGS@ +SSL_CPPFLAGS = @SSL_CPPFLAGS@ +SSL_LDFLAGS = @SSL_LDFLAGS@ +SSL_LIBS = @SSL_LIBS@ +STRIP = @STRIP@ +SUCMD = @SUCMD@ +UTIL_LIBS = @UTIL_LIBS@ +VERSION = @VERSION@ +_libcurl_config = @_libcurl_config@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +ac_have_man2html = @ac_have_man2html@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +ax_pthread_config = @ax_pthread_config@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +lua_CFLAGS = @lua_CFLAGS@ +lua_LIBS = @lua_LIBS@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target = @target@ +target_alias = @target_alias@ +target_cpu = @target_cpu@ +target_os = @target_os@ +target_vendor = @target_vendor@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +AUTOMAKE_OPTIONS = foreign +AM_CPPFLAGS = -I$(top_srcdir) -I$(top_srcdir)/src/common -I. +noinst_LTLIBRARIES = libsh5util_old.la +libsh5util_old_la_SOURCES = sh5util.c sh5util_old.h hdf5_api.c hdf5_api.h +libsh5util_old_la_LDFLAGS = $(LIB_LDFLAGS) -module --export-dynamic +all: all-am + +.SUFFIXES: +.SUFFIXES: .c .lo .o .obj +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign src/plugins/acct_gather_profile/hdf5/sh5util/libsh5util_old/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --foreign src/plugins/acct_gather_profile/hdf5/sh5util/libsh5util_old/Makefile +.PRECIOUS: Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \ + esac; + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): + +clean-noinstLTLIBRARIES: + -test -z "$(noinst_LTLIBRARIES)" || rm -f $(noinst_LTLIBRARIES) + @list='$(noinst_LTLIBRARIES)'; \ + locs=`for p in $$list; do echo $$p; done | \ + sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \ + sort -u`; \ + test -z "$$locs" || { \ + echo rm -f $${locs}; \ + rm -f $${locs}; \ + } + +libsh5util_old.la: $(libsh5util_old_la_OBJECTS) $(libsh5util_old_la_DEPENDENCIES) $(EXTRA_libsh5util_old_la_DEPENDENCIES) + $(AM_V_CCLD)$(libsh5util_old_la_LINK) $(libsh5util_old_la_OBJECTS) $(libsh5util_old_la_LIBADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hdf5_api.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sh5util.Plo@am__quote@ + +.c.o: +@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< + +.c.obj: +@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.c.lo: +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LTCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-am +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-am + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscopelist: cscopelist-am + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +distdir: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-am +all-am: Makefile $(LTLIBRARIES) +installdirs: +install: install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-am + +clean-am: clean-generic clean-libtool clean-noinstLTLIBRARIES \ + mostlyclean-am + +distclean: distclean-am + -rm -rf ./$(DEPDIR) + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-am + +dvi-am: + +html: html-am + +html-am: + +info: info-am + +info-am: + +install-data-am: + +install-dvi: install-dvi-am + +install-dvi-am: + +install-exec-am: + +install-html: install-html-am + +install-html-am: + +install-info: install-info-am + +install-info-am: + +install-man: + +install-pdf: install-pdf-am + +install-pdf-am: + +install-ps: install-ps-am + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -rf ./$(DEPDIR) + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: + +.MAKE: install-am install-strip + +.PHONY: CTAGS GTAGS TAGS all all-am check check-am clean clean-generic \ + clean-libtool clean-noinstLTLIBRARIES cscopelist-am ctags \ + ctags-am distclean distclean-compile distclean-generic \ + distclean-libtool distclean-tags distdir dvi dvi-am html \ + html-am info info-am install install-am install-data \ + install-data-am install-dvi install-dvi-am install-exec \ + install-exec-am install-html install-html-am install-info \ + install-info-am install-man install-pdf install-pdf-am \ + install-ps install-ps-am install-strip installcheck \ + installcheck-am installdirs maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-compile \ + mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ + tags tags-am uninstall uninstall-am + + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/src/plugins/acct_gather_profile/hdf5/sh5util/libsh5util_old/hdf5_api.c b/src/plugins/acct_gather_profile/hdf5/sh5util/libsh5util_old/hdf5_api.c new file mode 100644 index 0000000000000000000000000000000000000000..0772c0692fcc70e7041ec5660b5e8f26c37da640 --- /dev/null +++ b/src/plugins/acct_gather_profile/hdf5/sh5util/libsh5util_old/hdf5_api.c @@ -0,0 +1,1868 @@ +/****************************************************************************\ + * hdf5_api.c + ***************************************************************************** + * Copyright (C) 2013 Bull S. A. S. + * Bull, Rue Jean Jaures, B.P.68, 78340, Les Clayes-sous-Bois. + * + * Written by Rod Schultz <rod.schultz@bull.com> + * + * Provide support for acct_gather_profile plugins based on HDF5 files. + * + * This file is part of SLURM, a resource management program. + * For details, see <http://www.schedmd.com/slurmdocs/>. + * Please also read the included file: DISCLAIMER. + * + * SLURM is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * In addition, as a special exception, the copyright holders give permission + * to link the code of portions of this program with the OpenSSL library under + * certain conditions as described in each individual source file, and + * distribute linked combinations including the two. You must obey the GNU + * General Public License in all respects for all of the code used other than + * OpenSSL. If you modify file(s) with this exception, you may extend this + * exception to your version of the file(s), but you are not obligated to do + * so. If you do not wish to do so, delete this exception statement from your + * version. If you delete this exception statement from all source files in + * the program, then also delete it here. + * + * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with SLURM; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +\****************************************************************************/ + +#include "src/common/macros.h" +#include "src/common/slurm_time.h" +#include "src/common/xassert.h" +#include "src/common/xstring.h" + +#include "hdf5_api.h" + + +// Static variables ok as add function are inside a lock. +static time_t seriesStart; +static hid_t typTOD; +static int i; // General index used in some macros. +static int moffset; // General variable used by insert macros + +/* + * Macro to insert a date string type into a compound memory type + * + * Parameters + * p parent (group) memory type + * label description of item + * type profile struct type + * item data item in type + */ +#define MEM_ADD_DATE_TIME(p, label, type, item) \ + if(H5Tinsert(p, label, HOFFSET(type, item), typTOD) < 0) { \ + debug3("PROFILE: failed insert into memory datatype"); \ + H5Tclose(p); \ + return -1; \ + } +/* + * Macro to insert a date string type into a compound file type + * + * Parameters + * p parent (group) file type + * label description of item + * offset offset into record + */ +#define FILE_ADD_DATE_TIME(p, label, offset) \ + if(H5Tinsert(p, label, offset, typTOD) < 0) { \ + debug3("PROFILE: failed insert into file datatype"); \ + H5Tclose(p); \ + return -1; \ + } + +/* + * Macro to insert an uint64 into a compound memory type + * + * Parameters + * p parent (group) memory type + * label description of item + * type profile struct type + * item data item in type + */ +#define MEM_ADD_UINT64(p, label, type, item) \ + if(H5Tinsert(p, label, HOFFSET(type, item), H5T_NATIVE_UINT64) < 0) { \ + debug3("PROFILE: failed insert64 into memory datatype"); \ + H5Tclose(p); \ + return -1; \ + } +/* + * Macro to insert a uint64 into a compound file type + * + * Parameters + * p parent (group) file type + * label description of item + */ +#define FILE_ADD_UINT64(p, label) \ + if(H5Tinsert(p, label, moffset, H5T_NATIVE_UINT64) < 0) { \ + debug3("PROFILE: failed insert64 into file datatype"); \ + H5Tclose(p); \ + return -1; \ + } \ + moffset += 8; + +/* + * Macro to insert a double into a compound memory type + * + * Parameters + * p parent (group) memory type + * label description of item + * type profile struct type + * item data item in type + */ +#define MEM_ADD_DBL(p, label, type, item) \ + if(H5Tinsert(p, label, HOFFSET(type, item), H5T_NATIVE_DOUBLE) < 0) { \ + debug3("PROFILE: failed insertdbl into memory datatype"); \ + H5Tclose(p); \ + return -1; \ + } +/* + * Macro to insert a double into a compound file type + * + * Parameters + * p parent (group) file type + * label description of item + */ +#define FILE_ADD_DBL(p, label) \ + if(H5Tinsert(p, label, moffset, H5T_NATIVE_DOUBLE) < 0) { \ + debug3("PROFILE: failed insertdbl into file datatype"); \ + H5Tclose(p); \ + return -1; \ + } \ + moffset += 8; + +/* + * Macro to increment a sample in a difference series + * -- Difference means each sample represents counts for only that interval + * (assumes consistent naming convention) + * + * + * Parameters + * tot total pointer + * smp sample pointer + * var variable name in sample + * count number of items in series + */ +#define INCR_DIF_SAMPLE(tot, smp, var, count) \ + for (i=0; i<count; i++) { \ + if (i == 0) { \ + total->var.min = smp[i].var; \ + } \ + tot->var.total += smp[i].var; \ + tot->var.min = MIN(smp[i].var, tot->var.min); \ + tot->var.max = MAX(smp[i].var, tot->var.max); \ + } \ + tot->var.ave = tot->var.total / count; + +/* + * Macro to increment a sample in a running total + * -- Running total means first sample is initial conditions + * (assumes consistent naming convention) + * + * + * Parameters + * tot total pointer + * smp sample pointer + * var variable name in sample + * count number of items in series + */ +#define INCR_RT_SAMPLE(tot, smp, var, count) \ + for (i=1; i<count; i++) { \ + if (i == 1) { \ + total->var.min = smp[i].var; \ + } \ + tot->var.total += smp[i].var; \ + tot->var.min = MIN(smp[i].var, tot->var.min); \ + tot->var.max = MAX(smp[i].var, tot->var.max); \ + } \ + tot->var.ave = tot->var.total / count; + +/* Macro to put an int min,ave,max,total for a variable to extract file + * + * Parameters + * fp file descriptor + * var variable name + * prf prefix for series (usually ',' + */ +#define PUT_UINT_SUM(fp, var, prfx) \ + fprintf(fp, "%s%ld,%ld,%ld,%ld", prfx, \ + var.min, var.ave, var.max, var.total); +/* Macro to put an int min,ave,max,total for a variable to extract file + * + * Parameters + * fp file descriptor + * var variable name + * prf prefix for series (usually ',' + */ +#define PUT_DBL_SUM(fp, var, prfx) \ + fprintf(fp, "%s%.3f,%.3f,%.3f,%.3f", prfx, \ + var.min, var.ave, var.max, var.total); + + +// ============================================================================ +// Routines supporting Energy Data type +// ============================================================================ + +static int _energy_dataset_size(void) +{ + return sizeof(profile_energy_t); +} + +static hid_t _energy_create_memory_datatype(void) +{ + hid_t mtyp_energy = H5Tcreate(H5T_COMPOUND, sizeof(profile_energy_t)); + if (mtyp_energy < 0) { + debug3("PROFILE: failed to create Energy memory datatype"); + return -1; + } + MEM_ADD_DATE_TIME(mtyp_energy, "Date_Time", profile_energy_t, tod); + MEM_ADD_UINT64(mtyp_energy, "Time", profile_energy_t, time); + MEM_ADD_UINT64(mtyp_energy, "Power", profile_energy_t, power); + MEM_ADD_UINT64(mtyp_energy, "CPU_Frequency", + profile_energy_t, cpu_freq); + + return mtyp_energy; +} + +static hid_t _energy_create_file_datatype(void) +{ + hid_t ftyp_energy = H5Tcreate(H5T_COMPOUND, (TOD_LEN+3*8)); + if (ftyp_energy < 0) { + debug3("PROFILE: failed to create Energy file datatype"); + return -1; + } + moffset = TOD_LEN; + FILE_ADD_DATE_TIME(ftyp_energy, "Date_Time", 0); + FILE_ADD_UINT64(ftyp_energy, "Time"); + FILE_ADD_UINT64(ftyp_energy, "Power"); + FILE_ADD_UINT64(ftyp_energy, "CPU_Frequency"); + + return ftyp_energy; +} + +static hid_t _energy_s_create_memory_datatype(void) +{ + hid_t mtyp_energy = H5Tcreate(H5T_COMPOUND, + sizeof(profile_energy_s_t)); + if (mtyp_energy < 0) { + debug3("PROFILE: failed to create Energy_s memory datatype"); + return -1; + } + MEM_ADD_DATE_TIME(mtyp_energy, "Start Time", + profile_energy_s_t, start_time); + MEM_ADD_UINT64(mtyp_energy, "Elapsed Time", + profile_energy_s_t, elapsed_time); + MEM_ADD_UINT64(mtyp_energy, "Min Power", profile_energy_s_t, power.min); + MEM_ADD_UINT64(mtyp_energy, "Ave Power", profile_energy_s_t, power.ave); + MEM_ADD_UINT64(mtyp_energy, "Max Power", profile_energy_s_t, power.max); + MEM_ADD_UINT64(mtyp_energy, "Total Power", + profile_energy_s_t, power.total); + MEM_ADD_UINT64(mtyp_energy, "Min CPU Frequency", profile_energy_s_t, + cpu_freq.min); + MEM_ADD_UINT64(mtyp_energy, "Ave CPU Frequency", profile_energy_s_t, + cpu_freq.ave); + MEM_ADD_UINT64(mtyp_energy, "Max CPU Frequency", profile_energy_s_t, + cpu_freq.max); + MEM_ADD_UINT64(mtyp_energy, "Total CPU Frequency", profile_energy_s_t, + cpu_freq.total); + + return mtyp_energy; +} + +static hid_t _energy_s_create_file_datatype(void) +{ + hid_t ftyp_energy = H5Tcreate(H5T_COMPOUND, (TOD_LEN+9*8)); + if (ftyp_energy < 0) { + debug3("PROFILE: failed to create Energy_s file datatype"); + return -1; + } + moffset = TOD_LEN; + FILE_ADD_DATE_TIME(ftyp_energy, "Start Time", 0); + FILE_ADD_UINT64(ftyp_energy, "Elapsed Time"); + FILE_ADD_UINT64(ftyp_energy, "Min Power"); + FILE_ADD_UINT64(ftyp_energy, "Ave Power"); + FILE_ADD_UINT64(ftyp_energy, "Max Power"); + FILE_ADD_UINT64(ftyp_energy, "Total Power"); + FILE_ADD_UINT64(ftyp_energy, "Min CPU Frequency"); + FILE_ADD_UINT64(ftyp_energy, "Ave CPU Frequency"); + FILE_ADD_UINT64(ftyp_energy, "Max CPU Frequency"); + FILE_ADD_UINT64(ftyp_energy, "Total CPU Frequency"); + + return ftyp_energy; +} + +static void *_energy_init_job_series(int n_samples) +{ + profile_energy_t* energy_data; + + energy_data = xmalloc(n_samples * sizeof(profile_energy_t)); + if (energy_data == NULL) { + debug3("PROFILE: failed to get memory for energy data"); + return NULL; + } + return (void*) energy_data; +} + +static char** _energy_get_series_tod(void* data, int nsmp) +{ + int ix; + char **tod_values = NULL; + profile_energy_t* energy_series = (profile_energy_t*) data; + tod_values = (char**) xmalloc(nsmp*sizeof(char*)); + if (tod_values == NULL) { + info("Failed to get memory for energy tod"); + return NULL; + } + for (ix=0; ix < nsmp; ix++) { + tod_values[ix] = xstrdup(energy_series[ix].tod); + } + return tod_values; +} + +static double* _energy_get_series_values(char* data_name, void* data, int nsmp) +{ + int ix; + profile_energy_t* energy_series = (profile_energy_t*) data; + double *energy_values = NULL; + energy_values = xmalloc(nsmp*sizeof(double)); + if (energy_values == NULL) { + info("PROFILE: Failed to get memory for energy data"); + return NULL; + } + if (strcasecmp(data_name,"Time") == 0) { + for (ix=0; ix < nsmp; ix++) { + energy_values[ix] = (double) energy_series[ix].time; + + } + return energy_values; + } else if (strcasecmp(data_name,"Power") == 0) { + for (ix=0; ix < nsmp; ix++) { + energy_values[ix] = (double) energy_series[ix].power; + + } + return energy_values; + } else if (strcasecmp(data_name,"CPU_Frequency") == 0) { + for (ix=0; ix < nsmp; ix++) { + energy_values[ix] = (double) energy_series[ix].cpu_freq; + + } + return energy_values; + } + xfree(energy_values); + info("PROFILE: %s is invalid data item for energy data", data_name); + return NULL; +} + +static void _energy_merge_step_series( + hid_t group, void *prior, void *cur, void *buf) +{ +// This is a difference series + profile_energy_t* prf_cur = (profile_energy_t*) cur; + profile_energy_t* prf_buf = (profile_energy_t*) buf; + struct tm *ts = slurm_localtime(&prf_cur->time); + strftime(prf_buf->tod, TOD_LEN, TOD_FMT, ts); + if (prior == NULL) { + // First sample. + seriesStart = prf_cur->time; + prf_buf->time = 0; + + } else { + prf_buf->time = prf_cur->time - seriesStart; + } + prf_buf->power = prf_cur->power; + prf_buf->cpu_freq = prf_cur->cpu_freq; + return; +} + +static void *_energy_series_total(int n_samples, void *data) +{ + profile_energy_t* energy_data; + profile_energy_s_t* total; + if (n_samples < 1) + return NULL; + energy_data = (profile_energy_t*) data; + total = xmalloc(sizeof(profile_energy_s_t)); + if (total == NULL) { + error("PROFILE: Out of memory getting energy total"); + return NULL; + } + // Assuming energy series are a difference series + strcpy(total->start_time, energy_data[0].tod); + total->elapsed_time = energy_data[n_samples-1].time; + INCR_DIF_SAMPLE(total, energy_data, power, n_samples); + INCR_DIF_SAMPLE(total, energy_data, cpu_freq, n_samples); + return total; +} + +static void _energy_extract_series( + FILE* fp, bool put_header, int job, int step, + char *node, char *series, void *data, int size_data) +{ + + int n_items, ix; + profile_energy_t* energy_data = (profile_energy_t*) data; + if (put_header) { + fprintf(fp, "Job,Step,Node,Series,Date_Time,Elapsed_Time," + "Power, CPU_Frequency\n"); + } + n_items = size_data / sizeof(profile_energy_t); + for (ix=0; ix < n_items; ix++) { + fprintf(fp, "%d,%d,%s,%s,%s,%ld,%ld,%ld\n", job, step, node, + series, energy_data[ix].tod, energy_data[ix].time, + energy_data[ix].power, energy_data[ix].cpu_freq); + } + return; +} + +static void _energy_extract_total( + FILE* fp, bool put_header, int job, int step, + char *node, char *series, void *data, int size_data) +{ + profile_energy_s_t* energy_data = (profile_energy_s_t*) data; + if (put_header) { + fprintf(fp, "Job,Step,Node,Series,Start_Time,Elapsed_Time," + "Min_Power,Ave_Power,Max_Power,Total_Power," + "Min_CPU Frequency,Ave_CPU Frequency," + "Max_CPU Frequency,Total_CPU Frequency\n"); + } + fprintf(fp, "%d,%d,%s,%s,%s,%ld", job, step, node, series, + energy_data->start_time, energy_data->elapsed_time); + PUT_UINT_SUM(fp, energy_data->power, ","); + PUT_UINT_SUM(fp, energy_data->cpu_freq, ","); + fprintf(fp, "\n"); + return; +} + +static hdf5_api_ops_t* _energy_profile_factory(void) +{ + hdf5_api_ops_t* ops = xmalloc(sizeof(hdf5_api_ops_t)); + ops->dataset_size = &_energy_dataset_size; + ops->create_memory_datatype = &_energy_create_memory_datatype; + ops->create_file_datatype = &_energy_create_file_datatype; + ops->create_s_memory_datatype = &_energy_s_create_memory_datatype; + ops->create_s_file_datatype = &_energy_s_create_file_datatype; + ops->init_job_series = &_energy_init_job_series; + ops->get_series_tod = &_energy_get_series_tod; + ops->get_series_values = &_energy_get_series_values; + ops->merge_step_series = &_energy_merge_step_series; + ops->series_total = &_energy_series_total; + ops->extract_series = &_energy_extract_series; + ops->extract_total = &_energy_extract_total; + return ops; +} + + +// ============================================================================ +// Routines supporting I/O Data type +// ============================================================================ + +static int _io_dataset_size(void) +{ + return sizeof(profile_io_t); +} + +static hid_t _io_create_memory_datatype(void) +{ + hid_t mtyp_io = -1; + + mtyp_io = H5Tcreate(H5T_COMPOUND, sizeof(profile_io_t)); + if (mtyp_io < 0) { + debug3("PROFILE: failed to create IO memory datatype"); + return -1; + } + MEM_ADD_DATE_TIME(mtyp_io, "Date_Time", profile_io_t, tod); + MEM_ADD_UINT64(mtyp_io, "Time", profile_io_t, time); + MEM_ADD_UINT64(mtyp_io, "Reads", profile_io_t, reads); + MEM_ADD_DBL(mtyp_io, "Megabytes_Read", profile_io_t, read_size); + MEM_ADD_UINT64(mtyp_io, "Writes", profile_io_t, writes); + MEM_ADD_DBL(mtyp_io, "Megabytes_Write", profile_io_t, write_size); + return mtyp_io; +} + +static hid_t _io_create_file_datatype(void) +{ + hid_t ftyp_io = -1; + + ftyp_io = H5Tcreate(H5T_COMPOUND, TOD_LEN+5*8); + if (ftyp_io < 0) { + debug3("PROFILE: failed to create IO file datatype"); + return -1; + } + moffset = TOD_LEN; + FILE_ADD_DATE_TIME(ftyp_io, "Date_Time", 0); + FILE_ADD_UINT64(ftyp_io, "Time"); + FILE_ADD_UINT64(ftyp_io, "Reads"); + FILE_ADD_DBL(ftyp_io, "Megabytes_Read"); + FILE_ADD_UINT64(ftyp_io, "Writes"); + FILE_ADD_DBL(ftyp_io, "Megabytes_Write"); + + return ftyp_io; +} + +static hid_t _io_s_create_memory_datatype(void) +{ + hid_t mtyp_io = -1; + + mtyp_io = H5Tcreate(H5T_COMPOUND, sizeof(profile_io_s_t)); + if (mtyp_io < 0) { + debug3("PROFILE: failed to create IO memory datatype"); + return -1; + } + MEM_ADD_DATE_TIME(mtyp_io, "Start Time", profile_io_s_t, start_time); + MEM_ADD_UINT64(mtyp_io, "Elapsed Time", profile_io_s_t, elapsed_time); + MEM_ADD_UINT64(mtyp_io, "Min Reads", profile_io_s_t, reads.min); + MEM_ADD_UINT64(mtyp_io, "Ave Reads", profile_io_s_t, reads.ave); + MEM_ADD_UINT64(mtyp_io, "Max Reads", profile_io_s_t, reads.max); + MEM_ADD_UINT64(mtyp_io, "Total Reads", profile_io_s_t, reads.total); + MEM_ADD_DBL(mtyp_io, "Min Read Megabytes", + profile_io_s_t, read_size.min); + MEM_ADD_DBL(mtyp_io, "Ave Read Megabytes", + profile_io_s_t, read_size.ave); + MEM_ADD_DBL(mtyp_io, "Max Read Megabytes", + profile_io_s_t, read_size.max); + MEM_ADD_DBL(mtyp_io, "Total Read Megabytes", profile_io_s_t, + read_size.total); + MEM_ADD_UINT64(mtyp_io, "Min Writes", profile_io_s_t, writes.min); + MEM_ADD_UINT64(mtyp_io, "Ave Writes", profile_io_s_t, writes.ave); + MEM_ADD_UINT64(mtyp_io, "Max Writes", profile_io_s_t, writes.max); + MEM_ADD_UINT64(mtyp_io, "Total Writes", profile_io_s_t, writes.total); + MEM_ADD_DBL(mtyp_io, "Min Write Megabytes", profile_io_s_t, + write_size.min); + MEM_ADD_DBL(mtyp_io, "Ave Write Megabytes", profile_io_s_t, + write_size.ave); + MEM_ADD_DBL(mtyp_io, "Max Write Megabytes", profile_io_s_t, + write_size.max); + MEM_ADD_DBL(mtyp_io, "Total Write Megabytes", profile_io_s_t, + write_size.total); + + return mtyp_io; +} + +static hid_t _io_s_create_file_datatype(void) +{ + hid_t ftyp_io = -1; + + ftyp_io = H5Tcreate(H5T_COMPOUND, TOD_LEN+17*8); + if (ftyp_io < 0) { + debug3("PROFILE: failed to create IO file datatype"); + return -1; + } + moffset = TOD_LEN; + FILE_ADD_DATE_TIME(ftyp_io, "Start Time", 0); + FILE_ADD_UINT64(ftyp_io, "Elapsed Time"); + FILE_ADD_UINT64(ftyp_io, "Min Reads"); + FILE_ADD_UINT64(ftyp_io, "Ave Reads"); + FILE_ADD_UINT64(ftyp_io, "Max Reads"); + FILE_ADD_UINT64(ftyp_io, "Total Reads"); + FILE_ADD_DBL(ftyp_io, "Min Read Megabytes"); + FILE_ADD_DBL(ftyp_io, "Ave Read Megabytes"); + FILE_ADD_DBL(ftyp_io, "Max Read Megabytes"); + FILE_ADD_DBL(ftyp_io, "Total Read Megabytes"); + FILE_ADD_UINT64(ftyp_io, "Min Writes"); + FILE_ADD_UINT64(ftyp_io, "Ave Writes"); + FILE_ADD_UINT64(ftyp_io, "Max Writes"); + FILE_ADD_UINT64(ftyp_io, "Total Writes"); + FILE_ADD_DBL(ftyp_io, "Min Write Megabytes"); + FILE_ADD_DBL(ftyp_io, "Ave Write Megabytes"); + FILE_ADD_DBL(ftyp_io, "Max Write Megabytes"); + FILE_ADD_DBL(ftyp_io, "Total Write Megabytes"); + + return ftyp_io; +} + +static void *_io_init_job_series(int n_samples) +{ + profile_io_t* io_data; + io_data = xmalloc(n_samples * sizeof(profile_io_t)); + if (io_data == NULL) { + debug3("PROFILE: failed to get memory for combined io data"); + return NULL; + } + return (void*) io_data; +} + +static char** _io_get_series_tod(void* data, int nsmp) +{ + int ix; + char **tod_values = NULL; + profile_io_t* io_series = (profile_io_t*) data; + tod_values = (char**) xmalloc(nsmp*sizeof(char*)); + if (tod_values == NULL) { + info("Failed to get memory for io tod"); + return NULL; + } + for (ix=0; ix < nsmp; ix++) { + tod_values[ix] = xstrdup(io_series[ix].tod); + } + return tod_values; +} + +static double* _io_get_series_values(char* data_name, void* data, int nsmp) +{ + int ix; + profile_io_t* io_series = (profile_io_t*) data; + double *io_values = NULL; + io_values = xmalloc(nsmp*sizeof(double)); + if (io_values == NULL) { + info("PROFILE: Failed to get memory for io data"); + return NULL; + } + if (strcasecmp(data_name,"Time") == 0) { + for (ix=0; ix < nsmp; ix++) { + io_values[ix] = (double) io_series[ix].time; + + } + return io_values; + } else if (strcasecmp(data_name,"Reads") == 0) { + for (ix=0; ix < nsmp; ix++) { + io_values[ix] = (double) io_series[ix].reads; + + } + return io_values; + } else if (strcasecmp(data_name,"Megabytes_Read") == 0) { + for (ix=0; ix < nsmp; ix++) { + io_values[ix] = io_series[ix].read_size; + + } + return io_values; + } else if (strcasecmp(data_name,"Writes") == 0) { + for (ix=0; ix < nsmp; ix++) { + io_values[ix] = (double) io_series[ix].writes; + + } + return io_values; + } else if (strcasecmp(data_name,"Megabytes_Write") == 0) { + for (ix=0; ix < nsmp; ix++) { + io_values[ix] = io_series[ix].write_size; + + } + return io_values; + } + xfree(io_values); + info("PROFILE: %s is invalid data item for io data", data_name); + return NULL; +} + +static void _io_merge_step_series( + hid_t group, void *prior, void *cur, void *buf) +{ + // This is a difference series + static uint64_t start_reads = 0; + static uint64_t start_writes = 0; + static double start_read_size = 0; + static double start_write_size = 0; + profile_io_t* prfCur = (profile_io_t*) cur; + profile_io_t* prfBuf = (profile_io_t*) buf; + struct tm *ts = slurm_localtime(&prfCur->time); + strftime(prfBuf->tod, TOD_LEN, TOD_FMT, ts); + if (prior == NULL) { + // First sample. + seriesStart = prfCur->time; + prfBuf->time = 0; + start_reads = prfCur->reads; + prfBuf->reads = 0; + start_writes = prfCur->writes; + prfBuf->writes = 0; + start_read_size = prfCur->read_size; + prfBuf->read_size = 0; + start_write_size = prfCur->write_size; + prfBuf->write_size = 0; + } else { + prfBuf->time = prfCur->time - seriesStart; + prfBuf->reads = prfCur->reads - start_reads; + prfBuf->writes = prfCur->writes - start_writes; + prfBuf->read_size = prfCur->read_size - start_read_size; + prfBuf->write_size = prfCur->write_size - start_write_size; + } + return; +} + +static void *_io_series_total(int n_samples, void *data) +{ + profile_io_t* io_data; + profile_io_s_t* total; + if (n_samples < 1) + return NULL; + io_data = (profile_io_t*) data; + total = xmalloc(sizeof(profile_io_s_t)); + if (total == NULL) { + error("PROFILE: Out of memory getting I/O total"); + return NULL; + } + // Assuming io series are a running total, and the first + // sample just sets the initial conditions + strcpy(total->start_time, io_data[0].tod); + total->elapsed_time = io_data[n_samples-1].time; + INCR_DIF_SAMPLE(total, io_data, reads, n_samples); + INCR_DIF_SAMPLE(total, io_data, read_size, n_samples); + INCR_DIF_SAMPLE(total, io_data, writes, n_samples); + INCR_DIF_SAMPLE(total, io_data, write_size, n_samples); + return total; +} + +static void _io_extract_series( + FILE* fp, bool put_header, int job, int step, + char *node, char *series, void *data, int size_data) +{ + int n_items, ix; + profile_io_t* io_data = (profile_io_t*) data; + if (put_header) { + fprintf(fp,"Job,Step,Node,Series,Date_Time,Elapsed_time," + "Reads,Read Megabytes,Writes,Write Megabytes\n"); + } + n_items = size_data / sizeof(profile_io_t); + for (ix=0; ix < n_items; ix++) { + fprintf(fp,"%d,%d,%s,%s,%s,%ld,%ld,%.3f,%ld,%.3f\n", + job, step, node, series, + io_data[ix].tod, io_data[ix].time, + io_data[ix].reads, io_data[ix].read_size, + io_data[ix].writes, io_data[ix].write_size); + } + return; +} + +static void _io_extract_total( + FILE* fp, bool put_header, int job, int step, + char *node, char *series, void *data, int size_data) +{ + profile_io_s_t* io_data = (profile_io_s_t*) data; + if (put_header) { + fprintf(fp,"Job,Step,Node,Series,Start_Time,Elapsed_time," + "Min_Reads,Ave_Reads,Max_Reads,Total_Reads," + "Min_Read_Megabytes,Ave_Read_Megabytes," + "Max_Read_Megabytes,Total_Read_Megabytes," + "Min_Writes,Ave_Writes,Max_Writes,Total_Writes," + "Min_Write_Megabytes,Ave_Write_Megabytes," + "Max_Write_Megabytes,Total_Write_Megabytes\n"); + } + fprintf(fp, "%d,%d,%s,%s,%s,%ld", job, step, node, series, + io_data->start_time, io_data->elapsed_time); + PUT_UINT_SUM(fp, io_data->reads, ","); + PUT_DBL_SUM(fp, io_data->read_size, ","); + PUT_UINT_SUM(fp, io_data->writes, ","); + PUT_DBL_SUM(fp, io_data->write_size, ","); + fprintf(fp, "\n"); + return; +} + +static hdf5_api_ops_t* _io_profile_factory(void) +{ + hdf5_api_ops_t* ops = xmalloc(sizeof(hdf5_api_ops_t)); + ops->dataset_size = &_io_dataset_size; + ops->create_memory_datatype = &_io_create_memory_datatype; + ops->create_file_datatype = &_io_create_file_datatype; + ops->create_s_memory_datatype = &_io_s_create_memory_datatype; + ops->create_s_file_datatype = &_io_s_create_file_datatype; + ops->init_job_series = &_io_init_job_series; + ops->get_series_tod = &_io_get_series_tod; + ops->get_series_values = &_io_get_series_values; + ops->merge_step_series = &_io_merge_step_series; + ops->series_total = &_io_series_total; + ops->extract_series = &_io_extract_series; + ops->extract_total = &_io_extract_total; + return ops; +} + + +// ============================================================================ +// Routines supporting Network Data type +// ============================================================================ + +static int _network_dataset_size(void) +{ + return sizeof(profile_network_t); +} + +static hid_t _network_create_memory_datatype(void) +{ + hid_t mtyp_network = H5Tcreate(H5T_COMPOUND, + sizeof(profile_network_t)); + if (mtyp_network < 0) { + debug3("PROFILE: failed to create Network memory datatype"); + return -1; + } + MEM_ADD_DATE_TIME(mtyp_network, "Date_Time", profile_network_t, tod); + MEM_ADD_UINT64(mtyp_network, "Time", profile_network_t, time); + MEM_ADD_UINT64(mtyp_network, "Packets_In", + profile_network_t, packets_in); + MEM_ADD_DBL(mtyp_network, "Megabytes_In", profile_network_t, size_in); + MEM_ADD_UINT64(mtyp_network, "Packets_Out", + profile_network_t, packets_out); + MEM_ADD_DBL(mtyp_network, "Megabytes_Out", profile_network_t, size_out); + + return mtyp_network; +} + +static hid_t _network_create_file_datatype(void) +{ + hid_t ftyp_network = H5Tcreate(H5T_COMPOUND, TOD_LEN+5*8); + if (ftyp_network < 0) { + debug3("PROFILE: failed to create Network file datatype"); + return -1; + } + moffset = TOD_LEN; + FILE_ADD_DATE_TIME(ftyp_network, "Date_Time", 0); + FILE_ADD_UINT64(ftyp_network, "Time"); + FILE_ADD_UINT64(ftyp_network, "Packets_In"); + FILE_ADD_DBL(ftyp_network, "Megabytes_In"); + FILE_ADD_UINT64(ftyp_network, "Packets_Out"); + FILE_ADD_DBL(ftyp_network, "Megabytes_Out"); + + return ftyp_network; +} + +static hid_t _network_s_create_memory_datatype(void) +{ + hid_t mtyp_network = -1; + + mtyp_network = H5Tcreate(H5T_COMPOUND, sizeof(profile_network_s_t)); + if (mtyp_network < 0) { + debug3("PROFILE: failed to create Network memory datatype"); + return -1; + } + MEM_ADD_DATE_TIME(mtyp_network, "Start Time", profile_network_s_t, + start_time); + MEM_ADD_UINT64(mtyp_network, "Elapsed Time", profile_network_s_t, + elapsed_time); + MEM_ADD_UINT64(mtyp_network, "Min Packets In", profile_network_s_t, + packets_in.min); + MEM_ADD_UINT64(mtyp_network, "Ave Packets In", profile_network_s_t, + packets_in.ave); + MEM_ADD_UINT64(mtyp_network, "Max Packets In", profile_network_s_t, + packets_in.max); + MEM_ADD_UINT64(mtyp_network, "Total Packets In", profile_network_s_t, + packets_in.total); + MEM_ADD_DBL(mtyp_network, "Min Megabytes In", profile_network_s_t, + size_in.min); + MEM_ADD_DBL(mtyp_network, "Ave Megabytes In", profile_network_s_t, + size_in.ave); + MEM_ADD_DBL(mtyp_network, "Max Megabytes In", profile_network_s_t, + size_in.max); + MEM_ADD_DBL(mtyp_network, "Total Megabytes In", profile_network_s_t, + size_in.total); + MEM_ADD_UINT64(mtyp_network, "Min Packets Out", profile_network_s_t, + packets_out.min); + MEM_ADD_UINT64(mtyp_network, "Ave Packets Out", profile_network_s_t, + packets_out.ave); + MEM_ADD_UINT64(mtyp_network, "Max Packets Out", profile_network_s_t, + packets_out.max); + MEM_ADD_UINT64(mtyp_network, "Total Packets Out", profile_network_s_t, + packets_out.total); + MEM_ADD_DBL(mtyp_network, "Min Megabytes Out", profile_network_s_t, + size_out.min); + MEM_ADD_DBL(mtyp_network, "Ave Megabytes Out", profile_network_s_t, + size_out.ave); + MEM_ADD_DBL(mtyp_network, "Max Megabytes Out", profile_network_s_t, + size_out.max); + MEM_ADD_DBL(mtyp_network, "Total Megabytes Out", profile_network_s_t, + size_out.total); + + return mtyp_network; +} + +static hid_t _network_s_create_file_datatype(void) +{ + hid_t ftyp_network = H5Tcreate(H5T_COMPOUND, TOD_LEN+17*8); + if (ftyp_network < 0) { + debug3("PROFILE: failed to create Network file datatype"); + return -1; + } + moffset = TOD_LEN; + FILE_ADD_DATE_TIME(ftyp_network, "Start Time", 0); + FILE_ADD_UINT64(ftyp_network, "Elapsed Time"); + FILE_ADD_UINT64(ftyp_network, "Min Packets In"); + FILE_ADD_UINT64(ftyp_network, "Ave Packets In"); + FILE_ADD_UINT64(ftyp_network, "Max Packets In"); + FILE_ADD_UINT64(ftyp_network, "Total Packets In"); + FILE_ADD_DBL(ftyp_network, "Min Megabytes In"); + FILE_ADD_DBL(ftyp_network, "Ave Megabytes In"); + FILE_ADD_DBL(ftyp_network, "Max Megabytes In"); + FILE_ADD_DBL(ftyp_network, "Total Megabytes In"); + FILE_ADD_UINT64(ftyp_network, "Min Packets Out"); + FILE_ADD_UINT64(ftyp_network, "Ave Packets Out"); + FILE_ADD_UINT64(ftyp_network, "Max Packets Out"); + FILE_ADD_UINT64(ftyp_network, "Total Packets Out"); + FILE_ADD_DBL(ftyp_network, "Min Megabytes Out"); + FILE_ADD_DBL(ftyp_network, "Ave Megabytes Out"); + FILE_ADD_DBL(ftyp_network, "Max Megabytes Out"); + FILE_ADD_DBL(ftyp_network, "Total Megabytes Out"); + + return ftyp_network; +} + +static void *_network_init_job_series(int n_samples) +{ + profile_network_t* network_data; + + network_data = xmalloc(n_samples * sizeof(profile_network_t)); + if (network_data == NULL) { + debug3("PROFILE: failed to get memory for network data"); + return NULL; + } + return (void*) network_data; +} + +static char** _network_get_series_tod(void* data, int nsmp) +{ + int ix; + char **tod_values = NULL; + profile_network_t* network_series = (profile_network_t*) data; + tod_values = (char**) xmalloc(nsmp*sizeof(char*)); + if (tod_values == NULL) { + info("Failed to get memory for network tod"); + return NULL; + } + for (ix=0; ix < nsmp; ix++) { + tod_values[ix] = xstrdup(network_series[ix].tod); + } + return tod_values; +} + +static double* _network_get_series_values(char* data_name, void* data, int nsmp) +{ + int ix; + profile_network_t* network_series = (profile_network_t*) data; + double *network_values = NULL; + network_values = xmalloc(nsmp*sizeof(double)); + if (network_values == NULL) { + info("PROFILE: Failed to get memory for network data"); + return NULL; + } + if (strcasecmp(data_name,"Time") == 0) { + for (ix=0; ix < nsmp; ix++) { + network_values[ix] = (double) network_series[ix].time; + + } + return network_values; + } else if (strcasecmp(data_name,"Packets_In") == 0) { + for (ix=0; ix < nsmp; ix++) { + network_values[ix] = + (double) network_series[ix].packets_in; + + } + return network_values; + } else if (strcasecmp(data_name,"Megabytes_In") == 0) { + for (ix=0; ix < nsmp; ix++) { + network_values[ix] = network_series[ix].size_in; + + } + return network_values; + } else if (strcasecmp(data_name,"Packets_Out") == 0) { + for (ix=0; ix < nsmp; ix++) { + network_values[ix] = + (double) network_series[ix].packets_out; + + } + return network_values; + } else if (strcasecmp(data_name,"Megabytes_Out") == 0) { + for (ix=0; ix < nsmp; ix++) { + network_values[ix] = network_series[ix].size_out; + + } + return network_values; + } + xfree(network_values); + info("PROFILE: %s is invalid data item for network data", data_name); + return NULL; +} + +static void _network_merge_step_series( + hid_t group, void *prior, void *cur, void *buf) +{ +// This is a difference series + profile_network_t* prf_cur = (profile_network_t*) cur; + profile_network_t* prf_buf = (profile_network_t*) buf; + struct tm *ts = slurm_localtime(&prf_cur->time); + strftime(prf_buf->tod, TOD_LEN, TOD_FMT, ts); + if (prior == NULL) { + // First sample. + seriesStart = prf_cur->time; + prf_buf->time = 0; + } else { + prf_buf->time = prf_cur->time - seriesStart; + } + prf_buf->packets_in = prf_cur->packets_in; + prf_buf->packets_out = prf_cur->packets_out; + prf_buf->size_in = prf_cur->size_in; + prf_buf->size_out = prf_cur->size_out; + return; +} + +static void *_network_series_total(int n_samples, void *data) +{ + profile_network_t* network_data; + profile_network_s_t* total; + if (n_samples < 1) + return NULL; + network_data = (profile_network_t*) data; + total = xmalloc(sizeof(profile_network_s_t)); + if (total == NULL) { + error("PROFILE: Out of memory getting network total"); + return NULL; + } + // Assuming network series are a running total, and the first + // sample just sets the initial conditions + strcpy(total->start_time, network_data[0].tod); + total->elapsed_time = network_data[n_samples-1].time; + INCR_DIF_SAMPLE(total, network_data, packets_in, n_samples); + INCR_DIF_SAMPLE(total, network_data, size_in, n_samples); + INCR_DIF_SAMPLE(total, network_data, packets_out, n_samples); + INCR_DIF_SAMPLE(total, network_data, size_out, n_samples); + return total; +} + +static void _network_extract_series( + FILE* fp, bool put_header, int job, int step, + char *node, char *series, void *data, int size_data) +{ + int n_items, ix; + profile_network_t* network_data = (profile_network_t*) data; + + if (put_header) { + fprintf(fp,"Job,Step,Node,Series,Date_Time,Elapsed_time," + "Packets_In,MegaBytes_In,Packets_Out,MegaBytes_Out\n"); + } + n_items = size_data / sizeof(profile_network_t); + for (ix=0; ix < n_items; ix++) { + fprintf(fp,"%d,%d,%s,%s,%s,%ld,%ld,%.3f,%ld,%.3f\n", + job, step, node,series, + network_data[ix].tod, network_data[ix].time, + network_data[ix].packets_in, network_data[ix].size_in, + network_data[ix].packets_out, + network_data[ix].size_out); + } + return; +} + +static void _network_extract_total( + FILE* fp, bool put_header, int job, int step, + char *node, char *series, void *data, int size_data) +{ + profile_network_s_t* network_data = (profile_network_s_t*) data; + if (put_header) { + fprintf(fp,"Job,Step,Node,Series,Start_Time,Elapsed_time," + "Min_Packets_In,Ave_Packets_In," + "Max_Packets_In,Total_Packets_In," + "Min_Megabytes_In,Ave_Megabytes_In," + "Max_Megabytes_In,Total_Megabytes_In," + "Min_Packets_Out,Ave_Packets_Out," + "Max_Packets_Out,Total_Packets_Out," + "Min_Megabytes_Out,Ave_Megabytes_Out," + "Max_Megabytes_Out,Total_Megabytes_Out\n"); + } + fprintf(fp, "%d,%d,%s,%s,%s,%ld", job, step, node, series, + network_data->start_time, network_data->elapsed_time); + PUT_UINT_SUM(fp, network_data->packets_in, ","); + PUT_DBL_SUM(fp, network_data->size_in, ","); + PUT_UINT_SUM(fp, network_data->packets_out, ","); + PUT_DBL_SUM(fp, network_data->size_out, ","); + fprintf(fp, "\n"); + return; +} + +static hdf5_api_ops_t *_network_profile_factory(void) +{ + hdf5_api_ops_t* ops = xmalloc(sizeof(hdf5_api_ops_t)); + ops->dataset_size = &_network_dataset_size; + ops->create_memory_datatype = &_network_create_memory_datatype; + ops->create_file_datatype = &_network_create_file_datatype; + ops->create_s_memory_datatype = &_network_s_create_memory_datatype; + ops->create_s_file_datatype = &_network_s_create_file_datatype; + ops->init_job_series = &_network_init_job_series; + ops->get_series_tod = &_network_get_series_tod; + ops->get_series_values = &_network_get_series_values; + ops->merge_step_series = &_network_merge_step_series; + ops->series_total = &_network_series_total; + ops->extract_series = &_network_extract_series; + ops->extract_total = &_network_extract_total; + return ops; +} + +// ============================================================================ +// Routines supporting Task Data type +// ============================================================================ + +static int _task_dataset_size(void) +{ + return sizeof(profile_task_t); +} + +static hid_t _task_create_memory_datatype(void) +{ + hid_t mtyp_task = H5Tcreate(H5T_COMPOUND, sizeof(profile_task_t)); + if (mtyp_task < 0) { + debug3("PROFILE: failed to create Task memory datatype"); + return -1; + } + MEM_ADD_DATE_TIME(mtyp_task, "Date_Time", profile_task_t, tod); + MEM_ADD_UINT64(mtyp_task, "Time", profile_task_t, time); + MEM_ADD_UINT64(mtyp_task, "CPU_Frequency", profile_task_t, cpu_freq); + MEM_ADD_UINT64(mtyp_task, "CPU_Time", profile_task_t, cpu_time); + MEM_ADD_DBL(mtyp_task, "CPU_Utilization", + profile_task_t, cpu_utilization); + MEM_ADD_UINT64(mtyp_task, "RSS", profile_task_t, rss); + MEM_ADD_UINT64(mtyp_task, "VM_Size", profile_task_t, vm_size); + MEM_ADD_UINT64(mtyp_task, "Pages", profile_task_t, pages); + MEM_ADD_DBL(mtyp_task, "Read_Megabytes", profile_task_t, read_size); + MEM_ADD_DBL(mtyp_task, "Write_Megabytes", profile_task_t, write_size); + + return mtyp_task; +} + +static hid_t _task_create_file_datatype(void) +{ + hid_t ftyp_task = H5Tcreate(H5T_COMPOUND, TOD_LEN+9*8); + if (ftyp_task < 0) { + debug3("PROFILE: failed to create Task file datatype"); + return -1; + } + moffset = TOD_LEN; + FILE_ADD_DATE_TIME(ftyp_task, "Date_Time", 0); + FILE_ADD_UINT64(ftyp_task, "Time"); + FILE_ADD_UINT64(ftyp_task, "CPU_Frequency"); + FILE_ADD_UINT64(ftyp_task, "CPU_Time"); + FILE_ADD_DBL(ftyp_task, "CPU_Utilization"); + FILE_ADD_UINT64(ftyp_task, "RSS"); + FILE_ADD_UINT64(ftyp_task, "VM_Size"); + FILE_ADD_UINT64(ftyp_task, "Pages"); + FILE_ADD_DBL(ftyp_task, "Read_Megabytes"); + FILE_ADD_DBL(ftyp_task, "Write_Megabytes"); + + return ftyp_task; +} + +static hid_t _task_s_create_memory_datatype(void) +{ + hid_t mtyp_task = H5Tcreate(H5T_COMPOUND, sizeof(profile_task_s_t)); + if (mtyp_task < 0) { + debug3("PROFILE: failed to create Task memory datatype"); + return -1; + } + MEM_ADD_DATE_TIME(mtyp_task, "Start Time", profile_task_s_t, + start_time); + MEM_ADD_UINT64(mtyp_task, "Elapsed Time", profile_task_s_t, + elapsed_time); + MEM_ADD_UINT64(mtyp_task, "Min CPU Frequency", profile_task_s_t, + cpu_freq.min); + MEM_ADD_UINT64(mtyp_task, "Ave CPU Frequency", profile_task_s_t, + cpu_freq.ave); + MEM_ADD_UINT64(mtyp_task, "Max CPU Frequency", profile_task_s_t, + cpu_freq.max); + MEM_ADD_UINT64(mtyp_task, "Total CPU Frequency", profile_task_s_t, + cpu_freq.total); + MEM_ADD_UINT64(mtyp_task, "Min CPU Time", profile_task_s_t, + cpu_time.min); + MEM_ADD_UINT64(mtyp_task, "Ave CPU Time", profile_task_s_t, + cpu_time.ave); + MEM_ADD_UINT64(mtyp_task, "Max CPU Time", profile_task_s_t, + cpu_time.max); + MEM_ADD_UINT64(mtyp_task, "Total CPU Time", profile_task_s_t, + cpu_time.total); + MEM_ADD_DBL(mtyp_task, "Min CPU Utilization", profile_task_s_t, + cpu_utilization.min); + MEM_ADD_DBL(mtyp_task, "Ave CPU Utilization", profile_task_s_t, + cpu_utilization.ave); + MEM_ADD_DBL(mtyp_task, "Max CPU Utilization", profile_task_s_t, + cpu_utilization.max); + MEM_ADD_DBL(mtyp_task, "Total CPU Utilization", profile_task_s_t, + cpu_utilization.total); + MEM_ADD_UINT64(mtyp_task, "Min RSS", profile_task_s_t, rss.min); + MEM_ADD_UINT64(mtyp_task, "Ave RSS", profile_task_s_t, rss.ave); + MEM_ADD_UINT64(mtyp_task, "Max RSS", profile_task_s_t, rss.max); + MEM_ADD_UINT64(mtyp_task, "Total RSS", profile_task_s_t, rss.total); + MEM_ADD_UINT64(mtyp_task, "Min VM Size", profile_task_s_t, vm_size.min); + MEM_ADD_UINT64(mtyp_task, "Ave VM Size", profile_task_s_t, vm_size.ave); + MEM_ADD_UINT64(mtyp_task, "Max VM Size", profile_task_s_t, vm_size.max); + MEM_ADD_UINT64(mtyp_task, "Total VM Size", + profile_task_s_t, vm_size.total); + MEM_ADD_UINT64(mtyp_task, "Min Pages", profile_task_s_t, pages.min); + MEM_ADD_UINT64(mtyp_task, "Ave Pages", profile_task_s_t, pages.ave); + MEM_ADD_UINT64(mtyp_task, "Max Pages", profile_task_s_t, pages.max); + MEM_ADD_UINT64(mtyp_task, "Total Pages", profile_task_s_t, pages.total); + MEM_ADD_DBL(mtyp_task, "Min Read Megabytes", profile_task_s_t, + read_size.min); + MEM_ADD_DBL(mtyp_task, "Ave Read Megabytes", profile_task_s_t, + read_size.ave); + MEM_ADD_DBL(mtyp_task, "Max Read Megabytes", profile_task_s_t, + read_size.max); + MEM_ADD_DBL(mtyp_task, "Total Read Megabytes", profile_task_s_t, + read_size.total); + MEM_ADD_DBL(mtyp_task, "Min Write Megabytes", profile_task_s_t, + write_size.min); + MEM_ADD_DBL(mtyp_task, "Ave Write Megabytes", profile_task_s_t, + write_size.ave); + MEM_ADD_DBL(mtyp_task, "Max Write Megabytes", profile_task_s_t, + write_size.max); + MEM_ADD_DBL(mtyp_task, "Total Write Megabytes", profile_task_s_t, + write_size.total); + + return mtyp_task; +} + +static hid_t _task_s_create_file_datatype(void) +{ + hid_t ftyp_task = H5Tcreate(H5T_COMPOUND, TOD_LEN+33*8); + if (ftyp_task < 0) { + debug3("PROFILE: failed to create Task file datatype"); + return -1; + } + moffset = TOD_LEN; + FILE_ADD_DATE_TIME(ftyp_task, "Start Time", 0); + FILE_ADD_UINT64(ftyp_task, "Elapsed Time"); + FILE_ADD_UINT64(ftyp_task, "Min CPU Frequency"); + FILE_ADD_UINT64(ftyp_task, "Ave CPU Frequency"); + FILE_ADD_UINT64(ftyp_task, "Max CPU Frequency"); + FILE_ADD_UINT64(ftyp_task, "Total CPU Frequency"); + FILE_ADD_UINT64(ftyp_task, "Min CPU Time"); + FILE_ADD_UINT64(ftyp_task, "Ave CPU Time"); + FILE_ADD_UINT64(ftyp_task, "Max CPU Time"); + FILE_ADD_UINT64(ftyp_task, "Total CPU Time"); + FILE_ADD_DBL(ftyp_task, "Min CPU Utilization"); + FILE_ADD_DBL(ftyp_task, "Ave CPU Utilization"); + FILE_ADD_DBL(ftyp_task, "Max CPU Utilization"); + FILE_ADD_DBL(ftyp_task, "Total CPU Utilization"); + FILE_ADD_UINT64(ftyp_task, "Min RSS"); + FILE_ADD_UINT64(ftyp_task, "Ave RSS"); + FILE_ADD_UINT64(ftyp_task, "Max RSS"); + FILE_ADD_UINT64(ftyp_task, "Total RSS"); + FILE_ADD_UINT64(ftyp_task, "Min VM Size"); + FILE_ADD_UINT64(ftyp_task, "Ave VM Size"); + FILE_ADD_UINT64(ftyp_task, "Max VM Size"); + FILE_ADD_UINT64(ftyp_task, "Total VM Size"); + FILE_ADD_UINT64(ftyp_task, "Min Pages"); + FILE_ADD_UINT64(ftyp_task, "Ave Pages"); + FILE_ADD_UINT64(ftyp_task, "Max Pages"); + FILE_ADD_UINT64(ftyp_task, "Total Pages"); + FILE_ADD_DBL(ftyp_task, "Min Read Megabytes"); + FILE_ADD_DBL(ftyp_task, "Ave Read Megabytes"); + FILE_ADD_DBL(ftyp_task, "Max Read Megabytes"); + FILE_ADD_DBL(ftyp_task, "Total Read Megabytes"); + FILE_ADD_DBL(ftyp_task, "Min Write Megabytes"); + FILE_ADD_DBL(ftyp_task, "Ave Write Megabytes"); + FILE_ADD_DBL(ftyp_task, "Max Write Megabytes"); + FILE_ADD_DBL(ftyp_task, "Total Write Megabytes"); + + return ftyp_task; +} + +static void *_task_init_job_series(int n_samples) +{ + profile_task_t* task_data; + task_data = xmalloc(n_samples * sizeof(profile_task_t)); + if (task_data == NULL) { + debug3("PROFILE: failed to get memory for combined task data"); + return NULL; + } + return (void*) task_data; +} + +static char** _task_get_series_tod(void* data, int nsmp) +{ + int ix; + char **tod_values = NULL; + profile_task_t* task_series = (profile_task_t*) data; + tod_values = (char**) xmalloc(nsmp*sizeof(char*)); + if (tod_values == NULL) { + info("Failed to get memory for task tod"); + return NULL; + } + for (ix=0; ix < nsmp; ix++) { + tod_values[ix] = xstrdup(task_series[ix].tod); + } + return tod_values; +} + +static double* _task_get_series_values(char* data_name, void* data, int nsmp) +{ + int ix; + profile_task_t* task_series = (profile_task_t*) data; + double *task_values = NULL; + task_values = xmalloc(nsmp*sizeof(double)); + if (task_values == NULL) { + info("PROFILE: Failed to get memory for task data"); + return NULL; + } + if (strcasecmp(data_name,"Time") == 0) { + for (ix=0; ix < nsmp; ix++) { + task_values[ix] = (double) task_series[ix].time; + + } + return task_values; + } else if (strcasecmp(data_name,"CPU_Frequency") == 0) { + for (ix=0; ix < nsmp; ix++) { + task_values[ix] = (double) task_series[ix].cpu_freq; + + } + return task_values; + } else if (strcasecmp(data_name,"CPU_Time") == 0) { + for (ix=0; ix < nsmp; ix++) { + task_values[ix] = (double) task_series[ix].cpu_time; + + } + return task_values; + } else if (strcasecmp(data_name,"CPU_Utilization") == 0) { + for (ix=0; ix < nsmp; ix++) { + task_values[ix] = task_series[ix].cpu_utilization; + + } + return task_values; + } else if (strcasecmp(data_name,"RSS") == 0) { + for (ix=0; ix < nsmp; ix++) { + task_values[ix] = (double) task_series[ix].rss; + + } + return task_values; + } else if (strcasecmp(data_name,"VM_Size") == 0) { + for (ix=0; ix < nsmp; ix++) { + task_values[ix] = (double) task_series[ix].vm_size; + + } + return task_values; + } else if (strcasecmp(data_name,"Pages") == 0) { + for (ix=0; ix < nsmp; ix++) { + task_values[ix] = (double) task_series[ix].pages; + + } + return task_values; + } else if (strcasecmp(data_name,"Read_Megabytes") == 0) { + for (ix=0; ix < nsmp; ix++) { + task_values[ix] = task_series[ix].read_size; + + } + return task_values; + } else if (strcasecmp(data_name,"Write_Megabytes") == 0) { + for (ix=0; ix < nsmp; ix++) { + task_values[ix] = task_series[ix].write_size; + + } + return task_values; + } + xfree(task_values); + info("PROFILE: %s is invalid data item for task data", data_name); + return NULL; +} + +static void _task_merge_step_series( + hid_t group, void *prior, void *cur, void *buf) +{ +// This is a running total series + profile_task_t* prf_prior = (profile_task_t*) prior; + profile_task_t* prf_cur = (profile_task_t*) cur; + profile_task_t* buf_prv = NULL; + profile_task_t* buf_cur = (profile_task_t*) buf; + struct tm *ts; + + ts = slurm_localtime(&prf_cur->time); + strftime(buf_cur->tod, TOD_LEN, TOD_FMT, ts); + if (prf_prior == NULL) { + // First sample. + seriesStart = prf_cur->time; + buf_cur->time = 0; + buf_cur->cpu_time = 0; + buf_cur->cpu_utilization = 0; + buf_cur->read_size = 0.0; + buf_cur->write_size = 0.0; + } else { + buf_prv = buf_cur - 1; + buf_cur->time = prf_cur->time - seriesStart; + buf_cur->cpu_time = prf_cur->cpu_time - prf_prior->cpu_time; + buf_cur->cpu_utilization = 100.0*((double) buf_cur->cpu_time / + (double) (buf_cur->time - buf_prv->time)); + buf_cur->read_size = + prf_cur->read_size - prf_prior->read_size; + buf_cur->write_size = + prf_cur->write_size - prf_prior->write_size; + } + buf_cur->cpu_freq = prf_cur->cpu_freq; + buf_cur->rss = prf_cur->rss; + buf_cur->vm_size = prf_cur->vm_size; + buf_cur->pages = prf_cur->pages; + return; +} + +static void *_task_series_total(int n_samples, void *data) +{ + profile_task_t* task_data; + profile_task_s_t* total; + task_data = (profile_task_t*) data; + total = xmalloc(sizeof(profile_task_s_t)); + if (total == NULL) { + error("PROFILE: Out of memory getting task total"); + return NULL; + } + strcpy(total->start_time, task_data[0].tod); + total->elapsed_time = task_data[n_samples-1].time; + INCR_DIF_SAMPLE(total, task_data, cpu_freq, n_samples); + INCR_RT_SAMPLE(total, task_data, cpu_time, n_samples); + INCR_DIF_SAMPLE(total, task_data, cpu_utilization, n_samples); + INCR_DIF_SAMPLE(total, task_data, rss, n_samples); + INCR_DIF_SAMPLE(total, task_data, vm_size , n_samples); + INCR_DIF_SAMPLE(total, task_data, pages, n_samples); + INCR_RT_SAMPLE(total, task_data, read_size, n_samples); + INCR_RT_SAMPLE(total, task_data, write_size, n_samples); + return total; +} + +static void _task_extract_series( + FILE* fp, bool put_header, int job, int step, + char *node, char *series, void *data, int size_data) +{ + int n_items, ix; + profile_task_t* task_data = (profile_task_t*) data; + if (put_header) { + fprintf(fp,"Job,Step,Node,Series,Date Time,ElapsedTime," + "CPU Frequency,CPU Time," + "CPU Utilization,rss,VM Size,Pages," + "Read_bytes,Write_bytes\n"); + } + n_items = size_data / sizeof(profile_task_t); + for (ix=0; ix < n_items; ix++) { + fprintf(fp,"%d,%d,%s,%s,%s,%ld,%ld,%ld,%.3f", + job, step, node, series, + task_data[ix].tod, task_data[ix].time, + task_data[ix].cpu_freq, + task_data[ix].cpu_time, task_data[ix].cpu_utilization); + fprintf(fp,",%ld,%ld,%ld,%.3f,%.3f\n", task_data[ix].rss, + task_data[ix].vm_size, task_data[ix].pages, + task_data[ix].read_size, task_data[ix].write_size); + } + return; +} + +static void _task_extract_total( + FILE* fp, bool put_header, int job, int step, + char *node, char *series, void *data, int size_data) +{ + + profile_task_s_t* task_data = (profile_task_s_t*) data; + if (put_header) { + fprintf(fp,"Job,Step,Node,Series,Start_Time,Elapsed_time," + "Min CPU Frequency,Ave CPU Frequency," + "Ave CPU Frequency,Total CPU Frequency," + "Min_CPU_Time,Ave_CPU_Time," + "Max_CPU_Time,Total_CPU_Time," + "Min_CPU_Utilization,Ave_CPU_Utilization," + "Max_CPU_Utilization,Total_CPU_Utilization," + "Min_RSS,Ave_RSS,Max_RSS,Total_RSS," + "Min_VMSize,Ave_VMSize,Max_VMSize,Total_VMSize," + "Min_Pages,Ave_Pages,Max_Pages,Total_Pages," + "Min_Read_Megabytes,Ave_Read_Megabytes," + "Max_Read_Megabytes,Total_Read_Megabytes," + "Min_Write_Megabytes,Ave_Write_Megabytes," + "Max_Write_Megabytes,Total_Write_Megabytes\n"); + } + fprintf(fp, "%d,%d,%s,%s,%s,%ld", job, step, node, series, + task_data->start_time, task_data->elapsed_time); + PUT_UINT_SUM(fp, task_data->cpu_freq, ","); + PUT_UINT_SUM(fp, task_data->cpu_time, ","); + PUT_DBL_SUM(fp, task_data->cpu_utilization, ","); + PUT_UINT_SUM(fp, task_data->rss, ","); + PUT_UINT_SUM(fp, task_data->vm_size, ","); + PUT_UINT_SUM(fp, task_data->pages, ","); + PUT_DBL_SUM(fp, task_data->read_size, ","); + PUT_DBL_SUM(fp, task_data->write_size, ","); + fprintf(fp, "\n"); + return; +} + +static hdf5_api_ops_t *_task_profile_factory(void) +{ + hdf5_api_ops_t* ops = xmalloc(sizeof(hdf5_api_ops_t)); + ops->dataset_size = &_task_dataset_size; + ops->create_memory_datatype = &_task_create_memory_datatype; + ops->create_file_datatype = &_task_create_file_datatype; + ops->create_s_memory_datatype = &_task_s_create_memory_datatype; + ops->create_s_file_datatype = &_task_s_create_file_datatype; + ops->init_job_series = &_task_init_job_series; + ops->get_series_tod = &_task_get_series_tod; + ops->get_series_values = &_task_get_series_values; + ops->merge_step_series = &_task_merge_step_series; + ops->series_total = &_task_series_total; + ops->extract_series = &_task_extract_series; + ops->extract_total = &_task_extract_total; + return ops; +} + +/* ============================================================================ + * Common support functions + ===========================================================================*/ + +extern hdf5_api_ops_t* profile_factory(uint32_t type) +{ + switch (type) { + case ACCT_GATHER_PROFILE_ENERGY: + return _energy_profile_factory(); + break; + case ACCT_GATHER_PROFILE_TASK: + return _task_profile_factory(); + break; + case ACCT_GATHER_PROFILE_LUSTRE: + return _io_profile_factory(); + break; + case ACCT_GATHER_PROFILE_NETWORK: + return _network_profile_factory(); + break; + default: + error("profile_factory: Unknown type %d sent", type); + return NULL; + } +} + + +extern void profile_init_old(void) +{ + typTOD = H5Tcopy (H5T_C_S1); + H5Tset_size (typTOD, TOD_LEN); /* create string of length TOD_LEN */ + + return; +} + +extern void profile_fini_old(void) +{ + H5Tclose(typTOD); + + return; +} + +extern char *get_data_set_name(char *type) +{ + static char dset_name[MAX_DATASET_NAME+1]; + dset_name[0] = '\0'; + sprintf(dset_name, "%s Data", type); + + return dset_name; +} + + +static char* _H5O_type_t2str(H5O_type_t type) +{ + switch (type) + { + case H5O_TYPE_UNKNOWN: + return "H5O_TYPE_UNKNOWN"; + case H5O_TYPE_GROUP: + return "H5O_TYPE_GROUP"; + case H5O_TYPE_DATASET: + return "H5O_TYPE_DATASET"; + case H5O_TYPE_NAMED_DATATYPE: + return "H5O_TYPE_NAMED_DATATYPE"; + case H5O_TYPE_NTYPES: + return "H5O_TYPE_NTYPES"; + default: + return "Invalid H5O_TYPE"; + } +} + + +extern void hdf5_obj_info(hid_t group, char *nam_group) +{ + char buf[MAX_GROUP_NAME+1]; + hsize_t nobj, nattr; + hid_t aid; + int i, len; + H5G_info_t group_info; + H5O_info_t object_info; + + if (group < 0) { + info("PROFILE: Group is not HDF5 object"); + return; + } + H5Gget_info(group, &group_info); + nobj = group_info.nlinks; + H5Oget_info(group, &object_info); + nattr = object_info.num_attrs; + info("PROFILE group: %s NumObject=%d NumAttributes=%d", + nam_group, (int) nobj, (int) nattr); + for (i = 0; (nobj>0) && (i<nobj); i++) { + H5Oget_info_by_idx(group, ".", H5_INDEX_NAME, H5_ITER_INC, i, + &object_info, H5P_DEFAULT); + len = H5Lget_name_by_idx(group, ".", H5_INDEX_NAME, + H5_ITER_INC, i, buf, MAX_GROUP_NAME, + H5P_DEFAULT); + if ((len > 0) && (len < MAX_GROUP_NAME)) { + info("PROFILE: Obj=%d Type=%s Name=%s", + i, _H5O_type_t2str(object_info.type), buf); + } else { + info("PROFILE: Obj=%d Type=%s Name=%s (is truncated)", + i, _H5O_type_t2str(object_info.type), buf); + } + } + for (i = 0; (nattr>0) && (i<nattr); i++) { + aid = H5Aopen_by_idx(group, ".", H5_INDEX_NAME, H5_ITER_INC, + i, H5P_DEFAULT, H5P_DEFAULT); + // Get the name of the attribute. + len = H5Aget_name(aid, MAX_ATTR_NAME, buf); + if (len < MAX_ATTR_NAME) { + info("PROFILE: Attr=%d Name=%s", i, buf); + } else { + info("PROFILE: Attr=%d Name=%s (is truncated)", i, buf); + } + H5Aclose(aid); + } + + return; +} + +extern char *get_string_attribute(hid_t parent, char *name) +{ + char *value = NULL; + + hid_t attr, type; + size_t size; + + attr = get_attribute_handle(parent, name); + if (attr < 0) { + debug3("PROFILE: Attribute=%s does not exist", name); + return NULL; + } + type = H5Aget_type(attr); + if (H5Tget_class(type) != H5T_STRING) { + H5Aclose(attr); + debug3("PROFILE: Attribute=%s is not a string", name); + return NULL; + } + size = H5Tget_size(type); + value = xmalloc(size+1); + if (value == NULL) { + H5Tclose(type); + H5Aclose(attr); + debug3("PROFILE: failed to malloc %d bytes for attribute=%s", + (int) size, + name); + return NULL; + } + if (H5Aread(attr, type, value) < 0) { + xfree(value); + H5Tclose(type); + H5Aclose(attr); + debug3("PROFILE: failed to read attribute=%s", name); + return NULL; + } + H5Tclose(type); + H5Aclose(attr); + + return value; +} + +extern int get_int_attribute(hid_t parent, char *name) +{ + int value = 0; + + hid_t attr; + attr = get_attribute_handle(parent, name); + if (attr < 0) { + debug3("PROFILE: Attribute=%s does not exist, returning", name); + return value; + } + if (H5Aread(attr, H5T_NATIVE_INT, &value) < 0) { + debug3("PROFILE: failed to read attribute=%s, returning", name); + } + H5Aclose(attr); + + return value; +} + +extern uint32_t get_uint32_attribute(hid_t parent, char *name) +{ + int value = 0; + hid_t attr; + + attr = get_attribute_handle(parent, name); + if (attr < 0) { + debug3("PROFILE: Attribute=%s does not exist, returning", name); + return value; + } + if (H5Aread(attr, H5T_NATIVE_UINT32, &value) < 0) { + debug3("PROFILE: failed to read attribute=%s, returning", name); + } + H5Aclose(attr); + + return value; +} + +extern void *get_hdf5_data(hid_t parent, uint32_t type, + char *nam_group, int *size_data) +{ + void * data = NULL; + + hid_t id_data_set, dtyp_memory; + hsize_t szDset; + herr_t ec; + char *subtype = NULL; + hdf5_api_ops_t* ops = profile_factory(type); + char *type_name = acct_gather_profile_type_to_string(type); + + if (ops == NULL) { + debug3("PROFILE: failed to create %s operations", + type_name); + return NULL; + } + subtype = get_string_attribute(parent, ATTR_SUBDATATYPE); + if (subtype < 0) { + xfree(ops); + debug3("PROFILE: failed to get %s attribute", + ATTR_SUBDATATYPE); + return NULL; + } + id_data_set = H5Dopen(parent, get_data_set_name(nam_group), + H5P_DEFAULT); + if (id_data_set < 0) { + xfree(subtype); + xfree(ops); + debug3("PROFILE: failed to open %s Data Set", + type_name); + return NULL; + } + if (strcmp(subtype, SUBDATA_SUMMARY)) + dtyp_memory = (*(ops->create_memory_datatype))(); + else + dtyp_memory = (*(ops->create_s_memory_datatype))(); + xfree(subtype); + if (dtyp_memory < 0) { + H5Dclose(id_data_set); + xfree(ops); + debug3("PROFILE: failed to create %s memory datatype", + type_name); + return NULL; + } + szDset = H5Dget_storage_size(id_data_set); + *size_data = (int) szDset; + if (szDset == 0) { + H5Tclose(dtyp_memory); + H5Dclose(id_data_set); + xfree(ops); + debug3("PROFILE: %s data set is empty", + type_name); + return NULL; + } + data = xmalloc(szDset); + if (data == NULL) { + H5Tclose(dtyp_memory); + H5Dclose(id_data_set); + xfree(ops); + debug3("PROFILE: failed to get memory for %s data set", + type_name); + return NULL; + } + ec = H5Dread(id_data_set, dtyp_memory, H5S_ALL, H5S_ALL, H5P_DEFAULT, + data); + if (ec < 0) { + H5Tclose(dtyp_memory); + H5Dclose(id_data_set); + xfree(data); + xfree(ops); + debug3("PROFILE: failed to read %s data", + type_name); + return NULL; + } + H5Tclose(dtyp_memory); + H5Dclose(id_data_set); + xfree(ops); + + return data; +} + +extern void put_hdf5_data(hid_t parent, uint32_t type, char *subtype, + char *group, void *data, int n_item) +{ + hid_t id_group, dtyp_memory, dtyp_file, id_data_space, id_data_set; + hsize_t dims[1]; + herr_t ec; + hdf5_api_ops_t* ops = profile_factory(type); + char *type_name = acct_gather_profile_type_to_string(type); + + if (ops == NULL) { + debug3("PROFILE: failed to create %s operations", + type_name); + return; + } + // Create the datatypes. + if (strcmp(subtype, SUBDATA_SUMMARY)) { + dtyp_memory = (*(ops->create_memory_datatype))(); + dtyp_file = (*(ops->create_file_datatype))(); + } else { + dtyp_memory = (*(ops->create_s_memory_datatype))(); + dtyp_file = (*(ops->create_s_file_datatype))(); + } + + if (dtyp_memory < 0) { + xfree(ops); + debug3("PROFILE: failed to create %s memory datatype", + type_name); + return; + } + + if (dtyp_file < 0) { + H5Tclose(dtyp_memory); + xfree(ops); + debug3("PROFILE: failed to create %s file datatype", + type_name); + return; + } + + dims[0] = n_item; + id_data_space = H5Screate_simple(1, dims, NULL); + if (id_data_space < 0) { + H5Tclose(dtyp_file); + H5Tclose(dtyp_memory); + xfree(ops); + debug3("PROFILE: failed to create %s space descriptor", + type_name); + return; + } + + id_group = H5Gcreate(parent, group, H5P_DEFAULT, + H5P_DEFAULT, H5P_DEFAULT); + if (id_group < 0) { + H5Sclose(id_data_space); + H5Tclose(dtyp_file); + H5Tclose(dtyp_memory); + xfree(ops); + debug3("PROFILE: failed to create %s group", group); + return; + } + + put_string_attribute(id_group, ATTR_DATATYPE, type_name); + put_string_attribute(id_group, ATTR_SUBDATATYPE, subtype); + + id_data_set = H5Dcreate(id_group, get_data_set_name(group), dtyp_file, + id_data_space, H5P_DEFAULT, H5P_DEFAULT, + H5P_DEFAULT); + if (id_data_set < 0) { + H5Gclose(id_group); + H5Sclose(id_data_space); + H5Tclose(dtyp_file); + H5Tclose(dtyp_memory); + xfree(ops); + debug3("PROFILE: failed to create %s dataset", group); + return; + } + + ec = H5Dwrite(id_data_set, dtyp_memory, H5S_ALL, H5S_ALL, H5P_DEFAULT, + data); + if (ec < 0) { + debug3("PROFILE: failed to create write task data"); + // Fall through to release resources + } + H5Dclose(id_data_set); + H5Gclose(id_group); + H5Sclose(id_data_space); + H5Tclose(dtyp_file); + H5Tclose(dtyp_memory); + xfree(ops); + + + return; +} + diff --git a/src/plugins/acct_gather_profile/hdf5/sh5util/libsh5util_old/hdf5_api.h b/src/plugins/acct_gather_profile/hdf5/sh5util/libsh5util_old/hdf5_api.h new file mode 100644 index 0000000000000000000000000000000000000000..2066eadcdaca7fd15fcba25a9bfccabcb45c77cb --- /dev/null +++ b/src/plugins/acct_gather_profile/hdf5/sh5util/libsh5util_old/hdf5_api.h @@ -0,0 +1,361 @@ +/****************************************************************************\ + * hdf5_api.h + ***************************************************************************** + * Copyright (C) 2013 Bull S. A. S. + * Bull, Rue Jean Jaures, B.P.68, 78340, Les Clayes-sous-Bois. + * + * Written by Rod Schultz <rod.schultz@bull.com> + * + * Portions Copyright (C) 2013 SchedMD LLC. + * Written by Danny Auble <da@schedmd.com> + * + * Provide support for acct_gather_profile plugins based on HDF5 files. + * + * This file is part of SLURM, a resource management program. + * For details, see <http://www.schedmd.com/slurmdocs/>. + * Please also read the included file: DISCLAIMER. + * + * SLURM is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * In addition, as a special exception, the copyright holders give permission + * to link the code of portions of this program with the OpenSSL library under + * certain conditions as described in each individual source file, and + * distribute linked combinations including the two. You must obey the GNU + * General Public License in all respects for all of the code used other than + * OpenSSL. If you modify file(s) with this exception, you may extend this + * exception to your version of the file(s), but you are not obligated to do + * so. If you do not wish to do so, delete this exception statement from your + * version. If you delete this exception statement from all source files in + * the program, then also delete it here. + * + * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with SLURM; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +\****************************************************************************/ +#ifndef __ACCT_GATHER_HDF5_OLD_API_H__ +#define __ACCT_GATHER_HDF5_OLD_API_H__ + +#if HAVE_CONFIG_H +# include "config.h" +# if HAVE_INTTYPES_H +# include <inttypes.h> +# else +# if HAVE_STDINT_H +# include <stdint.h> +# endif +# endif /* HAVE_INTTYPES_H */ +#else /* !HAVE_CONFIG_H */ +# include <inttypes.h> +#endif /* HAVE_CONFIG_H */ + +#include <stdlib.h> + +#include <hdf5.h> +#include "src/common/slurm_acct_gather_profile.h" +#include "../../hdf5_api.h" + +#define MAX_PROFILE_PATH 1024 +#define MAX_ATTR_NAME 64 +#define MAX_GROUP_NAME 64 +#define MAX_DATASET_NAME 64 + +#define ATTR_NODENAME "Node Name" +#define ATTR_STARTTIME "Start Time" +#define ATTR_NSTEPS "Number of Steps" +#define ATTR_NNODES "Number of Nodes" +#define ATTR_NTASKS "Number of Tasks" +#define ATTR_TASKID "Task Id" +#define ATTR_CPUPERTASK "CPUs per Task" +#define ATTR_DATATYPE "Data Type" +#define ATTR_SUBDATATYPE "Subdata Type" +#define ATTR_STARTTIME "Start Time" +#define ATTR_STARTSEC "Start Second" +#define SUBDATA_DATA "Data" +#define SUBDATA_NODE "Node" +#define SUBDATA_SAMPLE "Sample" +#define SUBDATA_SERIES "Series" +#define SUBDATA_TOTAL "Total" +#define SUBDATA_SUMMARY "Summary" + +#define GRP_ENERGY "Energy" +#define GRP_LUSTRE "Lustre" +#define GRP_STEP "Step" +#define GRP_NODES "Nodes" +#define GRP_NODE "Node" +#define GRP_NETWORK "Network" +#define GRP_SAMPLES "Time Series" +#define GRP_SAMPLE "Sample" +#define GRP_TASKS "Tasks" +#define GRP_TASK "Task" +#define GRP_TOTALS "Totals" + +// Data types supported by all HDF5 plugins of this type + +#define TOD_LEN 24 +#define TOD_FMT "%F %T" + +/* + * prof_uint_sum is a low level structure intended to hold the + * minimum, average, maximum, and total values of a data item. + * It is usually used in a summary data structure for an item + * that occurs in a time series. + */ +typedef struct prof_uint_sum { + uint64_t min; // Minumum value + uint64_t ave; // Average value + uint64_t max; // Maximum value + uint64_t total; // Accumlated value +} prof_uint_sum_t; + +// Save as prof_uint_sum, but for double precision items +typedef struct prof_dbl_sum { + double min; // Minumum value + double ave; // Average value + double max; // Maximum value + double total; // Accumlated value +} prof_dbl_sum_t; + +#define PROFILE_ENERGY_DATA "Energy" +// energy data structures +// node_step file +typedef struct profile_energy { + char tod[TOD_LEN]; // Not used in node-step + time_t time; + uint64_t power; + uint64_t cpu_freq; +} profile_energy_t; +// summary data in job-node-totals +typedef struct profile_energy_s { + char start_time[TOD_LEN]; + uint64_t elapsed_time; + prof_uint_sum_t power; + prof_uint_sum_t cpu_freq; +} profile_energy_s_t; // series summary + +#define PROFILE_IO_DATA "I/O" +// io data structure +// node_step file +typedef struct profile_io { + char tod[TOD_LEN]; // Not used in node-step + time_t time; + uint64_t reads; + double read_size; // currently in megabytes + uint64_t writes; + double write_size; // currently in megabytes +} profile_io_t; +// summary data in job-node-totals +typedef struct profile_io_s { + char start_time[TOD_LEN]; + uint64_t elapsed_time; + prof_uint_sum_t reads; + prof_dbl_sum_t read_size; // currently in megabytes + prof_uint_sum_t writes; + prof_dbl_sum_t write_size; // currently in megabytes +} profile_io_s_t; + +#define PROFILE_NETWORK_DATA "Network" +// Network data structure +// node_step file +typedef struct profile_network { + char tod[TOD_LEN]; // Not used in node-step + time_t time; + uint64_t packets_in; + double size_in; // currently in megabytes + uint64_t packets_out; + double size_out; // currently in megabytes +} profile_network_t; +// summary data in job-node-totals +typedef struct profile_network_s { + char start_time[TOD_LEN]; + uint64_t elapsed_time; + prof_uint_sum_t packets_in; + prof_dbl_sum_t size_in; // currently in megabytes + prof_uint_sum_t packets_out; + prof_dbl_sum_t size_out; // currently in megabytes +} profile_network_s_t; + +#define PROFILE_TASK_DATA "Task" +// task data structure +// node_step file +typedef struct profile_task { + char tod[TOD_LEN]; // Not used in node-step + time_t time; + uint64_t cpu_freq; + uint64_t cpu_time; + double cpu_utilization; + uint64_t rss; + uint64_t vm_size; + uint64_t pages; + double read_size; // currently in megabytes + double write_size; // currently in megabytes +} profile_task_t; +// summary data in job-node-totals +typedef struct profile_task_s { + char start_time[TOD_LEN]; + uint64_t elapsed_time; + prof_uint_sum_t cpu_freq; + prof_uint_sum_t cpu_time; + prof_dbl_sum_t cpu_utilization; + prof_uint_sum_t rss; + prof_uint_sum_t vm_size; + prof_uint_sum_t pages; + prof_dbl_sum_t read_size; // currently in megabytes + prof_dbl_sum_t write_size; // currently in megabytes +} profile_task_s_t; + +/* + * Structure of function pointers of common operations on a profile data type. + * dataset_size -- size of one dataset (structure size) + * create_memory_datatype -- creates hdf5 memory datatype corresponding + * to the datatype structure. + * create_file_datatype -- creates hdf5 file datatype corresponding + * to the datatype structure. + * create_s_memory_datatype -- creates hdf5 memory datatype corresponding + * to the summary datatype structure. + * create_s_file_datatype -- creates hdf5 file datatype corresponding + * to the summary datatype structure. + * init_job_series -- allocates a buffer for a complete time series + * (in job merge) and initializes each member + * get_series_tod -- get the date/time value of each sample in the series + * get_series_values -- gets a specific data item from each sample in the + * series + * merge_step_series -- merges all the individual time samples into a + * single data set with one item per sample. + * Data items can be scaled (e.g. subtracting beginning time) + * differenced (to show counts in interval) or other things + * appropriate for the series. + * series_total -- accumulate or average members in the entire series to + * be added to the file as totals for the node or task. + * extract_series -- format members of a structure for putting to + * to a file data extracted from a time series to be imported into + * another analysis tool. (e.g. format as comma separated value.) + * extract_totals -- format members of a structure for putting to + * to a file data extracted from a time series total to be + * imported into another analysis tool. + * (format as comma,separated value, for example.) + */ +typedef struct hdf5_api_ops { + int (*dataset_size) (void); + hid_t (*create_memory_datatype) (void); + hid_t (*create_file_datatype) (void); + hid_t (*create_s_memory_datatype) (void); + hid_t (*create_s_file_datatype) (void); + void* (*init_job_series) (int); + char** (*get_series_tod) (void*, int); + double* (*get_series_values) (char*, void*, int); + void (*merge_step_series) (hid_t, void*, void*, void*); + void* (*series_total) (int, void*); + void (*extract_series) (FILE*, bool, int, int, char*, char*, void*, + int); + void (*extract_total) (FILE*, bool, int, int, char*, char*, void*, + int); +} hdf5_api_ops_t; + +/* ============================================================================ + * Common support functions + ==========================================================================*/ + +/* + * Create a opts group from type + */ +hdf5_api_ops_t* profile_factory(uint32_t type); + +/* + * Initialize profile (initialize static memory) + */ +void profile_init_old(void); + +/* + * Finialize profile (initialize static memory) + */ +void profile_fini_old(void); + +/* + * Make a dataset name + * + * Parameters + * type - series name + * + * Returns + * common data set name based on type in static memory + */ +char* get_data_set_name(char* type); + +/* + * print info on an object for debugging + * + * Parameters + * group - handle to group. + * namGroup - name of the group + */ +void hdf5_obj_info(hid_t group, char* namGroup); + +/* + * get string attribute + * + * Parameters + * parent - handle to parent group. + * name - name of the attribute + * + * Return: pointer to value. Caller responsibility to free!!! + */ +char* get_string_attribute(hid_t parent, char* name); + +/* + * get int attribute + * + * Parameters + * parent - handle to parent group. + * name - name of the attribute + * + * Return: value + */ +int get_int_attribute(hid_t parent, char* name); + +/* + * get uint32_t attribute + * + * Parameters + * parent - handle to parent group. + * name - name of the attribute + * + * Return: value + */ +uint32_t get_uint32_attribute(hid_t parent, char* name); + +/* + * Get data from a group of a HDF5 file + * + * Parameters + * parent - handle to parent. + * type - type of data (ACCT_GATHER_PROFILE_* in slurm.h) + * namGroup - name of group + * sizeData - pointer to variable into which to put size of dataset + * + * Returns -- data set of type (or null), caller must free. + */ +void* get_hdf5_data(hid_t parent, uint32_t type, char* namGroup, int* sizeData); + +/* + * Put one data sample into a new group in an HDF5 file + * + * Parameters + * parent - handle to parent group. + * type - type of data (ACCT_GATHER_PROFILE_* in slurm.h) + * subtype - generally source (node, series, ...) or summary + * group - name of new group + * data - data for the sample + * nItems - number of items of type in the data + */ +void put_hdf5_data(hid_t parent, uint32_t type, char* subtype, char* group, + void* data, int nItems); + +#endif /*__ACCT_GATHER_HDF5_OLD_API_H__*/ diff --git a/src/plugins/acct_gather_profile/hdf5/sh5util/libsh5util_old/sh5util.c b/src/plugins/acct_gather_profile/hdf5/sh5util/libsh5util_old/sh5util.c new file mode 100644 index 0000000000000000000000000000000000000000..5ebdbcf9206e9bb0170c721e512108688f7c0458 --- /dev/null +++ b/src/plugins/acct_gather_profile/hdf5/sh5util/libsh5util_old/sh5util.c @@ -0,0 +1,1571 @@ +/*****************************************************************************\ + * sh5util.c - slurm profile accounting plugin for io and energy using hdf5. + * - Utility to merge node-step files into a job file + * - or extract data from an job file + ***************************************************************************** + * Copyright (C) 2013 Bull S. A. S. + * Bull, Rue Jean Jaures, B.P.68, 78340, Les Clayes-sous-Bois. + * + * Written by Rod Schultz <rod.schultz@bull.com> + * + * Copyright (C) 2013 SchedMD LLC + * + * Written by Danny Auble <da@schedmd.com> + * + * This file is part of SLURM, a resource management program. + * For details, see <http://www.schedmd.com/slurmdocs/>. + * Please also read the included file: DISCLAIMER. + * + * SLURM is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * In addition, as a special exception, the copyright holders give permission + * to link the code of portions of this program with the OpenSSL library under + * certain conditions as described in each individual source file, and + * distribute linked combinations including the two. You must obey the GNU + * General Public License in all respects for all of the code used other than + * OpenSSL. If you modify file(s) with this exception, you may extend this + * exception to your version of the file(s), but you are not obligated to do + * so. If you do not wish to do so, delete this exception statement from your + * version. If you delete this exception statement from all source files in + * the program, then also delete it here. + * + * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with SLURM; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * +\*****************************************************************************/ + +#ifndef _GNU_SOURCE +# define _GNU_SOURCE +#endif + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#if HAVE_GETOPT_H +# include <getopt.h> +#else +# include "src/common/getopt.h" +#endif + +#include <dirent.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/time.h> + +#include "src/common/uid.h" +#include "src/common/read_config.h" +#include "src/common/proc_args.h" +#include "src/common/xstring.h" +#include "hdf5_api.h" +#include "../sh5util.h" + +static char **series_names; +static int num_series; + +static int _merge_step_files(void); +static int _extract_data(void); +static int _series_data(void); + +extern int run_old(int argc, char **argv) +{ + int cc; + + profile_init_old(); + + switch (params.mode) { + case SH5UTIL_MODE_MERGE: + cc = _merge_step_files(); + break; + case SH5UTIL_MODE_EXTRACT: + cc = _extract_data(); + break; + case SH5UTIL_MODE_ITEM_EXTRACT: + cc = _series_data(); + break; + case SH5UTIL_MODE_ITEM_LIST: + cc = SLURM_ERROR; + break; + default: + error("Unknown type %d", params.mode); + break; + } + + profile_fini_old(); + + return cc; +} + +/* + * delete list of strings + * + * Parameters + * list - xmalloc'd list of pointers of xmalloc'd strings. + * listlen - number of strings in the list + */ +static void _delete_string_list(char **list, int listLen) +{ + int ix; + + if (list == NULL) + return; + + for (ix = 0; ix < listLen; ix++) { + xfree(list[ix]); + } + + xfree(list); + +} + +/* ============================================================================ + * ============================================================================ + * Functions for merging samples from node step files into a job file + * ============================================================================ + * ========================================================================= */ + +static void* _get_all_samples(hid_t gid_series, char* nam_series, uint32_t type, + int nsamples) +{ + void* data = NULL; + + hid_t id_data_set, dtyp_memory, g_sample, sz_dest; + herr_t ec; + int smpx ,len; + void *data_prior = NULL, *data_cur = NULL; + char name_sample[MAX_GROUP_NAME+1]; + hdf5_api_ops_t* ops; + + ops = profile_factory(type); + if (ops == NULL) { + error("Failed to create operations for %s", + acct_gather_profile_type_to_string(type)); + return NULL; + } + data = (*(ops->init_job_series))(nsamples); + if (data == NULL) { + xfree(ops); + error("Failed to get memory for combined data"); + return NULL; + } + dtyp_memory = (*(ops->create_memory_datatype))(); + if (dtyp_memory < 0) { + xfree(ops); + xfree(data); + error("Failed to create %s memory datatype", + acct_gather_profile_type_to_string(type)); + return NULL; + } + for (smpx=0; smpx<nsamples; smpx++) { + len = H5Lget_name_by_idx(gid_series, ".", H5_INDEX_NAME, + H5_ITER_INC, smpx, name_sample, + MAX_GROUP_NAME, H5P_DEFAULT); + if (len<1 || len>MAX_GROUP_NAME) { + error("Invalid group name %s", name_sample); + continue; + } + g_sample = H5Gopen(gid_series, name_sample, H5P_DEFAULT); + if (g_sample < 0) { + info("Failed to open %s", name_sample); + } + id_data_set = H5Dopen(g_sample, get_data_set_name(name_sample), + H5P_DEFAULT); + if (id_data_set < 0) { + H5Gclose(g_sample); + error("Failed to open %s dataset", + acct_gather_profile_type_to_string(type)); + continue; + } + sz_dest = (*(ops->dataset_size))(); + data_cur = xmalloc(sz_dest); + if (data_cur == NULL) { + H5Dclose(id_data_set); + H5Gclose(g_sample); + error("Failed to get memory for prior data"); + continue; + } + ec = H5Dread(id_data_set, dtyp_memory, H5S_ALL, H5S_ALL, + H5P_DEFAULT, data_cur); + if (ec < 0) { + xfree(data_cur); + H5Dclose(id_data_set); + H5Gclose(g_sample); + error("Failed to read %s data", + acct_gather_profile_type_to_string(type)); + continue; + } + (*(ops->merge_step_series))(g_sample, data_prior, data_cur, + data+(smpx)*sz_dest); + + xfree(data_prior); + data_prior = data_cur; + H5Dclose(id_data_set); + H5Gclose(g_sample); + } + xfree(data_cur); + H5Tclose(dtyp_memory); + xfree(ops); + + return data; +} + +static void _merge_series_data(hid_t jgid_tasks, hid_t jg_node, hid_t nsg_node) +{ + hid_t jg_samples, nsg_samples; + hid_t g_series, g_series_total = -1; + hsize_t num_samples, n_series; + int idsx, len; + void *data = NULL, *series_total = NULL; + uint32_t type; + char *data_type; + char nam_series[MAX_GROUP_NAME+1]; + hdf5_api_ops_t* ops = NULL; + H5G_info_t group_info; + H5O_info_t object_info; + + if (jg_node < 0) { + info("Job Node is not HDF5 object"); + return; + } + if (nsg_node < 0) { + info("Node-Step is not HDF5 object"); + return; + } + + jg_samples = H5Gcreate(jg_node, GRP_SAMPLES, + H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + if (jg_samples < 0) { + info("Failed to create job node Samples"); + return; + } + nsg_samples = get_group(nsg_node, GRP_SAMPLES); + if (nsg_samples < 0) { + H5Gclose(jg_samples); + debug("Failed to get node-step Samples"); + return; + } + H5Gget_info(nsg_samples, &group_info); + n_series = group_info.nlinks; + if (n_series < 1) { + // No series? + H5Gclose(jg_samples); + H5Gclose(nsg_samples); + info("No Samples"); + return; + } + for (idsx = 0; idsx < n_series; idsx++) { + H5Oget_info_by_idx(nsg_samples, ".", H5_INDEX_NAME, H5_ITER_INC, + idsx, &object_info, H5P_DEFAULT); + if (object_info.type != H5O_TYPE_GROUP) + continue; + + len = H5Lget_name_by_idx(nsg_samples, ".", H5_INDEX_NAME, + H5_ITER_INC, idsx, nam_series, + MAX_GROUP_NAME, H5P_DEFAULT); + if (len<1 || len>MAX_GROUP_NAME) { + info("Invalid group name %s", nam_series); + continue; + } + g_series = H5Gopen(nsg_samples, nam_series, H5P_DEFAULT); + if (g_series < 0) { + info("Failed to open %s", nam_series); + continue; + } + H5Gget_info(g_series, &group_info); + num_samples = group_info.nlinks; + if (num_samples <= 0) { + H5Gclose(g_series); + info("_series %s has no samples", nam_series); + continue; + } + // Get first sample in series to find out how big the data is. + data_type = get_string_attribute(g_series, ATTR_DATATYPE); + if (!data_type) { + H5Gclose(g_series); + info("Failed to get datatype for Time Series Dataset"); + continue; + } + type = acct_gather_profile_type_from_string(data_type); + xfree(data_type); + data = _get_all_samples(g_series, nam_series, type, + num_samples); + if (data == NULL) { + H5Gclose(g_series); + info("Failed to get memory for Time Series Dataset"); + continue; + } + put_hdf5_data(jg_samples, type, SUBDATA_SERIES, nam_series, + data, num_samples); + ops = profile_factory(type); + if (ops == NULL) { + xfree(data); + H5Gclose(g_series); + info("Failed to create operations for %s", + acct_gather_profile_type_to_string(type)); + continue; + } + series_total = (*(ops->series_total))(num_samples, data); + if (series_total != NULL) { + // Totals for series attaches to node + g_series_total = make_group(jg_node, GRP_TOTALS); + if (g_series_total < 0) { + H5Gclose(g_series); + xfree(series_total); + xfree(data); + xfree(ops); + info("Failed to make Totals for Node"); + continue; + } + put_hdf5_data(g_series_total, type, + SUBDATA_SUMMARY, + nam_series, series_total, 1); + H5Gclose(g_series_total); + } + xfree(series_total); + xfree(ops); + xfree(data); + H5Gclose(g_series); + } + + return; +} + +/* ============================================================================ + * Functions for merging tasks data into a job file + ==========================================================================*/ + +static void _merge_task_totals(hid_t jg_tasks, hid_t nsg_node, char* node_name) +{ + hid_t jg_task, jg_totals, nsg_totals, + g_total, nsg_tasks, nsg_task = -1; + hsize_t nobj, ntasks = -1; + int i, len, taskx, taskid, taskcpus, size_data; + void *data; + uint32_t type; + char buf[MAX_GROUP_NAME+1]; + char group_name[MAX_GROUP_NAME+1]; + H5G_info_t group_info; + + if (jg_tasks < 0) { + info("Job Tasks is not HDF5 object"); + return; + } + if (nsg_node < 0) { + info("Node-Step is not HDF5 object"); + return; + } + + nsg_tasks = get_group(nsg_node, GRP_TASKS); + if (nsg_tasks < 0) { + debug("No Tasks group in node-step file"); + return; + } + + H5Gget_info(nsg_tasks, &group_info); + ntasks = group_info.nlinks; + for (taskx = 0; ((int)ntasks>0) && (taskx<((int)ntasks)); taskx++) { + // Get the name of the group. + len = H5Lget_name_by_idx(nsg_tasks, ".", H5_INDEX_NAME, + H5_ITER_INC, taskx, buf, + MAX_GROUP_NAME, H5P_DEFAULT); + if (len<1 || len>MAX_GROUP_NAME) { + info("Invalid group name %s", buf); + continue; + } + nsg_task = H5Gopen(nsg_tasks, buf, H5P_DEFAULT); + if (nsg_task < 0) { + debug("Failed to open %s", buf); + continue; + } + taskid = get_int_attribute(nsg_task, ATTR_TASKID); + sprintf(group_name, "%s_%d", GRP_TASK, taskid); + jg_task = H5Gcreate(jg_tasks, group_name, + H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + if (jg_task < 0) { + H5Gclose(nsg_task); + info("Failed to create job task group"); + continue; + } + put_string_attribute(jg_task, ATTR_NODENAME, node_name); + put_int_attribute(jg_task, ATTR_TASKID, taskid); + taskcpus = get_int_attribute(nsg_task, ATTR_CPUPERTASK); + put_int_attribute(jg_task, ATTR_CPUPERTASK, taskcpus); + nsg_totals = get_group(nsg_task, GRP_TOTALS); + if (nsg_totals < 0) { + H5Gclose(jg_task); + H5Gclose(nsg_task); + continue; + } + jg_totals = H5Gcreate(jg_task, GRP_TOTALS, + H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + if (jg_totals < 0) { + H5Gclose(jg_task); + H5Gclose(nsg_task); + info("Failed to create job task totals"); + continue; + } + H5Gget_info(nsg_totals, &group_info); + nobj = group_info.nlinks; + for (i = 0; (nobj>0) && (i<nobj); i++) { + // Get the name of the group. + len = H5Lget_name_by_idx(nsg_totals, ".", H5_INDEX_NAME, + H5_ITER_INC, i, buf, + MAX_GROUP_NAME, H5P_DEFAULT); + + if (len<1 || len>MAX_GROUP_NAME) { + info("Invalid group name %s", buf); + continue; + } + g_total = H5Gopen(nsg_totals, buf, H5P_DEFAULT); + if (g_total < 0) { + info("Failed to open %s", buf); + continue; + } + type = get_uint32_attribute(g_total, ATTR_DATATYPE); + if (!type) { + H5Gclose(g_total); + info("No %s attribute", ATTR_DATATYPE); + continue; + } + data = get_hdf5_data(g_total, type, buf, &size_data); + if (data == NULL) { + H5Gclose(g_total); + info("Failed to get group %s type %s data", buf, + acct_gather_profile_type_to_string(type)); + continue; + } + put_hdf5_data(jg_totals, type, SUBDATA_DATA, + buf, data, 1); + xfree(data); + H5Gclose(g_total); + } + H5Gclose(nsg_totals); + H5Gclose(nsg_task); + H5Gclose(jg_totals); + H5Gclose(jg_task); + } + H5Gclose(nsg_tasks); +} + +/* ============================================================================ + * Functions for merging node totals into a job file + ==========================================================================*/ + +static void _merge_node_totals(hid_t jg_node, hid_t nsg_node) +{ + hid_t jg_totals, nsg_totals, g_total; + hsize_t nobj; + int i, len, size_data; + void *data; + uint32_t type; + char buf[MAX_GROUP_NAME+1]; + H5G_info_t group_info; + + if (jg_node < 0) { + info("Job Node is not HDF5 object"); + return; + } + if (nsg_node < 0) { + info("Node-Step is not HDF5 object"); + return; + } + jg_totals = H5Gcreate(jg_node, GRP_TOTALS, + H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + if (jg_totals < 0) { + info("Failed to create job node totals"); + return; + } + nsg_totals = get_group(nsg_node, GRP_TOTALS); + if (nsg_totals < 0) { + H5Gclose(jg_totals); + return; + } + + H5Gget_info(nsg_totals, &group_info); + nobj = group_info.nlinks; + for (i = 0; (nobj>0) && (i<nobj); i++) { + // Get the name of the group. + len = H5Lget_name_by_idx(nsg_totals, ".", H5_INDEX_NAME, + H5_ITER_INC, i, buf, + MAX_GROUP_NAME, H5P_DEFAULT); + if (len<1 || len>MAX_GROUP_NAME) { + info("invalid group name %s", buf); + continue; + } + g_total = H5Gopen(nsg_totals, buf, H5P_DEFAULT); + if (g_total < 0) { + info("Failed to open %s", buf); + continue; + } + type = get_uint32_attribute(g_total, ATTR_DATATYPE); + if (!type) { + H5Gclose(g_total); + info("No %s attribute", ATTR_DATATYPE); + continue; + } + data = get_hdf5_data(g_total, type, buf, &size_data); + if (data == NULL) { + H5Gclose(g_total); + info("Failed to get group %s type %s data", + buf, acct_gather_profile_type_to_string(type)); + continue; + } + put_hdf5_data(jg_totals, type, SUBDATA_DATA, buf, data, 1); + xfree(data); + H5Gclose(g_total); + } + H5Gclose(nsg_totals); + H5Gclose(jg_totals); + return; +} + +/* ============================================================================ + * Functions for merging step data into a job file + ==========================================================================*/ + +static void _merge_node_step_data(hid_t fid_job, char* file_name, int nodeIndex, + char* node_name, hid_t jgid_nodes, + hid_t jgid_tasks) +{ + hid_t fid_nodestep, jgid_node, nsgid_root, nsgid_node; + char *start_time; + char group_name[MAX_GROUP_NAME+1]; + + jgid_node = H5Gcreate(jgid_nodes, node_name, + H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + if (jgid_node < 0) { + error("Failed to create group %s",node_name); + return; + } + put_string_attribute(jgid_node, ATTR_NODENAME, node_name); + // Process node step file + // Open the file and the node group. + fid_nodestep = H5Fopen(file_name, H5F_ACC_RDONLY, H5P_DEFAULT); + if (fid_nodestep < 0) { + H5Gclose(jgid_node); + error("Failed to open %s",file_name); + return; + } + nsgid_root = H5Gopen(fid_nodestep,"/", H5P_DEFAULT); + sprintf(group_name, "/%s_%s", GRP_NODE, node_name); + nsgid_node = H5Gopen(nsgid_root, group_name, H5P_DEFAULT); + if (nsgid_node < 0) { + H5Gclose(fid_nodestep); + H5Gclose(jgid_node); + error("Failed to open node group"); + return;; + } + start_time = get_string_attribute(nsgid_node,ATTR_STARTTIME); + if (start_time == NULL) { + info("No %s attribute", ATTR_STARTTIME); + } else { + put_string_attribute(jgid_node, ATTR_STARTTIME, start_time); + xfree(start_time); + } + _merge_node_totals(jgid_node, nsgid_node); + _merge_task_totals(jgid_tasks, nsgid_node, node_name); + _merge_series_data(jgid_tasks, jgid_node, nsgid_node); + H5Gclose(nsgid_node); + H5Fclose(fid_nodestep); + H5Gclose(jgid_node); + + if (!params.keepfiles) + remove(file_name); + + return; +} + +static int _merge_step_files(void) +{ + hid_t fid_job = -1; + hid_t jgid_step = -1; + hid_t jgid_nodes = -1; + hid_t jgid_tasks = -1; + DIR *dir; + struct dirent *de; + char file_name[MAX_PROFILE_PATH+1]; + char step_dir[MAX_PROFILE_PATH+1]; + char step_path[MAX_PROFILE_PATH+1]; + char jgrp_step_name[MAX_GROUP_NAME+1]; + char jgrp_nodes_name[MAX_GROUP_NAME+1]; + char jgrp_tasks_name[MAX_GROUP_NAME+1]; + char *step_node; + char *pos_char; + char *stepno; + int stepx = 0; + int num_steps = 0; + int nodex = -1; + int max_step = -1; + int jobid, stepid; + bool found_files = false; + + sprintf(step_dir, "%s/%s", params.dir, params.user); + + while (max_step == -1 || stepx <= max_step) { + + if (!(dir = opendir(step_dir))) { + error("Cannot open %s job profile directory: %m", step_dir); + return -1; + } + + nodex = 0; + while ((de = readdir(dir))) { + + strcpy(file_name, de->d_name); + if (file_name[0] == '.') + continue; + + pos_char = strstr(file_name,".h5"); + if (!pos_char) + continue; + *pos_char = 0; + + pos_char = strchr(file_name,'_'); + if (!pos_char) + continue; + *pos_char = 0; + + jobid = strtol(file_name, NULL, 10); + if (jobid != params.job_id) + continue; + + stepno = pos_char + 1; + pos_char = strchr(stepno,'_'); + if (!pos_char) { + continue; + } + *pos_char = 0; + + stepid = strtol(stepno, NULL, 10); + if (stepid > max_step) + max_step = stepid; + if (stepid != stepx) + continue; + + step_node = pos_char + 1; + + if (!found_files) { + fid_job = H5Fcreate(params.output, + H5F_ACC_TRUNC, + H5P_DEFAULT, + H5P_DEFAULT); + if (fid_job < 0) { + error("Failed create HDF5 file %s", params.output); + return -1; + } + found_files = true; + } + + if (nodex == 0) { + + num_steps++; + sprintf(jgrp_step_name, "/%s_%d", GRP_STEP, + stepx); + + jgid_step = make_group(fid_job, jgrp_step_name); + if (jgid_step < 0) { + error("Failed to create %s", jgrp_step_name); + continue; + } + + sprintf(jgrp_nodes_name,"%s/%s", + jgrp_step_name, + GRP_NODES); + jgid_nodes = make_group(jgid_step, + jgrp_nodes_name); + if (jgid_nodes < 0) { + error("Failed to create %s", jgrp_nodes_name); + continue; + } + + sprintf(jgrp_tasks_name,"%s/%s", + jgrp_step_name, + GRP_TASKS); + jgid_tasks = make_group(jgid_step, + jgrp_tasks_name); + if (jgid_tasks < 0) { + error("Failed to create %s", jgrp_tasks_name); + continue; + } + } + + sprintf(step_path, "%s/%s", step_dir, de->d_name); + debug("Adding %s to the job file", step_path); + _merge_node_step_data(fid_job, step_path, + nodex, step_node, + jgid_nodes, jgid_tasks); + nodex++; + } + + closedir(dir); + + if (nodex > 0) { + put_int_attribute(jgid_step, ATTR_NNODES, nodex); + H5Gclose(jgid_tasks); + H5Gclose(jgid_nodes); + H5Gclose(jgid_step); + } + + /* If we did not find the step 0 + * bail out. + */ + if (stepx == 0 + && !found_files) + break; + + stepx++; + } + + if (!found_files) + info("No node-step files found for jobid %d", params.job_id); + else + put_int_attribute(fid_job, ATTR_NSTEPS, num_steps); + + if (fid_job != -1) + H5Fclose(fid_job); + + return 0; +} + +/* ============================================================================ + * ============================================================================ + * Functions for data extraction + * ============================================================================ + * ========================================================================= */ + +static hid_t _get_series_parent(hid_t group) +{ + hid_t gid_level = -1; + + if (strcasecmp(params.level, "Node:Totals") == 0) { + gid_level = get_group(group, GRP_TOTALS); + if (gid_level < 0) { + info("Failed to open group %s", GRP_TOTALS); + } + } else if (strcasecmp(params.level, "Node:TimeSeries") == 0) { + gid_level = get_group(group, GRP_SAMPLES); + if (gid_level < 0) { + info("Failed to open group %s", GRP_SAMPLES); + } + } else { + info("%s is an illegal level", params.level); + return -1; + + } + + return gid_level; +} + + +static void _get_series_names(hid_t group) +{ + int i, len; + char buf[MAX_GROUP_NAME+1]; + H5G_info_t group_info; + + H5Gget_info(group, &group_info); + num_series = (int)group_info.nlinks; + if (num_series < 0) { + debug("No Data Series in group"); + return; + } + series_names = xmalloc(sizeof(char*)*num_series); + for (i = 0; (num_series>0) && (i<num_series); i++) { + len = H5Lget_name_by_idx(group, ".", H5_INDEX_NAME, + H5_ITER_INC, i, buf, + MAX_GROUP_NAME, H5P_DEFAULT); + if ((len < 0) || (len > MAX_GROUP_NAME)) { + info("Invalid series name=%s", buf); + // put into list anyway so list doesn't have a null. + } + series_names[i] = xstrdup(buf); + } + +} + +static void _extract_series(FILE* fp, int stepx, bool header, hid_t gid_level, + char* node_name, char* data_set_name) { + hid_t gid_series; + int size_data; + void *data; + uint32_t type; + char *data_type, *subtype; + hdf5_api_ops_t* ops; + gid_series = get_group(gid_level, data_set_name); + if (gid_series < 0) { + // This is okay, may not have ran long enough for + // a sample (hostname????) + // OR trying to get all tasks + return; + } + data_type = get_string_attribute(gid_series, ATTR_DATATYPE); + if (!data_type) { + H5Gclose(gid_series); + info("No datatype in %s", data_set_name); + return; + } + type = acct_gather_profile_type_from_string(data_type); + xfree(data_type); + subtype = get_string_attribute(gid_series, ATTR_SUBDATATYPE); + if (subtype == NULL) { + H5Gclose(gid_series); + info("No %s attribute", ATTR_SUBDATATYPE); + return; + } + ops = profile_factory(type); + if (ops == NULL) { + xfree(subtype); + H5Gclose(gid_series); + info("Failed to create operations for %s", + acct_gather_profile_type_to_string(type)); + return; + } + data = get_hdf5_data( + gid_series, type, data_set_name, &size_data); + if (data) { + if (strcmp(subtype,SUBDATA_SUMMARY) != 0) + (*(ops->extract_series)) (fp, header, params.job_id, + stepx, node_name, data_set_name, + data, size_data); + else + (*(ops->extract_total)) (fp, header, params.job_id, + stepx, node_name, data_set_name, + data, size_data); + xfree(data); + } else { + fprintf(fp, "%d,%d,%s,No %s Data\n", + params.job_id, stepx, node_name, + data_set_name); + } + xfree(ops); + H5Gclose(gid_series); + +} +static void _extract_node_level(FILE* fp, int stepx, hid_t jgid_nodes, + int nnodes, char* data_set_name) +{ + + hid_t jgid_node, gid_level; + int nodex, len; + char jgrp_node_name[MAX_GROUP_NAME+1]; + bool header = true; + for (nodex=0; nodex<nnodes; nodex++) { + len = H5Lget_name_by_idx(jgid_nodes, ".", H5_INDEX_NAME, + H5_ITER_INC, nodex, jgrp_node_name, + MAX_GROUP_NAME, H5P_DEFAULT); + if ((len < 0) || (len > MAX_GROUP_NAME)) { + info("Invalid node name=%s", jgrp_node_name); + continue; + } + jgid_node = get_group(jgid_nodes, jgrp_node_name); + if (jgid_node < 0) { + info("Failed to open group %s", jgrp_node_name); + continue; + } + if (params.node + && strcmp(params.node, "*") + && strcmp(params.node, jgrp_node_name)) + continue; + gid_level = _get_series_parent(jgid_node); + if (gid_level == -1) { + H5Gclose(jgid_node); + continue; + } + _extract_series(fp, stepx, header, gid_level, jgrp_node_name, + data_set_name); + header = false; + H5Gclose(gid_level); + H5Gclose(jgid_node); + } +} + +static void _extract_all_tasks(FILE *fp, hid_t gid_step, hid_t gid_nodes, + int nnodes, int stepx) +{ + + hid_t gid_tasks, gid_task = 0, gid_node = -1, gid_level = -1; + H5G_info_t group_info; + int ntasks, itx, len, task_id; + char task_name[MAX_GROUP_NAME+1]; + char* node_name; + char buf[MAX_GROUP_NAME+1]; + bool hd = true; + + gid_tasks = get_group(gid_step, GRP_TASKS); + if (gid_tasks < 0) + fatal("No tasks in step %d", stepx); + H5Gget_info(gid_tasks, &group_info); + ntasks = (int) group_info.nlinks; + if (ntasks <= 0) + fatal("No tasks in step %d", stepx); + + for (itx = 0; itx<ntasks; itx++) { + // Get the name of the group. + len = H5Lget_name_by_idx(gid_tasks, ".", H5_INDEX_NAME, + H5_ITER_INC, itx, buf, MAX_GROUP_NAME, + H5P_DEFAULT); + if ((len > 0) && (len < MAX_GROUP_NAME)) { + gid_task = H5Gopen(gid_tasks, buf, H5P_DEFAULT); + if (gid_task < 0) + fatal("Failed to open %s", buf); + } else + fatal("Illegal task name %s",buf); + task_id = get_int_attribute(gid_task, ATTR_TASKID); + node_name = get_string_attribute(gid_task, ATTR_NODENAME); + sprintf(task_name,"%s_%d", GRP_TASK, task_id); + gid_node = H5Gopen(gid_nodes, node_name, H5P_DEFAULT); + if (gid_node < 0) + fatal("Failed to open %s for Task_%d", + node_name, task_id); + gid_level = get_group(gid_node, GRP_SAMPLES); + if (gid_level < 0) + fatal("Failed to open group %s for node=%s task=%d", + GRP_SAMPLES,node_name, task_id); + _extract_series(fp, stepx, hd, gid_level, node_name, task_name); + + hd = false; + xfree(node_name); + H5Gclose(gid_level); + H5Gclose(gid_node); + H5Gclose(gid_task); + } + H5Gclose(gid_tasks); +} + +/* _extract_data() + */ +static int _extract_data(void) +{ + hid_t fid_job; + hid_t jgid_root; + hid_t jgid_step; + hid_t jgid_nodes; + hid_t jgid_node; + hid_t jgid_level; + int nsteps; + int nnodes; + int stepx; + int isx; + int len; + char jgrp_step_name[MAX_GROUP_NAME+1]; + char jgrp_node_name[MAX_GROUP_NAME+1]; + FILE *fp; + + fp = fopen(params.output, "w"); + if (fp == NULL) { + error("Failed to create output file %s -- %m", + params.output); + } + + fid_job = H5Fopen(params.input, H5F_ACC_RDONLY, H5P_DEFAULT); + if (fid_job < 0) { + error("Failed to open %s", params.input); + return -1; + } + + jgid_root = H5Gopen(fid_job, "/", H5P_DEFAULT); + if (jgid_root < 0) { + H5Fclose(fid_job); + error("Failed to open root"); + return -1; + } + + nsteps = get_int_attribute(jgid_root, ATTR_NSTEPS); + for (stepx = 0; stepx < nsteps; stepx++) { + + if ((params.step_id != -1) && (stepx != params.step_id)) + continue; + + sprintf(jgrp_step_name, "%s_%d", GRP_STEP, stepx); + jgid_step = get_group(jgid_root, jgrp_step_name); + if (jgid_step < 0) { + error("Failed to open group %s", jgrp_step_name); + continue; + } + + if (params.level && !strncasecmp(params.level, "Node:", 5)) { + + nnodes = get_int_attribute(jgid_step, ATTR_NNODES); + + jgid_nodes = get_group(jgid_step, GRP_NODES); + if (jgid_nodes < 0) { + H5Gclose(jgid_step); + error("Failed to open group %s", GRP_NODES); + continue; + } + + len = H5Lget_name_by_idx(jgid_nodes, ".", H5_INDEX_NAME, + H5_ITER_INC, 0, jgrp_node_name, + MAX_GROUP_NAME, H5P_DEFAULT); + if ((len < 0) || (len > MAX_GROUP_NAME)) { + H5Gclose(jgid_nodes); + H5Gclose(jgid_step); + error("Invalid node name %s", jgrp_node_name); + continue; + } + + jgid_node = get_group(jgid_nodes, jgrp_node_name); + if (jgid_node < 0) { + H5Gclose(jgid_nodes); + H5Gclose(jgid_step); + info("Failed to open group %s", jgrp_node_name); + continue; + } + + jgid_level = _get_series_parent(jgid_node); + if (jgid_level == -1) { + H5Gclose(jgid_node); + H5Gclose(jgid_nodes); + H5Gclose(jgid_step); + continue; + } + + _get_series_names(jgid_level); + H5Gclose(jgid_level); + H5Gclose(jgid_node); + + if (!params.series || !strcmp(params.series, "*")) { + for (isx = 0; isx < num_series; isx++) { + if (strncasecmp(series_names[isx], + GRP_TASK, + strlen(GRP_TASK)) == 0) + continue; + _extract_node_level(fp, stepx, jgid_nodes, + nnodes, + series_names[isx]); + // Now handle all tasks. + } + } else if (strcasecmp(params.series, GRP_TASKS) == 0 + || strcasecmp(params.series, GRP_TASK) == 0) { + for (isx = 0; isx < num_series; isx++) { + if (strstr(series_names[isx], + GRP_TASK)) { + _extract_node_level(fp, stepx, jgid_nodes, + nnodes, + series_names[isx]); + } + } + } else { + _extract_node_level(fp, stepx, jgid_nodes, + nnodes, + params.series); + } + + _delete_string_list(series_names, num_series); + series_names = NULL; + num_series = 0; + if (!params.series || !strcmp(params.series, "*")) + _extract_all_tasks(fp, jgid_step, jgid_nodes, + nnodes, stepx); + + H5Gclose(jgid_nodes); + } else { + error("%s is an illegal level", params.level); + } + H5Gclose(jgid_step); + } + + H5Gclose(jgid_root); + H5Fclose(fid_job); + fclose(fp); + + return 0; +} + + +/* ============================================================================ + * ============================================================================ + * Functions for data item extraction + * ============================================================================ + * ========================================================================= */ + +// Get the data_set for a node +static void *_get_series_data(hid_t jgid_node, char* series, + hdf5_api_ops_t **ops_p, int *nsmp) +{ + + hid_t gid_level, gid_series; + int size_data; + void *data; + uint32_t type; + char *data_type; + hdf5_api_ops_t* ops; + + *nsmp = 0; // Initialize return arguments. + *ops_p = NULL; + + // Navigate from the node group to the data set + gid_level = get_group(jgid_node, GRP_SAMPLES); + if (gid_level == -1) { + return NULL; + } + gid_series = get_group(gid_level, series); + if (gid_series < 0) { + // This is okay, may not have ran long enough for + // a sample (srun hostname) + H5Gclose(gid_level); + return NULL; + } + data_type = get_string_attribute(gid_series, ATTR_DATATYPE); + if (!data_type) { + H5Gclose(gid_series); + H5Gclose(gid_level); + debug("No datatype in %s", series); + return NULL; + } + // Invoke the data type operator to get the data set + type = acct_gather_profile_type_from_string(data_type); + xfree(data_type); + ops = profile_factory(type); + if (ops == NULL) { + H5Gclose(gid_series); + H5Gclose(gid_level); + debug("Failed to create operations for %s", + acct_gather_profile_type_to_string(type)); + return NULL; + } + data = get_hdf5_data(gid_series, type, series, &size_data); + if (data) { + *nsmp = (size_data / ops->dataset_size()); + *ops_p = ops; + } else { + xfree(ops); + } + H5Gclose(gid_series); + H5Gclose(gid_level); + return data; +} + +static void _series_analysis(FILE *fp, bool hd, int stepx, int nseries, + int nsmp, char **series_name, char **tod, double *et, + double **all_series, uint64_t *series_smp) +{ + double *mn_series; // Min Value, each sample + double *mx_series; // Max value, each sample + double *sum_series; // Total of all series, each sample + double *smp_series; // all samples for one node + uint64_t *mn_sx; // Index of series with minimum value + uint64_t *mx_sx; // Index of series with maximum value + uint64_t *series_in_smp; // Number of series in the sample + int max_smpx = 0; + double max_smp_series = 0; + double ave_series; + int ix, isx; + + mn_series = xmalloc(nsmp * sizeof(double)); + mx_series = xmalloc(nsmp * sizeof(double)); + sum_series =xmalloc(nsmp * sizeof(double)); + mn_sx = xmalloc(nsmp * sizeof(uint64_t)); + mx_sx = xmalloc(nsmp * sizeof(uint64_t)); + series_in_smp = xmalloc(nsmp * sizeof(uint64_t)); + + for (ix = 0; ix < nsmp; ix++) { + for (isx=0; isx<nseries; isx++) { + if (series_smp[isx]<nsmp && ix>=series_smp[isx]) + continue; + series_in_smp[ix]++; + smp_series = all_series[isx]; + if (smp_series) { + sum_series[ix] += smp_series[ix]; + if (mn_series[ix] == 0 + || smp_series[ix] < mn_series[ix]) { + mn_series[ix] = smp_series[ix]; + mn_sx[ix] = isx; + } + if (mx_series[ix] == 0 + || smp_series[ix] > mx_series[ix]) { + mx_series[ix] = smp_series[ix]; + mx_sx[ix] = isx; + } + } + } + } + + for (ix = 0; ix < nsmp; ix++) { + if (sum_series[ix] > max_smp_series) { + max_smpx = ix; + max_smp_series = sum_series[ix]; + } + } + + ave_series = sum_series[max_smpx] / series_in_smp[max_smpx]; + printf(" Step %d Maximum accumulated %s Value (%f) occurred " + "at %s (Elapsed Time=%d) Ave Node %f\n", + stepx, params.data_item, max_smp_series, + tod[max_smpx], (int) et[max_smpx], ave_series); + + // Put data for step + if (!hd) { + fprintf(fp,"TOD,Et,JobId,StepId,Min Node,Min %s," + "Ave %s,Max Node,Max %s,Total %s," + "Num Nodes",params.data_item,params.data_item, + params.data_item,params.data_item); + for (isx = 0; isx < nseries; isx++) { + fprintf(fp,",%s",series_name[isx]); + } + fprintf(fp,"\n"); + } + + for (ix = 0; ix < nsmp; ix++) { + fprintf(fp,"%s, %d",tod[ix], (int) et[ix]); + fprintf(fp,",%d,%d",params.job_id,stepx); + fprintf(fp,",%s,%f",series_name[mn_sx[ix]], + mn_series[ix]); + ave_series = sum_series[ix] / series_in_smp[ix]; + fprintf(fp,",%f",ave_series); + fprintf(fp,",%s,%f",series_name[mx_sx[ix]], + mx_series[ix]); + fprintf(fp,",%f",sum_series[ix]); + fprintf(fp,",%"PRIu64"",series_in_smp[ix]); + for (isx = 0; isx < nseries; isx++) { + if (series_smp[isx]<nsmp && ix>=series_smp[isx]) { + fprintf(fp,",0.0"); + } else { + smp_series = all_series[isx]; + fprintf(fp,",%f",smp_series[ix]); + } + } + fprintf(fp,"\n"); + } + + xfree(mn_series); + xfree(mx_series); + xfree(sum_series); + xfree(mn_sx); + xfree(mx_sx); +} + +static void _get_all_node_series(FILE *fp, bool hd, hid_t jgid_step, int stepx) +{ + char **tod = NULL; // Date time at each sample + char **node_name; // Node Names + double **all_series; // Pointers to all sampled for each node + double *et = NULL; // Elapsed time at each sample + uint64_t *series_smp; // Number of samples in this series + + hid_t jgid_nodes, jgid_node; + int nnodes, ndx, len, nsmp = 0, nitem = -1; + char jgrp_node_name[MAX_GROUP_NAME+1]; + void* series_data = NULL; + hdf5_api_ops_t* ops; + + nnodes = get_int_attribute(jgid_step, ATTR_NNODES); + // allocate node arrays + + series_smp = xmalloc(nnodes * (sizeof(uint64_t))); + if (series_smp == NULL) { + fatal("Failed to get memory for node_samples"); + return; /* fix for CLANG false positive */ + } + + node_name = xmalloc(nnodes * (sizeof(char*))); + if (node_name == NULL) { + fatal("Failed to get memory for node_name"); + return; /* fix for CLANG false positive */ + } + + all_series = xmalloc(nnodes * (sizeof(double*))); + if (all_series == NULL) { + fatal("Failed to get memory for all_series"); + return; /* fix for CLANG false positive */ + } + + jgid_nodes = get_group(jgid_step, GRP_NODES); + if (jgid_nodes < 0) + fatal("Failed to open group %s", GRP_NODES); + + for (ndx=0; ndx<nnodes; ndx++) { + len = H5Lget_name_by_idx(jgid_nodes, ".", H5_INDEX_NAME, + H5_ITER_INC, ndx, jgrp_node_name, + MAX_GROUP_NAME, H5P_DEFAULT); + if ((len < 0) || (len > MAX_GROUP_NAME)) { + debug("Invalid node name=%s", jgrp_node_name); + continue; + } + node_name[ndx] = xstrdup(jgrp_node_name); + jgid_node = get_group(jgid_nodes, jgrp_node_name); + if (jgid_node < 0) { + debug("Failed to open group %s", jgrp_node_name); + continue; + } + ops = NULL; + nitem = 0; + series_data = _get_series_data(jgid_node, params.series, + &ops, &nitem); + if (series_data==NULL || nitem==0 || ops==NULL) { + if (ops != NULL) + xfree(ops); + continue; + } + all_series[ndx] = ops->get_series_values( + params.data_item, series_data, nitem); + if (!all_series[ndx]) + fatal("No data item %s",params.data_item); + series_smp[ndx] = nitem; + if (ndx == 0) { + nsmp = nitem; + tod = ops->get_series_tod(series_data, nitem); + et = ops->get_series_values("time", + series_data, nitem); + } else { + if (nitem > nsmp) { + // new largest number of samples + _delete_string_list(tod, nsmp); + xfree(et); + nsmp = nitem; + tod = ops->get_series_tod(series_data, + nitem); + et = ops->get_series_values("time", + series_data, nitem); + } + } + xfree(ops); + xfree(series_data); + H5Gclose(jgid_node); + } + if (nsmp == 0) { + // May be bad series name + info("No values %s for series %s found in step %d", + params.data_item,params.series, + stepx); + } else { + _series_analysis(fp, hd, stepx, nnodes, nsmp, + node_name, tod, et, all_series, series_smp); + } + for (ndx=0; ndx<nnodes; ndx++) { + xfree(node_name[ndx]); + xfree(all_series[ndx]); + } + xfree(node_name); + xfree(all_series); + xfree(series_smp); + _delete_string_list(tod, nsmp); + xfree(et); + + H5Gclose(jgid_nodes); + +} + +static void _get_all_task_series(FILE *fp, bool hd, hid_t jgid_step, int stepx) +{ + + hid_t jgid_tasks, jgid_task = 0, jgid_nodes, jgid_node; + H5G_info_t group_info; + int ntasks,itx, tid; + uint64_t *task_id; + char **task_node_name; /* Node Name for each task */ + char **tod = NULL; /* Date time at each sample */ + char **series_name; /* Node Names */ + double **all_series; /* Pointers to all sampled for each node */ + double *et = NULL; /* Elapsed time at each sample */ + uint64_t *series_smp; /* Number of samples in this series */ + int nnodes, ndx, len, nsmp = 0, nitem = -1; + char jgrp_node_name[MAX_GROUP_NAME+1]; + char jgrp_task_name[MAX_GROUP_NAME+1]; + char buf[MAX_GROUP_NAME+1]; + void* series_data = NULL; + hdf5_api_ops_t* ops; + + jgid_nodes = get_group(jgid_step, GRP_NODES); + if (jgid_nodes < 0) + fatal("Failed to open group %s", GRP_NODES); + jgid_tasks = get_group(jgid_step, GRP_TASKS); + if (jgid_tasks < 0) + fatal("No tasks in step %d", stepx); + H5Gget_info(jgid_tasks, &group_info); + ntasks = (int) group_info.nlinks; + if (ntasks <= 0) + fatal("No tasks in step %d", stepx); + task_id = xmalloc(ntasks*sizeof(uint64_t)); + if (task_id == NULL) + fatal("Failed to get memory for task_ids"); + task_node_name = xmalloc(ntasks*sizeof(char*)); + if (task_node_name == NULL) + fatal("Failed to get memory for task_node_names"); + + for (itx = 0; itx<ntasks; itx++) { + // Get the name of the group. + len = H5Lget_name_by_idx(jgid_tasks, ".", H5_INDEX_NAME, + H5_ITER_INC, itx, buf, MAX_GROUP_NAME, + H5P_DEFAULT); + if ((len > 0) && (len < MAX_GROUP_NAME)) { + jgid_task = H5Gopen(jgid_tasks, buf, H5P_DEFAULT); + if (jgid_task < 0) + fatal("Failed to open %s", buf); + } else + fatal("Illegal task name %s",buf); + task_id[itx] = get_int_attribute(jgid_task, ATTR_TASKID); + task_node_name[itx] = get_string_attribute(jgid_task, + ATTR_NODENAME); + H5Gclose(jgid_task); + } + H5Gclose(jgid_tasks); + + nnodes = get_int_attribute(jgid_step, ATTR_NNODES); + // allocate node arrays + series_smp = (uint64_t*) xmalloc(ntasks*(sizeof(uint64_t))); + if (series_smp == NULL) { + fatal("Failed to get memory for node_samples"); + return; /* Fix for CLANG false positive */ + } + series_name = (char**) xmalloc(ntasks*(sizeof(char*))); + if (series_name == NULL) { + fatal("Failed to get memory for series_name"); + return; /* Fix for CLANG false positive */ + } + all_series = (double**) xmalloc(ntasks*(sizeof(double*))); + if (all_series == NULL) { + fatal("Failed to get memory for all_series"); + return; /* Fix for CLANG false positive */ + } + + for (ndx=0; ndx<nnodes; ndx++) { + + len = H5Lget_name_by_idx(jgid_nodes, ".", H5_INDEX_NAME, + H5_ITER_INC, ndx, jgrp_node_name, + MAX_GROUP_NAME, H5P_DEFAULT); + if ((len < 0) || (len > MAX_GROUP_NAME)) + fatal("Invalid node name=%s", jgrp_node_name); + jgid_node = get_group(jgid_nodes, jgrp_node_name); + + if (jgid_node < 0) + fatal("Failed to open group %s", jgrp_node_name); + for (itx = 0; itx<ntasks; itx++) { + if (strcmp(jgrp_node_name, task_node_name[itx]) != 0) + continue; + tid = task_id[itx]; + series_name[itx] = xstrdup_printf("%s_%d %s", + GRP_TASK,tid,jgrp_node_name); + sprintf(jgrp_task_name,"%s_%d",GRP_TASK, tid); + + ops = NULL; + nitem = 0; + series_data = _get_series_data(jgid_node, + jgrp_task_name, &ops, &nitem); + if (series_data==NULL || nitem==0 || ops==NULL) { + if (ops != NULL) + xfree(ops); + continue; + } + all_series[itx] = ops->get_series_values( + params.data_item, series_data, nitem); + if (!all_series[ndx]) + fatal("No data item %s",params.data_item); + series_smp[itx] = nitem; + if (nsmp == 0) { + nsmp = nitem; + tod = ops->get_series_tod(series_data, nitem); + et = ops->get_series_values("time", + series_data, nitem); + } else { + if (nitem > nsmp) { + // new largest number of samples + _delete_string_list(tod, nsmp); + xfree(et); + nsmp = nitem; + tod = ops->get_series_tod(series_data, + nitem); + et = ops->get_series_values("time", + series_data, nitem); + } + } + xfree(ops); + xfree(series_data); + } + H5Gclose(jgid_node); + } + if (nsmp == 0) { + // May be bad series name + info("No values %s for series %s found in step %d", + params.data_item,params.series, + stepx); + } else { + _series_analysis(fp, hd, stepx, ntasks, nsmp, + series_name, tod, et, all_series, series_smp); + } + for (itx=0; itx<ntasks; itx++) { + xfree(all_series[itx]); + } + xfree(series_name); + xfree(all_series); + xfree(series_smp); + _delete_string_list(tod, nsmp); + xfree(et); + _delete_string_list(task_node_name, ntasks); + xfree(task_id); + + H5Gclose(jgid_nodes); +} + +static int _series_data(void) +{ + FILE *fp; + bool hd = false; + hid_t fid_job; + hid_t jgid_root; + hid_t jgid_step; + int nsteps; + int stepx; + char jgrp_step_name[MAX_GROUP_NAME + 1]; + + fp = fopen(params.output, "w"); + if (fp == NULL) { + error("Failed open file %s -- %m", params.output); + return -1; + } + + fid_job = H5Fopen(params.input, H5F_ACC_RDONLY, H5P_DEFAULT); + if (fid_job < 0) { + fclose(fp); + error("Failed to open %s", params.input); + return -1; + } + + jgid_root = H5Gopen(fid_job, "/", H5P_DEFAULT); + if (jgid_root < 0) { + fclose(fp); + H5Fclose(fid_job); + error("Failed to open root"); + return -1; + } + + nsteps = get_int_attribute(jgid_root, ATTR_NSTEPS); + for (stepx = 0; stepx < nsteps; stepx++) { + + if ((params.step_id != -1) && (stepx != params.step_id)) + continue; + + sprintf(jgrp_step_name, "%s_%d", GRP_STEP, stepx); + jgid_step = get_group(jgid_root, jgrp_step_name); + if (jgid_step < 0) { + error("Failed to open group %s", jgrp_step_name); + return -1; + } + + if (strncmp(params.series,GRP_TASK,strlen(GRP_TASK)) == 0) + _get_all_task_series(fp,hd,jgid_step, stepx); + else + _get_all_node_series(fp,hd,jgid_step, stepx); + + hd = true; + H5Gclose(jgid_step); + } + + H5Gclose(jgid_root); + H5Fclose(fid_job); + fclose(fp); + + return 0; +} diff --git a/src/plugins/acct_gather_profile/hdf5/sh5util/libsh5util_old/sh5util_old.h b/src/plugins/acct_gather_profile/hdf5/sh5util/libsh5util_old/sh5util_old.h new file mode 100644 index 0000000000000000000000000000000000000000..575ec2583c6bb42922607213a63d782fb87a1a69 --- /dev/null +++ b/src/plugins/acct_gather_profile/hdf5/sh5util/libsh5util_old/sh5util_old.h @@ -0,0 +1,45 @@ +/****************************************************************************\ + * sh5util_old.h + ***************************************************************************** + * Copyright (C) 2015 SchedMD LLC. + * Written by Danny Auble <da@schedmd.com> + * + * Provide support for the old version of sh5util. + * + * This file is part of SLURM, a resource management program. + * For details, see <http://www.schedmd.com/slurmdocs/>. + * Please also read the included file: DISCLAIMER. + * + * SLURM is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * In addition, as a special exception, the copyright holders give permission + * to link the code of portions of this program with the OpenSSL library under + * certain conditions as described in each individual source file, and + * distribute linked combinations including the two. You must obey the GNU + * General Public License in all respects for all of the code used other than + * OpenSSL. If you modify file(s) with this exception, you may extend this + * exception to your version of the file(s), but you are not obligated to do + * so. If you do not wish to do so, delete this exception statement from your + * version. If you delete this exception statement from all source files in + * the program, then also delete it here. + * + * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with SLURM; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +\****************************************************************************/ +#ifndef __ACCT_SH5UTIL_OLD_H__ +#define __ACCT_SH5UTIL_OLD_H__ + +#include <stdlib.h> + +extern int run_old(int argc, char **argv); + +#endif diff --git a/src/plugins/acct_gather_profile/hdf5/sh5util/sh5util.c b/src/plugins/acct_gather_profile/hdf5/sh5util/sh5util.c index e6e7cc0211622520cb849f58899e9db8e7a5348c..b3399f6ba865a43f45dfb51e26dc727fe6ec1578 100644 --- a/src/plugins/acct_gather_profile/hdf5/sh5util/sh5util.c +++ b/src/plugins/acct_gather_profile/hdf5/sh5util/sh5util.c @@ -6,11 +6,11 @@ * Copyright (C) 2013 Bull S. A. S. * Bull, Rue Jean Jaures, B.P.68, 78340, Les Clayes-sous-Bois. * - * Written by Rod Schultz <rod.schultz@bull.com> - * * Copyright (C) 2013 SchedMD LLC * - * Written by Danny Auble <da@schedmd.com> + * Initially written by Rod Schultz <rod.schultz@bull.com> @ Bull + * and Danny Auble <da@schedmd.com> @ SchedMD. + * Adapted by Yoann Blein <yoann.blein@bull.net> @ Bull. * * This file is part of SLURM, a resource management program. * For details, see <http://www.schedmd.com/slurmdocs/>. @@ -69,43 +69,75 @@ #include "src/common/read_config.h" #include "src/common/proc_args.h" #include "src/common/xstring.h" +#include "src/common/slurm_acct_gather_profile.h" #include "../hdf5_api.h" - -typedef enum { - SH5UTIL_MODE_MERGE, - SH5UTIL_MODE_EXTRACT, - SH5UTIL_MODE_ITEM_EXTRACT, -} sh5util_mode_t; - -typedef struct { - char *dir; - int help; - char *input; - int job_id; - bool keepfiles; - char *level; - sh5util_mode_t mode; - char *node; - char *output; - char *series; - char *data_item; - int step_id; - char *user; - int verbose; -} sh5util_opts_t; - - -static sh5util_opts_t params; -static char **series_names; -static int num_series; - +#include "sh5util.h" + +#include "libsh5util_old/sh5util_old.h" + +#define MAX_PROFILE_PATH 1024 +// #define MAX_ATTR_NAME 64 +#define MAX_GROUP_NAME 64 +// #define MAX_DATASET_NAME 64 + +// #define ATTR_NODENAME "Node Name" +// #define ATTR_STARTTIME "Start Time" +#define ATTR_NSTEPS "Number of Steps" +#define ATTR_NNODES "Number of Nodes" +// #define ATTR_NTASKS "Number of Tasks" +// #define ATTR_TASKID "Task Id" +// #define ATTR_CPUPERTASK "CPUs per Task" +// #define ATTR_DATATYPE "Data Type" +// #define ATTR_SUBDATATYPE "Subdata Type" +// #define ATTR_STARTTIME "Start Time" +// #define ATTR_STARTSEC "Start Second" +// #define SUBDATA_DATA "Data" +// #define SUBDATA_NODE "Node" +// #define SUBDATA_SAMPLE "Sample" +// #define SUBDATA_SERIES "Series" +// #define SUBDATA_TOTAL "Total" +// #define SUBDATA_SUMMARY "Summary" + +#define GRP_ENERGY "Energy" +#define GRP_LUSTRE "Lustre" +// #define GRP_STEP "Step" +#define GRP_STEPS "Steps" +#define GRP_NODES "Nodes" +// #define GRP_NODE "Node" +#define GRP_NETWORK "Network" +// #define GRP_SAMPLES "Time Series" +// #define GRP_SAMPLE "Sample" +// #define GRP_TASKS "Tasks" +#define GRP_TASK "Task" +// #define GRP_TOTALS "Totals" + +// Data types supported by all HDF5 plugins of this type + +sh5util_opts_t params; + +typedef struct table { + const char *step; + const char *node; + const char *group; + const char *name; +} table_t; + +static FILE* output_file; +static bool group_mode = false; +static const char *current_step; +static const char *current_node; + +static void _cleanup(void); static int _set_options(const int argc, char **argv); static int _merge_step_files(void); -static int _extract_data(void); -static int _series_data(void); +static int _extract_series(void); +static int _extract_item(void); static int _check_params(void); static void _free_options(void); static void _remove_empty_output(void); +static int _list_items(void); +static int _fields_intersection(hid_t fid_job, List tables, List fields); + static void _help_msg(void) { @@ -113,6 +145,10 @@ static void _help_msg(void) Usage sh5util [<OPTION>] -j <job[.stepid]>\n" "\n" "Valid <OPTION> values are:\n" +" -L, --list Print the items of a series contained in a job file.\n" +" -i, --input merged file to extract from (default ./job_$jobid.h5)\n" +" -s, --series Name of series:\n" +" Energy | Lustre | Network | Tasks\n" " -E, --extract Extract data series from job file.\n" " -i, --input merged file to extract from (default ./job_$jobid.h5)\n" " -N, --node Node name to extract (default is all)\n" @@ -143,6 +179,8 @@ Usage sh5util [<OPTION>] -j <job[.stepid]>\n" " --usage Display brief usage message\n"); } + + int main(int argc, char **argv) { @@ -156,54 +194,49 @@ main(int argc, char **argv) if (cc < 0) goto ouch; - profile_init(); - switch (params.mode) { - case SH5UTIL_MODE_MERGE: - info("Merging node-step files into %s", params.output); cc = _merge_step_files(); - if (cc < 0) - goto ouch; break; - case SH5UTIL_MODE_EXTRACT: - info("Extracting job data from %s into %s", params.input, params.output); - cc = _extract_data(); - if (cc < 0) - goto ouch; + cc = _extract_series(); break; - case SH5UTIL_MODE_ITEM_EXTRACT: - info("Extracting '%s' from '%s' data from %s into %s", params.data_item, params.series, params.input, params.output); - cc = _series_data(); - if (cc < 0) - goto ouch; + cc = _extract_item(); + break; + case SH5UTIL_MODE_ITEM_LIST: + info("Listing items from %s", params.input); + cc = _list_items(); break; - default: error("Unknown type %d", params.mode); break; } - _remove_empty_output(); - profile_fini(); - _free_options(); + if (cc == SLURM_PROTOCOL_VERSION_ERROR) + cc = run_old(argc, argv); +ouch: + _cleanup(); - return 0; + return cc; +} -ouch: +static void _cleanup(void) +{ _remove_empty_output(); _free_options(); - - return -1; + log_fini(); + slurm_conf_destroy(); + jobacct_gather_fini(); + acct_gather_profile_fini(); + acct_gather_conf_destroy(); } /* _free_options() @@ -219,26 +252,15 @@ _free_options(void) xfree(params.data_item); xfree(params.user); } -/* - * delete list of strings - * - * Parameters - * list - xmalloc'd list of pointers of xmalloc'd strings. - * listlen - number of strings in the list - */ -static void _delete_string_list(char **list, int listLen) -{ - int ix; - - if (list == NULL) - return; - for (ix = 0; ix < listLen; ix++) { - xfree(list[ix]); - } - - xfree(list); +static void _void_free(void *str) +{ + xfree(str); +} +static int _str_cmp(void *str1, void *str2) +{ + return !xstrcmp((const char *)str1, (const char *)str2); } static void _remove_empty_output(void) @@ -284,6 +306,7 @@ static int _set_options(const int argc, char **argv) {"jobs", required_argument, 0, 'j'}, {"input", required_argument, 0, 'i'}, {"level", required_argument, 0, 'l'}, + {"list", no_argument, 0, 'L'}, {"node", required_argument, 0, 'N'}, {"output", required_argument, 0, 'o'}, {"profiledir", required_argument, 0, 'p'}, @@ -297,14 +320,24 @@ static int _set_options(const int argc, char **argv) log_init(xbasename(argv[0]), logopt, 0, NULL); +#if DEBUG + /* Move HDF5 trace printing to log file instead of stderr */ + H5Eset_auto(H5E_DEFAULT, (herr_t (*)(hid_t, void *))H5Eprint, + log_fp()); +#else + /* Silent HDF5 errors */ + H5Eset_auto(H5E_DEFAULT, NULL, NULL); +#endif + _init_opts(); - while ((cc = getopt_long(argc, argv, "d:Ehi:Ij:l:N:o:p:s:S:u:UvV", + while ((cc = getopt_long(argc, argv, "d:Ehi:Ij:l:LN:o:p:s:Su:UvV", long_options, &option_index)) != EOF) { switch (cc) { case 'd': params.data_item = xstrdup(optarg); - params.data_item = xstrtolower(params.data_item); + /* params.data_item = + xstrtolower(params.data_item); */ break; case 'E': params.mode = SH5UTIL_MODE_EXTRACT; @@ -312,6 +345,9 @@ static int _set_options(const int argc, char **argv) case 'I': params.mode = SH5UTIL_MODE_ITEM_EXTRACT; break; + case 'L': + params.mode = SH5UTIL_MODE_ITEM_LIST; + break; case 'h': _help_msg(); return -1; @@ -341,8 +377,10 @@ static int _set_options(const int argc, char **argv) if (strcmp(optarg, GRP_ENERGY) && strcmp(optarg, GRP_LUSTRE) && strcmp(optarg, GRP_NETWORK) - && strncmp(optarg,GRP_TASK,strlen(GRP_TASK))) { - error("Bad value for --series=\"%s\"", optarg); + && strncmp(optarg,GRP_TASK, + strlen(GRP_TASK))) { + error("Bad value for --series=\"%s\"", + optarg); return -1; } params.series = xstrdup(optarg); @@ -352,7 +390,8 @@ static int _set_options(const int argc, char **argv) break; case 'u': if (uid_from_string(optarg, &u) < 0) { - error("No such user --uid=\"%s\"", optarg); + error("No such user --uid=\"%s\"", + optarg); return -1; } params.user = uid_to_string(u); @@ -412,6 +451,8 @@ _check_params(void) if (!params.output) params.output = xstrdup_printf( "./extract_%d.csv", params.job_id); + if (!params.series) + fatal("Must specify series option --series"); } if (params.mode == SH5UTIL_MODE_ITEM_EXTRACT) { @@ -430,6 +471,13 @@ _check_params(void) params.data_item, params.job_id); } + if (params.mode == SH5UTIL_MODE_ITEM_LIST) { + if (!params.input) + params.input = xstrdup_printf( + "./job_%d.h5", params.job_id); + if (!params.series) + fatal("Must specify series option --series"); + } if (!params.output) params.output = xstrdup_printf("./job_%d.h5", params.job_id); @@ -437,466 +485,44 @@ _check_params(void) return 0; } -/* ============================================================================ - * ============================================================================ - * Functions for merging samples from node step files into a job file - * ============================================================================ - * ========================================================================= */ - -static void* _get_all_samples(hid_t gid_series, char* nam_series, uint32_t type, - int nsamples) -{ - void* data = NULL; - - hid_t id_data_set, dtyp_memory, g_sample, sz_dest; - herr_t ec; - int smpx ,len; - void *data_prior = NULL, *data_cur = NULL; - char name_sample[MAX_GROUP_NAME+1]; - hdf5_api_ops_t* ops; - - ops = profile_factory(type); - if (ops == NULL) { - error("Failed to create operations for %s", - acct_gather_profile_type_to_string(type)); - return NULL; - } - data = (*(ops->init_job_series))(nsamples); - if (data == NULL) { - xfree(ops); - error("Failed to get memory for combined data"); - return NULL; - } - dtyp_memory = (*(ops->create_memory_datatype))(); - if (dtyp_memory < 0) { - xfree(ops); - xfree(data); - error("Failed to create %s memory datatype", - acct_gather_profile_type_to_string(type)); - return NULL; - } - for (smpx=0; smpx<nsamples; smpx++) { - len = H5Lget_name_by_idx(gid_series, ".", H5_INDEX_NAME, - H5_ITER_INC, smpx, name_sample, - MAX_GROUP_NAME, H5P_DEFAULT); - if (len<1 || len>MAX_GROUP_NAME) { - error("Invalid group name %s", name_sample); - continue; - } - g_sample = H5Gopen(gid_series, name_sample, H5P_DEFAULT); - if (g_sample < 0) { - info("Failed to open %s", name_sample); - } - id_data_set = H5Dopen(g_sample, get_data_set_name(name_sample), - H5P_DEFAULT); - if (id_data_set < 0) { - H5Gclose(g_sample); - error("Failed to open %s dataset", - acct_gather_profile_type_to_string(type)); - continue; - } - sz_dest = (*(ops->dataset_size))(); - data_cur = xmalloc(sz_dest); - if (data_cur == NULL) { - H5Dclose(id_data_set); - H5Gclose(g_sample); - error("Failed to get memory for prior data"); - continue; - } - ec = H5Dread(id_data_set, dtyp_memory, H5S_ALL, H5S_ALL, - H5P_DEFAULT, data_cur); - if (ec < 0) { - xfree(data_cur); - H5Dclose(id_data_set); - H5Gclose(g_sample); - error("Failed to read %s data", - acct_gather_profile_type_to_string(type)); - continue; - } - (*(ops->merge_step_series))(g_sample, data_prior, data_cur, - data+(smpx)*sz_dest); - - xfree(data_prior); - data_prior = data_cur; - H5Dclose(id_data_set); - H5Gclose(g_sample); - } - xfree(data_cur); - H5Tclose(dtyp_memory); - xfree(ops); - - return data; -} - -static void _merge_series_data(hid_t jgid_tasks, hid_t jg_node, hid_t nsg_node) +/* Copy the group "/{NodeName}" of the hdf5 file file_name into the location + * jgid_nodes */ +static int _merge_node_step_data(char* file_name, char* node_name, + hid_t jgid_nodes, hid_t jgid_tasks) { - hid_t jg_samples, nsg_samples; - hid_t g_series, g_series_total = -1; - hsize_t num_samples, n_series; - int idsx, len; - void *data = NULL, *series_total = NULL; - uint32_t type; - char *data_type; - char nam_series[MAX_GROUP_NAME+1]; - hdf5_api_ops_t* ops = NULL; - H5G_info_t group_info; - H5O_info_t object_info; - - if (jg_node < 0) { - info("Job Node is not HDF5 object"); - return; - } - if (nsg_node < 0) { - info("Node-Step is not HDF5 object"); - return; - } - - jg_samples = H5Gcreate(jg_node, GRP_SAMPLES, - H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); - if (jg_samples < 0) { - info("Failed to create job node Samples"); - return; - } - nsg_samples = get_group(nsg_node, GRP_SAMPLES); - if (nsg_samples < 0) { - H5Gclose(jg_samples); - debug("Failed to get node-step Samples"); - return; - } - H5Gget_info(nsg_samples, &group_info); - n_series = group_info.nlinks; - if (n_series < 1) { - // No series? - H5Gclose(jg_samples); - H5Gclose(nsg_samples); - info("No Samples"); - return; - } - for (idsx = 0; idsx < n_series; idsx++) { - H5Oget_info_by_idx(nsg_samples, ".", H5_INDEX_NAME, H5_ITER_INC, - idsx, &object_info, H5P_DEFAULT); - if (object_info.type != H5O_TYPE_GROUP) - continue; + hid_t fid_nodestep; + char group_name[MAX_GROUP_NAME+1]; - len = H5Lget_name_by_idx(nsg_samples, ".", H5_INDEX_NAME, - H5_ITER_INC, idsx, nam_series, - MAX_GROUP_NAME, H5P_DEFAULT); - if (len<1 || len>MAX_GROUP_NAME) { - info("Invalid group name %s", nam_series); - continue; - } - g_series = H5Gopen(nsg_samples, nam_series, H5P_DEFAULT); - if (g_series < 0) { - info("Failed to open %s", nam_series); - continue; - } - H5Gget_info(g_series, &group_info); - num_samples = group_info.nlinks; - if (num_samples <= 0) { - H5Gclose(g_series); - info("_series %s has no samples", nam_series); - continue; - } - // Get first sample in series to find out how big the data is. - data_type = get_string_attribute(g_series, ATTR_DATATYPE); - if (!data_type) { - H5Gclose(g_series); - info("Failed to get datatype for Time Series Dataset"); - continue; - } - type = acct_gather_profile_type_from_string(data_type); - xfree(data_type); - data = _get_all_samples(g_series, nam_series, type, - num_samples); - if (data == NULL) { - H5Gclose(g_series); - info("Failed to get memory for Time Series Dataset"); - continue; - } - put_hdf5_data(jg_samples, type, SUBDATA_SERIES, nam_series, - data, num_samples); - ops = profile_factory(type); - if (ops == NULL) { - xfree(data); - H5Gclose(g_series); - info("Failed to create operations for %s", - acct_gather_profile_type_to_string(type)); - continue; - } - series_total = (*(ops->series_total))(num_samples, data); - if (series_total != NULL) { - // Totals for series attaches to node - g_series_total = make_group(jg_node, GRP_TOTALS); - if (g_series_total < 0) { - H5Gclose(g_series); - xfree(series_total); - xfree(data); - xfree(ops); - info("Failed to make Totals for Node"); - continue; - } - put_hdf5_data(g_series_total, type, - SUBDATA_SUMMARY, - nam_series, series_total, 1); - H5Gclose(g_series_total); - } - xfree(series_total); - xfree(ops); - xfree(data); - H5Gclose(g_series); - } - - return; -} - -/* ============================================================================ - * Functions for merging tasks data into a job file - ==========================================================================*/ - -static void _merge_task_totals(hid_t jg_tasks, hid_t nsg_node, char* node_name) -{ - hid_t jg_task, jg_totals, nsg_totals, - g_total, nsg_tasks, nsg_task = -1; - hsize_t nobj, ntasks = -1; - int i, len, taskx, taskid, taskcpus, size_data; - void *data; - uint32_t type; - char buf[MAX_GROUP_NAME+1]; - char group_name[MAX_GROUP_NAME+1]; - H5G_info_t group_info; - - if (jg_tasks < 0) { - info("Job Tasks is not HDF5 object"); - return; - } - if (nsg_node < 0) { - info("Node-Step is not HDF5 object"); - return; - } - - nsg_tasks = get_group(nsg_node, GRP_TASKS); - if (nsg_tasks < 0) { - debug("No Tasks group in node-step file"); - return; - } - - H5Gget_info(nsg_tasks, &group_info); - ntasks = group_info.nlinks; - for (taskx = 0; ((int)ntasks>0) && (taskx<((int)ntasks)); taskx++) { - // Get the name of the group. - len = H5Lget_name_by_idx(nsg_tasks, ".", H5_INDEX_NAME, - H5_ITER_INC, taskx, buf, - MAX_GROUP_NAME, H5P_DEFAULT); - if (len<1 || len>MAX_GROUP_NAME) { - info("Invalid group name %s", buf); - continue; - } - nsg_task = H5Gopen(nsg_tasks, buf, H5P_DEFAULT); - if (nsg_task < 0) { - debug("Failed to open %s", buf); - continue; - } - taskid = get_int_attribute(nsg_task, ATTR_TASKID); - sprintf(group_name, "%s_%d", GRP_TASK, taskid); - jg_task = H5Gcreate(jg_tasks, group_name, - H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); - if (jg_task < 0) { - H5Gclose(nsg_task); - info("Failed to create job task group"); - continue; - } - put_string_attribute(jg_task, ATTR_NODENAME, node_name); - put_int_attribute(jg_task, ATTR_TASKID, taskid); - taskcpus = get_int_attribute(nsg_task, ATTR_CPUPERTASK); - put_int_attribute(jg_task, ATTR_CPUPERTASK, taskcpus); - nsg_totals = get_group(nsg_task, GRP_TOTALS); - if (nsg_totals < 0) { - H5Gclose(jg_task); - H5Gclose(nsg_task); - continue; - } - jg_totals = H5Gcreate(jg_task, GRP_TOTALS, - H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); - if (jg_totals < 0) { - H5Gclose(jg_task); - H5Gclose(nsg_task); - info("Failed to create job task totals"); - continue; - } - H5Gget_info(nsg_totals, &group_info); - nobj = group_info.nlinks; - for (i = 0; (nobj>0) && (i<nobj); i++) { - // Get the name of the group. - len = H5Lget_name_by_idx(nsg_totals, ".", H5_INDEX_NAME, - H5_ITER_INC, i, buf, - MAX_GROUP_NAME, H5P_DEFAULT); - - if (len<1 || len>MAX_GROUP_NAME) { - info("Invalid group name %s", buf); - continue; - } - g_total = H5Gopen(nsg_totals, buf, H5P_DEFAULT); - if (g_total < 0) { - info("Failed to open %s", buf); - continue; - } - type = get_uint32_attribute(g_total, ATTR_DATATYPE); - if (!type) { - H5Gclose(g_total); - info("No %s attribute", ATTR_DATATYPE); - continue; - } - data = get_hdf5_data(g_total, type, buf, &size_data); - if (data == NULL) { - H5Gclose(g_total); - info("Failed to get group %s type %s data", buf, - acct_gather_profile_type_to_string(type)); - continue; - } - put_hdf5_data(jg_totals, type, SUBDATA_DATA, - buf, data, 1); - xfree(data); - H5Gclose(g_total); - } - H5Gclose(nsg_totals); - H5Gclose(nsg_task); - H5Gclose(jg_totals); - H5Gclose(jg_task); - } - H5Gclose(nsg_tasks); -} - -/* ============================================================================ - * Functions for merging node totals into a job file - ==========================================================================*/ - -static void _merge_node_totals(hid_t jg_node, hid_t nsg_node) -{ - hid_t jg_totals, nsg_totals, g_total; - hsize_t nobj; - int i, len, size_data; - void *data; - uint32_t type; - char buf[MAX_GROUP_NAME+1]; - H5G_info_t group_info; - - if (jg_node < 0) { - info("Job Node is not HDF5 object"); - return; - } - if (nsg_node < 0) { - info("Node-Step is not HDF5 object"); - return; - } - jg_totals = H5Gcreate(jg_node, GRP_TOTALS, - H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); - if (jg_totals < 0) { - info("Failed to create job node totals"); - return; - } - nsg_totals = get_group(nsg_node, GRP_TOTALS); - if (nsg_totals < 0) { - H5Gclose(jg_totals); - return; - } - - H5Gget_info(nsg_totals, &group_info); - nobj = group_info.nlinks; - for (i = 0; (nobj>0) && (i<nobj); i++) { - // Get the name of the group. - len = H5Lget_name_by_idx(nsg_totals, ".", H5_INDEX_NAME, - H5_ITER_INC, i, buf, - MAX_GROUP_NAME, H5P_DEFAULT); - if (len<1 || len>MAX_GROUP_NAME) { - info("invalid group name %s", buf); - continue; - } - g_total = H5Gopen(nsg_totals, buf, H5P_DEFAULT); - if (g_total < 0) { - info("Failed to open %s", buf); - continue; - } - type = get_uint32_attribute(g_total, ATTR_DATATYPE); - if (!type) { - H5Gclose(g_total); - info("No %s attribute", ATTR_DATATYPE); - continue; - } - data = get_hdf5_data(g_total, type, buf, &size_data); - if (data == NULL) { - H5Gclose(g_total); - info("Failed to get group %s type %s data", - buf, acct_gather_profile_type_to_string(type)); - continue; - } - put_hdf5_data(jg_totals, type, SUBDATA_DATA, buf, data, 1); - xfree(data); - H5Gclose(g_total); - } - H5Gclose(nsg_totals); - H5Gclose(jg_totals); - return; -} - -/* ============================================================================ - * Functions for merging step data into a job file - ==========================================================================*/ - -static void _merge_node_step_data(hid_t fid_job, char* file_name, int nodeIndex, - char* node_name, hid_t jgid_nodes, - hid_t jgid_tasks) -{ - hid_t fid_nodestep, jgid_node, nsgid_root, nsgid_node; - char *start_time; - char group_name[MAX_GROUP_NAME+1]; - - jgid_node = H5Gcreate(jgid_nodes, node_name, - H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); - if (jgid_node < 0) { - error("Failed to create group %s",node_name); - return; - } - put_string_attribute(jgid_node, ATTR_NODENAME, node_name); - // Process node step file - // Open the file and the node group. fid_nodestep = H5Fopen(file_name, H5F_ACC_RDONLY, H5P_DEFAULT); if (fid_nodestep < 0) { - H5Gclose(jgid_node); error("Failed to open %s",file_name); - return; - } - nsgid_root = H5Gopen(fid_nodestep,"/", H5P_DEFAULT); - sprintf(group_name, "/%s_%s", GRP_NODE, node_name); - nsgid_node = H5Gopen(nsgid_root, group_name, H5P_DEFAULT); - if (nsgid_node < 0) { - H5Gclose(fid_nodestep); - H5Gclose(jgid_node); - error("Failed to open node group"); - return;; + return SLURM_ERROR; } - start_time = get_string_attribute(nsgid_node,ATTR_STARTTIME); - if (start_time == NULL) { - info("No %s attribute", ATTR_STARTTIME); - } else { - put_string_attribute(jgid_node, ATTR_STARTTIME, start_time); - xfree(start_time); + + sprintf(group_name, "/%s", node_name); + hid_t ocpypl_id = H5Pcreate(H5P_OBJECT_COPY); /* default copy */ + hid_t lcpl_id = H5Pcreate(H5P_LINK_CREATE); /* parameters */ + if (H5Ocopy(fid_nodestep, group_name, jgid_nodes, node_name, + ocpypl_id, lcpl_id) < 0) { + debug("Failed to copy node step data of %s into the job file, " + "trying with old method", + node_name); + return SLURM_PROTOCOL_VERSION_ERROR; } - _merge_node_totals(jgid_node, nsgid_node); - _merge_task_totals(jgid_tasks, nsgid_node, node_name); - _merge_series_data(jgid_tasks, jgid_node, nsgid_node); - H5Gclose(nsgid_node); + H5Fclose(fid_nodestep); - H5Gclose(jgid_node); if (!params.keepfiles) remove(file_name); - return; + return SLURM_SUCCESS; } +/* Look for step and node files and merge them together into one job file */ static int _merge_step_files(void) { hid_t fid_job = -1; + hid_t jgid_steps = -1; hid_t jgid_step = -1; hid_t jgid_nodes = -1; hid_t jgid_tasks = -1; @@ -907,7 +533,6 @@ static int _merge_step_files(void) char step_path[MAX_PROFILE_PATH+1]; char jgrp_step_name[MAX_GROUP_NAME+1]; char jgrp_nodes_name[MAX_GROUP_NAME+1]; - char jgrp_tasks_name[MAX_GROUP_NAME+1]; char *step_node; char *pos_char; char *stepno; @@ -917,6 +542,7 @@ static int _merge_step_files(void) int max_step = -1; int jobid, stepid; bool found_files = false; + int rc = SLURM_SUCCESS; sprintf(step_dir, "%s/%s", params.dir, params.user); @@ -973,12 +599,18 @@ static int _merge_step_files(void) return -1; } found_files = true; + + jgid_steps = make_group(fid_job, GRP_STEPS); + if (jgid_steps < 0) { + error("Failed to create group %s", GRP_STEPS); + continue; + } } if (nodex == 0) { num_steps++; - sprintf(jgrp_step_name, "/%s_%d", GRP_STEP, + sprintf(jgrp_step_name, "/%s/%d", GRP_STEPS, stepx); jgid_step = make_group(fid_job, jgrp_step_name); @@ -997,6 +629,7 @@ static int _merge_step_files(void) continue; } + /* sprintf(jgrp_tasks_name,"%s/%s", jgrp_step_name, GRP_TASKS); @@ -1006,13 +639,13 @@ static int _merge_step_files(void) error("Failed to create %s", jgrp_tasks_name); continue; } + */ } sprintf(step_path, "%s/%s", step_dir, de->d_name); debug("Adding %s to the job file", step_path); - _merge_node_step_data(fid_job, step_path, - nodex, step_node, - jgid_nodes, jgid_tasks); + rc = _merge_node_step_data(step_path, step_node, + jgid_nodes, jgid_tasks); nodex++; } @@ -1040,10 +673,12 @@ static int _merge_step_files(void) else put_int_attribute(fid_job, ATTR_NSTEPS, num_steps); + if (jgid_steps != -1) + H5Gclose(jgid_steps); if (fid_job != -1) H5Fclose(fid_job); - return 0; + return rc; } /* ============================================================================ @@ -1052,827 +687,994 @@ static int _merge_step_files(void) * ============================================================================ * ========================================================================= */ -static hid_t _get_series_parent(hid_t group) +static void _table_free(void *table) { - hid_t gid_level = -1; + table_t *t = (table_t *)table; + xfree(t->step); + xfree(t->node); + xfree(t->group); + xfree(t->name); + xfree(table); +} - if (strcasecmp(params.level, "Node:Totals") == 0) { - gid_level = get_group(group, GRP_TOTALS); - if (gid_level < 0) { - info("Failed to open group %s", GRP_TOTALS); - } - } else if (strcasecmp(params.level, "Node:TimeSeries") == 0) { - gid_level = get_group(group, GRP_SAMPLES); - if (gid_level < 0) { - info("Failed to open group %s", GRP_SAMPLES); - } - } else { - info("%s is an illegal level", params.level); - return -1; +static void _table_path(table_t *t, char *path) +{ + snprintf(path, MAX_PROFILE_PATH, + "/"GRP_STEPS"/%s/"GRP_NODES"/%s/%s/%s", + t->step, t->node, t->group, t->name); +} + +static herr_t _collect_tables_group(hid_t g_id, const char *name, + const H5L_info_t *link_info, void *op_data) +{ + List tables = (List)op_data; + hid_t table_id = -1; + /* open the dataset. */ + if ((table_id = H5Dopen(g_id, name, H5P_DEFAULT)) < 0) { + error("Failed to open the dataset %s", name); + return -1; } + H5Dclose(table_id); - return gid_level; -} + group_mode = true; + + table_t *t = xmalloc(sizeof(table_t)); + t->step = xstrdup(current_step); + t->node = xstrdup(current_node); + t->group = xstrdup(params.series); + t->name = xstrdup(name); + list_append(tables, t); + return 0; +} -static void _get_series_names(hid_t group) +static herr_t _collect_tables_node(hid_t g_id, const char *name, + const H5L_info_t *link_info, void *op_data) { - int i, len; - char buf[MAX_GROUP_NAME+1]; - H5G_info_t group_info; - - H5Gget_info(group, &group_info); - num_series = (int)group_info.nlinks; - if (num_series < 0) { - debug("No Data Series in group"); - return; + char object_path[MAX_PROFILE_PATH+1]; + List tables = (List)op_data; + hid_t object_id = -1; + herr_t err; + + /* node filter */ + if (params.node + && strcmp(params.node, "*") != 0 + && strcmp(params.node, name) != 0) + return 0; + + snprintf(object_path, MAX_PROFILE_PATH+1, "%s/%s", name, params.series); + current_node = name; + + /* open the dataset. */ + if ((object_id = H5Oopen(g_id, object_path, H5P_DEFAULT)) < 0) { + error("Series %s not found", params.series); + return -1; } - series_names = xmalloc(sizeof(char*)*num_series); - for (i = 0; (num_series>0) && (i<num_series); i++) { - len = H5Lget_name_by_idx(group, ".", H5_INDEX_NAME, - H5_ITER_INC, i, buf, - MAX_GROUP_NAME, H5P_DEFAULT); - if ((len < 0) || (len > MAX_GROUP_NAME)) { - info("Invalid series name=%s", buf); - // put into list anyway so list doesn't have a null. + + if (H5Iget_type(object_id) == H5I_DATASET) { + table_t *t = xmalloc(sizeof(table_t)); + t->step = xstrdup(current_step); + t->node = xstrdup(name); + t->group = xstrdup(""); + t->name = xstrdup(params.series); + list_append(tables, t); + } else if (H5Iget_type(object_id) == H5I_GROUP) { + err = H5Literate(object_id, H5_INDEX_NAME, H5_ITER_INC, NULL, + _collect_tables_group, op_data); + if (err < 0) { + debug("2 Failed to iterate through group %s", object_path); + return SLURM_PROTOCOL_VERSION_ERROR; } - series_names[i] = xstrdup(buf); + } else { + error("Object of unknown type: %s", object_path); + H5Oclose(object_id); + return -1; } + H5Oclose(object_id); + + return 0; } -static void _extract_series(FILE* fp, int stepx, bool header, hid_t gid_level, - char* node_name, char* data_set_name) { - hid_t gid_series; - int size_data; - void *data; - uint32_t type; - char *data_type, *subtype; - hdf5_api_ops_t* ops; - gid_series = get_group(gid_level, data_set_name); - if (gid_series < 0) { - // This is okay, may not have ran long enough for - // a sample (hostname????) - // OR trying to get all tasks - return; - } - data_type = get_string_attribute(gid_series, ATTR_DATATYPE); - if (!data_type) { - H5Gclose(gid_series); - info("No datatype in %s", data_set_name); - return; - } - type = acct_gather_profile_type_from_string(data_type); - xfree(data_type); - subtype = get_string_attribute(gid_series, ATTR_SUBDATATYPE); - if (subtype == NULL) { - H5Gclose(gid_series); - info("No %s attribute", ATTR_SUBDATATYPE); - return; +static herr_t _collect_tables_step(hid_t g_id, const char *name, + const H5L_info_t *link_info, void *op_data) +{ + char nodes_path[MAX_PROFILE_PATH]; + herr_t err; + + /* step filter */ + if ((params.step_id != -1) && (atoi(name) != params.step_id)) + return 0; + + snprintf(nodes_path, MAX_PROFILE_PATH, "%s/"GRP_NODES, name); + current_step = name; + + err = H5Literate_by_name(g_id, nodes_path, H5_INDEX_NAME, + H5_ITER_INC, NULL, _collect_tables_node, + op_data, H5P_DEFAULT); + if (err < 0) { + debug("3 Failed to iterate through group /"GRP_STEPS"/%s", + nodes_path); + return err; } - ops = profile_factory(type); - if (ops == NULL) { - xfree(subtype); - H5Gclose(gid_series); - info("Failed to create operations for %s", - acct_gather_profile_type_to_string(type)); - return; + + return 0; +} + +static int _tables_list(hid_t fid_job, List tables) +{ + herr_t err; + ListIterator it; + table_t *t; + + /* Find the list of tables to be extracted */ + err = H5Literate_by_name(fid_job, "/"GRP_STEPS, H5_INDEX_NAME, + H5_ITER_INC, NULL, _collect_tables_step, + (void *)tables, H5P_DEFAULT); + if (err < 0) { + debug("4 Failed to iterate through group /" GRP_STEPS); + return SLURM_PROTOCOL_VERSION_ERROR; } - data = get_hdf5_data( - gid_series, type, data_set_name, &size_data); - if (data) { - if (strcmp(subtype,SUBDATA_SUMMARY) != 0) - (*(ops->extract_series)) (fp, header, params.job_id, - stepx, node_name, data_set_name, - data, size_data); - else - (*(ops->extract_total)) (fp, header, params.job_id, - stepx, node_name, data_set_name, - data, size_data); - xfree(data); - } else { - fprintf(fp, "%d,%d,%s,No %s Data\n", - params.job_id, stepx, node_name, - data_set_name); + + debug("tables found (group mode: %d):", group_mode); + it = list_iterator_create(tables); + while ((t = list_next(it))) { + debug(" /"GRP_STEPS"/%s/"GRP_NODES"/%s/%s/%s", + t->step, t->node, t->group, t->name); } - xfree(ops); - H5Gclose(gid_series); + list_iterator_destroy(it); + return SLURM_SUCCESS; } -static void _extract_node_level(FILE* fp, int stepx, hid_t jgid_nodes, - int nnodes, char* data_set_name) -{ - hid_t jgid_node, gid_level; - int nodex, len; - char jgrp_node_name[MAX_GROUP_NAME+1]; - bool header = true; - for (nodex=0; nodex<nnodes; nodex++) { - len = H5Lget_name_by_idx(jgid_nodes, ".", H5_INDEX_NAME, - H5_ITER_INC, nodex, jgrp_node_name, - MAX_GROUP_NAME, H5P_DEFAULT); - if ((len < 0) || (len > MAX_GROUP_NAME)) { - info("Invalid node name=%s", jgrp_node_name); - continue; + +/** + * Print the total values of a table to the output file + * + * @param nb_fields Number of fields in the dataset + * @param offsets Offset of each field + * @param types Type of each field + * @param type_size Size of of a record in the dataset + * @param table_id ID of the table to extract from + * @param state State of the current extraction + * @param node_name Name of the node containing this table + * @param output output file + */ +static void _extract_totals(size_t nb_fields, size_t *offsets, hid_t *types, + hsize_t type_size, hid_t table_id, + table_t *table, FILE *output) +{ + hsize_t nrecords; + size_t i, j; + uint8_t data[type_size]; + + /* allocate space for aggregate values: 4 values (min, max, + * sum, avg) on 8 bytes (uint64_t/double) for each field */ + uint64_t agg_i[nb_fields * 4]; + double *agg_d = (double *)agg_i; + + memset(agg_i, 0, nb_fields * 4 * sizeof(uint64_t)); + H5PTget_num_packets(table_id, &nrecords); + + /* compute min/max/sum */ + for (i = 0; i < nrecords; ++i) { + H5PTget_next(table_id, 1, data); + for (j = 0; j < nb_fields; ++j) { + if (H5Tequal(types[j], H5T_NATIVE_UINT64)) { + uint64_t v = *(uint64_t *)(data + offsets[j]); + uint64_t *a = agg_i + j * 4; + if (i == 0 || v < a[0]) /* min */ + a[0] = v; + if (v > a[1]) /* max */ + a[1] = v; + a[2] += v; /* sum */ + } else if (H5Tequal(types[j], H5T_NATIVE_DOUBLE)) { + double v = *(double *)(data + offsets[j]); + double *a = agg_d + j * 4; + if (i == 0 || v < a[0]) /* min */ + a[0] = v; + if (v > a[1]) /* max */ + a[1] = v; + a[2] += v; /* sum */ + } } - jgid_node = get_group(jgid_nodes, jgrp_node_name); - if (jgid_node < 0) { - info("Failed to open group %s", jgrp_node_name); - continue; + } + + /* compute avg */ + if (nrecords) { + for (j = 0; j < nb_fields; ++j) { + if (H5Tequal(types[j], H5T_NATIVE_UINT64)) { + agg_d[j*4+3] = (double)agg_i[j*4+2] / nrecords; + } else if (H5Tequal(types[j], H5T_NATIVE_DOUBLE)) { + agg_d[j*4+3] = (double)agg_d[j*4+2] / nrecords; + } } - if (params.node - && strcmp(params.node, "*") - && strcmp(params.node, jgrp_node_name)) - continue; - gid_level = _get_series_parent(jgid_node); - if (gid_level == -1) { - H5Gclose(jgid_node); - continue; + } + + /* step, node */ + fprintf(output, "%s,%s", table->step, table->node); + + if (group_mode) + fprintf(output, ",%s", table->name); + + /* elapsed time (first field in the last record) */ + fprintf(output, ",%"PRIu64, *(uint64_t *)data); + + /* aggregate values */ + for (j = 0; j < nb_fields; ++j) { + if (H5Tequal(types[j], H5T_NATIVE_UINT64)) { + fprintf(output, ",%"PRIu64",%"PRIu64",%"PRIu64",%lf", + agg_i[j * 4 + 0], + agg_i[j * 4 + 1], + agg_i[j * 4 + 2], + agg_d[j * 4 + 3]); + } else if (H5Tequal(types[j], H5T_NATIVE_DOUBLE)) { + fprintf(output, ",%lf,%lf,%lf,%lf", + agg_d[j * 4 + 0], + agg_d[j * 4 + 1], + agg_d[j * 4 + 2], + agg_d[j * 4 + 3]); } - _extract_series(fp, stepx, header, gid_level, jgrp_node_name, - data_set_name); - header = false; - H5Gclose(gid_level); - H5Gclose(jgid_node); } + fputc('\n', output); } -static void _extract_all_tasks(FILE *fp, hid_t gid_step, hid_t gid_nodes, - int nnodes, int stepx) +/** + * Extract the content of a table within a node. This function first discovers + * the content of the table and then handles both timeseries and totals levels. + */ +static int _extract_series_table(hid_t fid_job, table_t *table, List fields, + FILE *output, bool level_total) { + char path[MAX_PROFILE_PATH]; + + size_t i, j; + + size_t max_fields = list_count(fields); + size_t nb_fields = 0; + size_t offsets[max_fields]; + hid_t types[max_fields]; + + hid_t did = -1; /* dataset id */ + hid_t tid = -1; /* file type ID */ + hid_t n_tid = -1; /* native type ID */ + hid_t m_tid = -1; /* member type ID */ + hid_t nm_tid = -1; /* native member ID */ + hid_t table_id = -1; + hsize_t nmembers; + hsize_t type_size; + hsize_t nrecords; + char *m_name; + + _table_path(table, path); + debug("Extracting from table %s", path); + + /* open the dataset. */ + if ((did = H5Dopen(fid_job, path, H5P_DEFAULT)) < 0) { + error("Failed to open the table %s", path); + goto error; + } + + /* get the datatype */ + if ((tid = H5Dget_type(did)) < 0) + goto error; + if ((n_tid = H5Tget_native_type(tid, H5T_DIR_DEFAULT)) < 0) + goto error; + + type_size = H5Tget_size(n_tid); + + /* get the number of members */ + if ((nmembers = H5Tget_nmembers(tid)) == 0) + goto error; + + /* iterate through the members */ + for (i = 0; i < nmembers; i++) { + m_name = H5Tget_member_name(tid, (unsigned)i); + /* continue if the field must not be extracted */ + if (!list_find_first(fields, _str_cmp, m_name)) { + free(m_name); + continue; + } + free(m_name); + + /* get the member type */ + if ((m_tid = H5Tget_member_type(tid, (unsigned)i)) < 0) + goto error; + if ((nm_tid = H5Tget_native_type(m_tid, H5T_DIR_DEFAULT)) < 0) + goto error; - hid_t gid_tasks, gid_task = 0, gid_node = -1, gid_level = -1; - H5G_info_t group_info; - int ntasks, itx, len, task_id; - char task_name[MAX_GROUP_NAME+1]; - char* node_name; - char buf[MAX_GROUP_NAME+1]; - bool hd = true; - - gid_tasks = get_group(gid_step, GRP_TASKS); - if (gid_tasks < 0) - fatal("No tasks in step %d", stepx); - H5Gget_info(gid_tasks, &group_info); - ntasks = (int) group_info.nlinks; - if (ntasks <= 0) - fatal("No tasks in step %d", stepx); - - for (itx = 0; itx<ntasks; itx++) { - // Get the name of the group. - len = H5Lget_name_by_idx(gid_tasks, ".", H5_INDEX_NAME, - H5_ITER_INC, itx, buf, MAX_GROUP_NAME, - H5P_DEFAULT); - if ((len > 0) && (len < MAX_GROUP_NAME)) { - gid_task = H5Gopen(gid_tasks, buf, H5P_DEFAULT); - if (gid_task < 0) - fatal("Failed to open %s", buf); - } else - fatal("Illegal task name %s",buf); - task_id = get_int_attribute(gid_task, ATTR_TASKID); - node_name = get_string_attribute(gid_task, ATTR_NODENAME); - sprintf(task_name,"%s_%d", GRP_TASK, task_id); - gid_node = H5Gopen(gid_nodes, node_name, H5P_DEFAULT); - if (gid_node < 0) - fatal("Failed to open %s for Task_%d", - node_name, task_id); - gid_level = get_group(gid_node, GRP_SAMPLES); - if (gid_level < 0) - fatal("Failed to open group %s for node=%s task=%d", - GRP_SAMPLES,node_name, task_id); - _extract_series(fp, stepx, hd, gid_level, node_name, task_name); - - hd = false; - xfree(node_name); - H5Gclose(gid_level); - H5Gclose(gid_node); - H5Gclose(gid_task); + types[nb_fields] = nm_tid; + offsets[nb_fields] = H5Tget_member_offset(n_tid, (unsigned)i); + ++nb_fields; + + /*H5Tclose(nm_tid);*/ + H5Tclose(m_tid); + } + + H5Tclose(n_tid); + H5Tclose(tid); + H5Dclose(did); + + /* open the table */ + if ((table_id = H5PTopen(fid_job, path)) < 0) { + error("Failed to open the series %s", params.input); + goto error; + } + + if (level_total) { + _extract_totals(nb_fields, offsets, types, type_size, + table_id, table, output); + } else { + /* Timeseries level */ + H5PTget_num_packets(table_id, &nrecords); + uint8_t data[type_size]; + + /* print the expected fields of all the records */ + for (i = 0; i < nrecords; ++i) { + H5PTget_next(table_id, 1, data); + fprintf(output, "%s,%s", table->step, table->node); + if (group_mode) + fprintf(output, ",%s", table->name); + + for (j = 0; j < nb_fields; ++j) { + if (H5Tequal(types[j], H5T_NATIVE_UINT64)) { + fprintf(output, ",%"PRIu64, + *(uint64_t *)(data+offsets[j])); + } else if (H5Tequal(types[j], + H5T_NATIVE_DOUBLE)) { + fprintf(output, ",%lf", + *(double *)(data + offsets[j])); + } else { + error("Unknown type"); + goto error; + } + } + fputc('\n', output); + } } - H5Gclose(gid_tasks); + + H5PTclose(table_id); + + return SLURM_SUCCESS; + +error: + if (nm_tid >= 0) H5Dclose(nm_tid); + if (m_tid >= 0) H5Dclose(m_tid); + if (n_tid >= 0) H5Dclose(n_tid); + if (tid >= 0) H5Dclose(tid); + if (did >= 0) H5PTclose(did); + if (table_id >= 0) H5PTclose(table_id); + return SLURM_ERROR; } -/* _extract_data() +/* _extract_series() */ -static int _extract_data(void) +static int _extract_series(void) { - hid_t fid_job; - hid_t jgid_root; - hid_t jgid_step; - hid_t jgid_nodes; - hid_t jgid_node; - hid_t jgid_level; - int nsteps; - int nnodes; - int stepx; - int isx; - int len; - char jgrp_step_name[MAX_GROUP_NAME+1]; - char jgrp_node_name[MAX_GROUP_NAME+1]; - FILE *fp; - - fp = fopen(params.output, "w"); - if (fp == NULL) { + hid_t fid_job = -1; + bool level_total; + const char *field; + List tables = NULL; + List fields = NULL; + ListIterator it; + FILE *output = NULL; + int rc = SLURM_ERROR; + table_t *t; + + level_total = (xstrcasecmp(params.level, "Node:Totals") == 0); + + output = fopen(params.output, "w"); + if (output == NULL) { error("Failed to create output file %s -- %m", params.output); + goto error; } fid_job = H5Fopen(params.input, H5F_ACC_RDONLY, H5P_DEFAULT); if (fid_job < 0) { error("Failed to open %s", params.input); - return -1; + goto error; } - jgid_root = H5Gopen(fid_job, "/", H5P_DEFAULT); - if (jgid_root < 0) { - H5Fclose(fid_job); - error("Failed to open root"); - return -1; + /* Find the list of tables to be extracted */ + tables = list_create(_table_free); + if ((rc = _tables_list(fid_job, tables)) != SLURM_SUCCESS) { + debug("Failed to list tables %s", params.series); + goto error; } - nsteps = get_int_attribute(jgid_root, ATTR_NSTEPS); - for (stepx = 0; stepx < nsteps; stepx++) { - - if ((params.step_id != -1) && (stepx != params.step_id)) - continue; - - sprintf(jgrp_step_name, "%s_%d", GRP_STEP, stepx); - jgid_step = get_group(jgid_root, jgrp_step_name); - if (jgid_step < 0) { - error("Failed to open group %s", jgrp_step_name); - continue; - } - - if (params.level && !strncasecmp(params.level, "Node:", 5)) { - - nnodes = get_int_attribute(jgid_step, ATTR_NNODES); - - jgid_nodes = get_group(jgid_step, GRP_NODES); - if (jgid_nodes < 0) { - H5Gclose(jgid_step); - error("Failed to open group %s", GRP_NODES); - continue; - } - - len = H5Lget_name_by_idx(jgid_nodes, ".", H5_INDEX_NAME, - H5_ITER_INC, 0, jgrp_node_name, - MAX_GROUP_NAME, H5P_DEFAULT); - if ((len < 0) || (len > MAX_GROUP_NAME)) { - H5Gclose(jgid_nodes); - H5Gclose(jgid_step); - error("Invalid node name %s", jgrp_node_name); - continue; - } - - jgid_node = get_group(jgid_nodes, jgrp_node_name); - if (jgid_node < 0) { - H5Gclose(jgid_nodes); - H5Gclose(jgid_step); - info("Failed to open group %s", jgrp_node_name); - continue; - } + /* Find the fields to be extracted */ + fields = list_create(_void_free); + if ((rc = _fields_intersection(fid_job, tables, fields)) + != SLURM_SUCCESS) { + error("Failed to find data items for series %s", params.series); + goto error; + } - jgid_level = _get_series_parent(jgid_node); - if (jgid_level == -1) { - H5Gclose(jgid_node); - H5Gclose(jgid_nodes); - H5Gclose(jgid_step); - continue; - } + /* csv header */ + fprintf(output, "Step,Node"); - _get_series_names(jgid_level); - H5Gclose(jgid_level); - H5Gclose(jgid_node); - - if (!params.series || !strcmp(params.series, "*")) { - for (isx = 0; isx < num_series; isx++) { - if (strncasecmp(series_names[isx], - GRP_TASK, - strlen(GRP_TASK)) == 0) - continue; - _extract_node_level(fp, stepx, jgid_nodes, - nnodes, - series_names[isx]); - // Now handle all tasks. - } - } else if (strcasecmp(params.series, GRP_TASKS) == 0 - || strcasecmp(params.series, GRP_TASK) == 0) { - for (isx = 0; isx < num_series; isx++) { - if (strstr(series_names[isx], - GRP_TASK)) { - _extract_node_level(fp, stepx, jgid_nodes, - nnodes, - series_names[isx]); - } - } - } else { - _extract_node_level(fp, stepx, jgid_nodes, - nnodes, - params.series); - } + if (group_mode) { + fprintf(output, ",Series"); + } - _delete_string_list(series_names, num_series); - series_names = NULL; - num_series = 0; - if (!params.series || !strcmp(params.series, "*")) - _extract_all_tasks(fp, jgid_step, jgid_nodes, - nnodes, stepx); + if (level_total) { + /* do not aggregate time values */ + list_delete_all(fields, _str_cmp, "ElapsedTime"); + fputs(",ElapsedTime", output); + } - H5Gclose(jgid_nodes); + it = list_iterator_create(fields); + while ((field = list_next(it))) { + if (level_total) { + fprintf(output, ",Min_%s,Max_%s,Sum_%s,Avg_%s", + field, field, field, field); } else { - error("%s is an illegal level", params.level); + fprintf(output, ",%s", field); } - H5Gclose(jgid_step); } + fputc('\n', output); + list_iterator_destroy(it); - H5Gclose(jgid_root); - H5Fclose(fid_job); - fclose(fp); + /* Extract from every table */ + it = list_iterator_create(tables); + while ((t = list_next(it))) { + if (_extract_series_table(fid_job, t, fields, + output, level_total) < 0) { + error("Failed to extract series"); + goto error; + } + } - return 0; + list_destroy(tables); + list_destroy(fields); + H5Fclose(fid_job); + fclose(output); + return SLURM_SUCCESS; + +error: + if (fields) list_destroy(fields); + if (tables) list_destroy(tables); + if (output) fclose(output); + if (fid_job >= 0) H5Fclose(fid_job); + return rc; } - /* ============================================================================ * ============================================================================ * Functions for data item extraction * ============================================================================ * ========================================================================= */ -// Get the data_set for a node -static void *_get_series_data(hid_t jgid_node, char* series, - hdf5_api_ops_t **ops_p, int *nsmp) +/** + * Perform the analysis on a given item of type uint64_t, present in multiple + * tables. + * + * @param nb_tables Number of table to analyze + * @param tables IDs of all the tables to analyze + * @param nb_records Number of records in each table + * @param buf_size Size of the largest record of the tables + * @param offsets Offset of the item analyzed in each table + * @param names Names of the tables + * @param nodes Name of the node for each table + * @param step_name Name of the current step + */ +static void _item_analysis_uint(hsize_t nb_tables, hid_t *tables, + hsize_t *nb_records, size_t buf_size, size_t *offsets, + const char *names[], const char *nodes[], const char *step_name) { + size_t i; + uint64_t min_val; + size_t min_idx; + uint64_t max_val; + size_t max_idx; + uint64_t sum, sum_max; + double avg, avg_max; + size_t nb_series_in_smp; + uint64_t v; + uint64_t values[nb_tables]; + uint8_t buffer[buf_size]; + uint64_t et, et_max; + + sum_max = 0; + + for (;;) { + min_val = UINT64_MAX; + max_val = 0; + sum = 0; + nb_series_in_smp = 0; + + /* compute aggregate values */ + for (i = 0; i < nb_tables; ++i) { + if (nb_records[i] == 0) + continue; + --nb_records[i]; + ++nb_series_in_smp; + /* read the value of the item in the series i */ + H5PTget_next(tables[i], 1, (void *)buffer); + v = *(uint64_t *)(buffer + offsets[i]); + values[i] = v; + /* compute the sum, min and max */ + sum += v; + if (v < min_val) { + min_val = v; + min_idx = i; + } + if (v > max_val) { + max_val = v; + max_idx = i; + } + /* Elapsed time is always at offset 0 */ + et = *(uint64_t *)buffer; + } - hid_t gid_level, gid_series; - int size_data; - void *data; - uint32_t type; - char *data_type; - hdf5_api_ops_t* ops; + if (nb_series_in_smp == 0) /* stop if no more samples */ + break; - *nsmp = 0; // Initialize return arguments. - *ops_p = NULL; + avg = (double)sum / (double)nb_series_in_smp; - // Navigate from the node group to the data set - gid_level = get_group(jgid_node, GRP_SAMPLES); - if (gid_level == -1) { - return NULL; - } - gid_series = get_group(gid_level, series); - if (gid_series < 0) { - // This is okay, may not have ran long enough for - // a sample (srun hostname) - H5Gclose(gid_level); - return NULL; - } - data_type = get_string_attribute(gid_series, ATTR_DATATYPE); - if (!data_type) { - H5Gclose(gid_series); - H5Gclose(gid_level); - debug("No datatype in %s", series); - return NULL; - } - // Invoke the data type operator to get the data set - type = acct_gather_profile_type_from_string(data_type); - xfree(data_type); - ops = profile_factory(type); - if (ops == NULL) { - H5Gclose(gid_series); - H5Gclose(gid_level); - debug("Failed to create operations for %s", - acct_gather_profile_type_to_string(type)); - return NULL; - } - data = get_hdf5_data(gid_series, type, series, &size_data); - if (data) { - *nsmp = (size_data / ops->dataset_size()); - *ops_p = ops; - } else { - xfree(ops); + /* store the greatest sum */ + if (sum > sum_max) { + sum_max = sum; + avg_max = avg; + et_max = et; + } + + if (group_mode) { + fprintf(output_file, + "%s,%"PRIu64",%s %s,%"PRIu64",%s %s," + "%"PRIu64",%"PRIu64",%lf,%"PRIu64, + step_name, et, + names[min_idx], nodes[min_idx], min_val, + names[max_idx], nodes[max_idx], max_val, + sum, avg, nb_series_in_smp); + } else { + fprintf(output_file, + "%s,%"PRIu64",%s,%"PRIu64",%s,%"PRIu64",%" + PRIu64",%lf,%"PRIu64, + step_name, et, + nodes[min_idx], min_val, + nodes[max_idx], max_val, + sum, avg, nb_series_in_smp); + } + + /* print value of each series */ + for (i = 0; i < nb_tables; ++i) { + fprintf(output_file, ",%"PRIu64, values[i]); + /* and set their values to zero if no more values */ + if (values[i] && nb_records[i] == 0) + values[i] = 0; + } + fputc('\n', output_file); } - H5Gclose(gid_series); - H5Gclose(gid_level); - return data; + + printf(" Step %s Maximum accumulated %s Value (%"PRIu64") occurred " + "at Time=%"PRIu64", Ave Node %lf\n", + step_name, params.data_item, sum_max, et_max, avg_max); } -static void _series_analysis(FILE *fp, bool hd, int stepx, int nseries, - int nsmp, char **series_name, char **tod, double *et, - double **all_series, uint64_t *series_smp) +/** + * Perform the analysis on a given item of type double, present in multiple + * tables. + * See _item_analysis_uint for parameters description. + */ +static void _item_analysis_double(hsize_t nb_tables, hid_t *tables, + hsize_t *nb_records, size_t buf_size, size_t *offsets, + const char *names[], const char *nodes[], const char *step_name) { - double *mn_series; // Min Value, each sample - double *mx_series; // Max value, each sample - double *sum_series; // Total of all series, each sample - double *smp_series; // all samples for one node - uint64_t *mn_sx; // Index of series with minimum value - uint64_t *mx_sx; // Index of series with maximum value - uint64_t *series_in_smp; // Number of series in the sample - int max_smpx = 0; - double max_smp_series = 0; - double ave_series; - int ix, isx; - - mn_series = xmalloc(nsmp * sizeof(double)); - mx_series = xmalloc(nsmp * sizeof(double)); - sum_series =xmalloc(nsmp * sizeof(double)); - mn_sx = xmalloc(nsmp * sizeof(uint64_t)); - mx_sx = xmalloc(nsmp * sizeof(uint64_t)); - series_in_smp = xmalloc(nsmp * sizeof(uint64_t)); - - for (ix = 0; ix < nsmp; ix++) { - for (isx=0; isx<nseries; isx++) { - if (series_smp[isx]<nsmp && ix>=series_smp[isx]) + size_t i; + double min_val; + size_t min_idx; + double max_val; + size_t max_idx; + double sum, sum_max; + double avg, avg_max; + size_t nb_series_in_smp; + double v; + double values[nb_tables]; + uint8_t buffer[buf_size]; + uint64_t et, et_max; + + sum_max = 0; + + for (;;) { + min_val = UINT64_MAX; + max_val = 0; + sum = 0; + nb_series_in_smp = 0; + + /* compute aggregate values */ + for (i = 0; i < nb_tables; ++i) { + if (nb_records[i] == 0) continue; - series_in_smp[ix]++; - smp_series = all_series[isx]; - if (smp_series) { - sum_series[ix] += smp_series[ix]; - if (mn_series[ix] == 0 - || smp_series[ix] < mn_series[ix]) { - mn_series[ix] = smp_series[ix]; - mn_sx[ix] = isx; - } - if (mx_series[ix] == 0 - || smp_series[ix] > mx_series[ix]) { - mx_series[ix] = smp_series[ix]; - mx_sx[ix] = isx; - } + --nb_records[i]; + ++nb_series_in_smp; + /* read the value of the item in the series i */ + H5PTget_next(tables[i], 1, (void *)buffer); + v = *(double *)(buffer + offsets[i]); + values[i] = v; + /* compute the sum, min and max */ + sum += v; + if (v < min_val) { + min_val = v; + min_idx = i; + } + if (v > max_val) { + max_val = v; + max_idx = i; } + /* Elapsed time is always at offset 0 */ + et = *(double *)buffer; } - } - for (ix = 0; ix < nsmp; ix++) { - if (sum_series[ix] > max_smp_series) { - max_smpx = ix; - max_smp_series = sum_series[ix]; - } - } + if (nb_series_in_smp == 0) /* stop if no more samples */ + break; + + avg = (double)sum / (double)nb_series_in_smp; - ave_series = sum_series[max_smpx] / series_in_smp[max_smpx]; - printf(" Step %d Maximum accumulated %s Value (%f) occurred " - "at %s (Elapsed Time=%d) Ave Node %f\n", - stepx, params.data_item, max_smp_series, - tod[max_smpx], (int) et[max_smpx], ave_series); - - // Put data for step - if (!hd) { - fprintf(fp,"TOD,Et,JobId,StepId,Min Node,Min %s," - "Ave %s,Max Node,Max %s,Total %s," - "Num Nodes",params.data_item,params.data_item, - params.data_item,params.data_item); - for (isx = 0; isx < nseries; isx++) { - fprintf(fp,",%s",series_name[isx]); + /* store the greatest sum */ + if (sum > sum_max) { + sum_max = sum; + avg_max = avg; + et_max = et; } - fprintf(fp,"\n"); - } - for (ix = 0; ix < nsmp; ix++) { - fprintf(fp,"%s, %d",tod[ix], (int) et[ix]); - fprintf(fp,",%d,%d",params.job_id,stepx); - fprintf(fp,",%s,%f",series_name[mn_sx[ix]], - mn_series[ix]); - ave_series = sum_series[ix] / series_in_smp[ix]; - fprintf(fp,",%f",ave_series); - fprintf(fp,",%s,%f",series_name[mx_sx[ix]], - mx_series[ix]); - fprintf(fp,",%f",sum_series[ix]); - fprintf(fp,",%"PRIu64"",series_in_smp[ix]); - for (isx = 0; isx < nseries; isx++) { - if (series_smp[isx]<nsmp && ix>=series_smp[isx]) { - fprintf(fp,",0.0"); - } else { - smp_series = all_series[isx]; - fprintf(fp,",%f",smp_series[ix]); - } + fprintf(output_file, + "%s,%"PRIu64",%s,%lf,%s,%lf,%lf,%lf,%"PRIu64, + step_name, et, + names[min_idx], min_val, names[max_idx], max_val, + sum, avg, nb_series_in_smp); + + /* print value of each series */ + for (i = 0; i < nb_tables; ++i) { + fprintf(output_file, ",%lf", values[i]); + /* and set their values to zero if no more values */ + if (values[i] && nb_records[i] == 0) + values[i] = 0; } - fprintf(fp,"\n"); + fputc('\n', output_file); } - xfree(mn_series); - xfree(mx_series); - xfree(sum_series); - xfree(mn_sx); - xfree(mx_sx); + printf(" Step %s Maximum accumulated %s Value (%lf) occurred " + "at Time=%"PRIu64", Ave Node %lf\n", + step_name, params.data_item, sum_max, et_max, avg_max); } -static void _get_all_node_series(FILE *fp, bool hd, hid_t jgid_step, int stepx) +static herr_t _extract_item_step(hid_t g_id, const char *step_name, + const H5L_info_t *link_info, void *op_data) { - char **tod = NULL; // Date time at each sample - char **node_name; // Node Names - double **all_series; // Pointers to all sampled for each node - double *et = NULL; // Elapsed time at each sample - uint64_t *series_smp; // Number of samples in this series - - hid_t jgid_nodes, jgid_node; - int nnodes, ndx, len, nsmp = 0, nitem = -1; - char jgrp_node_name[MAX_GROUP_NAME+1]; - void* series_data = NULL; - hdf5_api_ops_t* ops; - - nnodes = get_int_attribute(jgid_step, ATTR_NNODES); - // allocate node arrays - - series_smp = xmalloc(nnodes * (sizeof(uint64_t))); - if (series_smp == NULL) { - fatal("Failed to get memory for node_samples"); - return; /* fix for CLANG false positive */ + static bool first = true; + + char nodes_path[MAX_PROFILE_PATH]; + char path[MAX_PROFILE_PATH]; + + size_t i, j; + size_t buf_size = 0; + char *m_name; + + hid_t fid_job = *((hid_t *)op_data); + hid_t did = -1; /* dataset id */ + hid_t tid = -1; /* file type ID */ + hid_t n_tid = -1; /* native type ID */ + hid_t m_tid = -1; /* member type ID */ + hid_t nm_tid = -1; /* native member ID */ + hsize_t nmembers; + hid_t item_type = -1; + herr_t err; + + List tables = NULL; + ListIterator it = NULL; + table_t *t; + + /* step filter */ + if ((params.step_id != -1) && (atoi(step_name) != params.step_id)) + return 0; + + current_step = step_name; + + snprintf(nodes_path, MAX_PROFILE_PATH, "%s/"GRP_NODES, step_name); + + tables = list_create(_table_free); + err = H5Literate_by_name(g_id, nodes_path, H5_INDEX_NAME, + H5_ITER_INC, NULL, _collect_tables_node, + (void *)tables, H5P_DEFAULT); + if (err < 0) { + debug("1 Failed to iterate through group /"GRP_STEPS"/%s", + nodes_path); + list_destroy(tables); + return -1; } - node_name = xmalloc(nnodes * (sizeof(char*))); - if (node_name == NULL) { - fatal("Failed to get memory for node_name"); - return; /* fix for CLANG false positive */ - } + size_t nb_tables = list_count(tables); + hid_t tables_id[nb_tables]; + size_t offsets[nb_tables]; + hsize_t nb_records[nb_tables]; + const char *names[nb_tables]; + const char *nodes[nb_tables]; - all_series = xmalloc(nnodes * (sizeof(double*))); - if (all_series == NULL) { - fatal("Failed to get memory for all_series"); - return; /* fix for CLANG false positive */ - } + for (i = 0; i < nb_tables; ++i) + tables_id[i] = -1; - jgid_nodes = get_group(jgid_step, GRP_NODES); - if (jgid_nodes < 0) - fatal("Failed to open group %s", GRP_NODES); + it = list_iterator_create(tables); + i = 0; + while ((t = (table_t *)list_next(it))) { + names[i] = t->name; + nodes[i] = t->node; - for (ndx=0; ndx<nnodes; ndx++) { - len = H5Lget_name_by_idx(jgid_nodes, ".", H5_INDEX_NAME, - H5_ITER_INC, ndx, jgrp_node_name, - MAX_GROUP_NAME, H5P_DEFAULT); - if ((len < 0) || (len > MAX_GROUP_NAME)) { - debug("Invalid node name=%s", jgrp_node_name); - continue; + /* open the dataset. */ + _table_path(t, path); + if ((did = H5Dopen(fid_job, path, H5P_DEFAULT)) < 0) { + error("Failed to open the series %s", path); + goto error; } - node_name[ndx] = xstrdup(jgrp_node_name); - jgid_node = get_group(jgid_nodes, jgrp_node_name); - if (jgid_node < 0) { - debug("Failed to open group %s", jgrp_node_name); - continue; + + /* get the datatype */ + if ((tid = H5Dget_type(did)) < 0) + goto error; + if ((n_tid = H5Tget_native_type(tid, H5T_DIR_DEFAULT)) < 0) + goto error; + + buf_size = MAX(buf_size, H5Tget_size(n_tid)); + + /* get the number of members */ + if ((nmembers = H5Tget_nmembers(tid)) == 0) + goto error; + + /* iterate through the members and stop when params.data_item + * is found */ + for (j = 0; j < nmembers; j++) { + m_name = H5Tget_member_name(tid, (unsigned)j); + if (xstrcasecmp(params.data_item, m_name) == 0) { + free(m_name); + break; + } + free(m_name); } - ops = NULL; - nitem = 0; - series_data = _get_series_data(jgid_node, params.series, - &ops, &nitem); - if (series_data==NULL || nitem==0 || ops==NULL) { - if (ops != NULL) - xfree(ops); - continue; + + if (j == nmembers) { + error("Item %s not found in series %s", + params.data_item, path); + goto error; } - all_series[ndx] = ops->get_series_values( - params.data_item, series_data, nitem); - if (!all_series[ndx]) - fatal("No data item %s",params.data_item); - series_smp[ndx] = nitem; - if (ndx == 0) { - nsmp = nitem; - tod = ops->get_series_tod(series_data, nitem); - et = ops->get_series_values("time", - series_data, nitem); - } else { - if (nitem > nsmp) { - // new largest number of samples - _delete_string_list(tod, nsmp); - xfree(et); - nsmp = nitem; - tod = ops->get_series_tod(series_data, - nitem); - et = ops->get_series_values("time", - series_data, nitem); - } + + offsets[i] = H5Tget_member_offset(n_tid, (unsigned)j); + + /* get the member type */ + if ((m_tid = H5Tget_member_type(tid, (unsigned)j)) < 0) + goto error; + if ((nm_tid = H5Tget_native_type(m_tid, H5T_DIR_DEFAULT)) < 0) + goto error; + + if (item_type == -1) { + item_type = nm_tid; + } else if (nm_tid != item_type) { + error("Malformed file: fields with the same name in " + "tables with the same name must have the same " + "types"); + goto error; + } + + H5Tclose(nm_tid); + H5Tclose(m_tid); + + H5Tclose(n_tid); + H5Tclose(tid); + H5Dclose(did); + + /* open the table */ + if ((tables_id[i] = H5PTopen(fid_job, path)) < 0) { + error("Failed to open the series %s", path); + goto error; } - xfree(ops); - xfree(series_data); - H5Gclose(jgid_node); + H5PTget_num_packets(tables_id[i], &nb_records[i]); + + ++i; + } + + if (first) { + /* complete header */ + first = false; + list_iterator_reset(it); + while ((t = (table_t *)list_next(it))) { + if (group_mode) + fprintf(output_file, ",%s", t->node); + else + fprintf(output_file, ",%s %s", t->name, t->node); + } + fputc('\n', output_file); } - if (nsmp == 0) { - // May be bad series name - info("No values %s for series %s found in step %d", - params.data_item,params.series, - stepx); + + list_iterator_destroy(it); + + if (H5Tequal(item_type, H5T_NATIVE_UINT64)) { + _item_analysis_uint(nb_tables, tables_id, nb_records, buf_size, + offsets, names, nodes, step_name); + } else if (H5Tequal(item_type, H5T_NATIVE_DOUBLE)) { + _item_analysis_double(nb_tables, tables_id, nb_records, buf_size, + offsets, names, nodes, step_name); } else { - _series_analysis(fp, hd, stepx, nnodes, nsmp, - node_name, tod, et, all_series, series_smp); + error("Unknown type"); + goto error; } - for (ndx=0; ndx<nnodes; ndx++) { - xfree(node_name[ndx]); - xfree(all_series[ndx]); + + /* clean up */ + for (i = 0; i < nb_tables; ++i) { + H5PTclose(tables_id[i]); } - xfree(node_name); - xfree(all_series); - xfree(series_smp); - _delete_string_list(tod, nsmp); - xfree(et); + list_destroy(tables); - H5Gclose(jgid_nodes); + return 0; +error: + if (did >= 0) H5Dclose(did); + if (tid >= 0) H5Tclose(tid); + if (n_tid >= 0) H5Tclose(n_tid); + if (m_tid >= 0) H5Tclose(m_tid); + if (nm_tid >= 0) H5Tclose(nm_tid); + if (tables) list_destroy(tables); + for (i = 0; i < nb_tables; ++i) { + if (tables_id[i] >= 0) + H5PTclose(tables_id[i]); + } + return -1; } -static void _get_all_task_series(FILE *fp, bool hd, hid_t jgid_step, int stepx) +static int _extract_item(void) { + hid_t fid_job; + herr_t err; - hid_t jgid_tasks, jgid_task = 0, jgid_nodes, jgid_node; - H5G_info_t group_info; - int ntasks,itx, tid; - uint64_t *task_id; - char **task_node_name; /* Node Name for each task */ - char **tod = NULL; /* Date time at each sample */ - char **series_name; /* Node Names */ - double **all_series; /* Pointers to all sampled for each node */ - double *et = NULL; /* Elapsed time at each sample */ - uint64_t *series_smp; /* Number of samples in this series */ - int nnodes, ndx, len, nsmp = 0, nitem = -1; - char jgrp_node_name[MAX_GROUP_NAME+1]; - char jgrp_task_name[MAX_GROUP_NAME+1]; - char buf[MAX_GROUP_NAME+1]; - void* series_data = NULL; - hdf5_api_ops_t* ops; - - jgid_nodes = get_group(jgid_step, GRP_NODES); - if (jgid_nodes < 0) - fatal("Failed to open group %s", GRP_NODES); - jgid_tasks = get_group(jgid_step, GRP_TASKS); - if (jgid_tasks < 0) - fatal("No tasks in step %d", stepx); - H5Gget_info(jgid_tasks, &group_info); - ntasks = (int) group_info.nlinks; - if (ntasks <= 0) - fatal("No tasks in step %d", stepx); - task_id = xmalloc(ntasks*sizeof(uint64_t)); - if (task_id == NULL) - fatal("Failed to get memory for task_ids"); - task_node_name = xmalloc(ntasks*sizeof(char*)); - if (task_node_name == NULL) - fatal("Failed to get memory for task_node_names"); - - for (itx = 0; itx<ntasks; itx++) { - // Get the name of the group. - len = H5Lget_name_by_idx(jgid_tasks, ".", H5_INDEX_NAME, - H5_ITER_INC, itx, buf, MAX_GROUP_NAME, - H5P_DEFAULT); - if ((len > 0) && (len < MAX_GROUP_NAME)) { - jgid_task = H5Gopen(jgid_tasks, buf, H5P_DEFAULT); - if (jgid_task < 0) - fatal("Failed to open %s", buf); - } else - fatal("Illegal task name %s",buf); - task_id[itx] = get_int_attribute(jgid_task, ATTR_TASKID); - task_node_name[itx] = get_string_attribute(jgid_task, - ATTR_NODENAME); - H5Gclose(jgid_task); - } - H5Gclose(jgid_tasks); - - nnodes = get_int_attribute(jgid_step, ATTR_NNODES); - // allocate node arrays - series_smp = (uint64_t*) xmalloc(ntasks*(sizeof(uint64_t))); - if (series_smp == NULL) { - fatal("Failed to get memory for node_samples"); - return; /* Fix for CLANG false positive */ + output_file = fopen(params.output, "w"); + if (output_file == NULL) { + error("Failed to create output file %s -- %m", + params.output); } - series_name = (char**) xmalloc(ntasks*(sizeof(char*))); - if (series_name == NULL) { - fatal("Failed to get memory for series_name"); - return; /* Fix for CLANG false positive */ + + fid_job = H5Fopen(params.input, H5F_ACC_RDONLY, H5P_DEFAULT); + if (fid_job < 0) { + fclose(output_file); + error("Failed to open %s", params.input); + return SLURM_ERROR; } - all_series = (double**) xmalloc(ntasks*(sizeof(double*))); - if (all_series == NULL) { - fatal("Failed to get memory for all_series"); - return; /* Fix for CLANG false positive */ + + /* csv header */ + fputs("Step,ElaspedTime,MinNode,MinValue,MaxNode,MaxValue,Sum,Avg," + "NumNodes", output_file); + + err = H5Literate_by_name(fid_job, "/" GRP_STEPS, H5_INDEX_NAME, + H5_ITER_INC, NULL, _extract_item_step, + (void *)(&fid_job), H5P_DEFAULT); + if (err < 0) { + debug("hnere Failed to iterate through group /" GRP_STEPS); + H5Fclose(fid_job); + fclose(output_file); + return SLURM_PROTOCOL_VERSION_ERROR; } - for (ndx=0; ndx<nnodes; ndx++) { + H5Fclose(fid_job); + fclose(output_file); - len = H5Lget_name_by_idx(jgid_nodes, ".", H5_INDEX_NAME, - H5_ITER_INC, ndx, jgrp_node_name, - MAX_GROUP_NAME, H5P_DEFAULT); - if ((len < 0) || (len > MAX_GROUP_NAME)) - fatal("Invalid node name=%s", jgrp_node_name); - jgid_node = get_group(jgid_nodes, jgrp_node_name); + return SLURM_SUCCESS; +} - if (jgid_node < 0) - fatal("Failed to open group %s", jgrp_node_name); - for (itx = 0; itx<ntasks; itx++) { - if (strcmp(jgrp_node_name, task_node_name[itx]) != 0) - continue; - tid = task_id[itx]; - series_name[itx] = xstrdup_printf("%s_%d %s", - GRP_TASK,tid,jgrp_node_name); - sprintf(jgrp_task_name,"%s_%d",GRP_TASK, tid); - - ops = NULL; - nitem = 0; - series_data = _get_series_data(jgid_node, - jgrp_task_name, &ops, &nitem); - if (series_data==NULL || nitem==0 || ops==NULL) { - if (ops != NULL) - xfree(ops); - continue; +static int _fields_intersection(hid_t fid_job, List tables, List fields) +{ + hid_t jgid_table = -1; + hid_t tid = -1; + hssize_t nb_fields; + size_t i; + char *field; + ListIterator it1, it2; + bool found; + char path[MAX_PROFILE_PATH]; + table_t *t; + bool first = true; + + if (fields == NULL || tables == NULL) + return SLURM_ERROR; + + it1 = list_iterator_create(tables); + while ((t = (table_t *)list_next(it1))) { + _table_path(t, path); + jgid_table = H5Dopen(fid_job, path, H5P_DEFAULT); + if (jgid_table < 0) { + error("Failed to open table %s", path); + return SLURM_ERROR; + } + + tid = H5Dget_type(jgid_table); + nb_fields = H5Tget_nmembers(tid); + + if (first) { + first = false; + /* nothing to intersect yet, copy all the fields */ + for (i = 0; i < nb_fields; i++) { + field = H5Tget_member_name(tid, i); + list_append(fields, xstrdup(field)); + free(field); + } + } else { + /* gather fields */ + char *l_fields[nb_fields]; + for (i = 0; i < nb_fields; i++) { + l_fields[i] = H5Tget_member_name(tid, i); } - all_series[itx] = ops->get_series_values( - params.data_item, series_data, nitem); - if (!all_series[ndx]) - fatal("No data item %s",params.data_item); - series_smp[itx] = nitem; - if (nsmp == 0) { - nsmp = nitem; - tod = ops->get_series_tod(series_data, nitem); - et = ops->get_series_values("time", - series_data, nitem); - } else { - if (nitem > nsmp) { - // new largest number of samples - _delete_string_list(tod, nsmp); - xfree(et); - nsmp = nitem; - tod = ops->get_series_tod(series_data, - nitem); - et = ops->get_series_values("time", - series_data, nitem); + /* remove fields that are not in current table */ + it2 = list_iterator_create(fields); + while ((field = list_next(it2))) { + found = false; + for (i = 0; i < nb_fields; i++) { + if (xstrcmp(field, l_fields[i]) == 0) { + found = true; + break; + } + } + if (!found) { + list_delete_item(it2); } } - xfree(ops); - xfree(series_data); + list_iterator_destroy(it2); + /* clean up fields */ + for (i = 0; i < nb_fields; i++) + free(l_fields[i]); } - H5Gclose(jgid_node); - } - if (nsmp == 0) { - // May be bad series name - info("No values %s for series %s found in step %d", - params.data_item,params.series, - stepx); - } else { - _series_analysis(fp, hd, stepx, ntasks, nsmp, - series_name, tod, et, all_series, series_smp); - } - for (itx=0; itx<ntasks; itx++) { - xfree(all_series[itx]); + + H5Tclose(tid); + H5Dclose(jgid_table); } - xfree(series_name); - xfree(all_series); - xfree(series_smp); - _delete_string_list(tod, nsmp); - xfree(et); - _delete_string_list(task_node_name, ntasks); - xfree(task_id); - - H5Gclose(jgid_nodes); + list_iterator_destroy(it1); + + return SLURM_SUCCESS; } -static int _series_data(void) +/* List the intersection of the items of all tables with the same name, for all + * table names. The list is printed on the standard output */ +static int _list_items(void) { - FILE *fp; - bool hd = false; - hid_t fid_job; - hid_t jgid_root; - hid_t jgid_step; - int nsteps; - int stepx; - char jgrp_step_name[MAX_GROUP_NAME + 1]; - - fp = fopen(params.output, "w"); - if (fp == NULL) { - error("Failed open file %s -- %m", params.output); - return -1; - } + hid_t fid_job = -1; + List fields; + ListIterator it; + const char *field; + int rc = SLURM_ERROR; + List tables; + /* get series names */ fid_job = H5Fopen(params.input, H5F_ACC_RDONLY, H5P_DEFAULT); if (fid_job < 0) { - fclose(fp); error("Failed to open %s", params.input); - return -1; + return SLURM_ERROR; } - jgid_root = H5Gopen(fid_job, "/", H5P_DEFAULT); - if (jgid_root < 0) { - fclose(fp); + /* Find the list of tables to be extracted */ + tables = list_create(_table_free); + if ((rc = _tables_list(fid_job, tables)) != SLURM_SUCCESS) { + debug("Failed to list tables %s", params.series); H5Fclose(fid_job); - error("Failed to open root"); - return -1; + list_destroy(tables); + return rc; } - nsteps = get_int_attribute(jgid_root, ATTR_NSTEPS); - for (stepx = 0; stepx < nsteps; stepx++) { - - if ((params.step_id != -1) && (stepx != params.step_id)) - continue; - - sprintf(jgrp_step_name, "%s_%d", GRP_STEP, stepx); - jgid_step = get_group(jgid_root, jgrp_step_name); - if (jgid_step < 0) { - error("Failed to open group %s", jgrp_step_name); - return -1; - } - - if (strncmp(params.series,GRP_TASK,strlen(GRP_TASK)) == 0) - _get_all_task_series(fp,hd,jgid_step, stepx); - else - _get_all_node_series(fp,hd,jgid_step, stepx); + fields = list_create(_void_free); + if ((rc = _fields_intersection(fid_job, tables, fields)) + != SLURM_SUCCESS) { + error("Failed to intersect fields"); + H5Fclose(fid_job); + list_destroy(tables); + list_destroy(fields); + return rc; + } - hd = true; - H5Gclose(jgid_step); + it = list_iterator_create(fields); + while ((field = list_next(it))) { + printf("%s\n", field); } + list_iterator_destroy(it); + + list_destroy(tables); + list_destroy(fields); - H5Gclose(jgid_root); H5Fclose(fid_job); - fclose(fp); - return 0; + return SLURM_SUCCESS; } diff --git a/src/plugins/acct_gather_profile/hdf5/sh5util/sh5util.h b/src/plugins/acct_gather_profile/hdf5/sh5util/sh5util.h new file mode 100644 index 0000000000000000000000000000000000000000..8f111df851a69923cea4946fd951da34a6cf972f --- /dev/null +++ b/src/plugins/acct_gather_profile/hdf5/sh5util/sh5util.h @@ -0,0 +1,70 @@ +/*****************************************************************************\ + * sh5util.h - slurm profile accounting plugin for io and energy using hdf5. + * - Utility to merge node-step files into a job file + * - or extract data from an job file + ***************************************************************************** + * Copyright (C) 2015 SchedMD LLC + * + * Written by Danny Auble <da@schedmd.com> @ SchedMD. + * + * This file is part of SLURM, a resource management program. + * For details, see <http://www.schedmd.com/slurmdocs/>. + * Please also read the included file: DISCLAIMER. + * + * SLURM is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * In addition, as a special exception, the copyright holders give permission + * to link the code of portions of this program with the OpenSSL library under + * certain conditions as described in each individual source file, and + * distribute linked combinations including the two. You must obey the GNU + * General Public License in all respects for all of the code used other than + * OpenSSL. If you modify file(s) with this exception, you may extend this + * exception to your version of the file(s), but you are not obligated to do + * so. If you do not wish to do so, delete this exception statement from your + * version. If you delete this exception statement from all source files in + * the program, then also delete it here. + * + * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with SLURM; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * +\*****************************************************************************/ + +#ifndef __ACCT_SH5UTIL_H__ +#define __ACCT_SH5UTIL_H__ + +typedef enum { + SH5UTIL_MODE_MERGE, + SH5UTIL_MODE_EXTRACT, + SH5UTIL_MODE_ITEM_EXTRACT, + SH5UTIL_MODE_ITEM_LIST, +} sh5util_mode_t; + +typedef struct { + char *dir; + int help; + char *input; + int job_id; + bool keepfiles; + char *level; + sh5util_mode_t mode; + char *node; + char *output; + char *series; + char *data_item; + int step_id; + char *user; + int verbose; +} sh5util_opts_t; + +extern sh5util_opts_t params; + +#endif // __ACCT_SH5UTIL_H__ diff --git a/src/plugins/acct_gather_profile/none/acct_gather_profile_none.c b/src/plugins/acct_gather_profile/none/acct_gather_profile_none.c index 212d5d5cec8e47b0ec92536fbf0baa5565c77e60..6a093cef2dd8fbb1c940cb549922dc3b7f07658e 100644 --- a/src/plugins/acct_gather_profile/none/acct_gather_profile_none.c +++ b/src/plugins/acct_gather_profile/none/acct_gather_profile_none.c @@ -47,6 +47,7 @@ #include "src/common/slurm_protocol_api.h" #include "src/common/slurm_protocol_defs.h" #include "src/slurmd/common/proctrack.h" +#include "src/common/slurm_acct_gather_profile.h" #include <fcntl.h> #include <signal.h> @@ -151,7 +152,19 @@ extern int acct_gather_profile_p_task_end(pid_t taskpid) return SLURM_SUCCESS; } -extern int acct_gather_profile_p_add_sample_data(uint32_t type, void* data) +extern int acct_gather_profile_p_create_group(const char* name) +{ + return SLURM_SUCCESS; +} + +extern int acct_gather_profile_p_create_dataset( + const char* name, int parent, acct_gather_profile_dataset_t *dataset) +{ + return SLURM_SUCCESS; +} + +extern int acct_gather_profile_p_add_sample_data(int dataset_id, void* data, + time_t sample_time) { return SLURM_SUCCESS; } @@ -160,3 +173,9 @@ extern void acct_gather_profile_p_conf_values(List *data) { return; } + +extern bool acct_gather_profile_p_is_active(uint32_t type) +{ + return false; +} + diff --git a/src/plugins/jobacct_gather/aix/jobacct_gather_aix.c b/src/plugins/jobacct_gather/aix/jobacct_gather_aix.c index 6995801c27b00eff052dd4e29cc23d233a8f6ea2..a90d5bbf6481f755ae95faa69b8bdd92443eadfd 100644 --- a/src/plugins/jobacct_gather/aix/jobacct_gather_aix.c +++ b/src/plugins/jobacct_gather/aix/jobacct_gather_aix.c @@ -236,7 +236,7 @@ static List _get_precs(List task_list, bool pgid_plugin, uint64_t cont_id, * wrong. */ extern void jobacct_gather_p_poll_data( - List task_list, bool pgid_plugin, uint64_t cont_id) + List task_list, bool pgid_plugin, uint64_t cont_id, bool profile) { static jag_callbacks_t callbacks; static bool first = 1; @@ -248,7 +248,8 @@ extern void jobacct_gather_p_poll_data( callbacks.get_offspring_data = _get_offspring_data; } - jag_common_poll_data(task_list, pgid_plugin, cont_id, &callbacks); + jag_common_poll_data(task_list, pgid_plugin, cont_id, &callbacks, + profile); return; } diff --git a/src/plugins/jobacct_gather/cgroup/jobacct_gather_cgroup.c b/src/plugins/jobacct_gather/cgroup/jobacct_gather_cgroup.c index 0bef3bd7b29fba24eee33f9e1c2f44255ea6c068..61efb05059bb01ca00e0ef16869d67d0c940a812 100644 --- a/src/plugins/jobacct_gather/cgroup/jobacct_gather_cgroup.c +++ b/src/plugins/jobacct_gather/cgroup/jobacct_gather_cgroup.c @@ -282,7 +282,7 @@ extern int fini (void) * wrong. */ extern void jobacct_gather_p_poll_data( - List task_list, bool pgid_plugin, uint64_t cont_id) + List task_list, bool pgid_plugin, uint64_t cont_id, bool profile) { static jag_callbacks_t callbacks; static bool first = 1; @@ -293,7 +293,8 @@ extern void jobacct_gather_p_poll_data( callbacks.prec_extra = _prec_extra; } - jag_common_poll_data(task_list, pgid_plugin, cont_id, &callbacks); + jag_common_poll_data(task_list, pgid_plugin, cont_id, &callbacks, + profile); return; } diff --git a/src/plugins/jobacct_gather/common/common_jag.c b/src/plugins/jobacct_gather/common/common_jag.c index 8e5fa60511393efbfa83b39e08d4d3e98681145b..4d983247eebf4baa2421825f87c7014287d0172d 100644 --- a/src/plugins/jobacct_gather/common/common_jag.c +++ b/src/plugins/jobacct_gather/common/common_jag.c @@ -41,6 +41,7 @@ #include <dirent.h> #include <fcntl.h> #include <signal.h> +#include <time.h> #include <ctype.h> #include "src/common/slurm_xlator.h" @@ -59,6 +60,18 @@ static long hertz = 0; static int my_pagesize = 0; static DIR *slash_proc = NULL; static int energy_profile = ENERGY_DATA_JOULES_TASK; +static uint64_t debug_flags = 0; + +static int _find_prec(void *x, void *key) +{ + jag_prec_t *prec = (jag_prec_t *) x; + struct jobacctinfo *jobacct = (struct jobacctinfo *) key; + + if (prec->pid == jobacct->pid) + return 1; + + return 0; +} /* return weighted frequency in mhz */ static uint32_t _update_weighted_freq(struct jobacctinfo *jobacct, @@ -506,16 +519,14 @@ static List _get_precs(List task_list, bool pgid_plugin, uint64_t cont_id, proctrack_g_get_pids(cont_id, &pids, &npids); if (!npids) { /* update consumed energy even if pids do not exist */ - ListIterator itr = list_iterator_create(task_list); struct jobacctinfo *jobacct = NULL; - if ((jobacct = list_next(itr))) { + if ((jobacct = list_peek(task_list))) { acct_gather_energy_g_get_data( energy_profile, &jobacct->energy); debug2("getjoules_task energy = %u", jobacct->energy.consumed_energy); } - list_iterator_destroy(itr); debug4("no pids in this container %"PRIu64"", cont_id); goto finished; @@ -623,17 +634,112 @@ finished: return prec_list; } +static void _record_profile(struct jobacctinfo *jobacct) +{ + enum { + FIELD_CPUFREQ, + FIELD_CPUTIME, + FIELD_CPUUTIL, + FIELD_RSS, + FIELD_VMSIZE, + FIELD_PAGES, + FIELD_READ, + FIELD_WRITE, + FIELD_CNT + }; + + acct_gather_profile_dataset_t dataset[] = { + { "CPUFrequency", PROFILE_FIELD_UINT64 }, + { "CPUTime", PROFILE_FIELD_UINT64 }, + { "CPUUtilization", PROFILE_FIELD_DOUBLE }, + { "RSS", PROFILE_FIELD_UINT64 }, + { "VMSize", PROFILE_FIELD_UINT64 }, + { "Pages", PROFILE_FIELD_UINT64 }, + { "ReadMB", PROFILE_FIELD_DOUBLE }, + { "WriteMB", PROFILE_FIELD_DOUBLE }, + { NULL, PROFILE_FIELD_NOT_SET } + }; + + static int profile_gid = -1; + double et; + union { + double d; + uint64_t u64; + } data[FIELD_CNT]; + + if (profile_gid == -1) + profile_gid = acct_gather_profile_g_create_group("Tasks"); + + /* Create the dataset first */ + if (jobacct->dataset_id < 0) { + char ds_name[32]; + snprintf(ds_name, sizeof(ds_name), "%u", jobacct->id.taskid); + + jobacct->dataset_id = acct_gather_profile_g_create_dataset( + ds_name, profile_gid, dataset); + if (jobacct->dataset_id == SLURM_ERROR) { + error("JobAcct: Failed to create the dataset for " + "task %d", + jobacct->pid); + return; + } + } + + if (jobacct->dataset_id < 0) + return; + + data[FIELD_CPUFREQ].u64 = jobacct->act_cpufreq; + data[FIELD_RSS].u64 = jobacct->tot_rss; + data[FIELD_VMSIZE].u64 = jobacct->tot_vsize; + data[FIELD_PAGES].u64 = jobacct->tot_pages; + + /* delta from last snapshot */ + if (!jobacct->last_time) { + et = 0; + data[FIELD_CPUTIME].u64 = 0; + data[FIELD_CPUUTIL].d = 0.0; + data[FIELD_READ].d = 0.0; + data[FIELD_WRITE].d = 0.0; + } else { + data[FIELD_CPUTIME].u64 = + jobacct->tot_cpu - jobacct->last_total_cputime; + et = (jobacct->cur_time - jobacct->last_time); + if (!et) + data[FIELD_CPUUTIL].d = 0.0; + else + data[FIELD_CPUUTIL].d = + (100.0 * (double)data[FIELD_CPUTIME].u64) / + ((double) et); + + data[FIELD_READ].d = jobacct->tot_disk_read - + jobacct->last_tot_disk_read; + + data[FIELD_WRITE].d = jobacct->tot_disk_write - + jobacct->last_tot_disk_write; + } + + if (debug_flags & DEBUG_FLAG_PROFILE) { + char str[256]; + info("PROFILE-Task: %s", acct_gather_profile_dataset_str( + dataset, data, str, sizeof(str))); + } + acct_gather_profile_g_add_sample_data(jobacct->dataset_id, + (void *)data, jobacct->cur_time); +} + extern void jag_common_init(long in_hertz) { uint32_t profile_opt; + debug_flags = slurm_get_debug_flags(); + acct_gather_profile_g_get(ACCT_GATHER_PROFILE_RUNNING, &profile_opt); /* If we are profiling energy it will be checked at a different rate, so just grab the last one. */ if (profile_opt & ACCT_GATHER_PROFILE_ENERGY) - energy_profile = ENERGY_DATA_STRUCT; + energy_profile = ENERGY_DATA_NODE_ENERGY; if (in_hertz) { hertz = in_hertz; @@ -677,19 +783,18 @@ extern void print_jag_prec(jag_prec_t *prec) extern void jag_common_poll_data( List task_list, bool pgid_plugin, uint64_t cont_id, - jag_callbacks_t *callbacks) + jag_callbacks_t *callbacks, bool profile) { /* Update the data */ List prec_list = NULL; uint64_t total_job_mem = 0, total_job_vsize = 0; ListIterator itr; - ListIterator itr2; jag_prec_t *prec = NULL; struct jobacctinfo *jobacct = NULL; static int processing = 0; char sbuf[72]; int energy_counted = 0; - static int first = 1; + time_t ct; static int no_over_memory_kill = -1; xassert(callbacks); @@ -717,6 +822,7 @@ extern void jag_common_poll_data( if (!callbacks->get_precs) callbacks->get_precs = _get_precs; + ct = time(NULL); prec_list = (*(callbacks->get_precs))(task_list, pgid_plugin, cont_id, callbacks); @@ -725,99 +831,104 @@ extern void jag_common_poll_data( itr = list_iterator_create(task_list); while ((jobacct = list_next(itr))) { - itr2 = list_iterator_create(prec_list); - while ((prec = list_next(itr2))) { - if (prec->pid == jobacct->pid) { - uint32_t cpu_calc; + uint32_t cpu_calc; + uint32_t last_total_cputime; + if (!(prec = list_find_first(prec_list, _find_prec, jobacct))) + continue; + #if _DEBUG - info("pid:%u ppid:%u rss:%d KB", - prec->pid, prec->ppid, prec->rss); + info("pid:%u ppid:%u rss:%d KB", + prec->pid, prec->ppid, prec->rss); #endif - /* find all my descendents */ - if (callbacks->get_offspring_data) - (*(callbacks->get_offspring_data)) - (prec_list, prec, prec->pid); - cpu_calc = (prec->ssec + prec->usec)/hertz; - /* tally their usage */ - jobacct->max_rss = - MAX(jobacct->max_rss, prec->rss); - jobacct->tot_rss = prec->rss; - total_job_mem += prec->rss; - jobacct->max_vsize = - MAX(jobacct->max_vsize, prec->vsize); - jobacct->tot_vsize = prec->vsize; - total_job_vsize += prec->vsize; - jobacct->max_pages = - MAX(jobacct->max_pages, prec->pages); - jobacct->tot_pages = prec->pages; - jobacct->max_disk_read = MAX( - jobacct->max_disk_read, - prec->disk_read); - jobacct->tot_disk_read = prec->disk_read; - jobacct->max_disk_write = MAX( - jobacct->max_disk_write, - prec->disk_write); - jobacct->tot_disk_write = prec->disk_write; - jobacct->min_cpu = - MAX(jobacct->min_cpu, cpu_calc); - jobacct->last_total_cputime = jobacct->tot_cpu; - /* Update the cpu times - */ - jobacct->tot_cpu = cpu_calc; - jobacct->user_cpu_sec = prec->usec/hertz; - jobacct->sys_cpu_sec = prec->ssec/hertz; - debug2("%s: %d mem size %"PRIu64" %"PRIu64" " - "time %u(%u+%u)", __func__, - jobacct->pid, jobacct->max_rss, - jobacct->max_vsize, jobacct->tot_cpu, - jobacct->user_cpu_sec, - jobacct->sys_cpu_sec); - /* compute frequency */ - jobacct->this_sampled_cputime = - cpu_calc - jobacct->last_total_cputime; - _get_sys_interface_freq_line( - prec->last_cpu, - "cpuinfo_cur_freq", sbuf); - jobacct->act_cpufreq = - _update_weighted_freq(jobacct, sbuf); - debug2("%s: Task average frequency = %u " - "pid %d mem size %"PRIu64" %"PRIu64" " - "time %u(%u+%u)", __func__, - jobacct->act_cpufreq, - jobacct->pid, jobacct->max_rss, - jobacct->max_vsize, jobacct->tot_cpu, - jobacct->user_cpu_sec, - jobacct->sys_cpu_sec); - /* get energy consumption - * only once is enough since we - * report per node energy consumption */ - debug2("energycounted = %d", energy_counted); - if (energy_counted == 0) { - acct_gather_energy_g_get_data( - energy_profile, - &jobacct->energy); - debug2("getjoules_task energy = %u", - jobacct->energy.consumed_energy); - energy_counted = 1; - } - /* We only profile on after the first poll. */ - if (!first) - acct_gather_profile_g_add_sample_data( - ACCT_GATHER_PROFILE_TASK, - jobacct); - break; - } + /* find all my descendents */ + if (callbacks->get_offspring_data) + (*(callbacks->get_offspring_data)) + (prec_list, prec, prec->pid); + + last_total_cputime = jobacct->tot_cpu; + + cpu_calc = (prec->ssec + prec->usec)/hertz; + /* tally their usage */ + jobacct->max_rss = + MAX(jobacct->max_rss, prec->rss); + jobacct->tot_rss = prec->rss; + total_job_mem += prec->rss; + jobacct->max_vsize = + MAX(jobacct->max_vsize, prec->vsize); + jobacct->tot_vsize = prec->vsize; + total_job_vsize += prec->vsize; + jobacct->max_pages = + MAX(jobacct->max_pages, prec->pages); + jobacct->tot_pages = prec->pages; + jobacct->max_disk_read = MAX( + jobacct->max_disk_read, + prec->disk_read); + jobacct->tot_disk_read = prec->disk_read; + jobacct->max_disk_write = MAX( + jobacct->max_disk_write, + prec->disk_write); + + jobacct->tot_disk_write = prec->disk_write; + jobacct->min_cpu = + MAX(jobacct->min_cpu, cpu_calc); + + /* Update the cpu times + */ + jobacct->tot_cpu = cpu_calc; + jobacct->user_cpu_sec = prec->usec/hertz; + jobacct->sys_cpu_sec = prec->ssec/hertz; + debug2("%s: %d mem size %"PRIu64" %"PRIu64" " + "time %u(%u+%u)", __func__, + jobacct->pid, jobacct->max_rss, + jobacct->max_vsize, jobacct->tot_cpu, + jobacct->user_cpu_sec, + jobacct->sys_cpu_sec); + /* compute frequency */ + jobacct->this_sampled_cputime = + cpu_calc - last_total_cputime; + _get_sys_interface_freq_line( + prec->last_cpu, + "cpuinfo_cur_freq", sbuf); + jobacct->act_cpufreq = + _update_weighted_freq(jobacct, sbuf); + debug("%s: Task average frequency = %u " + "pid %d mem size %"PRIu64" %"PRIu64" " + "time %u(%u+%u)", __func__, + jobacct->act_cpufreq, + jobacct->pid, jobacct->max_rss, + jobacct->max_vsize, jobacct->tot_cpu, + jobacct->user_cpu_sec, + jobacct->sys_cpu_sec); + /* get energy consumption + * only once is enough since we + * report per node energy consumption */ + debug2("energycounted = %d", energy_counted); + if (energy_counted == 0) { + acct_gather_energy_g_get_data( + energy_profile, + &jobacct->energy); + debug2("getjoules_task energy = %u", + jobacct->energy.consumed_energy); + energy_counted = 1; + } + if (profile && + acct_gather_profile_g_is_active(ACCT_GATHER_PROFILE_TASK)) { + jobacct->cur_time = ct; + + _record_profile(jobacct); + + jobacct->last_tot_disk_read = jobacct->tot_disk_read; + jobacct->last_tot_disk_write = jobacct->tot_disk_write; + jobacct->last_total_cputime = jobacct->tot_cpu; + jobacct->last_time = jobacct->cur_time; } - list_iterator_destroy(itr2); } list_iterator_destroy(itr); - if (! no_over_memory_kill) { + if (!no_over_memory_kill) jobacct_gather_handle_mem_limit(total_job_mem, total_job_vsize); - } finished: - list_destroy(prec_list); + FREE_NULL_LIST(prec_list); processing = 0; - first = 0; } diff --git a/src/plugins/jobacct_gather/common/common_jag.h b/src/plugins/jobacct_gather/common/common_jag.h index 40c21bb3887c2e218df78713d3353205a1889ebe..30d215ada1a6a701bcf151aca5c3f5341e4ce09e 100644 --- a/src/plugins/jobacct_gather/common/common_jag.h +++ b/src/plugins/jobacct_gather/common/common_jag.h @@ -72,6 +72,6 @@ extern void print_jag_prec(jag_prec_t *prec); extern void jag_common_poll_data( List task_list, bool pgid_plugin, uint64_t cont_id, - jag_callbacks_t *callbacks); + jag_callbacks_t *callbacks, bool profile); #endif diff --git a/src/plugins/jobacct_gather/linux/jobacct_gather_linux.c b/src/plugins/jobacct_gather/linux/jobacct_gather_linux.c index 90c2fc56b052fc28082815302af2ba282a04e053..b094882f90f69489860ca604584b3285fedb9d9a 100644 --- a/src/plugins/jobacct_gather/linux/jobacct_gather_linux.c +++ b/src/plugins/jobacct_gather/linux/jobacct_gather_linux.c @@ -185,7 +185,7 @@ extern int fini (void) * wrong. */ extern void jobacct_gather_p_poll_data( - List task_list, bool pgid_plugin, uint64_t cont_id) + List task_list, bool pgid_plugin, uint64_t cont_id, bool profile) { static jag_callbacks_t callbacks; static bool first = 1; @@ -198,7 +198,8 @@ extern void jobacct_gather_p_poll_data( callbacks.get_offspring_data = _get_offspring_data; } - jag_common_poll_data(task_list, pgid_plugin, cont_id, &callbacks); + jag_common_poll_data(task_list, pgid_plugin, cont_id, &callbacks, + profile); return; } diff --git a/src/slurmd/slurmd/req.c b/src/slurmd/slurmd/req.c index d36df10f6c2df82491593c6d2356156e2a41aacb..8fe4aa70b833d1efb5fa6106079198cb19eafaf2 100644 --- a/src/slurmd/slurmd/req.c +++ b/src/slurmd/slurmd/req.c @@ -2535,9 +2535,9 @@ _rpc_acct_gather_update(slurm_msg_t *msg) memset(&acct_msg, 0, sizeof(acct_gather_node_resp_msg_t)); acct_msg.node_name = conf->node_name; - acct_msg.energy = acct_gather_energy_alloc(); + acct_msg.energy = acct_gather_energy_alloc(1); acct_gather_energy_g_get_data( - ENERGY_DATA_STRUCT, acct_msg.energy); + ENERGY_DATA_NODE_ENERGY, acct_msg.energy); slurm_msg_t_copy(&resp_msg, msg); resp_msg.msg_type = RESPONSE_ACCT_GATHER_UPDATE; @@ -2576,10 +2576,13 @@ _rpc_acct_gather_energy(slurm_msg_t *msg) acct_gather_node_resp_msg_t acct_msg; time_t now = time(NULL), last_poll = 0; int data_type = ENERGY_DATA_STRUCT; + uint16_t sensor_cnt; acct_gather_energy_req_msg_t *req = msg->data; acct_gather_energy_g_get_data(ENERGY_DATA_LAST_POLL, &last_poll); + acct_gather_energy_g_get_data(ENERGY_DATA_SENSOR_CNT, + &sensor_cnt); /* If we polled later than delta seconds then force a new poll. @@ -2588,7 +2591,9 @@ _rpc_acct_gather_energy(slurm_msg_t *msg) data_type = ENERGY_DATA_JOULES_TASK; memset(&acct_msg, 0, sizeof(acct_gather_node_resp_msg_t)); - acct_msg.energy = acct_gather_energy_alloc(); + acct_msg.sensor_cnt = sensor_cnt; + acct_msg.energy = acct_gather_energy_alloc(sensor_cnt); + acct_gather_energy_g_get_data(data_type, acct_msg.energy); slurm_msg_t_copy(&resp_msg, msg); diff --git a/src/slurmd/slurmd/slurmd.c b/src/slurmd/slurmd/slurmd.c index 4226f91d1deb6cdc0371d67ed36e56cf99351a11..66b2b996d47ca09dabbd6f9ad786a2a332aecdbc 100644 --- a/src/slurmd/slurmd/slurmd.c +++ b/src/slurmd/slurmd/slurmd.c @@ -756,8 +756,8 @@ _fill_registration_msg(slurm_node_registration_status_msg_t *msg) list_destroy(steps); if (!msg->energy) - msg->energy = acct_gather_energy_alloc(); - acct_gather_energy_g_get_data(ENERGY_DATA_STRUCT, msg->energy); + msg->energy = acct_gather_energy_alloc(1); + acct_gather_energy_g_get_data(ENERGY_DATA_NODE_ENERGY, msg->energy); msg->timestamp = time(NULL); diff --git a/testsuite/expect/test12.6 b/testsuite/expect/test12.6 index 3ea2d40be9840f0688a9cec3bfb61df651289223..6c5ce29f312ac1b2f6c6ed7c0d1bc41ae5298969 100755 --- a/testsuite/expect/test12.6 +++ b/testsuite/expect/test12.6 @@ -38,52 +38,6 @@ set file_out "test$test_id.output" set file_prog "test$test_id.prog" set job_id 0 -proc get_jobacct_freq {} { - global scontrol - set task_freq 30 - log_user 0 - match_max -d 100000 - spawn $scontrol show config - expect { - timeout { - send_user "\nCan't find JobAcctGatherFrequency in slurm.conf. Using $task_freq" - return $task_freq - } - } - wait - log_user 1 - set opt "" - set lines [split $expect_out(buffer) "\n"] - set nl [llength $lines] - for {set lx 0} {$lx < $nl} {incr lx} { - set line [lindex $lines $lx] - if {[string first "JobAcctGatherFrequency" $line] != -1 } { - set poseq [string first "=" $line] - set opt [string range $line $poseq+2 end] - set opt [string tolower $opt] - break - } - } - if {$opt == ""} { - send_user "\nJobAcctGatherFrequency in slurm.conf. Using $task_freq" - } - - set tokens [split $opt ","] - set nt [llength $tokens] - if {$nt == 1} { - if {[string first "task=" $opt] == -1} { - return $opt - } - } - for {set tx 0} {$tx < $nt} {incr tx} { - set opt [lindex $tokens $tx] - if {[string first "task=" $opt] != -1} { - return [string range $opt 5 end] - } - } - return task_freq -} - print_header $test_id if {[test_front_end]} { @@ -111,7 +65,10 @@ if {$profile == 0} { send_user "\nacct_gather_profile/hdf5 plugin installed\n" send_user "Note: this test takes 3 minutes to run\n" -set task_freq [get_jobacct_freq] +set task_freq [get_job_acct_freq] +if {$task_freq < 30} { + send_user "\nWarning: jobacct_gather_freq < 30 ($task_freq), results are unreliable\n" +} # # Build a test program to put a known load on the system @@ -122,12 +79,6 @@ exec $bin_chmod 700 $file_prog set timeout [expr $max_job_delay + 200] -# Override task polling supplied by slurm.conf -# I am not sure why we are doing this (but we are) -# The original task_freq had a + 5 here. That will always fail with a bad -# allocation if you are enforcing memory limits, so we changed it to - 5 -# instead. I don't think it really matters though. -set task_freq [expr $task_freq - 5] set srun_pid [spawn $srun --acctg-freq=$task_freq --profile=task -t5 ./$file_prog] expect { -re "SLURM_JobId=($number)" { @@ -180,51 +131,70 @@ set lno 0 set fd 0 set last_et 0 set fd [open $file_out "r"] +set et_col -1 +set cpu_util_col -1 +set read_disk_col -1 while {$fd > 0 && [gets $fd line] != -1} { incr lno - if {$lno > 2} { - set tokens [split $line ","] - if {[llength $tokens] < 14} { - send_user "\nFAILURE: too few items on line $lno" + + set tokens [split $line ","] + + if {$lno == 1} { + set et_col [lsearch $tokens "ElapsedTime"] + set cpu_util_col [lsearch $tokens "CPUUtilization"] + set read_disk_col [lsearch $tokens "ReadMB"] + + if {$et_col == -1} { + send_user "\nFAILURE: no ElapsedTime column found\n" set exit_code 1 - break; } - set et [lindex $tokens 5] - set cur_et [expr $et - $last_et] - set last_et $et - set low_rd [expr 0.975 * 10 * $cur_et] - set hi_rd [expr 1.025 * 10 * $cur_et] - if {$lno == 2 && $et < 30} { - send_user "\nWarning: jobacct_gather_freq < 30, results are unreliable\n" - } - if {$lno == 2 && $et < $task_freq} { - send_user "\nFAILURE: sample $et is not --acctg-freq=$task_freq\n" + if {$cpu_util_col == -1} { + send_user "\nFAILURE: no CPUUtilization column found\n" set exit_code 1 } - set cputil [lindex $tokens 8] - # The range on cpu utilization is pretty wide - # Linux accounting resolution is only to one second, so in a - # typical 30 interval an extra second is 3%. The burn loop - # consumes a bit more that asked for. There is additional type - # managing the I/O portion. Slurm and linux also consume some - # cpu. - if {$cputil < 38.0} { - send_user "\nWarning: CPU Busy $cputil not near 40% on line $lno\n" - incr nerr - } - if {$cputil > 47.0} { - send_user "\nWarning: CPU Busy $cputil not near 40% on line $lno\n" - incr nerr - } - set rdmb [lindex $tokens 12] - if {$rdmb < $low_rd} { - send_user "\nWarning: Read Megabytes $rdmb not near 100.0 on line $lno\n" - incr nerr + if {$read_disk_col == -1} { + send_user "\nFAILURE: no ReadMB column found\n" + set exit_code 1 } - if {$rdmb > $hi_rd} { - send_user "\nWarning: Read Megabytes $rdmb not near 100.0 on line $lno\n" - incr nerr + + if {$exit_code} { + break; } + continue; + } + + set et [lindex $tokens $et_col] + set cur_et [expr $et - $last_et] + set last_et $et + + if { $lno == 2 } { + continue; + } + + if {$cur_et < $task_freq} { + send_user "\nWarning: Poll interval was only $cur_et instead of expected $task_freq on line $lno\n" + incr nerr + } + + set cputil [lindex $tokens $cpu_util_col] + # The range on cpu utilization is pretty wide + # Linux accounting resolution is only to one second, so in a + # typical 30 interval an extra second is 3%. The burn loop + # consumes a bit more that asked for. There is additional type + # managing the I/O portion. Slurm and linux also consume some + # cpu. + if {$cputil < 38.0 || $cputil > 47.0 } { + send_user "\nWarning: CPU Busy $cputil not near 40% on line $lno\n" + incr nerr + } + + set rdmb [lindex $tokens $read_disk_col] + set low_rd [expr 0.975 * 10 * $cur_et] + set hi_rd [expr 1.025 * 10 * $cur_et] + + if {$rdmb < $low_rd || $rdmb > $hi_rd } { + send_user "\nWarning: Read Megabytes $rdmb not near 100.0 on line $lno\n" + incr nerr } } close $fd