diff --git a/doc/man/man1/scontrol.1 b/doc/man/man1/scontrol.1 index 68f187fb888560bb610ae493575db582aaf27b8b..286daa80652649719ba8845c770e6517732b5cf4 100644 --- a/doc/man/man1/scontrol.1 +++ b/doc/man/man1/scontrol.1 @@ -1298,13 +1298,18 @@ energy accounting sample, in watts. .TP \fILowestJoules\fP The energy consumed by the node between the last time it was powered on and -the last time it was registered by the slurmd daemon, in joules. +the last time it was registered by slurmd, in joules. .TP \fIConsumedJoules\fP The energy consumed by the node between the last time it was registered by the slurmd daemon and the last node energy accounting sample, in joules. +.PP +If the reported value is "n/s" (not supported), the node does not support the +configured \fBAcctGatherEnergyType\fR plugin. If the reported value is zero, energy +accounting for nodes is disabled. + .SH "ENVIRONMENT VARIABLES" .PP Some \fBscontrol\fR options may diff --git a/src/api/node_info.c b/src/api/node_info.c index b0a112eb078e1dbc2d0fd2c7c2783c5c7a30643d..47082ff782d2ca51cc760d29f668c0031bfd1035 100644 --- a/src/api/node_info.c +++ b/src/api/node_info.c @@ -293,11 +293,15 @@ slurm_sprint_node_table (node_info_t * node_ptr, xstrcat(out, "\n "); /****** power Line ******/ - snprintf(tmp_line, sizeof(tmp_line), "CurrentWatts=%u LowestJoules=%u " - "ConsumedJoules=%u", - node_ptr->energy->current_watts, node_ptr->energy->base_watts, + if (node_ptr->energy->current_watts == NO_VAL) + snprintf(tmp_line, sizeof(tmp_line), "CurrentWatts=n/s " + "LowestJoules=n/s ConsumedJoules=n/s"); + else + snprintf(tmp_line, sizeof(tmp_line), "CurrentWatts=%u " + "LowestJoules=%u ConsumedJoules=%u", + node_ptr->energy->current_watts, + node_ptr->energy->base_watts, node_ptr->energy->consumed_energy); - xstrcat(out, tmp_line); if (one_liner) diff --git a/src/common/slurm_jobacct_gather.c b/src/common/slurm_jobacct_gather.c index 5a334d897a3eafdb2c88cfe8e496054e45bd0626..ea6fd699837993780af2c8bb44d80ed02b71ae8b 100644 --- a/src/common/slurm_jobacct_gather.c +++ b/src/common/slurm_jobacct_gather.c @@ -1031,6 +1031,9 @@ extern void jobacctinfo_aggregate(jobacctinfo_t *dest, jobacctinfo_t *from) dest->sys_cpu_usec -= 1E6; } dest->act_cpufreq += from->act_cpufreq; + if (from->energy.consumed_energy == NO_VAL) + dest->energy.consumed_energy = NO_VAL; + else dest->energy.consumed_energy += from->energy.consumed_energy; } @@ -1056,5 +1059,8 @@ extern void jobacctinfo_2_stats(slurmdb_stats_t *stats, jobacctinfo_t *jobacct) stats->cpu_min_taskid = jobacct->min_cpu_id.taskid; stats->cpu_ave = (double)jobacct->tot_cpu; stats->act_cpufreq = (double)jobacct->act_cpufreq; + if (jobacct->energy.consumed_energy == NO_VAL) + stats->consumed_energy = NO_VAL; + else stats->consumed_energy = (double)jobacct->energy.consumed_energy; } diff --git a/src/plugins/acct_gather_energy/rapl/acct_gather_energy_rapl.c b/src/plugins/acct_gather_energy/rapl/acct_gather_energy_rapl.c index 42e376d4d375011a84b48d9dd8d00adb55e00c1c..371a400932b51494f6c115d37ade1071e0f1c378 100644 --- a/src/plugins/acct_gather_energy/rapl/acct_gather_energy_rapl.c +++ b/src/plugins/acct_gather_energy/rapl/acct_gather_energy_rapl.c @@ -145,7 +145,7 @@ static uint64_t _read_msr(int fd, int which) "this can be common. Check your system " "if you think this is in error."); } else { - error("Check if your CPU has RAPL support for %s: %m", + debug("Check if your CPU has RAPL support for %s: %m", _msr_string(which)); } } @@ -253,6 +253,8 @@ extern int acct_gather_energy_p_update_node_energy(void) uint64_t result; double ret; + if (local_energy->current_watts == NO_VAL) + return rc; acct_gather_energy_shutdown = false; if (!acct_gather_energy_shutdown) { uint32_t node_current_energy; @@ -372,11 +374,18 @@ static void _get_joules_task(acct_gather_energy_t *energy) extern int init(void) { int i; + uint64_t result; + _hardware(); for (i = 0; i < nb_pkg; i++) pkg_fd[i] = _open_msr(pkg2cpu[i]); local_energy = acct_gather_energy_alloc(); + + result = _read_msr(pkg_fd[0], MSR_RAPL_POWER_UNIT); + if (result == 0) + local_energy->current_watts = NO_VAL; + debug_flags = slurm_get_debug_flags(); verbose("%s loaded", plugin_name); return SLURM_SUCCESS; @@ -404,6 +413,9 @@ extern int acct_gather_energy_p_get_data(enum acct_energy_type data_type, int rc = SLURM_SUCCESS; switch (data_type) { case ENERGY_DATA_JOULES_TASK: + if (local_energy->current_watts == NO_VAL) + energy->consumed_energy = NO_VAL; + else _get_joules_task(energy); break; case ENERGY_DATA_STRUCT: diff --git a/src/sacct/process.c b/src/sacct/process.c index 1e74544795a3c19a3f36e944b1b400ddae1092f8..50302e285d4f651ab28237a28cc7f4de9586752a 100644 --- a/src/sacct/process.c +++ b/src/sacct/process.c @@ -92,6 +92,9 @@ void aggregate_stats(slurmdb_stats_t *dest, slurmdb_stats_t *from) dest->cpu_min_taskid = from->cpu_min_taskid; } dest->cpu_ave += from->cpu_ave; + if ((from->consumed_energy == NO_VAL) || (dest->consumed_energy == NO_VAL)) + dest->consumed_energy = NO_VAL; + else dest->consumed_energy += from->consumed_energy; dest->act_cpufreq += from->act_cpufreq; } diff --git a/src/sstat/print.c b/src/sstat/print.c index 10377d5947f69a9c411b2036951afc21ec7703dc..92bcaa9076e8a215a6f54d388fe791a9c7d31918 100644 --- a/src/sstat/print.c +++ b/src/sstat/print.c @@ -90,6 +90,7 @@ void print_fields(slurmdb_step_rec_t *step) while ((field = list_next(print_fields_itr))) { char *tmp_char = NULL; + memset(&outbuf, 0, sizeof(outbuf)); switch(field->type) { case PRINT_AVECPU: @@ -111,8 +112,9 @@ void print_fields(slurmdb_step_rec_t *step) (curr_inx == field_count)); break; case PRINT_CONSUMED_ENERGY: - - convert_num_unit((float)step->stats.consumed_energy, + if (!fuzzy_equal(step->stats.consumed_energy, NO_VAL)) + convert_num_unit((float) + step->stats.consumed_energy, outbuf, sizeof(outbuf), UNIT_NONE); diff --git a/src/sstat/process.c b/src/sstat/process.c index 9f11b0ddf927a7ab64fe092b0572ea82aea94778..d3232ab8dedafaf4fd37974610df0e1e36f27603 100644 --- a/src/sstat/process.c +++ b/src/sstat/process.c @@ -89,6 +89,9 @@ void aggregate_stats(slurmdb_stats_t *dest, slurmdb_stats_t *from) dest->cpu_min_taskid = from->cpu_min_taskid; } dest->cpu_ave += from->cpu_ave; + if ((from->consumed_energy == NO_VAL) || (dest->consumed_energy == NO_VAL)) + dest->consumed_energy = NO_VAL; + else dest->consumed_energy += from->consumed_energy; dest->act_cpufreq += from->act_cpufreq; }