diff --git a/doc/man/man5/slurm.conf.5 b/doc/man/man5/slurm.conf.5 index 9c45ec01f2801018a4daf2828659a6724a28e9ef..c9e631fd4a3aad22b91880b6e149bd272448271f 100644 --- a/doc/man/man5/slurm.conf.5 +++ b/doc/man/man5/slurm.conf.5 @@ -1392,7 +1392,14 @@ Options current supported by any plugins are listed below. .TP \fBbalance_interval=#\fR Specifies the time interval, in seconds, between attempts to rebalance power -caps across the nodes. Supported by the power/cray plugin. +caps across the nodes. +The default value is 30 seconds. +Supported by the power/cray plugin. +.TP +\fBcapmc_path=\fR +Specifies the absolute path of the capmc command. +The default value is "/opt/cray/capmc/default/bin/capmc". +Supported by the power/cray plugin. .RE .TP diff --git a/src/plugins/power/common/power_common.h b/src/plugins/power/common/power_common.h index 2a2d1054e0e00875c342b2871e8da761702c936e..0b029e8de76a5b5c73cba5ff926e8730c9f62081 100644 --- a/src/plugins/power/common/power_common.h +++ b/src/plugins/power/common/power_common.h @@ -53,6 +53,12 @@ typedef struct power_by_job { uint32_t used_watts; /* Recent power use rate, in watts */ } power_by_job_t; +typedef struct power_by_nodes { + uint32_t alloc_watts; /* Currently allocated power, in watts */ + bool increase_power; /* Set if node's power allocation increasing */ + char *nodes; /* Node names (nid range list values on Cray) */ +} power_by_nodes_t; + /* For all nodes in a cluster, return global power allocation/use information */ extern void get_cluster_power(struct node_record *node_record_table_ptr, int node_record_count, diff --git a/src/plugins/power/cray/Makefile.am b/src/plugins/power/cray/Makefile.am index db17c696e3467d4dfa8fe0ca16d6e4b2cb9fd037..5a84d0dd1db923c277738d0905551fdf31f3f716 100644 --- a/src/plugins/power/cray/Makefile.am +++ b/src/plugins/power/cray/Makefile.am @@ -4,7 +4,7 @@ AUTOMAKE_OPTIONS = foreign PLUGIN_FLAGS = -module -avoid-version --export-dynamic -# if HAVE_NATIVE_CRAY // FIXME: FUTURE +if WITH_JSON_PARSER AM_CPPFLAGS = -I$(top_srcdir) -I$(top_srcdir)/src/common $(JSON_CPPFLAGS) @@ -17,6 +17,6 @@ force: $(power_cray_la_LIBADD) : force @cd `dirname $@` && $(MAKE) `basename $@` -# else -# EXTRA_power_cray_la_SOURCES = power_cray.c -# endif +else +EXTRA_power_cray_la_SOURCES = power_cray.c +endif diff --git a/src/plugins/power/cray/Makefile.in b/src/plugins/power/cray/Makefile.in index 9826fce35df9e2b4b9e085ac4106f65045939e95..bd236f4b5081a4b274d9466fcbf24cd22a0a3353 100644 --- a/src/plugins/power/cray/Makefile.in +++ b/src/plugins/power/cray/Makefile.in @@ -162,8 +162,11 @@ am__uninstall_files_from_dir = { \ } am__installdirs = "$(DESTDIR)$(pkglibdir)" LTLIBRARIES = $(pkglib_LTLIBRARIES) -power_cray_la_DEPENDENCIES = ../common/libpower_common.la -am_power_cray_la_OBJECTS = power_cray.lo +@WITH_JSON_PARSER_TRUE@power_cray_la_DEPENDENCIES = \ +@WITH_JSON_PARSER_TRUE@ ../common/libpower_common.la +am__power_cray_la_SOURCES_DIST = power_cray.c +@WITH_JSON_PARSER_TRUE@am_power_cray_la_OBJECTS = power_cray.lo +am__EXTRA_power_cray_la_SOURCES_DIST = power_cray.c power_cray_la_OBJECTS = $(am_power_cray_la_OBJECTS) AM_V_lt = $(am__v_lt_@AM_V@) am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) @@ -172,6 +175,7 @@ am__v_lt_1 = power_cray_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ $(power_cray_la_LDFLAGS) $(LDFLAGS) -o $@ +@WITH_JSON_PARSER_TRUE@am_power_cray_la_rpath = -rpath $(pkglibdir) AM_V_P = $(am__v_P_@AM_V@) am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) am__v_P_0 = false @@ -206,8 +210,9 @@ AM_V_CCLD = $(am__v_CCLD_@AM_V@) am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) am__v_CCLD_0 = @echo " CCLD " $@; am__v_CCLD_1 = -SOURCES = $(power_cray_la_SOURCES) -DIST_SOURCES = $(power_cray_la_SOURCES) +SOURCES = $(power_cray_la_SOURCES) $(EXTRA_power_cray_la_SOURCES) +DIST_SOURCES = $(am__power_cray_la_SOURCES_DIST) \ + $(am__EXTRA_power_cray_la_SOURCES_DIST) am__can_run_installinfo = \ case $$AM_UPDATE_INFO_DIR in \ n|no|NO) false;; \ @@ -476,13 +481,12 @@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ AUTOMAKE_OPTIONS = foreign PLUGIN_FLAGS = -module -avoid-version --export-dynamic - -# if HAVE_NATIVE_CRAY // FIXME: FUTURE -AM_CPPFLAGS = -I$(top_srcdir) -I$(top_srcdir)/src/common $(JSON_CPPFLAGS) -pkglib_LTLIBRARIES = power_cray.la -power_cray_la_SOURCES = power_cray.c -power_cray_la_LDFLAGS = $(SO_LDFLAGS) $(PLUGIN_FLAGS) $(JSON_LDFLAGS) -power_cray_la_LIBADD = ../common/libpower_common.la +@WITH_JSON_PARSER_TRUE@AM_CPPFLAGS = -I$(top_srcdir) -I$(top_srcdir)/src/common $(JSON_CPPFLAGS) +@WITH_JSON_PARSER_TRUE@pkglib_LTLIBRARIES = power_cray.la +@WITH_JSON_PARSER_TRUE@power_cray_la_SOURCES = power_cray.c +@WITH_JSON_PARSER_TRUE@power_cray_la_LDFLAGS = $(SO_LDFLAGS) $(PLUGIN_FLAGS) $(JSON_LDFLAGS) +@WITH_JSON_PARSER_TRUE@power_cray_la_LIBADD = ../common/libpower_common.la +@WITH_JSON_PARSER_FALSE@EXTRA_power_cray_la_SOURCES = power_cray.c all: all-am .SUFFIXES: @@ -554,7 +558,7 @@ clean-pkglibLTLIBRARIES: } power_cray.la: $(power_cray_la_OBJECTS) $(power_cray_la_DEPENDENCIES) $(EXTRA_power_cray_la_DEPENDENCIES) - $(AM_V_CCLD)$(power_cray_la_LINK) -rpath $(pkglibdir) $(power_cray_la_OBJECTS) $(power_cray_la_LIBADD) $(LIBS) + $(AM_V_CCLD)$(power_cray_la_LINK) $(am_power_cray_la_rpath) $(power_cray_la_OBJECTS) $(power_cray_la_LIBADD) $(LIBS) mostlyclean-compile: -rm -f *.$(OBJEXT) @@ -799,13 +803,9 @@ uninstall-am: uninstall-pkglibLTLIBRARIES uninstall-pkglibLTLIBRARIES -force: -$(power_cray_la_LIBADD) : force - @cd `dirname $@` && $(MAKE) `basename $@` - -# else -# EXTRA_power_cray_la_SOURCES = power_cray.c -# endif +@WITH_JSON_PARSER_TRUE@force: +@WITH_JSON_PARSER_TRUE@$(power_cray_la_LIBADD) : force +@WITH_JSON_PARSER_TRUE@ @cd `dirname $@` && $(MAKE) `basename $@` # Tell versions [3.59,3.63) of GNU make to not export all variables. # Otherwise a system limit (for SysV at least) may be exceeded. diff --git a/src/plugins/power/cray/power_cray.c b/src/plugins/power/cray/power_cray.c index 2671db288558a4069dced0b5f8b4c05b7c3c4d66..21f24c5a77e7bce3d86f4e57afe6f8533f80fd30 100644 --- a/src/plugins/power/cray/power_cray.c +++ b/src/plugins/power/cray/power_cray.c @@ -52,10 +52,12 @@ #include "src/common/log.h" #include "src/common/slurm_protocol_api.h" #include "src/common/xmalloc.h" +#include "src/common/xstring.h" #include "src/plugins/power/common/power_common.h" #include "src/slurmctld/locks.h" #define DEFAULT_BALANCE_INTERVAL 30 +#define DEFAULT_CAPMC_PATH "/opt/cray/capmc/default/bin/capmc" /* These are defined here so when we link with something other than * the slurmctld we will have these symbols defined. They will get @@ -105,6 +107,8 @@ const uint32_t plugin_version = 100; /*********************** local variables *********************/ static int balance_interval = DEFAULT_BALANCE_INTERVAL; +static char *capmc_path = NULL; +static uint64_t debug_flag = 0; static bool stop_power = false; static pthread_t power_thread = 0; static pthread_mutex_t thread_flag_mutex = PTHREAD_MUTEX_INITIALIZER; @@ -114,6 +118,7 @@ static pthread_cond_t term_cond = PTHREAD_COND_INITIALIZER; /*********************** local functions *********************/ static void _load_config(void); extern void *_power_agent(void *args); +static void _set_power_caps(List node_power_list); static void _stop_power_agent(void); /* Parse PowerParameters configuration */ @@ -121,6 +126,7 @@ static void _load_config(void) { char *sched_params, *tmp_ptr; + debug_flag = slurm_get_debug_flags(); sched_params = slurm_get_power_parameters(); if (!sched_params) return; @@ -135,7 +141,21 @@ static void _load_config(void) } } + xfree(capmc_path); + if ((tmp_ptr = strstr(sched_params, "capmc_path="))) { + capmc_path = xstrdup(tmp_ptr + 11); + tmp_ptr = strchr(capmc_path, ','); + if (tmp_ptr) + tmp_ptr[0] = '\0'; + } else { + capmc_path = xstrdup(DEFAULT_CAPMC_PATH); + } + xfree(sched_params); + if (debug_flag & DEBUG_FLAG_POWER) { + info("%s configuration: balance_interval=%d capmc_path=%s", + __func__, balance_interval, capmc_path); + } } /* Periodically attempt to re-balance power caps across nodes */ @@ -147,7 +167,7 @@ extern void *_power_agent(void *args) /* Read jobs and nodes */ slurmctld_lock_t read_locks = { NO_LOCK, READ_LOCK, READ_LOCK, NO_LOCK }; - List job_power_list; + List job_power_list, node_power_list = NULL; uint32_t alloc_watts = 0, used_watts = 0; last_balance_time = time(NULL); @@ -162,11 +182,14 @@ extern void *_power_agent(void *args) continue; lock_slurmctld(read_locks); +//FIXME: On Cray/ALPS system use "capmc get_node_energy_counter" to get +// “raw accumulated-energy†and calculate power consumption from that get_cluster_power(node_record_table_ptr, node_record_count, &alloc_watts, &used_watts); job_power_list = get_job_power(job_list, node_record_table_ptr); //FIXME: power re-balancing decisions here FREE_NULL_LIST(job_power_list); + _set_power_caps(node_power_list); last_balance_time = time(NULL); unlock_slurmctld(read_locks); _load_config(); @@ -174,6 +197,73 @@ extern void *_power_agent(void *args) return NULL; } +static void _set_power_caps(List node_power_list) +{ + ListIterator node_iterator; + power_by_nodes_t *node_power; + char *cmd_resp, *script_argv[7], watts[32]; + int status = 0; + + if (!node_power_list) + return; + + script_argv[0] = capmc_path; + script_argv[1] = "set_power_cap"; + script_argv[2] = "--nids"; + /* script_argv[3] = TBD */ + script_argv[4] = "--watts"; + script_argv[5] = watts; + script_argv[6] = NULL; + + /* Pass 1, decrease power for select nodes */ + node_iterator = list_iterator_create(node_power_list); + while ((node_power = (power_by_nodes_t *) list_next(node_iterator))) { + if (node_power->increase_power) + continue; + script_argv[3] = node_power->nodes; + snprintf(watts, sizeof(watts), "%u", node_power->alloc_watts); + cmd_resp = power_run_script("capmc", capmc_path, script_argv, + 2000, &status); + if (status != 0) { + error("%s: capmc %s %s %s %s %s: %s", + __func__, script_argv[1], script_argv[2], + script_argv[3], script_argv[4], script_argv[5], + cmd_resp); + xfree(cmd_resp); + list_iterator_destroy(node_iterator); + return; + } else if (debug_flag & DEBUG_FLAG_POWER) { + info("%s: capmc %s %s %s %s %s", + __func__, script_argv[1], script_argv[2], + script_argv[3], script_argv[4], script_argv[5]); + } + xfree(cmd_resp); + } + + /* Pass 2, increase power for select nodes */ + list_iterator_reset(node_iterator); + while ((node_power = (power_by_nodes_t *) list_next(node_iterator))) { + if (!node_power->increase_power) + continue; + script_argv[3] = node_power->nodes; + snprintf(watts, sizeof(watts), "%u", node_power->alloc_watts); + cmd_resp = power_run_script("capmc", capmc_path, script_argv, + 2000, &status); + if (status != 0) { + error("%s: capmc %s %s %s %s %s: %s", + __func__, script_argv[1], script_argv[2], + script_argv[3], script_argv[4], script_argv[5], + cmd_resp); + } else if (debug_flag & DEBUG_FLAG_POWER) { + info("%s: capmc %s %s %s %s %s", + __func__, script_argv[1], script_argv[2], + script_argv[3], script_argv[4], script_argv[5]); + } + xfree(cmd_resp); + } + list_iterator_destroy(node_iterator); +} + /* Terminate power thread */ static void _stop_power_agent(void) {