diff --git a/doc/man/man5/knl.conf.5 b/doc/man/man5/knl.conf.5 index 0eb0e17fc81ff9464a0e143c227353df832ce809..581a66c006e588fef19c61db9f3048849d639fce 100644 --- a/doc/man/man5/knl.conf.5 +++ b/doc/man/man5/knl.conf.5 @@ -1,4 +1,4 @@ -.TH "knl.conf" "5" "Slurm Configuration File" "May 2016" "Slurm Configuration File" +.TH "knl.conf" "5" "Slurm Configuration File" "December 2017" "Slurm Configuration File" .SH "NAME" knl.conf \- Slurm configuration file for Intel Knights Landing processor. @@ -135,6 +135,12 @@ The value can include one of the possible values identified with the \fBAllowNUMA\fR configuration parameter above. The default value is "a2a". +.TP +\fBForce\fR +If set to a non\-zero value then load the node_features/generic plugin even +on non\-KNL nodes. +Used primarily for testing purposes. + .TP \fBLogFile\fR Fully qualified path to a log file. @@ -205,7 +211,7 @@ LogFile=/var/tmp/slurm_node_feature.log SyscfgPath=/usr/sbin/syscfg .SH "COPYING" -Copyright (C) 2015-2016 SchedMD LLC. +Copyright (C) 2015-2017 SchedMD LLC. .LP This file is part of Slurm, a resource management program. For details, see <https://slurm.schedmd.com/>. diff --git a/src/plugins/node_features/knl_generic/node_features_knl_generic.c b/src/plugins/node_features/knl_generic/node_features_knl_generic.c index ada184f50298dd58df77c653013ba85c8c8d69c8..02933924db7aedaa3d8e96da6b38a44d65c9c24d 100644 --- a/src/plugins/node_features/knl_generic/node_features_knl_generic.c +++ b/src/plugins/node_features/knl_generic/node_features_knl_generic.c @@ -178,6 +178,8 @@ static knl_system_type_t knl_system_type = KNL_SYSTEM_TYPE_INTEL; static uint32_t ume_check_interval = 0; static pthread_mutex_t ume_mutex = PTHREAD_MUTEX_INITIALIZER; static pthread_t ume_thread = 0; +static uint32_t force_load = 0; +static int hw_is_knl = -1; /* Percentage of MCDRAM used for cache by type, updated from syscfg */ static int mcdram_pct[KNL_MCDRAM_CNT]; @@ -190,6 +192,7 @@ static s_p_options_t knl_conf_file_options[] = { {"BootTime", S_P_UINT32}, {"DefaultMCDRAM", S_P_STRING}, {"DefaultNUMA", S_P_STRING}, + {"Force", S_P_UINT32}, {"LogFile", S_P_STRING}, {"McPath", S_P_STRING}, {"SyscfgPath", S_P_STRING}, @@ -714,6 +717,9 @@ extern int init(void) s_p_hashtbl_t *tbl; struct stat stat_buf; int rc = SLURM_SUCCESS; + char* cpuinfo_path = "/proc/cpuinfo"; + FILE *cpu_info_file; + char buf[1024]; /* Set default values */ allow_mcdram = KNL_MCDRAM_FLAG; @@ -774,6 +780,7 @@ extern int init(void) } xfree(tmp_str); } + (void) s_p_get_uint32(&force_load, "Force", tbl); (void) s_p_get_string(&mc_path, "McPath", tbl); (void) s_p_get_string(&syscfg_path, "SyscfgPath", tbl); if (s_p_get_string(&tmp_str, "SystemType", tbl)) { @@ -802,6 +809,20 @@ extern int init(void) else syscfg_found = 0; + hw_is_knl = 0; + cpu_info_file = fopen(cpuinfo_path, "r"); + if (cpu_info_file == NULL) { + error("Error opening/reading %s: %m", cpuinfo_path); + } else { + while (fgets(buf, sizeof(buf), cpu_info_file)) { + if (strstr(buf, "Xeon Phi")) { + hw_is_knl = 1; + break; + } + } + fclose(cpu_info_file); + } + if ((resume_program = slurm_get_resume_program())) { error("Use of ResumeProgram with %s not currently supported", plugin_name); @@ -824,6 +845,7 @@ extern int init(void) info("BootTIme=%u", boot_time); info("DefaultMCDRAM=%s DefaultNUMA=%s", default_mcdram_str, default_numa_str); + info("Force=%u", force_load); info("McPath=%s", mc_path); info("SyscfgPath=%s", syscfg_path); info("SyscfgTimeout=%u msec", syscfg_timeout); @@ -911,11 +933,11 @@ extern void node_features_p_node_state(char **avail_modes, char **current_mode) if (!syscfg_path || !avail_modes || !current_mode) return; - if (syscfg_found == 0) { + if ((syscfg_found == 0) || (!hw_is_knl && !force_load)) { /* This node on cluster lacks syscfg; should not be KNL */ static bool log_event = true; if (log_event) { - info("%s: syscfg program not found, can not get KNL modes", + info("%s: syscfg program not found or node isn't KNL, can not get KNL modes", __func__); log_event = false; } @@ -1289,11 +1311,14 @@ extern int node_features_p_node_set(char *active_features) error("%s: SyscfgPath not configured", __func__); return SLURM_ERROR; } - if (syscfg_found == 0) { - /* This node on cluster lacks syscfg; should not be KNL. - * This code should never be reached */ - error("%s: syscfg program not found; can not set KNL modes", - __func__); + if ((syscfg_found == 0) || (!hw_is_knl && !force_load)) { + /* This node on cluster lacks syscfg; should not be KNL */ + static bool log_event = true; + if (log_event) { + error("%s: syscfg program not found or node isn't KNL; can not set KNL modes", + __func__); + log_event = false; + } return SLURM_ERROR; }