Skip to content
Snippets Groups Projects
Commit 412846ed authored by Dominik Bartkiewicz's avatar Dominik Bartkiewicz Committed by Morris Jette
Browse files

Avoid using knl_generic plugin except on real KNL nodes

Add "Force=1" to knl_generic.conf to override (for testing)
bug 4487
parent 5a23c48a
No related branches found
No related tags found
No related merge requests found
.TH "knl.conf" "5" "Slurm Configuration File" "May 2016" "Slurm Configuration File"
.TH "knl.conf" "5" "Slurm Configuration File" "December 2017" "Slurm Configuration File"
.SH "NAME"
knl.conf \- Slurm configuration file for Intel Knights Landing processor.
......@@ -135,6 +135,12 @@ The value can include one of the possible values identified with the
\fBAllowNUMA\fR configuration parameter above.
The default value is "a2a".
.TP
\fBForce\fR
If set to a non\-zero value then load the node_features/generic plugin even
on non\-KNL nodes.
Used primarily for testing purposes.
.TP
\fBLogFile\fR
Fully qualified path to a log file.
......@@ -205,7 +211,7 @@ LogFile=/var/tmp/slurm_node_feature.log
SyscfgPath=/usr/sbin/syscfg
.SH "COPYING"
Copyright (C) 2015-2016 SchedMD LLC.
Copyright (C) 2015-2017 SchedMD LLC.
.LP
This file is part of Slurm, a resource management program.
For details, see <https://slurm.schedmd.com/>.
......
......@@ -178,6 +178,8 @@ static knl_system_type_t knl_system_type = KNL_SYSTEM_TYPE_INTEL;
static uint32_t ume_check_interval = 0;
static pthread_mutex_t ume_mutex = PTHREAD_MUTEX_INITIALIZER;
static pthread_t ume_thread = 0;
static uint32_t force_load = 0;
static int hw_is_knl = -1;
/* Percentage of MCDRAM used for cache by type, updated from syscfg */
static int mcdram_pct[KNL_MCDRAM_CNT];
......@@ -190,6 +192,7 @@ static s_p_options_t knl_conf_file_options[] = {
{"BootTime", S_P_UINT32},
{"DefaultMCDRAM", S_P_STRING},
{"DefaultNUMA", S_P_STRING},
{"Force", S_P_UINT32},
{"LogFile", S_P_STRING},
{"McPath", S_P_STRING},
{"SyscfgPath", S_P_STRING},
......@@ -714,6 +717,9 @@ extern int init(void)
s_p_hashtbl_t *tbl;
struct stat stat_buf;
int rc = SLURM_SUCCESS;
char* cpuinfo_path = "/proc/cpuinfo";
FILE *cpu_info_file;
char buf[1024];
/* Set default values */
allow_mcdram = KNL_MCDRAM_FLAG;
......@@ -774,6 +780,7 @@ extern int init(void)
}
xfree(tmp_str);
}
(void) s_p_get_uint32(&force_load, "Force", tbl);
(void) s_p_get_string(&mc_path, "McPath", tbl);
(void) s_p_get_string(&syscfg_path, "SyscfgPath", tbl);
if (s_p_get_string(&tmp_str, "SystemType", tbl)) {
......@@ -802,6 +809,20 @@ extern int init(void)
else
syscfg_found = 0;
hw_is_knl = 0;
cpu_info_file = fopen(cpuinfo_path, "r");
if (cpu_info_file == NULL) {
error("Error opening/reading %s: %m", cpuinfo_path);
} else {
while (fgets(buf, sizeof(buf), cpu_info_file)) {
if (strstr(buf, "Xeon Phi")) {
hw_is_knl = 1;
break;
}
}
fclose(cpu_info_file);
}
if ((resume_program = slurm_get_resume_program())) {
error("Use of ResumeProgram with %s not currently supported",
plugin_name);
......@@ -824,6 +845,7 @@ extern int init(void)
info("BootTIme=%u", boot_time);
info("DefaultMCDRAM=%s DefaultNUMA=%s",
default_mcdram_str, default_numa_str);
info("Force=%u", force_load);
info("McPath=%s", mc_path);
info("SyscfgPath=%s", syscfg_path);
info("SyscfgTimeout=%u msec", syscfg_timeout);
......@@ -911,11 +933,11 @@ extern void node_features_p_node_state(char **avail_modes, char **current_mode)
if (!syscfg_path || !avail_modes || !current_mode)
return;
if (syscfg_found == 0) {
if ((syscfg_found == 0) || (!hw_is_knl && !force_load)) {
/* This node on cluster lacks syscfg; should not be KNL */
static bool log_event = true;
if (log_event) {
info("%s: syscfg program not found, can not get KNL modes",
info("%s: syscfg program not found or node isn't KNL, can not get KNL modes",
__func__);
log_event = false;
}
......@@ -1289,11 +1311,14 @@ extern int node_features_p_node_set(char *active_features)
error("%s: SyscfgPath not configured", __func__);
return SLURM_ERROR;
}
if (syscfg_found == 0) {
/* This node on cluster lacks syscfg; should not be KNL.
* This code should never be reached */
error("%s: syscfg program not found; can not set KNL modes",
__func__);
if ((syscfg_found == 0) || (!hw_is_knl && !force_load)) {
/* This node on cluster lacks syscfg; should not be KNL */
static bool log_event = true;
if (log_event) {
error("%s: syscfg program not found or node isn't KNL; can not set KNL modes",
__func__);
log_event = false;
}
return SLURM_ERROR;
}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment