From f686c8165ec4a24bea4c133565bd07f966a87233 Mon Sep 17 00:00:00 2001 From: "Christopher J. Morrone" <morrone2@llnl.gov> Date: Tue, 14 Mar 2006 20:05:50 +0000 Subject: [PATCH] svn merge -r7263:7499 https://eris.llnl.gov/svn/slurm/branches/config New general configuration file parser and slurm.conf handling code. Allows long lines to be continued on the next line by ending with a "\". Whitespace is allowed between the key and "=", and between the "=" and value. slurm.conf is now read only once, and resulting data structures can be shared between application and slurm library code. WARNING: A NodeName may now occur only once in a slurm.conf file. If you want to temporarily make nodes DOWN in the slurm.conf, use the new DownNodes keyword (see "man slurm.conf"). --- NEWS | 8 + doc/man/man5/slurm.conf.5 | 38 +- src/common/Makefile.am | 1 + src/common/parse_config.c | 1041 ++++++++++ src/common/parse_config.h | 306 +++ src/common/read_config.c | 2071 ++++++++++---------- src/common/read_config.h | 179 +- src/common/slurm_protocol_api.c | 284 +-- src/common/xstring.c | 27 + src/common/xstring.h | 5 + src/plugins/switch/federation/federation.c | 98 +- src/scontrol/scontrol.c | 56 +- src/slurmctld/controller.c | 21 +- src/slurmctld/job_mgr.c | 1 + src/slurmctld/node_mgr.c | 38 +- src/slurmctld/proc_req.c | 137 +- src/slurmctld/read_config.c | 788 +++----- src/slurmctld/slurmctld.h | 4 +- src/slurmd/common/slurmstepd_init.c | 4 +- src/slurmd/slurmd/req.c | 5 +- src/slurmd/slurmd/slurmd.c | 99 +- src/slurmd/slurmd/slurmd.h | 4 +- src/slurmd/slurmstepd/slurmstepd.c | 4 +- src/squeue/opts.c | 2 +- src/srun/srun_job.c | 6 +- 25 files changed, 3194 insertions(+), 2033 deletions(-) create mode 100644 src/common/parse_config.c create mode 100644 src/common/parse_config.h diff --git a/NEWS b/NEWS index ebaae9f1c2a..b0dae062d5a 100644 --- a/NEWS +++ b/NEWS @@ -4,6 +4,14 @@ documents those changes that are of interest to users and admins. * Changes in SLURM 1.1.0-pre3 ============================= -- Added framework for XCPU job launch support. + -- New general configuration file parser and slurm.conf handling code. + Allows long lines to be continued on the next line by ending with a "\". + Whitespace is allowed between the key and "=", and between the "=" and + value. + WARNING: A NodeName may now occur only once in a slurm.conf file. + If you want to temporarily make nodes DOWN in the slurm.conf, + use the new DownNodes keyword (see "man slurm.conf"). + * Changes in SLURM 1.1.0-pre2 ============================= diff --git a/doc/man/man5/slurm.conf.5 b/doc/man/man5/slurm.conf.5 index 7fa222d08a1..cb0194fb1e4 100644 --- a/doc/man/man5/slurm.conf.5 +++ b/doc/man/man5/slurm.conf.5 @@ -564,11 +564,11 @@ of larger clusters. In order to support the concept of jobs requiring consecutive nodes on some architectures, node specifications should be place in this file in consecutive order. -If a specific node name is listed more than once in the configuration -file only its "State" and "Reason" fields may be reset. -This may be useful to record the state of nodes which are temporarily +No single node name may be listed more than once in the configuration +file. +Use "DownNodes=" to record the state of nodes which are temporarily in a DOWN or DRAINED state without altering permanent configuration -information as shown in the example. +information. A job step's tasks are allocated to nodes in order the nodes appear in the configuration file. There is presently no capability within SLURM to arbitarily order a job step's tasks. @@ -690,6 +690,34 @@ should be assigned to nodes with more processors, memory, disk space, higher processor speed, etc. Weight is an integer value with a default value of 1. .LP +The "DownNodes=" configuration permits you to mark certain nodes as in a +DOWN or DRAINED state without altering the permanent configuration +information listed under a "NodeName=" specification. +.TP +\fBDownNodes\fR +Any node name, or list of node names, from the "NodeName=" specifications. +.TP +\fBReason\fR +Identifies the reason for a node being in state "DOWN" or "DRAINED" +or "DRAINING". Use quotes to enclose a reason having more than one +word. +.TP +\fBState\fR +State of the node with respect to the initiation of user jobs. +Acceptable values are "BUSY", "DOWN", "DRAINED", "DRAINING", "IDLE", +and "UNKNOWN". "BUSY" indicates the node has been allocated work +and should not be used in the configuration file. +"DOWN" indicates the node failed and is unavailable to be allocated work. +"DRAINED" indicates the node was configured unavailable to be +allocated work and is presently not performing any work. +"DRAINING" indicates the node is unavailable to be allocated new +work, but is completing the processing of a job. +"IDLE" indicates the node available to be allocated work, but +has none at present +"UNKNOWN" indicates the node's state is undefined, but will be +established when the \fBslurmd\fR daemon on that node registers. +The default value is "UNKNOWN". +.LP The partition configuration permits you to establish different job limits or access controls for various groups (or partitions) of nodes. Nodes may be in more than one partition, making partitions serve @@ -874,7 +902,7 @@ NodeName=dev[0-25] NodeAddr=edev[0-25] Weight=16 .br # Update records for specific DOWN nodes .br -NodeName=dev20 State=DOWN Reason="power,ETA=Dec25" +DownNodes=dev20 State=DOWN Reason="power,ETA=Dec25" .br # .br diff --git a/src/common/Makefile.am b/src/common/Makefile.am index d2eba3367b0..899c7838740 100644 --- a/src/common/Makefile.am +++ b/src/common/Makefile.am @@ -36,6 +36,7 @@ libcommon_la_SOURCES = \ bitstring.c bitstring.h \ mpi.c mpi.h \ pack.c pack.h \ + parse_config.c parse_config.h \ parse_spec.c parse_spec.h \ plugin.c plugin.h \ plugrack.c plugrack.h \ diff --git a/src/common/parse_config.c b/src/common/parse_config.c new file mode 100644 index 00000000000..124e2c217d7 --- /dev/null +++ b/src/common/parse_config.c @@ -0,0 +1,1041 @@ +/*****************************************************************************\ + * parse_config.c - parse any slurm.conf-like configuration file + * + * NOTE: when you see the prefix "s_p_", think "slurm parser". + * + * $Id$ + ***************************************************************************** + * Copyright (C) 2006 The Regents of the University of California. + * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). + * Written by Christopher J. Morrone <morrone2@llnl.gov>. + * UCRL-CODE-217948. + * + * This file is part of SLURM, a resource management program. + * For details, see <http://www.llnl.gov/linux/slurm/>. + * + * SLURM is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with SLURM; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. +\*****************************************************************************/ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include <string.h> +#include <sys/types.h> +#include <regex.h> +#include <stdint.h> +#include <ctype.h> + +/* #include "src/common/slurm_protocol_defs.h" */ +#include "src/common/log.h" +#include "src/common/macros.h" +#include "src/common/xmalloc.h" +#include "src/common/xstring.h" +#include "src/common/xassert.h" +/* #include "src/common/slurm_rlimits_info.h" */ +#include "src/common/parse_config.h" + +#include <slurm/slurm.h> + +#define BUFFER_SIZE 4096 + +#define CONF_HASH_LEN 26 + +static regex_t keyvalue_re; +static char *keyvalue_pattern = + "^[[:space:]]*" + "([[:alpha:]]+)" /* key */ + "[[:space:]]*=[[:space:]]*" + "((\"([^\"]*)\")|([^[:space:]]+))" /* value: quoted with whitespace, + * or unquoted and no whitespace */ + "([[:space:]]|$)"; +static bool keyvalue_initialized = false; + +struct s_p_values { + char *key; + int type; + int data_count; + void *data; + int (*handler)(void **data, slurm_parser_enum_t type, + const char *key, const char *value, const char *line); + void (*destroy)(void *data); + s_p_values_t *next; +}; + +/* + * NOTE - "key" is case insensitive. + */ +static int _conf_hashtbl_index(const char *key) +{ + int i; + int idx = 0; + + xassert(key); + for (i = 0; i < 10; i++) { + if (key[i] == '\0') + break; + idx += tolower(key[i]); + } + return idx % CONF_HASH_LEN; +} + +static void _conf_hashtbl_insert(s_p_hashtbl_t *hashtbl, + s_p_values_t *value) +{ + int idx; + + xassert(value); + idx = _conf_hashtbl_index(value->key); + value->next = hashtbl[idx]; + hashtbl[idx] = value; +} + +/* + * NOTE - "key" is case insensitive. + */ +static s_p_values_t *_conf_hashtbl_lookup( + const s_p_hashtbl_t *hashtbl, const char *key) +{ + int idx; + s_p_values_t *p; + + xassert(key); + if (hashtbl == NULL) + return NULL; + + idx = _conf_hashtbl_index(key); + for (p = hashtbl[idx]; p != NULL; p = p->next) { + if (strcasecmp(p->key, key) == 0) + return p; + } + return NULL; +} + +s_p_hashtbl_t *s_p_hashtbl_create( + s_p_options_t options[]) +{ + s_p_options_t *op = NULL; + s_p_values_t *value; + s_p_hashtbl_t *hashtbl; + int len; + + len = CONF_HASH_LEN * sizeof(s_p_values_t *); + hashtbl = (s_p_hashtbl_t *)xmalloc(len); + memset(hashtbl, 0, len); + + for (op = options; op->key != NULL; op++) { + value = xmalloc(sizeof(s_p_values_t)); + value->key = xstrdup(op->key); + value->type = op->type; + value->data_count = 0; + value->data = NULL; + value->next = NULL; + value->handler = op->handler; + value->destroy = op->destroy; + _conf_hashtbl_insert(hashtbl, value); + } + + return hashtbl; +} + +static void _conf_file_values_free(s_p_values_t *p) +{ + int i; + + if (p->data_count > 0) { + switch(p->type) { + case S_P_ARRAY: + for (i = 0; i < p->data_count; i++) { + void **ptr_array = (void **)p->data; + if (p->destroy != NULL) { + p->destroy(ptr_array[i]); + } else { + xfree(ptr_array[i]); + } + } + xfree(p->data); + break; + default: + if (p->destroy != NULL) { + p->destroy(p->data); + } else { + xfree(p->data); + } + break; + } + } + xfree(p->key); + xfree(p); +} + +void s_p_hashtbl_destroy(s_p_hashtbl_t *hashtbl) { + int i; + s_p_values_t *p, *next; + + for (i = 0; i < CONF_HASH_LEN; i++) { + for (p = hashtbl[i]; p != NULL; p = next) { + next = p->next; + _conf_file_values_free(p); + } + } + xfree(hashtbl); +} + +static void _keyvalue_regex_init(void) +{ + if (!keyvalue_initialized) { + if (regcomp(&keyvalue_re, keyvalue_pattern, + REG_EXTENDED) != 0) { + /* FIXME - should be fatal */ + error("keyvalue regex compilation failed\n"); + } + keyvalue_initialized = true; + } +} + +/* + * IN line - string to be search for a key=value pair + * OUT key - pointer to the key string (caller must free with free()) + * OUT value - pointer to the value string (caller must free with free()) + * OUT remaining - pointer into the "line" string denoting the start + * of the unsearched portion of the string + * Return 0 when a key-value pair is found, and -1 otherwise. + */ +static int _keyvalue_regex(const char *line, + char **key, char **value, char **remaining) +{ + size_t nmatch = 8; + regmatch_t pmatch[8]; + char *start; + size_t len; + char *match; + + *key = NULL; + *value = NULL; + *remaining = (char *)line; + memset(pmatch, 0, sizeof(regmatch_t)*nmatch); + if (regexec(&keyvalue_re, line, nmatch, pmatch, 0) + == REG_NOMATCH) { + return -1; + } + + *key = (char *)(xstrndup(line + pmatch[1].rm_so, + pmatch[1].rm_eo - pmatch[1].rm_so)); + + + if (pmatch[4].rm_so != -1) { + *value = (char *)(xstrndup(line + pmatch[4].rm_so, + pmatch[4].rm_eo - pmatch[4].rm_so)); + } else if (pmatch[5].rm_so != -1) { + *value = (char *)(xstrndup(line + pmatch[5].rm_so, + pmatch[5].rm_eo - pmatch[5].rm_so)); + } else { + *value = xstrdup(""); + } + + *remaining = (char *)(line + pmatch[2].rm_eo); + + return 0; +} + +static int _strip_continuation(char *buf, int len) +{ + char *ptr; + int bs = 0; + + for (ptr = buf+len-1; ptr >= buf; ptr--) { + if (*ptr == '\\') + bs++; + else if (isspace(*ptr) && bs == 0) + continue; + else + break; + } + /* Check for an odd number of contiguous backslashes at + the end of the line */ + if (bs % 2 == 1) { + ptr = ptr + bs; + *ptr = '\0'; + return (ptr - buf); + } else { + return len; /* no continuation */ + } +} + +/* + * Strip out trailing carriage returns and newlines + */ +static void _strip_cr_nl(char *line) +{ + int len = strlen(line); + char *ptr; + + for (ptr = line+len-1; ptr >= line; ptr--) { + if (*ptr=='\r' || *ptr=='\n') { + *ptr = '\0'; + } else { + return; + } + } +} + +/* Strip comments from a line by terminating the string + * where the comment begins. + * Everything after a non-escaped "#" is a comment. + */ +static void _strip_comments(char *line) +{ + int i; + int len = strlen(line); + int bs_count = 0; + + for (i = 0; i < len; i++) { + /* if # character is preceded by an even number of + * escape characters '\' */ + if (line[i] == '#' && (bs_count%2) == 0) { + line[i] = '\0'; + break; + } else if (line[i] == '\\') { + bs_count++; + } else { + bs_count = 0; + } + } +} + +/* + * Strips any escape characters, "\". If you WANT a back-slash, + * it must be escaped, "\\". + */ +static void _strip_escapes(char *line) +{ + int i, j; + int len = strlen(line); + + for (i = 0, j = 0; i < len+1; i++, j++) { + if (line[i] == '\\') + i++; + line[j] = line[i]; + } +} + +/* + * Reads the next line from the "file" into buffer "buf". + * + * Concatonates together lines that are continued on + * the next line by a trailing "\". Strips out comments, + * replaces escaped "\#" with "#", and replaces "\\" with "\". + */ +static int _get_next_line(char *buf, int buf_size, FILE *file) +{ + char *ptr = buf; + int leftover = buf_size; + int read_size, new_size; + int eof = 1; + + while (fgets(ptr, leftover, file)) { + eof = 0; + _strip_comments(ptr); + read_size = strlen(ptr); + new_size = _strip_continuation(ptr, read_size); + if (new_size < read_size) { + ptr += new_size; + leftover -= new_size; + } else { /* no continuation */ + break; + } + } + /* _strip_cr_nl(buf); */ /* not necessary */ + _strip_escapes(buf); + + return !eof; +} + +static int _handle_string(s_p_values_t *v, + const char *value, const char *line) +{ + if (v->data_count != 0) { + error("%s specified more than once", v->key); + return -1; + } + + if (v->handler != NULL) { + /* call the handler function */ + int rc; + rc = v->handler(&v->data, v->type, v->key, value, line); + if (rc != 1) + return rc == 0 ? 0 : -1; + } else { + v->data = xstrdup(value); + } + + v->data_count = 1; + return 1; +} + +static int _handle_long(s_p_values_t *v, + const char *value, const char *line) +{ + if (v->data_count != 0) { + error("%s specified more than once", v->key); + return -1; + } + + if (v->handler != NULL) { + /* call the handler function */ + int rc; + rc = v->handler(&v->data, v->type, v->key, value, line); + if (rc != 1) + return rc == 0 ? 0 : -1; + } else { + char *endptr; + long num; + errno = 0; + num = strtol(value, &endptr, 0); + if ((num == 0 && errno == EINVAL) + || (*endptr != '\0')) { + if (strcasecmp(value, "INFINITE") == 0) { + num = (long)-1; + } else { + error("\"%s\" is not a valid number", value); + return -1; + } + } else if (errno == ERANGE) { + error("\"%s\" is out of range", value); + return -1; + } + v->data = xmalloc(sizeof(long)); + *(long *)v->data = num; + } + + v->data_count = 1; + return 1; +} + +static int _handle_uint16(s_p_values_t *v, + const char *value, const char *line) +{ + if (v->data_count != 0) { + error("%s specified more than once", v->key); + return -1; + } + + if (v->handler != NULL) { + /* call the handler function */ + int rc; + rc = v->handler(&v->data, v->type, v->key, value, line); + if (rc != 1) + return rc == 0 ? 0 : -1; + } else { + char *endptr; + unsigned long num; + + errno = 0; + num = strtoul(value, &endptr, 0); + if ((num == 0 && errno == EINVAL) + || (*endptr != '\0')) { + if (strcasecmp(value, "INFINITE") == 0) { + num = (uint16_t)-1; + } else { + error("\"%s\" is not a valid number", value); + return -1; + } + } else if (errno == ERANGE) { + error("\"%s\" is out of range", value); + return -1; + } else if (num < 0) { + error("\"%s\" is less than zero", value); + return -1; + } else if (num > 0xffff) { + error("\"%s\" is greater than 65535", value); + return -1; + } + v->data = xmalloc(sizeof(uint16_t)); + *(uint16_t *)v->data = (uint16_t)num; + } + + v->data_count = 1; + return 1; +} + +static int _handle_uint32(s_p_values_t *v, + const char *value, const char *line) +{ + if (v->data_count != 0) { + error("%s specified more than once", v->key); + return -1; + } + + if (v->handler != NULL) { + /* call the handler function */ + int rc; + rc = v->handler(&v->data, v->type, v->key, value, line); + if (rc != 1) + return rc == 0 ? 0 : -1; + } else { + char *endptr; + unsigned long num; + + errno = 0; + num = strtoul(value, &endptr, 0); + if ((num == 0 && errno == EINVAL) + || (*endptr != '\0')) { + if (strcasecmp(value, "INFINITE") == 0) { + num = (uint32_t)-1; + } else { + error("\"%s\" is not a valid number", value); + return -1; + } + } else if (errno == ERANGE) { + error("\"%s\" is out of range", value); + return -1; + } else if (num < 0) { + error("\"%s\" is less than zero", value); + return -1; + } else if (num > 0xffffffff) { + error("\"%s\" is greater than 4294967295", value); + return -1; + } + v->data = xmalloc(sizeof(uint32_t)); + *(uint32_t *)v->data = (uint32_t)num; + } + + v->data_count = 1; + return 1; +} + +static int _handle_pointer(s_p_values_t *v, + const char *value, const char *line) +{ + if (v->data_count != 0) { + error("%s specified more than once", v->key); + return -1; + } + + if (v->handler != NULL) { + /* call the handler function */ + int rc; + rc = v->handler(&v->data, v->type, v->key, value, line); + if (rc != 1) + return rc == 0 ? 0 : -1; + } else { + v->data = xstrdup(value); + } + + v->data_count = 1; + return 1; +} + +static int _handle_array(s_p_values_t *v, + const char *value, const char *line) +{ + void *new_ptr; + void **data; + + if (v->handler != NULL) { + /* call the handler function */ + int rc; + rc = v->handler(&new_ptr, v->type, v->key, value, line); + if (rc != 1) + return rc == 0 ? 0 : -1; + } else { + new_ptr = xstrdup(value); + } + v->data_count += 1; + v->data = xrealloc(v->data, (v->data_count)*sizeof(void *)); + data = &((void**)v->data)[v->data_count-1]; + *data = new_ptr; + + return 1; +} + +static int _handle_boolean(s_p_values_t *v, + const char *value, const char *line) +{ + if (v->data_count != 0) { + error("%s specified more than once", v->key); + return -1; + } + + if (v->handler != NULL) { + /* call the handler function */ + int rc; + rc = v->handler(&v->data, v->type, v->key, value, line); + if (rc != 1) + return rc == 0 ? 0 : -1; + } else { + bool flag; + + if (!strcasecmp(value, "yes") + || !strcasecmp(value, "up") + || !strcasecmp(value, "1")) { + flag = true; + } else if (!strcasecmp(value, "no") + || !strcasecmp(value, "down") + || !strcasecmp(value, "0")) { + flag = false; + } else { + error("\"%s\" is not a valid option for \"%s\"", + value, v->key); + return -1; + } + + v->data = xmalloc(sizeof(bool)); + *(bool *)v->data = flag; + } + + v->data_count = 1; + return 1; +} + +static void _handle_keyvalue_match(s_p_values_t *v, + const char *value, const char *line) +{ + /* debug3("key = %s, value = %s, line = \"%s\"", */ + /* v->key, value, line); */ + switch (v->type) { + case S_P_IGNORE: + /* do nothing */ + break; + case S_P_STRING: + _handle_string(v, value, line); + break; + case S_P_LONG: + _handle_long(v, value, line); + break; + case S_P_UINT16: + _handle_uint16(v, value, line); + break; + case S_P_UINT32: + _handle_uint32(v, value, line); + break; + case S_P_POINTER: + _handle_pointer(v, value, line); + break; + case S_P_ARRAY: + _handle_array(v, value, line); + break; + case S_P_BOOLEAN: + _handle_boolean(v, value, line); + break; + } +} + +/* + * Return 1 if all characters in "line" are white-space characters, + * otherwise return 0. + */ +static int _line_is_space(const char *line) +{ + int len = strlen(line); + int i; + + for (i = 0; i < len; i++) { + if (!isspace(line[i])) + return 0; + } + + return 1; +} + + +/* + * Returns 1 if the line is parsed cleanly, and 0 otherwise. + */ +int s_p_parse_line(s_p_hashtbl_t *hashtbl, const char *line) +{ + char *key, *value; + const char *ptr = line; + char *leftover = (char *)line; + s_p_values_t *p; + + _keyvalue_regex_init(); + + while (_keyvalue_regex(ptr, &key, &value, &leftover) == 0) { + if (p = _conf_hashtbl_lookup(hashtbl, key)) { + _handle_keyvalue_match(p, value, line); + ptr = leftover; + } else { + error("Parsing failed at unrecognized key: %s", key); + xfree(key); + xfree(value); + return 0; + } + xfree(key); + xfree(value); + } + + if (!_line_is_space(leftover)) { + char *ptr = xstrdup(leftover); + _strip_cr_nl(ptr); + error("Parsing failed at: \"%s\"", ptr); + xfree(ptr); + return 0; + } + + return 1; +} + +void s_p_parse_file(s_p_hashtbl_t *hashtbl, char *filename) +{ + FILE *f; + char line[BUFFER_SIZE]; + char *key, *value, *leftover; + + _keyvalue_regex_init(); + + f = fopen(filename, "r"); + + while(_get_next_line(line, BUFFER_SIZE, f)) { + /* skip empty lines */ + if (line[0] == '\0') + continue; + + s_p_parse_line(hashtbl, line); + } + + fclose(f); +} + +/* + * s_p_get_string + * + * Search for a key in a s_p_hashtbl_t with value of type + * string. If the key is found and has a set value, the + * value is retuned in "str". + * + * OUT str - pointer to a copy of the string value + * (caller is resonsible for freeing str with xfree()) + * IN key - hash table key. + * IN hashtbl - hash table created by s_p_hashtbl_create() + * + * Returns 1 when a value was set for "key" during parsing and "str" + * was successfully set, otherwise returns 0; + * + * NOTE: Caller is responsible for freeing the returned string with xfree! + */ +int s_p_get_string(char **str, const char *key, const s_p_hashtbl_t *hashtbl) +{ + s_p_values_t *p; + + p = _conf_hashtbl_lookup(hashtbl, key); + if (p == NULL) { + error("Invalid key \"%s\"", key); + return 0; + } + if (p->type != S_P_STRING) { + error("Key \"%s\" is not a string\n", key); + return 0; + } + if (p->data_count == 0) { + return 0; + } + + *str = xstrdup((char *)p->data); + + return 1; +} + +/* + * s_p_get_long + * + * Search for a key in a s_p_hashtbl_t with value of type + * long. If the key is found and has a set value, the + * value is retuned in "num". + * + * OUT num - pointer to a long where the value is returned + * IN key - hash table key + * IN hashtbl - hash table created by s_p_hashtbl_create() + * + * Returns 1 when a value was set for "key" during parsing and "num" + * was successfully set, otherwise returns 0; + */ +int s_p_get_long(long *num, const char *key, const s_p_hashtbl_t *hashtbl) +{ + s_p_values_t *p; + + p = _conf_hashtbl_lookup(hashtbl, key); + if (p == NULL) { + error("Invalid key \"%s\"", key); + return 0; + } + if (p->type != S_P_LONG) { + error("Key \"%s\" is not a long\n", key); + return 0; + } + if (p->data_count == 0) { + return 0; + } + + *num = *(long *)p->data; + + return 1; +} + +/* + * s_p_get_uint16 + * + * Search for a key in a s_p_hashtbl_t with value of type + * uint16. If the key is found and has a set value, the + * value is retuned in "num". + * + * OUT num - pointer to a uint16_t where the value is returned + * IN key - hash table key + * IN hashtbl - hash table created by s_p_hashtbl_create() + * + * Returns 1 when a value was set for "key" during parsing and "num" + * was successfully set, otherwise returns 0; + */ +int s_p_get_uint16(uint16_t *num, const char *key, + const s_p_hashtbl_t *hashtbl) +{ + s_p_values_t *p; + + p = _conf_hashtbl_lookup(hashtbl, key); + if (p == NULL) { + error("Invalid key \"%s\"", key); + return 0; + } + if (p->type != S_P_UINT16) { + error("Key \"%s\" is not a uint16_t\n", key); + return 0; + } + if (p->data_count == 0) { + return 0; + } + + *num = *(uint16_t *)p->data; + + return 1; +} + +/* + * s_p_get_uint32 + * + * Search for a key in a s_p_hashtbl_t with value of type + * uint32. If the key is found and has a set value, the + * value is retuned in "num". + * + * OUT num - pointer to a uint32_t where the value is returned + * IN key - hash table key + * IN hashtbl - hash table created by s_p_hashtbl_create() + * + * Returns 1 when a value was set for "key" during parsing and "num" + * was successfully set, otherwise returns 0; + */ +int s_p_get_uint32(uint32_t *num, const char *key, + const s_p_hashtbl_t *hashtbl) +{ + s_p_values_t *p; + + p = _conf_hashtbl_lookup(hashtbl, key); + if (p == NULL) { + error("Invalid key \"%s\"", key); + return 0; + } + if (p->type != S_P_UINT32) { + error("Key \"%s\" is not a uint32_t\n", key); + return 0; + } + if (p->data_count == 0) { + return 0; + } + + *num = *(uint32_t *)p->data; + + return 1; +} + +/* + * s_p_get_pointer + * + * Search for a key in a s_p_hashtbl_t with value of type + * pointer. If the key is found and has a set value, the + * value is retuned in "ptr". + * + * OUT ptr - pointer to a void pointer where the value is returned + * IN key - hash table key + * IN hashtbl - hash table created by s_p_hashtbl_create() + * + * Returns 1 when a value was set for "key" during parsing and "ptr" + * was successfully set, otherwise returns 0; + */ +int s_p_get_pointer(void **ptr, const char *key, const s_p_hashtbl_t *hashtbl) +{ + s_p_values_t *p; + + p = _conf_hashtbl_lookup(hashtbl, key); + if (p == NULL) { + error("Invalid key \"%s\"", key); + return 0; + } + if (p->type != S_P_POINTER) { + error("Key \"%s\" is not a pointer\n", key); + return 0; + } + if (p->data_count == 0) { + return 0; + } + + *ptr = p->data; + + return 1; +} + + +/* + * s_p_get_array + * + * Most s_p_ data types allow a key to appear only once in a file + * (s_p_parse_file) or line (s_p_parse_line). S_P_ARRAY is the exception. + * + * S_P_ARRAY allows a key to appear any number of times. Each time + * a particular key is found the value array grows by one element, and + * that element contains a pointer to the newly parsed value. You can + * think of this as being an array of S_P_POINTER types. + * + * OUT ptr_array - pointer to a void pointer-pointer where the value is returned + * OUT count - length of ptr_array + * IN key - hash table key + * IN hashtbl - hash table created by s_p_hashtbl_create() + * + * Returns 1 when a value was set for "key" during parsing and both + * "ptr_array" and "count" were successfully set, otherwise returns 0. + */ +int s_p_get_array(void **ptr_array[], int *count, + const char *key, const s_p_hashtbl_t *hashtbl) +{ + s_p_values_t *p; + + p = _conf_hashtbl_lookup(hashtbl, key); + if (p == NULL) { + error("Invalid key \"%s\"", key); + return 0; + } + if (p->type != S_P_ARRAY) { + error("Key \"%s\" is not an array\n", key); + return 0; + } + if (p->data_count == 0) { + return 0; + } + + *ptr_array = (void **)p->data; + *count = p->data_count; + + return 1; +} + +/* + * s_p_get_boolean + * + * Search for a key in a s_p_hashtbl_t with value of type + * boolean. If the key is found and has a set value, the + * value is retuned in "flag". + * + * OUT flag - pointer to a bool where the value is returned + * IN key - hash table key + * IN hashtbl - hash table created by s_p_hashtbl_create() + * + * Returns 1 when a value was set for "key" during parsing and "num" + * was successfully set, otherwise returns 0; + */ +int s_p_get_boolean(bool *flag, const char *key, const s_p_hashtbl_t *hashtbl) +{ + s_p_values_t *p; + + p = _conf_hashtbl_lookup(hashtbl, key); + if (p == NULL) { + error("Invalid key \"%s\"", key); + return 0; + } + if (p->type != S_P_BOOLEAN) { + error("Key \"%s\" is not a boolean\n", key); + return 0; + } + if (p->data_count == 0) { + return 0; + } + + *flag = *(bool *)p->data; + + return 1; +} + + +/* + * Given an "options" array, print the current values of all + * options in supplied hash table "hashtbl". + * + * Primarily for debugging purposes. + */ +void s_p_dump_values(const s_p_hashtbl_t *hashtbl, + const s_p_options_t options[]) +{ + const s_p_options_t *op = NULL; + long num; + uint16_t num16; + uint32_t num32; + char *str; + void *ptr; + void **ptr_array; + int count; + int i; + + for (op = options; op->key != NULL; op++) { + switch(op->type) { + case S_P_STRING: + if (s_p_get_string(&str, op->key, hashtbl)) { + verbose("%s = %s", op->key, str); + xfree(str); + } else { + verbose("%s", op->key); + } + break; + case S_P_LONG: + if (s_p_get_long(&num, op->key, hashtbl)) + verbose("%s = %ld", op->key, num); + else + verbose("%s", op->key); + break; + case S_P_UINT16: + if (s_p_get_uint16(&num16, op->key, hashtbl)) + verbose("%s = %hu", op->key, num16); + else + verbose("%s", op->key); + break; + case S_P_UINT32: + if (s_p_get_uint32(&num32, op->key, hashtbl)) + verbose("%s = %u", op->key, num32); + else + verbose("%s", op->key); + break; + case S_P_POINTER: + if (s_p_get_pointer(&ptr, op->key, hashtbl)) + verbose("%s = %x", op->key, ptr); + else + verbose("%s", op->key); + break; + case S_P_ARRAY: + if (s_p_get_array(&ptr_array, &count, + op->key, hashtbl)) { + verbose("%s, count = %d", op->key, count); + } else { + verbose("%s", op->key); + } + break; + } + } +} diff --git a/src/common/parse_config.h b/src/common/parse_config.h new file mode 100644 index 00000000000..8fa04c37f2d --- /dev/null +++ b/src/common/parse_config.h @@ -0,0 +1,306 @@ +/***************************************************************************** + * parse_config.h - parse any slurm.conf-like configuration file + * + * NOTE: when you see the prefix "s_p_", think "slurm parser". + * + * $Id$ + ***************************************************************************** + * Copyright (C) 2006 The Regents of the University of California. + * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). + * Written by Christopher J. Morrone <morrone2@llnl.gov>. + * UCRL-CODE-217948. + * + * This file is part of SLURM, a resource management program. + * For details, see <http://www.llnl.gov/linux/slurm/>. + * + * SLURM is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with SLURM; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. +\*****************************************************************************/ + +#ifndef _PARSE_CONFIG_H +#define _PARSE_CONFIG_H + +#include <stdint.h> + +/* + * This slurm file parser provides a method for parsing a file + * for key-value pairs of the form "key = value". This parser can be used + * for any slurm-like configuration file, not just slurm.conf. If you are + * looking for code specific to slurm.conf, look in + * src/common/slurm_conf.[hc]. + * + * In the parsed file, any amount of white-space is allowed between the the + * key, equal-sign, and value. The parser handles comments, line + * continuations, and escaped characters automatically. Double-quotes can + * be used to surround an entire value if white-space is needed within + * a value string. + * + * A comment begins with a "#" and ends at the end of the line. A line + * continuation is a "\" character at the end of the line (only white-space + * may follow the "\"). A line continuation tells the parser to + * concatenate the following line with the current line. + * + * To include a literal "\" or "#" character in a file, it can be escaped + * by a preceding "\". + * + * Double-quotes CANNOT be escaped, and they must surround the entire value + * string, they cannot be used within some substring of a value string. + * An empty string can be specified with doubles quotes: Apple="". + * + * To use this parser, first construct an array of s_p_options_t structures. + * Only the "key" string needs to be non-zero. Zero or NULL are valid + * defaults for type, handler, and destroy, which conventiently allows + * then to be left out in any static initializations of options arrays. For + * instance: + * + * s_p_options_t options[] = {{"Apples", S_P_UINT16}, + * {"Oranges"}, + * {NULL}}; + * + * In this example, the handler and destroy functions for the "Apples" key + * are NULL pointers, and for key "Oranges" even the type is zero. A zero + * type is equivalent to specifying type S_P_IGNORE. + * + * Once an s_p_options_t array is defined, it is converted into a slurm + * parser hash table structure with the s_p_hashtbl_create() function. + * The s_p_hashtbl_t thus returned can be given to the s_p_parse_file() + * function to parse a file, and fill in the s_p_hashtbl_t structure with + * the values found in the file. Values for keys can then be retrieved + * from the s_p_hashtbl_t with the functions with names beginning with + * "s_p_get_", e.g. s_p_get_boolean(), s_p_get_string(), s_p_get_uint16(), + * etc. + * + * Valid types + * ----------- + * + * S_P_IGNORE - Any instance of specified key and associated value in a file + * will be allowed, but the value will not be stored and will not + * be retirevable from the s_p_hashtbl_t. + * S_P_STRING - The value for a given key will be saved in string form, no + * converstions will be performed on the value. + * S_P_LONG - The value for a given key must be a valid + * string representation of a long integer (as determined by strtol()), + * otherwise an error will be raised. + * S_P_UINT16 - The value for a given key must be a valid + * string representation of an unsigned 16-bit integer. + * S_P_LONG - The value for a given key must be be a valid + * string representation of an unsigned 32-bit integer. + * S_P_POINTER - The parser makes no assumption about the type of the value. + * The s_p_get_pointer() function will return a pointer to the + * s_p_hashtbl_t's internal copy of the value. By default, the value + * will simply be the string representation of the value found in the file. + * This differs from S_P_STRING in that s_p_get_string() returns a COPY + * of the value which must be xfree'ed by the user. The pointer + * returns by s_p_get_pointer() must NOT be freed by the user. + * It is intended that normally S_P_POINTER with be used in conjunction + * with "handler" and "destroy" functions to implement a custom type. + * S_P_ARRAY - This (and S_P_IGNORE, which does not record the fact that it + * has seen the key previously) is the only type which allows its key to + * appear multiple times in a file. With any other type (except + * S_P_IGNORE), an error will be raised when a key is seen more than + * once in a file. + * S_P_ARRAY works mostly the same as S_P_POINTER, except that it builds + * an array of pointers to the found values. + * + * Handlers and destructors + * ------------------------ + * + * Any key specified in an s_p_options_t array can have function callbacks for + * a "handler" function and a "destroy" function. The prototypes for each + * are available below in the typedef of s_p_options_t. + * + * A handler function is given the the "key" string, "value" string, and a + * pointer to the entire "line" on which the key-value pair was found (this is + * the line after the parser has removed comments and concatenated continued + * lines). The handler can transform the value any way it desires, and then + * return a pointer to the newly allocated value data in the "data" pointer. + * The return code from "handler" must be -1 if the value is invalid, 0 if + * the value is valid but no value will be set for "data" (the parser will not + * flag this key as already seen, and the destroy() function will not be + * called during s_p_hashtbl_destroy()), and 1 if "data" is set. + * + * If the "destroy" function is set for a key, and the parser marked a key as + * "seen" during parsing, then it will pass the pointer to the value data + * to the "destroy" function when s_p_hashtbl_destroy() is called. If + * a key was "seen" during parsing, but the "destroy" function is NULL, + * s_p_hashtbl_destroy() will call xfree() on the data pointer. + */ + +typedef struct s_p_values s_p_values_t; +typedef s_p_values_t * s_p_hashtbl_t; + +typedef enum slurm_parser_enum { + S_P_IGNORE = 0, + S_P_STRING, + S_P_LONG, + S_P_UINT16, + S_P_UINT32, + S_P_POINTER, + S_P_ARRAY, + S_P_BOOLEAN +} slurm_parser_enum_t; + +typedef struct conf_file_options { + char *key; + slurm_parser_enum_t type; + int (*handler)(void **data, slurm_parser_enum_t type, + const char *key, const char *value, const char *line); + void (*destroy)(void *data); +} s_p_options_t; + + +s_p_hashtbl_t *s_p_hashtbl_create(struct conf_file_options options[]); +void s_p_hashtbl_destroy(s_p_hashtbl_t *hashtbl); + + +void s_p_parse_file(s_p_hashtbl_t *hashtbl, char *filename); + +/* + * Returns 1 if the line is parsed cleanly, and 0 otherwise. + */ +int s_p_parse_line(s_p_hashtbl_t *hashtbl, const char *line); + +/* + * s_p_get_string + * + * Search for a key in a s_p_hashtbl_t with value of type + * string. If the key is found and has a set value, the + * value is retuned in "str". + * + * OUT str - pointer to a copy of the string value + * (caller is resonsible for freeing str with xfree()) + * IN key - hash table key. + * IN hashtbl - hash table created by s_p_hashtbl_create() + * + * Returns 1 when a value was set for "key" during parsing and "str" + * was successfully set, otherwise returns 0; + * + * NOTE: Caller is responsible for freeing the returned string with xfree! + */ +int s_p_get_string(char **str, const char *key, const s_p_hashtbl_t *hashtbl); + +/* + * s_p_get_long + * + * Search for a key in a s_p_hashtbl_t with value of type + * long. If the key is found and has a set value, the + * value is retuned in "num". + * + * OUT num - pointer to a long where the value is returned + * IN key - hash table key + * IN hashtbl - hash table created by s_p_hashtbl_create() + * + * Returns 1 when a value was set for "key" during parsing and "num" + * was successfully set, otherwise returns 0; + */ +int s_p_get_long(long *num, const char *key, const s_p_hashtbl_t *hashtbl); + +/* + * s_p_get_uint16 + * + * Search for a key in a s_p_hashtbl_t with value of type + * uint16. If the key is found and has a set value, the + * value is retuned in "num". + * + * OUT num - pointer to a uint16_t where the value is returned + * IN key - hash table key + * IN hashtbl - hash table created by s_p_hashtbl_create() + * + * Returns 1 when a value was set for "key" during parsing and "num" + * was successfully set, otherwise returns 0; + */ +int s_p_get_uint16(uint16_t *num, const char *key, + const s_p_hashtbl_t *hashtbl); + +/* + * s_p_get_uint32 + * + * Search for a key in a s_p_hashtbl_t with value of type + * uint32. If the key is found and has a set value, the + * value is retuned in "num". + * + * OUT num - pointer to a uint32_t where the value is returned + * IN key - hash table key + * IN hashtbl - hash table created by s_p_hashtbl_create() + * + * Returns 1 when a value was set for "key" during parsing and "num" + * was successfully set, otherwise returns 0; + */ +int s_p_get_uint32(uint32_t *num, const char *key, + const s_p_hashtbl_t *hashtbl); + +/* + * s_p_get_pointer + * + * Search for a key in a s_p_hashtbl_t with value of type + * pointer. If the key is found and has a set value, the + * value is retuned in "ptr". + * + * OUT num - pointer to a void pointer where the value is returned + * IN key - hash table key + * IN hashtbl - hash table created by s_p_hashtbl_create() + * + * Returns 1 when a value was set for "key" during parsing and "ptr" + * was successfully set, otherwise returns 0; + */ +int s_p_get_pointer(void **ptr, const char *key, const s_p_hashtbl_t *hashtbl); + +/* + * s_p_get_array + * + * Most s_p_ data types allow a key to appear only once in a file + * (s_p_parse_file) or line (s_p_parse_line). S_P_ARRAY is the exception. + * + * S_P_ARRAY allows a key to appear any number of times. Each time + * a particular key is found the value array grows by one element, and + * that element contains a pointer to the newly parsed value. You can + * think of this as being an array of S_P_POINTER types. + * + * OUT num - pointer to a void pointer-pointer where the value is returned + * IN key - hash table key + * IN hashtbl - hash table created by s_p_hashtbl_create() + * + * Returns 1 when a value was set for "key" during parsing and "ptr" + * was successfully set, otherwise returns 0; + */ +int s_p_get_array(void **ptr_array[], int *count, + const char *key, const s_p_hashtbl_t *hashtbl); + +/* + * s_p_get_boolean + * + * Search for a key in a s_p_hashtbl_t with value of type + * boolean. If the key is found and has a set value, the + * value is retuned in "flag". + * + * OUT flag - pointer to a bool where the value is returned + * IN key - hash table key + * IN hashtbl - hash table created by s_p_hashtbl_create() + * + * Returns 1 when a value was set for "key" during parsing and "num" + * was successfully set, otherwise returns 0; + */ +int s_p_get_boolean(bool *flag, const char *key, const s_p_hashtbl_t *hashtbl); + +/* + * Given an "options" array, print the current values of all + * options in supplied hash table "hashtbl". + * + * Primarily for debugging purposes. + */ +void s_p_dump_values(const s_p_hashtbl_t *hashtbl, + const s_p_options_t options[]); + + +#endif /* !_PARSE_CONFIG_H */ diff --git a/src/common/read_config.c b/src/common/read_config.c index ce54c442b0b..60e55a7672f 100644 --- a/src/common/read_config.c +++ b/src/common/read_config.c @@ -40,6 +40,7 @@ #include <sys/types.h> #include <time.h> #include <unistd.h> +#include <pthread.h> #include <slurm/slurm.h> @@ -52,50 +53,543 @@ #include "src/common/xmalloc.h" #include "src/common/xstring.h" #include "src/common/slurm_rlimits_info.h" +#include "src/common/parse_config.h" -#define BUFFER_SIZE 1024 -#define MULTIPLE_VALUE_MSG "Multiple values for %s, latest one used" +/* Instantiation of the "extern slurm_ctl_conf_t slurmcltd_conf" + * found in slurmctld.h */ +slurm_ctl_conf_t slurmctld_conf; + +static pthread_mutex_t conf_lock = PTHREAD_MUTEX_INITIALIZER; +static s_p_hashtbl_t *conf_hashtbl; +static slurm_ctl_conf_t *conf_ptr = &slurmctld_conf; +static bool conf_initialized = false; + +/* + * FIXME - If we eliminate the SlurmdPort option altogether, then + * default_slurmd_port and parse_slurmd_port can + * be removed. + */ +static uint16_t default_slurmd_port; +static int parse_slurmd_port(void **dest, slurm_parser_enum_t type, + const char *key, const char *value, + const char *line); + +static s_p_hashtbl_t *default_nodename_tbl; +static s_p_hashtbl_t *default_partition_tbl; inline static void _normalize_debug_level(uint16_t *level); -static int _parse_node_spec (char *in_line, bool slurmd_hosts); -static int _parse_part_spec (char *in_line); +/* data structures for looking up slurmd port numbers */ +struct slurmd_port { + hostset_t aliases; /* NodeName */ + uint16_t port; +}; +static struct slurmd_port *slurmd_port_array; +static int slurmd_port_array_count; +#define NAME_HASH_LEN 512 typedef struct names_ll_s { - char *node_hostname; - char *node_name; - struct names_ll_s *next; + char *alias; /* NodeName */ + char *hostname; /* NodeHostname */ + char *address; /* NodeAddr */ + uint16_t port; + slurm_addr addr; + bool addr_initialized; + struct names_ll_s *next_alias; + struct names_ll_s *next_hostname; } names_ll_t; -bool all_slurmd_hosts = false; -#define NAME_HASH_LEN 512 +bool nodehash_initialized = false; static names_ll_t *host_to_node_hashtbl[NAME_HASH_LEN] = {NULL}; static names_ll_t *node_to_host_hashtbl[NAME_HASH_LEN] = {NULL}; static char *this_hostname = NULL; +static int parse_nodename(void **dest, slurm_parser_enum_t type, + const char *key, const char *value, + const char *line); +static void destroy_nodename(void *ptr); +static int parse_partitionname(void **dest, slurm_parser_enum_t type, + const char *key, const char *value, + const char *line); +static void destroy_partitionname(void *ptr); +static int parse_downnodes(void **dest, slurm_parser_enum_t type, + const char *key, const char *value, + const char *line); +static void destroy_downnodes(void *ptr); +static int defunct_option(void **dest, slurm_parser_enum_t type, + const char *key, const char *value, + const char *line); +static void validate_and_set_defaults(slurm_ctl_conf_t *conf, + s_p_hashtbl_t *hashtbl); + +s_p_options_t slurm_conf_options[] = { + {"AuthType", S_P_STRING}, + {"CheckpointType", S_P_STRING}, + {"CacheGroups", S_P_UINT16}, + {"BackupAddr", S_P_STRING}, + {"BackupController", S_P_STRING}, + {"ControlAddr", S_P_STRING}, + {"ControlMachine", S_P_STRING}, + {"Epilog", S_P_STRING}, + {"FastSchedule", S_P_UINT16}, + {"FirstJobId", S_P_UINT32}, + {"HashBase", S_P_LONG, defunct_option}, + {"HeartbeatInterval", S_P_LONG, defunct_option}, + {"InactiveLimit", S_P_UINT16}, + {"JobAcctloc", S_P_STRING}, + {"JobAcctParameters", S_P_STRING}, + {"JobAcctType", S_P_STRING}, + {"JobCompLoc", S_P_STRING}, + {"JobCompType", S_P_STRING}, + {"JobCredentialPrivateKey", S_P_STRING}, + {"JobCredentialPublicCertificate", S_P_STRING}, + {"KillTree", S_P_UINT16, defunct_option}, + {"KillWait", S_P_UINT16}, + {"MaxJobCount", S_P_UINT16}, + {"MinJobAge", S_P_UINT16}, + {"MpichGmDirectSupport", S_P_LONG}, + {"MpiDefault", S_P_STRING}, + {"PluginDir", S_P_STRING}, + {"ProctrackType", S_P_STRING}, + {"Prolog", S_P_STRING}, + {"PropagateResourceLimitsExcept", S_P_STRING}, + {"PropagateResourceLimits", S_P_STRING}, + {"ReturnToService", S_P_UINT16}, + {"SchedulerAuth", S_P_STRING}, + {"SchedulerPort", S_P_UINT16}, + {"SchedulerRootFilter", S_P_UINT16}, + {"SchedulerType", S_P_STRING}, + {"SelectType", S_P_STRING}, + {"SlurmUser", S_P_STRING}, + {"SlurmctldDebug", S_P_UINT16}, + {"SlurmctldLogFile", S_P_STRING}, + {"SlurmctldPidFile", S_P_STRING}, + {"SlurmctldPort", S_P_UINT32}, + {"SlurmctldTimeout", S_P_UINT16}, + {"SlurmdDebug", S_P_UINT16}, + {"SlurmdLogFile", S_P_STRING}, + {"SlurmdPidFile", S_P_STRING}, + {"SlurmdPort", S_P_UINT32, parse_slurmd_port}, + {"SlurmdSpoolDir", S_P_STRING}, + {"SlurmdTimeout", S_P_UINT16}, + {"SrunEpilog", S_P_STRING}, + {"SrunProlog", S_P_STRING}, + {"StateSaveLocation", S_P_STRING}, + {"SwitchType", S_P_STRING}, + {"TaskEpilog", S_P_STRING}, + {"TaskProlog", S_P_STRING}, + {"TaskPlugin", S_P_STRING}, + {"TmpFS", S_P_STRING}, + {"TreeWidth", S_P_UINT16}, + {"WaitTime", S_P_UINT16}, + + {"NodeName", S_P_ARRAY, parse_nodename, destroy_nodename}, + /* The following keywords are ignored by this parser and handled + by the NodeName handler */ + {"NodeHostname"}, + {"NodeAddr"}, + {"Feature"}, + {"Port"}, + {"Procs"}, + {"RealMemory"}, + {"Reason"}, + {"State"}, + {"TmpDisk"}, + {"Weight"}, + + {"PartitionName", S_P_ARRAY, parse_partitionname, destroy_partitionname}, + /* The following keywords are ignored by this parser and handled + by the PartitionName handler */ + {"AllowGroups"}, + {"Default"}, + {"Hidden"}, + {"MaxTime"}, + {"MaxNodes"}, + {"MinNodes"}, + {"Nodes"}, + {"RootOnly"}, + {"Shared"}, + {"State"}, + + {"DownNodes", S_P_ARRAY, parse_downnodes, destroy_downnodes}, + /* "State" and "Reason" are already ignored */ + + {NULL} +}; + +static s_p_options_t _nodename_options[] = { + {"NodeName", S_P_STRING}, + {"NodeHostname", S_P_STRING}, + {"NodeAddr", S_P_STRING}, + {"Feature", S_P_STRING}, + {"Port", S_P_UINT16}, + {"Procs", S_P_UINT32}, + {"RealMemory", S_P_UINT32}, + {"Reason", S_P_STRING}, + {"State", S_P_STRING}, + {"TmpDisk", S_P_UINT32}, + {"Weight", S_P_UINT32}, + {NULL} +}; + +static s_p_options_t _partition_options[] = { + {"PartitionName", S_P_STRING}, + {"AllowGroups", S_P_STRING}, + {"Default", S_P_BOOLEAN}, /* YES or NO */ + {"Hidden", S_P_BOOLEAN}, /* YES or NO */ + {"MaxTime", S_P_UINT32}, /* INFINITE or a number */ + {"MaxNodes", S_P_UINT32}, /* INFINITE or a number */ + {"MinNodes", S_P_UINT32}, + {"Nodes", S_P_STRING}, + {"RootOnly", S_P_BOOLEAN}, /* YES or NO */ + {"Shared", S_P_STRING}, /* YES, NO, or FORCE */ + {"State", S_P_BOOLEAN}, /* UP or DOWN */ + {NULL} +}; + +static s_p_options_t _downnodes_options[] = { + {"DownNodes", S_P_STRING}, + {"Reason", S_P_STRING}, + {"State", S_P_STRING}, + {NULL} +}; +/* + * This function works almost exactly the same as the + * default S_P_UINT32 handler, except that it also sets the + * global variable default_slurmd_port. + */ +static int parse_slurmd_port(void **dest, slurm_parser_enum_t type, + const char *key, const char *value, + const char *line) +{ + char *endptr; + unsigned long num; + uint32_t *ptr; + + errno = 0; + num = strtoul(value, &endptr, 0); + if ((num == 0 && errno == EINVAL) + || (*endptr != '\0')) { + error("\"%s\" is not a valid number", value); + return -1; + } else if (errno == ERANGE) { + error("\"%s\" is out of range", value); + return -1; + } else if (num < 0) { + error("\"%s\" is less than zero", value); + return -1; + } else if (num > 0xffffffff) { + error("\"%s\" is greater than 4294967295", value); + return -1; + } + + default_slurmd_port = (uint32_t)num; + + ptr = (uint32_t *)xmalloc(sizeof(uint32_t)); + *ptr = (uint32_t)num; + *dest = (void *)ptr; + + return 1; +} + +static int defunct_option(void **dest, slurm_parser_enum_t type, + const char *key, const char *value, + const char *line) +{ + error("The option \"%s\" is defunct, see man slurm.conf.", key); + return 0; +} + +static int parse_nodename(void **dest, slurm_parser_enum_t type, + const char *key, const char *value, const char *line) +{ + s_p_hashtbl_t *tbl, *dflt; + slurm_conf_node_t *n; + + tbl = s_p_hashtbl_create(_nodename_options); + s_p_parse_line(tbl, line); + /* s_p_dump_values(tbl, _nodename_options); */ + + if (strcasecmp(value, "DEFAULT") == 0) { + char *tmp; + if (s_p_get_string(&tmp, "NodeHostname", tbl)) { + error("NodeHostname not allowed with NodeName=DEFAULT"); + xfree(tmp); + return -1; + } + if (s_p_get_string(&tmp, "NodeAddr", tbl)) { + error("NodeAddr not allowed with NodeName=DEFAULT"); + xfree(tmp); + return -1; + } + + if (default_nodename_tbl != NULL) + s_p_hashtbl_destroy(default_nodename_tbl); + default_nodename_tbl = tbl; + + return 0; + } else { + n = xmalloc(sizeof(slurm_conf_node_t)); + dflt = default_nodename_tbl; + + s_p_get_string(&n->nodenames, "NodeName", tbl); + if (!s_p_get_string(&n->hostnames, "NodeHostname", tbl)) + n->hostnames = xstrdup(n->nodenames); + if (!s_p_get_string(&n->addresses, "NodeAddr", tbl)) + n->addresses = xstrdup(n->hostnames); + + if (!s_p_get_string(&n->feature, "Feature", tbl)) + s_p_get_string(&n->feature, "Feature", dflt); + + if (!s_p_get_uint16(&n->port, "Port", tbl) + && !s_p_get_uint16(&n->port, "Port", dflt)) { + if (default_slurmd_port != 0) + n->port = default_slurmd_port; + else + n->port = SLURMD_PORT; + } + + if (!s_p_get_uint32(&n->cpus, "Procs", tbl) + && !s_p_get_uint32(&n->cpus, "Procs", dflt)) + n->cpus = 1; + + if (!s_p_get_uint32(&n->real_memory, "RealMemory", tbl) + && !s_p_get_uint32(&n->real_memory, "RealMemory", dflt)) + n->real_memory = 1; + + if (!s_p_get_string(&n->reason, "Reason", tbl)) + s_p_get_string(&n->reason, "Reason", dflt); + + if (!s_p_get_string(&n->state, "State", tbl) + && !s_p_get_string(&n->state, "State", dflt)) + n->state = NULL; + + if (!s_p_get_uint32(&n->tmp_disk, "TmpDisk", tbl) + && !s_p_get_uint32(&n->tmp_disk, "TmpDisk", dflt)) + n->tmp_disk = 1; + + if (!s_p_get_uint32(&n->weight, "Weight", tbl) + && !s_p_get_uint32(&n->weight, "Weight", dflt)) + n->weight = 1; + + s_p_hashtbl_destroy(tbl); + + *dest = (void *)n; + + return 1; + } + + /* should not get here */ +} + +static void destroy_nodename(void *ptr) +{ + slurm_conf_node_t *n = (slurm_conf_node_t *)ptr; + xfree(n->nodenames); + xfree(n->hostnames); + xfree(n->addresses); + xfree(n->feature); + xfree(n->reason); + xfree(n->state); + xfree(ptr); +} + +int slurm_conf_nodename_array(slurm_conf_node_t **ptr_array[]) +{ + int count; + slurm_conf_node_t **ptr; + + if (s_p_get_array((void ***)&ptr, &count, "NodeName", conf_hashtbl)) { + *ptr_array = ptr; + return count; + } else { + *ptr_array = NULL; + return 0; + } +} + +static int parse_partitionname(void **dest, slurm_parser_enum_t type, + const char *key, const char *value, const char *line) +{ + s_p_hashtbl_t *tbl, *dflt; + slurm_conf_partition_t *p; + char *tmp = NULL; + + tbl = s_p_hashtbl_create(_partition_options); + s_p_parse_line(tbl, line); + /* s_p_dump_values(tbl, _partition_options); */ + + if (strcasecmp(value, "DEFAULT") == 0) { + if (default_partition_tbl != NULL) + s_p_hashtbl_destroy(default_partition_tbl); + default_partition_tbl = tbl; + + return 0; + } else { + p = xmalloc(sizeof(slurm_conf_partition_t)); + dflt = default_partition_tbl; + + s_p_get_string(&p->name, "PartitionName", tbl); + + if (!s_p_get_string(&p->allow_groups, "AllowGroups", tbl)) + s_p_get_string(&p->allow_groups, "AllowGroups", dflt); + if (p->allow_groups && strcasecmp(p->allow_groups, "ALL")==0) { + xfree(p->allow_groups); + p->allow_groups = NULL; /* NULL means allow all */ + } + + if (!s_p_get_boolean(&p->default_flag, "Default", tbl) + && !s_p_get_boolean(&p->default_flag, "Default", dflt)) + p->default_flag = false; + + if (!s_p_get_boolean(&p->hidden_flag, "Hidden", tbl) + && !s_p_get_boolean(&p->hidden_flag, "Hidden", dflt)) + p->hidden_flag = false; + + if (!s_p_get_uint32(&p->max_time, "MaxTime", tbl) + && !s_p_get_uint32(&p->max_time, "MaxTime", dflt)) + p->max_time = INFINITE; + + if (!s_p_get_uint32(&p->max_nodes, "MaxNodes", tbl) + && !s_p_get_uint32(&p->max_nodes, "MaxNodes", dflt)) + p->max_nodes = INFINITE; + + if (!s_p_get_uint32(&p->min_nodes, "MinNodes", tbl) + && !s_p_get_uint32(&p->min_nodes, "MinNodes", dflt)) + p->min_nodes = 1; + + if (!s_p_get_string(&p->nodes, "Nodes", tbl) + && !s_p_get_string(&p->nodes, "Nodes", dflt)) + p->nodes = NULL; + + if (!s_p_get_boolean(&p->root_only_flag, "RootOnly", tbl) + && !s_p_get_boolean(&p->root_only_flag, "RootOnly", dflt)) + p->root_only_flag = false; + + if (!s_p_get_string(&tmp, "Shared", tbl) + && !s_p_get_string(&tmp, "Shared", dflt)) { + p->shared = SHARED_NO; + } else { + if (strcasecmp(tmp, "YES") == 0) + p->shared = SHARED_YES; + else if (strcasecmp(tmp, "NO") == 0) + p->shared = SHARED_NO; + else if (strcasecmp(tmp, "FORCE") == 0) + p->shared = SHARED_FORCE; + else { + error("Bad value \"%s\" for Shared", tmp); + destroy_partitionname(p); + return -1; + } + } + + if (!s_p_get_boolean(&p->state_up_flag, "State", tbl) + && !s_p_get_boolean(&p->state_up_flag, "State", dflt)) + p->state_up_flag = true; + + s_p_hashtbl_destroy(tbl); + + *dest = (void *)p; + + return 1; + } + + /* should not get here */ +} + +static void destroy_partitionname(void *ptr) +{ + slurm_conf_partition_t *p = (slurm_conf_partition_t *)ptr; + + xfree(p->name); + xfree(p->nodes); + xfree(p->allow_groups); + xfree(ptr); +} + +int slurm_conf_partition_array(slurm_conf_partition_t **ptr_array[]) +{ + int count; + slurm_conf_partition_t **ptr; + + if (s_p_get_array((void ***)&ptr, &count, "PartitionName", + conf_hashtbl)) { + *ptr_array = ptr; + return count; + } else { + *ptr_array = NULL; + return 0; + } +} + +static int parse_downnodes(void **dest, slurm_parser_enum_t type, + const char *key, const char *value, + const char *line) +{ + s_p_hashtbl_t *tbl, *dflt; + slurm_conf_downnodes_t *n; + + tbl = s_p_hashtbl_create(_downnodes_options); + s_p_parse_line(tbl, line); + /* s_p_dump_values(tbl, _downnodes_options); */ + + n = xmalloc(sizeof(slurm_conf_node_t)); + dflt = default_nodename_tbl; + + s_p_get_string(&n->nodenames, "DownNodes", tbl); + + if (!s_p_get_string(&n->reason, "Reason", tbl)) + n->reason = xstrdup("Set in slurm.conf"); + + if (!s_p_get_string(&n->state, "State", tbl)) + n->state = NULL; + + s_p_hashtbl_destroy(tbl); + + *dest = (void *)n; + + return 1; +} + +static void destroy_downnodes(void *ptr) +{ + slurm_conf_downnodes_t *n = (slurm_conf_downnodes_t *)ptr; + xfree(n->nodenames); + xfree(n->reason); + xfree(n->state); + xfree(ptr); +} + +int slurm_conf_downnodes_array(slurm_conf_downnodes_t **ptr_array[]) +{ + int count; + slurm_conf_downnodes_t **ptr; + + if (s_p_get_array((void ***)&ptr, &count, "DownNodes", conf_hashtbl)) { + *ptr_array = ptr; + return count; + } else { + *ptr_array = NULL; + return 0; + } +} + static void _free_name_hashtbl() { int i; names_ll_t *p, *q; for (i=0; i<NAME_HASH_LEN; i++) { - p = host_to_node_hashtbl[i]; - while (p) { - xfree(p->node_hostname); - xfree(p->node_name); - q = p->next; - xfree(p); - p = q; - } - host_to_node_hashtbl[i] = NULL; p = node_to_host_hashtbl[i]; while (p) { - xfree(p->node_hostname); - xfree(p->node_name); - q = p->next; + xfree(p->alias); + xfree(p->hostname); + xfree(p->address); + q = p->next_alias; xfree(p); p = q; } node_to_host_hashtbl[i] = NULL; + host_to_node_hashtbl[i] = NULL; } xfree(this_hostname); } @@ -105,7 +599,7 @@ static void _init_name_hashtbl() return; } -static int _get_hash_idx(char *s) +static int _get_hash_idx(const char *s) { int i; @@ -114,46 +608,49 @@ static int _get_hash_idx(char *s) return i % NAME_HASH_LEN; } -static void _push_to_hashtbl(char *node, char *host) +static void _push_to_hashtbls(char *alias, char *hostname, + char *address, uint16_t port) { - int idx; + int hostname_idx, alias_idx; names_ll_t *p, *new; - char *hh; - hh = host ? host : node; - idx = _get_hash_idx(hh); -#ifndef HAVE_FRONT_END /* Operate only on front-end */ - p = host_to_node_hashtbl[idx]; + alias_idx = _get_hash_idx(alias); + hostname_idx = _get_hash_idx(hostname); + +#if !defined(HAVE_FRONT_END) && !defined(MULTIPLE_SLURMD) + /* Ensure only one slurmd configured on each host */ + p = host_to_node_hashtbl[hostname_idx]; while (p) { - if (strcmp(p->node_hostname, hh)==0) { - fatal("Duplicated NodeHostname %s in the config file", - hh); + if (strcmp(p->hostname, hostname)==0) { + error("Duplicated NodeHostname %s in the config file", + hostname); return; } - p = p->next; + p = p->next_hostname; } #endif - new = (names_ll_t *)xmalloc(sizeof(*new)); - new->node_hostname = xstrdup(hh); - new->node_name = xstrdup(node); - new->next = host_to_node_hashtbl[idx]; - host_to_node_hashtbl[idx] = new; - - idx = _get_hash_idx(node); - p = node_to_host_hashtbl[idx]; + /* Ensure only one instance of each NodeName */ + p = node_to_host_hashtbl[alias_idx]; while (p) { - if (strcmp(p->node_name, node)==0) { + if (strcmp(p->alias, alias)==0) { fatal("Duplicated NodeName %s in the config file", - node); + p->alias); return; } - p = p->next; + p = p->next_alias; } + + /* Create the new data structure and link it into the hash tables */ new = (names_ll_t *)xmalloc(sizeof(*new)); - new->node_name = xstrdup(node); - new->node_hostname = xstrdup(hh); - new->next = node_to_host_hashtbl[idx]; - node_to_host_hashtbl[idx] = new; + new->alias = xstrdup(alias); + new->hostname = xstrdup(hostname); + new->address = xstrdup(address); + new->port = port; + new->addr_initialized = false; + new->next_hostname = host_to_node_hashtbl[hostname_idx]; + host_to_node_hashtbl[hostname_idx] = new; + new->next_alias = node_to_host_hashtbl[alias_idx]; + node_to_host_hashtbl[alias_idx] = new; } /* @@ -161,106 +658,208 @@ static void _push_to_hashtbl(char *node, char *host) * If node_hostname is NULL, only node_name will be used and * no lookup table record is created. */ -static void _register_conf_node_aliases(char *node_name, char *node_hostname) +static int _register_conf_node_aliases(slurm_conf_node_t *node_ptr) { - hostlist_t node_list = NULL, host_list = NULL; - char *hn = NULL, *nn; + hostlist_t alias_list = NULL; + hostlist_t hostname_list = NULL; + hostlist_t address_list = NULL; + char *alias = NULL; + char *hostname = NULL; + char *address = NULL; + int error_code; - if (node_name == NULL || *node_name == '\0') - return; - if (strcasecmp(node_name, "DEFAULT") == 0) { - if (node_hostname) { - fatal("NodeHostname for NodeName=DEFAULT is illegal"); - } - return; + if (node_ptr->nodenames == NULL || *node_ptr->nodenames == '\0') + return -1; + + if ((alias_list = hostlist_create(node_ptr->nodenames)) == NULL) { + error("Unable to create NodeName list from %s", + node_ptr->nodenames); + error_code = errno; + goto cleanup; + } + if ((hostname_list = hostlist_create(node_ptr->hostnames)) == NULL) { + error("Unable to create NodeHostname list from %s", + node_ptr->hostnames); + error_code = errno; + goto cleanup; + } + if ((address_list = hostlist_create(node_ptr->addresses)) == NULL) { + error("Unable to create NodeAddr list from %s", + node_ptr->addresses); + error_code = errno; + goto cleanup; + } + + /* some sanity checks */ +#ifdef HAVE_FRONT_END + if (hostlist_count(hostname_list) != 1 + || hostlist_count(address_list) != 1) { + error("Only one hostname and address allowed " + "in FRONT_END mode"); + goto cleanup; + } + hostname = node_ptr->hostnames; + address = node_ptr->addresses; +#else + if (hostlist_count(hostname_list) < hostlist_count(alias_list)) { + error("At least as many NodeHostname are required " + "as NodeName"); + goto cleanup; } - if (!this_hostname) { - this_hostname = xmalloc(MAX_SLURM_NAME); - getnodename(this_hostname, MAX_SLURM_NAME); + if (hostlist_count(address_list) < hostlist_count(alias_list)) { + error("At least as many NodeAddr are required as NodeName"); + goto cleanup; } - if (strcasecmp(node_name, "localhost") == 0) - node_name = this_hostname; - if (node_hostname == NULL) - node_hostname = node_name; - if (strcasecmp(node_hostname, "localhost") == 0) - node_hostname = this_hostname; - - node_list = hostlist_create(node_name); -#ifdef HAVE_FRONT_END /* Common NodeHostname for all NodeName values */ - /* Expect one common node_hostname for all back-end nodes */ - hn = node_hostname; -#else - host_list = hostlist_create(node_hostname); - if (hostlist_count(node_list) != hostlist_count(host_list)) - fatal("NodeName and NodeHostname have different " - "number of records"); #endif - while ((nn = hostlist_shift(node_list))) { - if (host_list) - hn = hostlist_shift(host_list); - _push_to_hashtbl(nn, hn); - if (host_list) - free(hn); - free(nn); + + /* now build the individual node structures */ + while ((alias = hostlist_shift(alias_list))) { +#ifndef HAVE_FRONT_END + hostname = hostlist_shift(hostname_list); + address = hostlist_shift(address_list); +#endif + + _push_to_hashtbls(alias, hostname, address, node_ptr->port); + + free(alias); +#ifndef HAVE_FRONT_END + free(hostname); + free(address); +#endif } - hostlist_destroy(node_list); - if (host_list) - hostlist_destroy(host_list); - return; + /* free allocated storage */ +cleanup: + if (alias_list) + hostlist_destroy(alias_list); + if (hostname_list) + hostlist_destroy(hostname_list); + if (address_list) + hostlist_destroy(address_list); + return error_code; +} + +static void _init_slurmd_nodehash(void) +{ + slurm_conf_node_t **ptr_array; + int count; + int i; + + if (nodehash_initialized) + return; + else + nodehash_initialized = true; + + count = slurm_conf_nodename_array(&ptr_array); + if (count == 0) { + return; + } + + for (i = 0; i < count; i++) { + _register_conf_node_aliases(ptr_array[i]); + } +} + +extern void slurm_conf_nodehash_init(void) +{ + slurm_conf_lock(); + _init_slurmd_nodehash(); + slurm_conf_unlock(); } + /* - * get_conf_node_hostname - Return the NodeHostname for given NodeName + * slurm_conf_get_hostname - Return the NodeHostname for given NodeName */ -extern char *get_conf_node_hostname(char *node_name) +extern char *slurm_conf_get_hostname(const char *node_name) { int idx; names_ll_t *p; + _init_slurmd_nodehash(); + idx = _get_hash_idx(node_name); p = node_to_host_hashtbl[idx]; while (p) { - if (strcmp(p->node_name, node_name) == 0) { - return xstrdup(p->node_hostname); + if (strcmp(p->alias, node_name) == 0) { + return xstrdup(p->hostname); } - p = p->next; + p = p->next_alias; } - if (all_slurmd_hosts) - return NULL; - else { - /* Assume identical if we didn't explicitly save all pairs */ - return xstrdup(node_name); - } + return NULL; } /* - * get_conf_node_name - Return the NodeName for given NodeHostname + * slurm_conf_get_nodename - Return the NodeName for given NodeHostname */ -extern char *get_conf_node_name(char *node_hostname) +extern char *slurm_conf_get_nodename(const char *node_hostname) { int idx; names_ll_t *p; + _init_slurmd_nodehash(); + idx = _get_hash_idx(node_hostname); p = host_to_node_hashtbl[idx]; while (p) { - if (strcmp(p->node_hostname, node_hostname) == 0) { - return xstrdup(p->node_name); + if (strcmp(p->hostname, node_hostname) == 0) { + return xstrdup(p->alias); } - p = p->next; + p = p->next_hostname; } - if (all_slurmd_hosts) - return NULL; - else { - /* Assume identical if we didn't explicitly save all pairs */ - return xstrdup(node_hostname); + return NULL; +} + +/* + * slurm_conf_get_port - Return the port for a given NodeName + */ +extern uint16_t slurm_conf_get_port(const char *node_name) +{ + int idx; + names_ll_t *p; + + _init_slurmd_nodehash(); + + idx = _get_hash_idx(node_name); + p = node_to_host_hashtbl[idx]; + while (p) { + if (strcmp(p->alias, node_name) == 0) { + return p->port; + } + p = p->next_alias; } + + return 0; } +/* + * slurm_conf_get_addr - Return the slurm_addr for a given NodeName + */ +extern slurm_addr slurm_conf_get_addr(const char *node_name) +{ + int idx; + names_ll_t *p; + _init_slurmd_nodehash(); + + idx = _get_hash_idx(node_name); + p = node_to_host_hashtbl[idx]; + while (p) { + if (strcmp(p->alias, node_name) == 0) { + if (!p->addr_initialized) { + slurm_set_addr(&p->addr, p->port, p->address); + p->addr_initialized = true; + } + return p->addr; + } + p = p->next_alias; + } + + /* FIXME - needs to return a success/fail flag, and set address + through a parameter */ +} /* getnodename - equivalent to gethostname, but return only the first @@ -417,844 +1016,156 @@ init_slurm_conf (slurm_ctl_conf_t *ctl_conf_ptr) return; } -/* - * parse_config_spec - parse the overall configuration specifications, update - * values - * IN/OUT in_line - input line, parsed info overwritten with white-space - * IN ctl_conf_ptr - pointer to data structure to be updated - * RET 0 if no error, otherwise an error code - * - * NOTE: slurmctld and slurmd ports are built thus: - * if SlurmctldPort/SlurmdPort are set then - * get the port number based upon a look-up in /etc/services - * if the lookup fails then translate SlurmctldPort/SlurmdPort - * into a number - * These port numbers are overridden if set in the configuration file - */ -int -parse_config_spec (char *in_line, slurm_ctl_conf_t *ctl_conf_ptr) +/* caller must lock conf_lock */ +static void +_init_slurm_conf(char *file_name) { - int error_code; - long fast_schedule = -1, hash_base, heartbeat_interval = -1; - long inactive_limit = -1, kill_wait = -1; - long ret2service = -1, slurmctld_timeout = -1, slurmd_timeout = -1; - long sched_port = -1, sched_rootfltr = -1; - long slurmctld_debug = -1, slurmd_debug = -1, tree_width = -1; - long max_job_cnt = -1, min_job_age = -1, wait_time = -1; - long slurmctld_port = -1, slurmd_port = -1; - long mpich_gm_dir = -1, kill_tree = -1, cache_groups = -1; - char *backup_addr = NULL, *backup_controller = NULL; - char *checkpoint_type = NULL, *control_addr = NULL; - char *control_machine = NULL, *epilog = NULL, *mpi_default = NULL; - char *proctrack_type = NULL, *prolog = NULL; - char *propagate_rlimits_except = NULL, *propagate_rlimits = NULL; - char *sched_type = NULL, *sched_auth = NULL; - char *select_type = NULL; - char *state_save_location = NULL, *tmp_fs = NULL; - char *slurm_user = NULL, *slurmctld_pidfile = NULL; - char *slurmctld_logfile = NULL; - char *slurmd_logfile = NULL; - char *slurmd_spooldir = NULL, *slurmd_pidfile = NULL; - char *plugindir = NULL, *auth_type = NULL, *switch_type = NULL; - char *job_acct_loc = NULL, *job_acct_parameters = NULL, - *job_acct_type = NULL; - char *job_comp_loc = NULL, *job_comp_type = NULL; - char *job_credential_private_key = NULL; - char *job_credential_public_certificate = NULL; - char *srun_prolog = NULL, *srun_epilog = NULL; - char *task_prolog = NULL, *task_epilog = NULL, *task_plugin = NULL; - long first_job_id = -1; - - error_code = slurm_parser (in_line, - "AuthType=", 's', &auth_type, - "CheckpointType=", 's', &checkpoint_type, - "CacheGroups=", 'l', &cache_groups, - "BackupAddr=", 's', &backup_addr, - "BackupController=", 's', &backup_controller, - "ControlAddr=", 's', &control_addr, - "ControlMachine=", 's', &control_machine, - /* SrunEpilog and TaskEpilog MUST come before Epilog */ - "SrunEpilog=", 's', &srun_epilog, - "TaskEpilog=", 's', &task_epilog, - "Epilog=", 's', &epilog, - "FastSchedule=", 'l', &fast_schedule, - "FirstJobId=", 'l', &first_job_id, - "HashBase=", 'l', &hash_base, /* defunct */ - "HeartbeatInterval=", 'l', &heartbeat_interval, - "InactiveLimit=", 'l', &inactive_limit, - "JobAcctloc=", 's', &job_acct_loc, - "JobAcctParameters=", 's', &job_acct_parameters, - "JobAcctType=", 's', &job_acct_type, - "JobCompLoc=", 's', &job_comp_loc, - "JobCompType=", 's', &job_comp_type, - "JobCredentialPrivateKey=", 's', &job_credential_private_key, - "JobCredentialPublicCertificate=", 's', - &job_credential_public_certificate, - "KillTree=", 'l', &kill_tree, - "KillWait=", 'l', &kill_wait, - "MaxJobCount=", 'l', &max_job_cnt, - "MinJobAge=", 'l', &min_job_age, - "MpichGmDirectSupport=", 'l', &mpich_gm_dir, - "MpiDefault=", 's', &mpi_default, - "PluginDir=", 's', &plugindir, - "ProctrackType=", 's', &proctrack_type, - /* SrunProlog and TaskProlog MUST come before Prolog */ - "SrunProlog=", 's', &srun_prolog, - "TaskProlog=", 's', &task_prolog, - "Prolog=", 's', &prolog, - "PropagateResourceLimitsExcept=", 's',&propagate_rlimits_except, - "PropagateResourceLimits=", 's',&propagate_rlimits, - "ReturnToService=", 'l', &ret2service, - "SchedulerAuth=", 's', &sched_auth, - "SchedulerPort=", 'l', &sched_port, - "SchedulerRootFilter=", 'l', &sched_rootfltr, - "SchedulerType=", 's', &sched_type, - "SelectType=", 's', &select_type, - "SlurmUser=", 's', &slurm_user, - "SlurmctldDebug=", 'l', &slurmctld_debug, - "SlurmctldLogFile=", 's', &slurmctld_logfile, - "SlurmctldPidFile=", 's', &slurmctld_pidfile, - "SlurmctldPort=", 'l', &slurmctld_port, - "SlurmctldTimeout=", 'l', &slurmctld_timeout, - "SlurmdDebug=", 'l', &slurmd_debug, - "SlurmdLogFile=", 's', &slurmd_logfile, - "SlurmdPidFile=", 's', &slurmd_pidfile, - "SlurmdPort=", 'l', &slurmd_port, - "SlurmdSpoolDir=", 's', &slurmd_spooldir, - "SlurmdTimeout=", 'l', &slurmd_timeout, - "StateSaveLocation=", 's', &state_save_location, - "SwitchType=", 's', &switch_type, - "TaskPlugin=", 's', &task_plugin, - "TmpFS=", 's', &tmp_fs, - "WaitTime=", 'l', &wait_time, - "TreeWidth=", 'l', &tree_width, - "END"); + /* conf_ptr = (slurm_ctl_conf_t *)xmalloc(sizeof(slurm_ctl_conf_t)); */ + default_slurmd_port = 0; - if (error_code) - return error_code; - - if ( auth_type ) { - if ( ctl_conf_ptr->authtype ) { - error( MULTIPLE_VALUE_MSG, "AuthType" ); - xfree( ctl_conf_ptr->authtype ); - } - ctl_conf_ptr->authtype = auth_type; + if (file_name == NULL) { + file_name = getenv("SLURM_CONF"); + if (file_name == NULL) + file_name = SLURM_CONFIG_FILE; } - if ( cache_groups != -1) { - if ( ctl_conf_ptr->cache_groups != (uint16_t) NO_VAL) - error (MULTIPLE_VALUE_MSG, "CacheGroups"); - if ((cache_groups < 0) || (cache_groups > 0xffff)) - error("CacheGroups=%ld is invalid", cache_groups); - else - ctl_conf_ptr->cache_groups = cache_groups; - } - - if ( checkpoint_type ) { - if ( ctl_conf_ptr->checkpoint_type ) { - error( MULTIPLE_VALUE_MSG, "CheckpointType" ); - xfree( ctl_conf_ptr->checkpoint_type ); - } - ctl_conf_ptr->checkpoint_type = checkpoint_type; - } - - if ( backup_addr ) { - if ( ctl_conf_ptr->backup_addr ) { - error (MULTIPLE_VALUE_MSG, "BackupAddr"); - xfree (ctl_conf_ptr->backup_addr); - } - ctl_conf_ptr->backup_addr = backup_addr; - } - - if ( backup_controller ) { - if ( ctl_conf_ptr->backup_controller ) { - error (MULTIPLE_VALUE_MSG, "BackupController"); - xfree (ctl_conf_ptr->backup_controller); - } - ctl_conf_ptr->backup_controller = backup_controller; - } - - if ( control_addr ) { - if ( ctl_conf_ptr->control_addr ) { - error (MULTIPLE_VALUE_MSG, "ControlAddr"); - xfree (ctl_conf_ptr->control_addr); - } - ctl_conf_ptr->control_addr = control_addr; - } - - if ( control_machine ) { - if ( ctl_conf_ptr->control_machine ) { - error (MULTIPLE_VALUE_MSG, "ControlMachine"); - xfree (ctl_conf_ptr->control_machine); - } - ctl_conf_ptr->control_machine = control_machine; - } - - if ( epilog ) { - if ( ctl_conf_ptr->epilog ) { - error (MULTIPLE_VALUE_MSG, "Epilog"); - xfree (ctl_conf_ptr->epilog); - } - ctl_conf_ptr->epilog = epilog; - } - - if ( fast_schedule != -1 ) { - if ( ctl_conf_ptr->fast_schedule != (uint16_t) NO_VAL) - error (MULTIPLE_VALUE_MSG, "FastSchedule"); - if ((fast_schedule < 0) || (fast_schedule > 0xffff)) - error("FastSchedule=%ld is invalid", fast_schedule); - else - ctl_conf_ptr->fast_schedule = fast_schedule; - } - - if ( first_job_id != -1) { - if ( ctl_conf_ptr->first_job_id != (uint32_t) NO_VAL) - error (MULTIPLE_VALUE_MSG, "FirstJobId"); - if (first_job_id < 0) - error("FirstJobId=%ld is invalid", first_job_id); - else - ctl_conf_ptr->first_job_id = first_job_id; - } - - if ( heartbeat_interval != -1) - error("HeartbeatInterval is defunct, see man slurm.conf"); - - if ( inactive_limit != -1) { - if ( ctl_conf_ptr->inactive_limit != (uint16_t) NO_VAL) - error (MULTIPLE_VALUE_MSG, "InactiveLimit"); -#ifdef HAVE_BG /* Inactive limit must be zero on Blue Gene */ - if (inactive_limit) { - error("InactiveLimit=%ld is invalid on Blue Gene", - inactive_limit); - } - inactive_limit = 0; /* default value too */ -#endif - if ((inactive_limit < 0) || (inactive_limit > 0xffff)) - error("InactiveLimit=%ld is invalid", inactive_limit); - else - ctl_conf_ptr->inactive_limit = inactive_limit; - } - - if ( job_acct_loc ) { - if ( ctl_conf_ptr->job_acct_loc ) { - error( MULTIPLE_VALUE_MSG, "JobAcctLoc" ); - xfree( ctl_conf_ptr->job_acct_loc ); - } - ctl_conf_ptr->job_acct_loc = job_acct_loc; - } - - if ( job_acct_parameters ) { - if ( ctl_conf_ptr->job_acct_parameters ) { - error( MULTIPLE_VALUE_MSG, "JobAcctParameters" ); - xfree( ctl_conf_ptr->job_acct_parameters ); - } - ctl_conf_ptr->job_acct_parameters = job_acct_parameters; - } - - if ( job_acct_type ) { - if ( ctl_conf_ptr->job_acct_type ) { - error( MULTIPLE_VALUE_MSG, "JobAcctType" ); - xfree( ctl_conf_ptr->job_acct_type ); - } - ctl_conf_ptr->job_acct_type = job_acct_type; - } - - if ( job_comp_loc ) { - if ( ctl_conf_ptr->job_comp_loc ) { - error( MULTIPLE_VALUE_MSG, "JobCompLoc" ); - xfree( ctl_conf_ptr->job_comp_loc ); - } - ctl_conf_ptr->job_comp_loc = job_comp_loc; - } - - if ( job_comp_type ) { - if ( ctl_conf_ptr->job_comp_type ) { - error( MULTIPLE_VALUE_MSG, "JobCompType" ); - xfree( ctl_conf_ptr->job_comp_type ); - } - ctl_conf_ptr->job_comp_type = job_comp_type; - } - - if ( job_credential_private_key ) { - if ( ctl_conf_ptr->job_credential_private_key ) { - error (MULTIPLE_VALUE_MSG, "JobCredentialPrivateKey"); - xfree (ctl_conf_ptr->job_credential_private_key); - } - ctl_conf_ptr->job_credential_private_key = - job_credential_private_key; - } - - if ( job_credential_public_certificate ) { - if ( ctl_conf_ptr->job_credential_public_certificate ) { - error (MULTIPLE_VALUE_MSG, - "JobCredentialPublicCertificate"); - xfree (ctl_conf_ptr-> - job_credential_public_certificate); - } - ctl_conf_ptr->job_credential_public_certificate = - job_credential_public_certificate; - } - - if ( kill_tree != -1) { - verbose("KillTree configuration parameter is defunct"); - verbose(" mapping to ProctrackType=proctrack/linuxproc"); - xfree(proctrack_type); - proctrack_type = xstrdup("proctrack/linuxproc"); - } - - if ( kill_wait != -1) { - if ( ctl_conf_ptr->kill_wait != (uint16_t) NO_VAL) - error (MULTIPLE_VALUE_MSG, "KillWait"); - if ((kill_wait < 0) || (kill_wait > 0xffff)) - error("KillWait=%ld is invalid", kill_wait); - else - ctl_conf_ptr->kill_wait = kill_wait; - } - - if ( max_job_cnt != -1) { - if ( ctl_conf_ptr->max_job_cnt != (uint16_t) NO_VAL) - error (MULTIPLE_VALUE_MSG, "MaxJobCount"); - if ((max_job_cnt < 0) || (max_job_cnt > 0xffff)) - error("MaxJobCount=%ld is invalid", max_job_cnt); - else - ctl_conf_ptr->max_job_cnt = max_job_cnt; - } - - if ( min_job_age != -1) { - if ( ctl_conf_ptr->min_job_age != (uint16_t) NO_VAL) - error (MULTIPLE_VALUE_MSG, "MinJobAge"); - if ((min_job_age < 0) || (min_job_age > 0xffff)) - error("MinJobAge=%ld is invalid", min_job_age); - else - ctl_conf_ptr->min_job_age = min_job_age; - } - - if ( mpich_gm_dir != -1) { - verbose("MpichGmDirectSupport configuration parameter is defunct"); - verbose(" mapping to ProctrackType=proctrack/linuxproc"); - xfree(proctrack_type); - proctrack_type = xstrdup("proctrack/linuxproc"); - } - - if (mpi_default) { - if ( ctl_conf_ptr->mpi_default ) { - error( MULTIPLE_VALUE_MSG, "MpiDefault" ); - xfree( ctl_conf_ptr->mpi_default ); - } - ctl_conf_ptr->mpi_default = mpi_default; - } - - if ( plugindir ) { - if ( ctl_conf_ptr->plugindir ) { - error( MULTIPLE_VALUE_MSG, "PluginDir" ); - xfree( ctl_conf_ptr->plugindir ); - } - ctl_conf_ptr->plugindir = plugindir; - } - - if ( proctrack_type ) { - if ( ctl_conf_ptr->proctrack_type ) { - error( MULTIPLE_VALUE_MSG, "ProctrackType" ); - xfree( ctl_conf_ptr->proctrack_type ); - } - ctl_conf_ptr->proctrack_type = proctrack_type; - } - - if ( prolog ) { - if ( ctl_conf_ptr->prolog ) { - error (MULTIPLE_VALUE_MSG, "Prolog"); - xfree (ctl_conf_ptr->prolog); - } - ctl_conf_ptr->prolog = prolog; - } - - if ( propagate_rlimits ) { - if ( ctl_conf_ptr->propagate_rlimits ) { - error( MULTIPLE_VALUE_MSG, - "PropagateResourceLimits" ); - xfree( ctl_conf_ptr->propagate_rlimits ); - } - else if ( ctl_conf_ptr->propagate_rlimits_except ) { - error( "%s keyword conflicts with %s, using latter.", - "PropagateResourceLimitsExcept", - "PropagateResourceLimits"); - xfree( ctl_conf_ptr->propagate_rlimits_except ); - } - ctl_conf_ptr->propagate_rlimits = propagate_rlimits; - } - if ( propagate_rlimits_except ) { - if ( ctl_conf_ptr->propagate_rlimits_except ) { - error( MULTIPLE_VALUE_MSG, - "PropagateResourceLimitsExcept" ); - xfree( ctl_conf_ptr->propagate_rlimits_except ); - } - else if ( ctl_conf_ptr->propagate_rlimits ) { - error( "%s keyword conflicts with %s, using latter.", - "PropagateResourceLimits", - "PropagateResourceLimitsExcept"); - xfree( ctl_conf_ptr->propagate_rlimits ); - } - ctl_conf_ptr->propagate_rlimits_except = - propagate_rlimits_except; - } - - if ( ret2service != -1) { - if ( ctl_conf_ptr->ret2service != (uint16_t) NO_VAL) - error (MULTIPLE_VALUE_MSG, "ReturnToService"); - if ((ret2service < 0) || (ret2service > 0xffff)) - error("ReturnToService=%ld is invalid", ret2service); - else - ctl_conf_ptr->ret2service = ret2service; - } - - if ( sched_auth ) { - if ( ctl_conf_ptr->schedauth ) { - xfree( ctl_conf_ptr->schedauth ); - } - ctl_conf_ptr->schedauth = sched_auth; - } - - if ( sched_port != -1 ) { - if (ctl_conf_ptr->schedport != (uint16_t) NO_VAL) - error (MULTIPLE_VALUE_MSG, "SchedPort"); - if (( sched_port < 1 ) || (sched_port > 0xffff)) - error( "SchedPort=%ld is invalid", sched_port ); - else - ctl_conf_ptr->schedport = (uint16_t) sched_port; - } - - if ( sched_rootfltr != -1 ) { - if ( ctl_conf_ptr->schedrootfltr != (uint16_t) NO_VAL) - error (MULTIPLE_VALUE_MSG, "SchedulerRootFilter"); - if ((sched_rootfltr < 0) || (sched_rootfltr > 0xffff)) - error("SchedulerRootFilter=%ld is invalid"); - else - ctl_conf_ptr->schedrootfltr = (uint16_t) sched_rootfltr; - } - - if ( sched_type ) { - if ( ctl_conf_ptr->schedtype ) { - xfree( ctl_conf_ptr->schedtype ); - } - ctl_conf_ptr->schedtype = sched_type; - } - - if ( select_type ) { - if ( ctl_conf_ptr->select_type ) { - xfree( ctl_conf_ptr->select_type ); - } - ctl_conf_ptr->select_type = select_type; - } - - if ( slurm_user ) { - struct passwd *slurm_passwd; - slurm_passwd = getpwnam(slurm_user); - if (slurm_passwd == NULL) { - error ("Invalid user for SlurmUser %s, ignored", - slurm_user); - } else { - if ( ctl_conf_ptr->slurm_user_name ) { - error (MULTIPLE_VALUE_MSG, "SlurmUser"); - xfree (ctl_conf_ptr->slurm_user_name); - } - ctl_conf_ptr->slurm_user_name = slurm_user; - if (slurm_passwd->pw_uid > 0xffff) - error("SlurmUser numberic overflow, will be fixed soon"); - else - ctl_conf_ptr->slurm_user_id = slurm_passwd->pw_uid; - } - } - - if ( slurmctld_debug != -1) { - if ( ctl_conf_ptr->slurmctld_debug != (uint16_t) NO_VAL) - error (MULTIPLE_VALUE_MSG, "SlurmctldDebug"); - ctl_conf_ptr->slurmctld_debug = slurmctld_debug; - } - - if ( slurmctld_pidfile ) { - if ( ctl_conf_ptr->slurmctld_pidfile ) { - error (MULTIPLE_VALUE_MSG, "SlurmctldPidFile"); - xfree (ctl_conf_ptr->slurmctld_pidfile); - } - ctl_conf_ptr->slurmctld_pidfile = slurmctld_pidfile; - } - - if ( slurmctld_logfile ) { - if ( ctl_conf_ptr->slurmctld_logfile ) { - error (MULTIPLE_VALUE_MSG, "SlurmctldLogFile"); - xfree (ctl_conf_ptr->slurmctld_logfile); - } - ctl_conf_ptr->slurmctld_logfile = slurmctld_logfile; - } - - if ( slurmctld_port != -1) { - if ( ctl_conf_ptr->slurmctld_port != (uint32_t) NO_VAL) - error (MULTIPLE_VALUE_MSG, "SlurmctldPort"); - else if (slurmctld_port < 0) - error ("SlurmctldPort=%ld is invalid", - slurmctld_port); - else - ctl_conf_ptr->slurmctld_port = slurmctld_port; - } + conf_hashtbl = s_p_hashtbl_create(slurm_conf_options); + conf_ptr->last_update = time(NULL); + s_p_parse_file(conf_hashtbl, file_name); + /* s_p_dump_values(conf_hashtbl, slurm_conf_options); */ + validate_and_set_defaults(conf_ptr, conf_hashtbl); + conf_ptr->slurm_conf = xstrdup(file_name); +} - if ( slurmctld_timeout != -1) { - if ( ctl_conf_ptr->slurmctld_timeout != (uint16_t) NO_VAL) - error (MULTIPLE_VALUE_MSG, "SlurmctldTimeout"); - if ((slurmctld_timeout < 0) || (slurmctld_timeout > 0xffff)) - error("SlurmctldTimeout=%ld is invalid", - slurmctld_timeout); - else - ctl_conf_ptr->slurmctld_timeout = slurmctld_timeout; +/* caller must lock conf_lock */ +static void +_destroy_slurm_conf() +{ + s_p_hashtbl_destroy(conf_hashtbl); + if (default_nodename_tbl != NULL) { + s_p_hashtbl_destroy(default_nodename_tbl); + default_nodename_tbl = NULL; } - - if ( slurmd_debug != -1) { - if ( ctl_conf_ptr->slurmd_debug != (uint16_t) NO_VAL) - error (MULTIPLE_VALUE_MSG, "SlurmdDebug"); - if ((slurmd_debug < 0) || (slurmd_debug > 0xffff)) - error("SlurmdDebug=%ld is invalid", slurmd_debug); - else - ctl_conf_ptr->slurmd_debug = slurmd_debug; - } - - if ( slurmd_logfile ) { - if ( ctl_conf_ptr->slurmd_logfile ) { - error (MULTIPLE_VALUE_MSG, "SlurmdLogFile"); - xfree (ctl_conf_ptr->slurmd_logfile); - } - ctl_conf_ptr->slurmd_logfile = slurmd_logfile; + if (default_partition_tbl != NULL) { + s_p_hashtbl_destroy(default_partition_tbl); + default_partition_tbl = NULL; } + free_slurm_conf(conf_ptr); + /* xfree(conf_ptr); */ +} -#ifndef MULTIPLE_SLURMD - if ( slurmd_port != -1) { - if ( ctl_conf_ptr->slurmd_port != (uint32_t) NO_VAL) - error (MULTIPLE_VALUE_MSG, "SlurmdPort"); - else if (slurmd_port < 0) - error ("SlurmdPort=%ld is invalid", slurmd_port); - else - ctl_conf_ptr->slurmd_port = slurmd_port; - } -#endif +/* + * slurm_conf_init - load the slurm configuration from the a file. + * IN file_name - name of the slurm configuration file to be read + * If file_name is NULL, then this routine tries to use + * the value in the SLURM_CONF env variable. Failing that, + * it uses the compiled-in default file name. + * If the conf structures have already been initialized by a call to + * slurm_conf_init, any subsequent calls will do nothing until + * slurm_conf_destroy is called. + * RET SLURM_SUCCESS if conf file is initialized. If the slurm conf + * was already initialied, return SLURM_ERROR. + */ +extern int +slurm_conf_init(char *file_name) +{ + pthread_mutex_lock(&conf_lock); - if ( slurmd_spooldir ) { - if ( ctl_conf_ptr->slurmd_spooldir ) { - error (MULTIPLE_VALUE_MSG, "SlurmdSpoolDir"); - xfree (ctl_conf_ptr->slurmd_spooldir); - } - ctl_conf_ptr->slurmd_spooldir = slurmd_spooldir; + if (conf_initialized) { + pthread_mutex_unlock(&conf_lock); + return SLURM_ERROR; } - if ( slurmd_pidfile ) { - if ( ctl_conf_ptr->slurmd_pidfile ) { - error (MULTIPLE_VALUE_MSG, "SlurmdPidFile"); - xfree (ctl_conf_ptr->slurmd_pidfile); - } - ctl_conf_ptr->slurmd_pidfile = slurmd_pidfile; - } + _init_slurm_conf(file_name); + conf_initialized = true; - if ( slurmd_timeout != -1) { - if ( ctl_conf_ptr->slurmd_timeout != (uint16_t) NO_VAL) - error (MULTIPLE_VALUE_MSG, "SlurmdTimeout"); - if ((slurmd_timeout < 0) || (slurmd_timeout > 0xffff)) - error("SlurmdTimeout=%ld is invalid", slurmd_timeout); - else - ctl_conf_ptr->slurmd_timeout = slurmd_timeout; - } + pthread_mutex_unlock(&conf_lock); + return SLURM_SUCCESS; +} - if ( srun_prolog ) { - if ( ctl_conf_ptr->srun_prolog ) { - error (MULTIPLE_VALUE_MSG, "SrunProlog"); - xfree (ctl_conf_ptr->srun_prolog); - } - ctl_conf_ptr->srun_prolog = srun_prolog; - } +/* + * slurm_conf_reinit - reload the slurm configuration from a file. + * IN file_name - name of the slurm configuration file to be read + * If file_name is NULL, then this routine tries to use + * the value in the SLURM_CONF env variable. Failing that, + * it uses the compiled-in default file name. + * Unlike slurm_conf_init, slurm_conf_reinit will always reread the + * file and reinitialize the configuration structures. + * RET SLURM_SUCCESS if conf file is reinitialized, otherwise SLURM_ERROR. + */ +extern int +slurm_conf_reinit(char *file_name) +{ + pthread_mutex_lock(&conf_lock); - if ( srun_epilog ) { - if ( ctl_conf_ptr->srun_epilog ) { - error (MULTIPLE_VALUE_MSG, "SrunEpilog"); - xfree (ctl_conf_ptr->srun_epilog); - } - ctl_conf_ptr->srun_epilog = srun_epilog; + if (file_name == NULL) { + file_name = getenv("SLURM_CONF"); + if (file_name == NULL) + file_name = SLURM_CONFIG_FILE; } - if ( state_save_location ) { - if ( ctl_conf_ptr->state_save_location ) { - error (MULTIPLE_VALUE_MSG, "StateSaveLocation"); - xfree (ctl_conf_ptr->state_save_location); - } - ctl_conf_ptr->state_save_location = state_save_location; + if (conf_initialized) { + /* could check modified time on slurm.conf here */ + _destroy_slurm_conf(); } - if ( switch_type ) { - if ( ctl_conf_ptr->switch_type ) { - error (MULTIPLE_VALUE_MSG, "SwitchType"); - xfree (ctl_conf_ptr->switch_type); - } - ctl_conf_ptr->switch_type = switch_type; - } + _init_slurm_conf(file_name); + conf_initialized = true; - if ( task_epilog ) { - if ( ctl_conf_ptr->task_epilog ) { - error (MULTIPLE_VALUE_MSG, "TaskEpilog"); - xfree (ctl_conf_ptr->task_epilog); - } - ctl_conf_ptr->task_epilog = task_epilog; - } + pthread_mutex_unlock(&conf_lock); + return SLURM_SUCCESS; - if ( task_prolog ) { - if ( ctl_conf_ptr->task_prolog ) { - error (MULTIPLE_VALUE_MSG, "TaskProlog"); - xfree (ctl_conf_ptr->task_prolog); - } - ctl_conf_ptr->task_prolog = task_prolog; - } +} - if ( task_plugin ) { - if ( ctl_conf_ptr->task_plugin ) { - error (MULTIPLE_VALUE_MSG, "TaskPlugin"); - xfree (ctl_conf_ptr->task_plugin); - } - ctl_conf_ptr->task_plugin = task_plugin; - } +extern int +slurm_conf_destroy(void) +{ + pthread_mutex_lock(&conf_lock); - if ( tmp_fs ) { - if ( ctl_conf_ptr->tmp_fs ) { - error (MULTIPLE_VALUE_MSG, "TmpFS"); - xfree (ctl_conf_ptr->tmp_fs); - } - ctl_conf_ptr->tmp_fs = tmp_fs; + if (!conf_initialized) { + pthread_mutex_unlock(&conf_lock); + return SLURM_SUCCESS; } - if ( wait_time != -1) { - if ( ctl_conf_ptr->wait_time != (uint16_t) NO_VAL) - error (MULTIPLE_VALUE_MSG, "WaitTime"); - if ((wait_time < 0) || (wait_time > 0xffff)) - error("WaitTime=%ld is invalid", wait_time); - else - ctl_conf_ptr->wait_time = wait_time; - } + _destroy_slurm_conf(); - if ( tree_width != -1) { - if ( ctl_conf_ptr->tree_width != (uint16_t) NO_VAL) - error (MULTIPLE_VALUE_MSG, "TreeWidth"); - if ((tree_width < 1) || (tree_width > 0xffff)) - error("TreeWidth=%ld is invalid", tree_width); - else - ctl_conf_ptr->tree_width = tree_width; - } + conf_initialized = false; + pthread_mutex_unlock(&conf_lock); - return 0; + return SLURM_SUCCESS; } -/* - * _parse_node_spec - just overwrite node specifications (toss the results) - * IN/OUT in_line - input line, parsed info overwritten with white-space - * IN slurmd_hosts - if true then build a list of hosts on which slurmd runs, - * only useful for "scontrol show daemons" command - * RET 0 if no error, otherwise an error code - */ -static int -_parse_node_spec (char *in_line, bool slurmd_hosts) +extern slurm_ctl_conf_t * +slurm_conf_lock(void) { - int error_code; - char *feature = NULL, *node_addr = NULL, *node_name = NULL; - char *state = NULL, *reason=NULL; - char *node_hostname = NULL; - int cpus_val, real_memory_val, tmp_disk_val, weight_val; - int port; - - error_code = slurm_parser (in_line, - "Feature=", 's', &feature, - "NodeAddr=", 's', &node_addr, - "NodeName=", 's', &node_name, - "NodeHostname=", 's', &node_hostname, - "Port=", 'd', &port, - "Procs=", 'd', &cpus_val, - "RealMemory=", 'd', &real_memory_val, - "Reason=", 's', &reason, - "State=", 's', &state, - "TmpDisk=", 'd', &tmp_disk_val, - "Weight=", 'd', &weight_val, - "END"); - - if (error_code) - return error_code; + pthread_mutex_lock(&conf_lock); - if (node_name - && (node_hostname || slurmd_hosts)) { - all_slurmd_hosts = true; - _register_conf_node_aliases(node_name, node_hostname); + if (!conf_initialized) { + _init_slurm_conf(NULL); + conf_initialized = true; } - xfree(feature); - xfree(node_addr); - xfree(node_name); - xfree(node_hostname); - xfree(reason); - xfree(state); - - return error_code; + return conf_ptr; } -/* - * _parse_part_spec - just overwrite partition specifications (toss the - * results) - * IN/OUT in_line - input line, parsed info overwritten with white-space - * RET 0 if no error, otherwise an error code - */ -static int -_parse_part_spec (char *in_line) +extern void +slurm_conf_unlock(void) { - int error_code; - char *allow_groups = NULL, *default_str = NULL, *hidden_str = NULL; - char *partition = NULL, *max_time_str = NULL, *root_str = NULL; - char *nodes = NULL, *shared_str = NULL, *state_str = NULL; - int max_nodes_val, min_nodes_val; - - error_code = slurm_parser (in_line, - "AllowGroups=", 's', &allow_groups, - "Default=", 's', &default_str, - "Hidden=", 's', &hidden_str, - "PartitionName=", 's', &partition, - "RootOnly=", 's', &root_str, - "MaxTime=", 's', &max_time_str, - "MaxNodes=", 'd', &max_nodes_val, - "MinNodes=", 'd', &min_nodes_val, - "Nodes=", 's', &nodes, - "Shared=", 's', &shared_str, - "State=", 's', &state_str, - "END"); - - xfree(allow_groups); - xfree(default_str); - xfree(hidden_str); - xfree(partition); - xfree(max_time_str); - xfree(root_str); - xfree(nodes); - xfree(shared_str); - xfree(state_str); - - return error_code; + pthread_mutex_unlock(&conf_lock); } -/* - * read_slurm_conf_ctl - load the slurm configuration from the configured - * file. - * OUT ctl_conf_ptr - pointer to data structure to be filled - * IN slurmd_hosts - if true then build a list of hosts on which slurmd runs - * (only useful for "scontrol show daemons" command). Otherwise only - * record nodes in which NodeName and NodeHostname differ. - * RET 0 if no error, otherwise an error code - */ -extern int -read_slurm_conf_ctl (slurm_ctl_conf_t *ctl_conf_ptr, bool slurmd_hosts) +/* Normalize supplied debug level to be in range per log.h definitions */ +static void _normalize_debug_level(uint16_t *level) { - FILE *slurm_spec_file; /* pointer to input data file */ - int line_num; /* line number in input file */ - int line_size; /* bytes in current input line */ - char in_line[BUFFER_SIZE]; /* input line */ - int error_code, i, j; - - assert (ctl_conf_ptr); - init_slurm_conf (ctl_conf_ptr); - - if (ctl_conf_ptr->slurm_conf == NULL) { - char *val = getenv("SLURM_CONF"); - - if (val == NULL) { - val = SLURM_CONFIG_FILE; - } - ctl_conf_ptr->slurm_conf = xstrdup (val); - } - slurm_spec_file = fopen (ctl_conf_ptr->slurm_conf, "r"); - if (slurm_spec_file == NULL) { - fatal ("read_slurm_conf_ctl error opening file %s, %m", - ctl_conf_ptr->slurm_conf); - } - - /* process the data file */ - line_num = 0; - while (fgets (in_line, BUFFER_SIZE, slurm_spec_file) != NULL) { - line_num++; - line_size = strlen (in_line); - if (line_size >= (BUFFER_SIZE - 1)) { - error ("Line %d, of configuration file %s too long", - line_num, ctl_conf_ptr->slurm_conf); - fclose (slurm_spec_file); - return E2BIG; - break; - } - - /* everything after a non-escaped "#" is a comment */ - /* replace comment flag "#" with a `\0' (End of string) */ - /* an escaped value "\#" is translated to "#" */ - /* this permitted embedded "#" in node/partition names */ - for (i = 0; i < line_size; i++) { - if (in_line[i] == '\0') - break; - if (in_line[i] != '#') - continue; - if ((i > 0) && (in_line[i - 1] == '\\')) { - for (j = i; j < line_size; j++) { - in_line[j - 1] = in_line[j]; - } - line_size--; - continue; - } - in_line[i] = '\0'; - break; - } - - /* parse what is left */ - - /* overall configuration parameters */ - if ((error_code = parse_config_spec (in_line, ctl_conf_ptr))) { - fclose (slurm_spec_file); - return error_code; - } - - /* node configuration parameters */ - if ((error_code = _parse_node_spec (in_line, slurmd_hosts))) { - fclose (slurm_spec_file); - return error_code; - } - - /* partition configuration parameters */ - if ((error_code = _parse_part_spec (in_line))) { - fclose (slurm_spec_file); - return error_code; - } - - /* report any leftover strings on input line */ - report_leftover (in_line, line_num); + if (*level > LOG_LEVEL_DEBUG3) { + error("Normalizing debug level from %u to %d", + *level, LOG_LEVEL_DEBUG3); + *level = LOG_LEVEL_DEBUG3; } - - fclose (slurm_spec_file); - validate_config (ctl_conf_ptr); - return SLURM_SUCCESS; + /* level is uint16, always > LOG_LEVEL_QUIET(0), can't underflow */ } /* - * report_leftover - report any un-parsed (non-whitespace) characters on the - * configuration input line (we over-write parsed characters with whitespace). - * IN in_line - what is left of the configuration input line. - * IN line_num - line number of the configuration file. - */ -void -report_leftover (char *in_line, int line_num) -{ - int i; - - for (i = 0; i < strlen (in_line); i++) { - if (isspace ((int) in_line[i]) || (in_line[i] == '\n')) - continue; - error ("Ignored input on line %d of configuration: %s", - line_num, &in_line[i]); - break; - } -} - -/* validate configuration * * IN/OUT ctl_conf_ptr - a configuration as loaded by read_slurm_conf_ctl * @@ -1263,215 +1174,255 @@ report_leftover (char *in_line, int line_num) * NOTE: if backup_addr is NULL, it is over-written by backup_controller * NOTE: if control_addr is NULL, it is over-written by control_machine */ -void -validate_config (slurm_ctl_conf_t *ctl_conf_ptr) +static void +validate_and_set_defaults(slurm_ctl_conf_t *conf, s_p_hashtbl_t *hashtbl) { - if ((ctl_conf_ptr->backup_controller != NULL) && - (strcasecmp("localhost", ctl_conf_ptr->backup_controller) == 0)) { - xfree (ctl_conf_ptr->backup_controller); - ctl_conf_ptr->backup_controller = xmalloc (MAX_SLURM_NAME); - if ( getnodename (ctl_conf_ptr->backup_controller, - MAX_SLURM_NAME) ) - fatal ("getnodename: %m"); - } - - if ((ctl_conf_ptr->backup_addr == NULL) && - (ctl_conf_ptr->backup_controller != NULL)) - ctl_conf_ptr->backup_addr = - xstrdup (ctl_conf_ptr->backup_controller); - - if ((ctl_conf_ptr->backup_controller == NULL) && - (ctl_conf_ptr->backup_addr != NULL)) { - error ("BackupAddr specified without BackupController"); - xfree (ctl_conf_ptr->backup_addr); + if (s_p_get_string(&conf->backup_controller, "BackupController", + hashtbl) + && strcasecmp("localhost", conf->backup_controller) == 0) { + xfree(conf->backup_controller); + conf->backup_controller = xmalloc (MAX_SLURM_NAME); + if (getnodename(conf->backup_controller, MAX_SLURM_NAME)) + fatal("getnodename: %m"); + } + if (s_p_get_string(&conf->backup_addr, "BackupAddr", hashtbl)) { + if (conf->backup_controller == NULL) { + error("BackupAddr specified without BackupController"); + xfree(conf->backup_addr); + } + } else { + if (conf->backup_controller != NULL) + conf->backup_addr = xstrdup(conf->backup_controller); } - if (ctl_conf_ptr->control_machine == NULL) + if (!s_p_get_string(&conf->control_machine, "ControlMachine", hashtbl)) fatal ("validate_config: ControlMachine not specified."); - else if (strcasecmp("localhost", ctl_conf_ptr->control_machine) == 0) { - xfree (ctl_conf_ptr->control_machine); - ctl_conf_ptr->control_machine = xmalloc (MAX_SLURM_NAME); - if ( getnodename (ctl_conf_ptr->control_machine, - MAX_SLURM_NAME) ) - fatal ("getnodename: %m"); + else if (strcasecmp("localhost", conf->control_machine) == 0) { + xfree (conf->control_machine); + conf->control_machine = xmalloc(MAX_SLURM_NAME); + if (getnodename(conf->control_machine, MAX_SLURM_NAME)) + fatal("getnodename: %m"); } - if ((ctl_conf_ptr->control_addr == NULL) && - (ctl_conf_ptr->control_machine != NULL)) - ctl_conf_ptr->control_addr = - xstrdup (ctl_conf_ptr->control_machine); - - if ((ctl_conf_ptr->backup_controller != NULL) && - (strcmp (ctl_conf_ptr->backup_controller, - ctl_conf_ptr->control_machine) == 0)) { - error ("ControlMachine and BackupController identical"); - xfree (ctl_conf_ptr->backup_addr); - xfree (ctl_conf_ptr->backup_controller); + if (!s_p_get_string(&conf->control_addr, "ControlAddr", hashtbl) + && conf->control_machine != NULL) + conf->control_addr = xstrdup (conf->control_machine); + + if ((conf->backup_controller != NULL) + && (strcmp(conf->backup_controller, conf->control_machine) == 0)) { + error("ControlMachine and BackupController identical"); + xfree(conf->backup_addr); + xfree(conf->backup_controller); } - if (ctl_conf_ptr->job_credential_private_key == NULL) - fatal ("JobCredentialPrivateKey not set"); - if (ctl_conf_ptr->job_credential_public_certificate == NULL) - fatal ("JobCredentialPublicCertificate not set"); + if (!s_p_get_string(&conf->job_credential_private_key, + "JobCredentialPrivateKey", hashtbl)) + fatal("JobCredentialPrivateKey not set"); + if (!s_p_get_string(&conf->job_credential_public_certificate, + "JobCredentialPublicCertificate", hashtbl)) + fatal("JobCredentialPublicCertificate not set"); - if (ctl_conf_ptr->max_job_cnt < 1) - fatal ("MaxJobCount=%u, No jobs permitted", - ctl_conf_ptr->max_job_cnt); + if (s_p_get_uint16(&conf->max_job_cnt, "MaxJobCount", hashtbl) + && conf->max_job_cnt < 1) + fatal("MaxJobCount=%u, No jobs permitted", conf->max_job_cnt); - if (ctl_conf_ptr->authtype == NULL) - ctl_conf_ptr->authtype = xstrdup(DEFAULT_AUTH_TYPE); + if (!s_p_get_string(&conf->authtype, "AuthType", hashtbl)) + conf->authtype = xstrdup(DEFAULT_AUTH_TYPE); - if (ctl_conf_ptr->cache_groups == (uint16_t) NO_VAL) - ctl_conf_ptr->cache_groups = DEFAULT_CACHE_GROUPS; + if (!s_p_get_uint16(&conf->cache_groups, "CacheGroups", hashtbl)) + conf->cache_groups = DEFAULT_CACHE_GROUPS; - if (ctl_conf_ptr->checkpoint_type == NULL) - ctl_conf_ptr->checkpoint_type = - xstrdup(DEFAULT_CHECKPOINT_TYPE); + if (!s_p_get_string(&conf->checkpoint_type, "CheckpointType", hashtbl)) + conf->checkpoint_type = xstrdup(DEFAULT_CHECKPOINT_TYPE); - if (ctl_conf_ptr->fast_schedule == (uint16_t) NO_VAL) - ctl_conf_ptr->fast_schedule = DEFAULT_FAST_SCHEDULE; + s_p_get_string(&conf->epilog, "Epilog", hashtbl); - if (ctl_conf_ptr->first_job_id == (uint32_t) NO_VAL) - ctl_conf_ptr->first_job_id = DEFAULT_FIRST_JOB_ID; + if (!s_p_get_uint16(&conf->fast_schedule, "FastSchedule", hashtbl)) + conf->fast_schedule = DEFAULT_FAST_SCHEDULE; - if (ctl_conf_ptr->inactive_limit == (uint16_t) NO_VAL) - ctl_conf_ptr->inactive_limit = DEFAULT_INACTIVE_LIMIT; + if (!s_p_get_uint32(&conf->first_job_id, "FirstJobId", hashtbl)) + conf->first_job_id = DEFAULT_FIRST_JOB_ID; + + if (s_p_get_uint16(&conf->inactive_limit, "InactiveLimit", hashtbl)) { +#ifdef HAVE_BG + /* Inactive limit must be zero on Blue Gene */ + error("InactiveLimit=%ld is invalid on Blue Gene", + cont->inactive_limit); + conf->inactive_limit = 0; /* default value too */ +#endif + } else { + conf->inactive_limit = DEFAULT_INACTIVE_LIMIT; + } - if (ctl_conf_ptr->job_acct_loc == NULL) - ctl_conf_ptr->job_acct_loc = xstrdup(DEFAULT_JOB_ACCT_LOC); + if (!s_p_get_string(&conf->job_acct_loc, "JobAcctLoc", hashtbl)) + conf->job_acct_loc = xstrdup(DEFAULT_JOB_ACCT_LOC); - if (ctl_conf_ptr->job_acct_parameters == NULL) - ctl_conf_ptr->job_acct_parameters = - xstrdup(DEFAULT_JOB_ACCT_PARAMETERS); + if (!s_p_get_string(&conf->job_acct_parameters, + "JobAcctParameters", hashtbl)) + conf->job_acct_parameters = + xstrdup(DEFAULT_JOB_ACCT_PARAMETERS); - if (ctl_conf_ptr->job_acct_type == NULL) - ctl_conf_ptr->job_acct_type = xstrdup(DEFAULT_JOB_ACCT_TYPE); + if (!s_p_get_string(&conf->job_acct_type, "JobAcctType", hashtbl)) + conf->job_acct_type = xstrdup(DEFAULT_JOB_ACCT_TYPE); - if (ctl_conf_ptr->job_comp_type == NULL) - ctl_conf_ptr->job_comp_type = xstrdup(DEFAULT_JOB_COMP_TYPE); + s_p_get_string(&conf->job_comp_loc, "JobCompLoc", hashtbl); - if (ctl_conf_ptr->kill_wait == (uint16_t) NO_VAL) - ctl_conf_ptr->kill_wait = DEFAULT_KILL_WAIT; + if (!s_p_get_string(&conf->job_comp_type, "JobCompType", hashtbl)) + conf->job_comp_type = xstrdup(DEFAULT_JOB_COMP_TYPE); - if (ctl_conf_ptr->max_job_cnt == (uint16_t) NO_VAL) - ctl_conf_ptr->max_job_cnt = DEFAULT_MAX_JOB_COUNT; + if (!s_p_get_uint16(&conf->kill_wait, "KillWait", hashtbl)) + conf->kill_wait = DEFAULT_KILL_WAIT; - if (ctl_conf_ptr->min_job_age == (uint16_t) NO_VAL) - ctl_conf_ptr->min_job_age = DEFAULT_MIN_JOB_AGE; + if (!s_p_get_uint16(&conf->max_job_cnt, "MaxJobCount", hashtbl)) + conf->max_job_cnt = DEFAULT_MAX_JOB_COUNT; - if (ctl_conf_ptr->mpi_default == NULL) - ctl_conf_ptr->mpi_default = xstrdup(DEFAULT_MPI_DEFAULT); - if (ctl_conf_ptr->plugindir == NULL) - ctl_conf_ptr->plugindir = xstrdup(SLURM_PLUGIN_PATH); + if (!s_p_get_uint16(&conf->min_job_age, "MinJobAge", hashtbl)) + conf->min_job_age = DEFAULT_MIN_JOB_AGE; - if (ctl_conf_ptr->switch_type == NULL) - ctl_conf_ptr->switch_type = xstrdup(DEFAULT_SWITCH_TYPE); + if (!s_p_get_string(&conf->mpi_default, "MpiDefault", hashtbl)) + conf->mpi_default = xstrdup(DEFAULT_MPI_DEFAULT); - if (ctl_conf_ptr->proctrack_type == NULL) { - if (!strcmp(ctl_conf_ptr->switch_type,"switch/elan")) - ctl_conf_ptr->proctrack_type = - xstrdup("proctrack/rms"); + if (!s_p_get_string(&conf->plugindir, "PluginDir", hashtbl)) + conf->plugindir = xstrdup(SLURM_PLUGIN_PATH); + + if (!s_p_get_string(&conf->switch_type, "SwitchType", hashtbl)) + conf->switch_type = xstrdup(DEFAULT_SWITCH_TYPE); + + if (!s_p_get_string(&conf->proctrack_type, "ProctrackType", hashtbl)) { + if (!strcmp(conf->switch_type,"switch/elan")) + conf->proctrack_type = xstrdup("proctrack/rms"); else - ctl_conf_ptr->proctrack_type = - xstrdup(DEFAULT_PROCTRACK_TYPE); + conf->proctrack_type = + xstrdup(DEFAULT_PROCTRACK_TYPE); } - if ((!strcmp(ctl_conf_ptr->switch_type, "switch/elan")) - && (!strcmp(ctl_conf_ptr->proctrack_type,"proctrack/linuxproc"))) + if ((!strcmp(conf->switch_type, "switch/elan")) + && (!strcmp(conf->proctrack_type,"proctrack/linuxproc"))) fatal("proctrack/linuxproc is incompatable with switch/elan"); - if (ctl_conf_ptr->propagate_rlimits_except) { - if ((parse_rlimits( ctl_conf_ptr->propagate_rlimits_except, - NO_PROPAGATE_RLIMITS )) < 0) - fatal( "Bad PropagateResourceLimitsExcept: %s", - ctl_conf_ptr->propagate_rlimits_except ); - } - else { - if (ctl_conf_ptr->propagate_rlimits == NULL) - ctl_conf_ptr->propagate_rlimits = xstrdup( "ALL" ); - if ((parse_rlimits( ctl_conf_ptr->propagate_rlimits, + s_p_get_string(&conf->prolog, "Prolog", hashtbl); + + if (s_p_get_string(&conf->propagate_rlimits_except, + "PropagateResourceLimitsExcept", hashtbl)) { + if ((parse_rlimits(conf->propagate_rlimits_except, + NO_PROPAGATE_RLIMITS)) < 0) + fatal("Bad PropagateResourceLimitsExcept: %s", + conf->propagate_rlimits_except); + } else { + if (!s_p_get_string(&conf->propagate_rlimits, + "PropagateResourceLimits", hashtbl)) + conf->propagate_rlimits = xstrdup( "ALL" ); + if ((parse_rlimits(conf->propagate_rlimits, PROPAGATE_RLIMITS )) < 0) - fatal( "Bad PropagateResourceLimits: %s", - ctl_conf_ptr->propagate_rlimits ); + fatal("Bad PropagateResourceLimits: %s", + conf->propagate_rlimits); } - if (ctl_conf_ptr->ret2service == (uint16_t) NO_VAL) - ctl_conf_ptr->ret2service = DEFAULT_RETURN_TO_SERVICE; + if (!s_p_get_uint16(&conf->ret2service, "ReturnToService", hashtbl)) + conf->ret2service = DEFAULT_RETURN_TO_SERVICE; - if (ctl_conf_ptr->schedrootfltr == (uint16_t) NO_VAL) - ctl_conf_ptr->schedrootfltr = DEFAULT_SCHEDROOTFILTER; + s_p_get_string(&conf->schedauth, "SchedulerAuth", hashtbl); + + if (s_p_get_uint16(&conf->schedport, "SchedulerPort", hashtbl)) { + if (conf->schedport == 0) { + error("SchedulerPort=0 is invalid"); + conf->schedport = (uint16_t)NO_VAL; + } + } - if (ctl_conf_ptr->schedtype == NULL) - ctl_conf_ptr->schedtype = xstrdup(DEFAULT_SCHEDTYPE); + if (!s_p_get_uint16(&conf->schedrootfltr, + "SchedulerRootFilter", hashtbl)) + conf->schedrootfltr = DEFAULT_SCHEDROOTFILTER; - if (ctl_conf_ptr->select_type == NULL) - ctl_conf_ptr->select_type = xstrdup(DEFAULT_SELECT_TYPE); + if (!s_p_get_string(&conf->schedtype, "SchedulerType", hashtbl)) + conf->schedtype = xstrdup(DEFAULT_SCHEDTYPE); - if (ctl_conf_ptr->slurm_user_name == NULL) { - ctl_conf_ptr->slurm_user_name = xstrdup("root"); - ctl_conf_ptr->slurm_user_id = 0; + if (!s_p_get_string(&conf->select_type, "SelectType", hashtbl)) + conf->select_type = xstrdup(DEFAULT_SELECT_TYPE); + + if (!s_p_get_string( &conf->slurm_user_name, "SlurmUser", hashtbl)) { + conf->slurm_user_name = xstrdup("root"); + conf->slurm_user_id = 0; + } else { + struct passwd *slurm_passwd; + slurm_passwd = getpwnam(conf->slurm_user_name); + if (slurm_passwd == NULL) { + error ("Invalid user for SlurmUser %s, ignored", + conf->slurm_user_name); + xfree(conf->slurm_user_name); + } else { + if (slurm_passwd->pw_uid > 0xffff) + error("SlurmUser numeric overflow, " + "will be fixed soon"); + else + conf->slurm_user_id = slurm_passwd->pw_uid; + } } - if (ctl_conf_ptr->slurmctld_debug != (uint16_t) NO_VAL) - _normalize_debug_level(&ctl_conf_ptr->slurmctld_debug); + if (s_p_get_uint16(&conf->slurmctld_debug, "SlurmctldDebug", hashtbl)) + _normalize_debug_level(&conf->slurmctld_debug); else - ctl_conf_ptr->slurmctld_debug = LOG_LEVEL_INFO; + conf->slurmctld_debug = LOG_LEVEL_INFO; + + if (!s_p_get_string(&conf->slurmctld_pidfile, + "SlurmctldPidFile", hashtbl)) + conf->slurmctld_pidfile = xstrdup(DEFAULT_SLURMCTLD_PIDFILE); - if (ctl_conf_ptr->slurmctld_pidfile == NULL) - ctl_conf_ptr->slurmctld_pidfile = - xstrdup(DEFAULT_SLURMCTLD_PIDFILE); + s_p_get_string(&conf->slurmctld_logfile, "SlurmctldLogFile", hashtbl); - if (ctl_conf_ptr->slurmctld_port == (uint32_t) NO_VAL) - ctl_conf_ptr->slurmctld_port = SLURMCTLD_PORT; + if (!s_p_get_uint32(&conf->slurmctld_port, "SlurmctldPort", hashtbl)) + conf->slurmctld_port = SLURMCTLD_PORT; - if (ctl_conf_ptr->slurmctld_timeout == (uint16_t) NO_VAL) - ctl_conf_ptr->slurmctld_timeout = DEFAULT_SLURMCTLD_TIMEOUT; + if (!s_p_get_uint16(&conf->slurmctld_timeout, + "SlurmctldTimeout", hashtbl)) + conf->slurmctld_timeout = DEFAULT_SLURMCTLD_TIMEOUT; - if (ctl_conf_ptr->slurmd_debug != (uint16_t) NO_VAL) - _normalize_debug_level(&ctl_conf_ptr->slurmd_debug); + if (s_p_get_uint16(&conf->slurmd_debug, "SlurmdDebug", hashtbl)) + _normalize_debug_level(&conf->slurmd_debug); else - ctl_conf_ptr->slurmd_debug = LOG_LEVEL_INFO; + conf->slurmd_debug = LOG_LEVEL_INFO; - if (ctl_conf_ptr->slurmd_pidfile == NULL) - ctl_conf_ptr->slurmd_pidfile = xstrdup(DEFAULT_SLURMD_PIDFILE); + s_p_get_string(&conf->slurmd_logfile, "SlurmdLogFile", hashtbl); -#ifndef MULTIPLE_SLURMD - if (ctl_conf_ptr->slurmd_port == (uint32_t) NO_VAL) - ctl_conf_ptr->slurmd_port = SLURMD_PORT; -#endif + if (!s_p_get_string(&conf->slurmd_pidfile, "SlurmdPidFile", hashtbl)) + conf->slurmd_pidfile = xstrdup(DEFAULT_SLURMD_PIDFILE); - if (ctl_conf_ptr->slurmd_spooldir == NULL) - ctl_conf_ptr->slurmd_spooldir = xstrdup(DEFAULT_SPOOLDIR); + if (!s_p_get_uint32(&conf->slurmd_port, "SlurmdPort", hashtbl)) + conf->slurmd_port = SLURMD_PORT; - if (ctl_conf_ptr->slurmd_timeout == (uint16_t) NO_VAL) - ctl_conf_ptr->slurmd_timeout = DEFAULT_SLURMD_TIMEOUT; + if (!s_p_get_string(&conf->slurmd_spooldir, "SlurmdSpoolDir", hashtbl)) + conf->slurmd_spooldir = xstrdup(DEFAULT_SPOOLDIR); - if (ctl_conf_ptr->state_save_location == NULL) - ctl_conf_ptr->state_save_location = xstrdup( - DEFAULT_SAVE_STATE_LOC); + if (!s_p_get_uint16(&conf->slurmd_timeout, "SlurmdTimeout", hashtbl)) + conf->slurmd_timeout = DEFAULT_SLURMD_TIMEOUT; - /* see above for switch_type, order dependent */ + s_p_get_string(&conf->srun_prolog, "SrunProlog", hashtbl); + s_p_get_string(&conf->srun_epilog, "SrunEpilog", hashtbl); - if (ctl_conf_ptr->task_plugin == NULL) - ctl_conf_ptr->task_plugin = xstrdup(DEFAULT_TASK_PLUGIN); + if (!s_p_get_string(&conf->state_save_location, + "StateSaveLocation", hashtbl)) + conf->state_save_location = xstrdup(DEFAULT_SAVE_STATE_LOC); - if (ctl_conf_ptr->tmp_fs == NULL) - ctl_conf_ptr->tmp_fs = xstrdup(DEFAULT_TMP_FS); + /* see above for switch_type, order dependent */ - if (ctl_conf_ptr->wait_time == (uint16_t) NO_VAL) - ctl_conf_ptr->wait_time = DEFAULT_WAIT_TIME; - - if (ctl_conf_ptr->tree_width == (uint16_t) NO_VAL) - ctl_conf_ptr->tree_width = DEFAULT_TREE_WIDTH; + if (!s_p_get_string(&conf->task_plugin, "TaskPlugin", hashtbl)) + conf->task_plugin = xstrdup(DEFAULT_TASK_PLUGIN); -} + s_p_get_string(&conf->task_epilog, "TaskEpilog", hashtbl); + s_p_get_string(&conf->task_prolog, "TaskProlog", hashtbl); -/* Normalize supplied debug level to be in range per log.h definitions */ -static void _normalize_debug_level(uint16_t *level) -{ - if (*level > LOG_LEVEL_DEBUG3) { - error("Normalizing debug level from %u to %d", - *level, LOG_LEVEL_DEBUG3); - *level = LOG_LEVEL_DEBUG3; + if (!s_p_get_string(&conf->tmp_fs, "TmpFS", hashtbl)) + conf->tmp_fs = xstrdup(DEFAULT_TMP_FS); + + if (!s_p_get_uint16(&conf->wait_time, "WaitTime", hashtbl)) + conf->wait_time = DEFAULT_WAIT_TIME; + + if (s_p_get_uint16(&conf->schedport, "TreeWidth", hashtbl)) { + if (conf->tree_width == 0) { + error("TreeWidth=0 is invalid"); + conf->tree_width = 50; /* default? */ + } + } else { + conf->tree_width = DEFAULT_TREE_WIDTH; } - /* level is uint16, always > LOG_LEVEL_QUIET(0), can't underflow */ } + diff --git a/src/common/read_config.h b/src/common/read_config.h index b0d335c64a0..2990d4ec1cb 100644 --- a/src/common/read_config.h +++ b/src/common/read_config.h @@ -31,6 +31,9 @@ #define _READ_CONFIG_H #include "src/common/slurm_protocol_defs.h" +#include "src/common/parse_config.h" + +extern slurm_ctl_conf_t slurmctld_conf; #define DEFAULT_AUTH_TYPE "auth/none" #define DEFAULT_FAST_SCHEDULE 1 @@ -74,90 +77,144 @@ #define DEFAULT_WAIT_TIME 0 #define DEFAULT_TREE_WIDTH 50 -/* - * init_slurm_conf - initialize or re-initialize the slurm configuration - * values defaults (NULL or NO_VAL). Note that the configuration - * file pathname (slurm_conf) is not changed. - * IN/OUT ctl_conf_ptr - pointer to data structure to be initialized +typedef struct slurm_conf_node { + char *nodenames; + char *hostnames; + char *addresses; + char *feature; /* arbitrary list of features associated */ + uint16_t port; + uint32_t cpus; /* count of cpus running on the node */ + uint32_t real_memory; /* MB real memory on the node */ + char *reason; + char *state; + uint32_t tmp_disk; /* MB total storage in TMP_FS file system */ + uint32_t weight; /* arbitrary priority of node for + * scheduling work on */ +} slurm_conf_node_t; + +typedef struct slurm_conf_partition { + char *name; /* name of the partition */ + bool hidden_flag; /* 1 if hidden by default */ + uint32_t max_time; /* minutes or INFINITE */ + uint32_t max_nodes; /* per job or INFINITE */ + uint32_t min_nodes; /* per job */ + uint32_t total_nodes; /* total number of nodes in the partition */ + uint32_t total_cpus; /* total number of cpus in the partition */ + bool root_only_flag;/* 1 if allocate/submit RPC can only be + issued by user root */ + uint16_t shared; /* 1 if job can share a node, + 2 if sharing required */ + bool state_up_flag; /* 1 if state is up, 0 if down */ + char *nodes; /* comma delimited list names of nodes */ + char *allow_groups; /* comma delimited list of groups, + * NULL indicates all */ + bool default_flag; +} slurm_conf_partition_t; + +typedef struct slurm_conf_downnodes { + char *nodenames; + char *reason; + char *state; +} slurm_conf_downnodes_t; + +/* + * slurm_conf_init - load the slurm configuration from the a file. + * IN file_name - name of the slurm configuration file to be read + * If file_name is NULL, then this routine tries to use + * the value in the SLURM_CONF env variable. Failing that, + * it uses the compiled-in default file name. + * If the conf structures have already been initialized by a call to + * slurm_conf_init, any subsequent calls will do nothing until + * slurm_conf_destroy is called. + * RET SLURM_SUCCESS if conf file is initialized. If the slurm conf + * was already initialied, return SLURM_ERROR. + * NOTE: Caller must NOT be holding slurm_conf_lock(). */ -extern void init_slurm_conf (slurm_ctl_conf_t *ctl_conf_ptr); +extern int slurm_conf_init(char *file_name); -/* - * free_slurm_conf - free all storage associated with a slurm_ctl_conf_t. - * IN/OUT ctl_conf_ptr - pointer to data structure to be freed +/* + * slurm_conf_reinit - reload the slurm configuration from a file. + * IN file_name - name of the slurm configuration file to be read + * If file_name is NULL, then this routine tries to use + * the value in the SLURM_CONF env variable. Failing that, + * it uses the compiled-in default file name. + * Unlike slurm_conf_init, slurm_conf_reinit will always reread the + * file and reinitialize the configuration structures. + * RET SLURM_SUCCESS if conf file is reinitialized, otherwise SLURM_ERROR. + * NOTE: Caller must NOT be holding slurm_conf_lock(). */ -extern void free_slurm_conf (slurm_ctl_conf_t *ctl_conf_ptr); +extern int slurm_conf_reinit(char *file_name); /* - * getnodename - equivalent to gethostname(), but return only the first - * component of the fully qualified name (e.g. "linux123.foo.bar" - * becomes "linux123") - * NOTE: NodeName in the config may be different from real hostname. - * Use get_conf_node_name() to get the former. + * NOTE: Caller must NOT be holding slurm_conf_lock(). */ -extern int getnodename (char *name, size_t len); +extern int slurm_conf_destroy(void); + +extern slurm_ctl_conf_t *slurm_conf_lock(void); + +extern void slurm_conf_unlock(void); + +/* + * Set "ptr_array" with the pointer to an array of pointers to + * slurm_conf_node_t structures. + * + * Return value is the length of the array. + */ +extern int slurm_conf_nodename_array(slurm_conf_node_t **ptr_array[]); + +/* + * Set "ptr_array" with the pointer to an array of pointers to + * slurm_conf_partition_t structures. + * + * Return value is the length of the array. + */ +extern int slurm_conf_partition_array(slurm_conf_partition_t **ptr_array[]); + +/* + * Set "ptr_array" with the pointer to an array of pointers to + * slurm_conf_node_t structures. + * + * Return value is the length of the array. + */ +extern int slurm_conf_downnodes_array(slurm_conf_downnodes_t **ptr_array[]); /* * get_conf_node_hostname - Return the NodeHostname for given NodeName */ -extern char *get_conf_node_hostname(char *node_name); +extern char *slurm_conf_get_hostname(const char *node_name); /* * get_conf_node_name - Return the NodeName for given NodeHostname */ -extern char *get_conf_node_name(char *node_hostname); +extern char *slurm_conf_get_nodename(const char *node_hostname); /* - * parse_config_spec - parse the overall configuration specifications, update - * values - * IN/OUT in_line - input line, parsed info overwritten with white-space - * IN ctl_conf_ptr - pointer to data structure to be updated - * RET 0 if no error, otherwise an error code - * - * NOTE: slurmctld and slurmd ports are built thus: - * if SlurmctldPort/SlurmdPort are set then - * get the port number based upon a look-up in /etc/services - * if the lookup fails then translate SlurmctldPort/SlurmdPort - * into a number - * These port numbers are overridden if set in the configuration file + * slurm_conf_get_port - Return the port for a given NodeName */ -extern int parse_config_spec (char *in_line, slurm_ctl_conf_t *ctl_conf_ptr); +extern uint16_t slurm_conf_get_port(const char *node_name); -/* - * read_slurm_conf_ctl - load the slurm configuration from the configured - * file. - * OUT ctl_conf_ptr - pointer to data structure to be filled - * IN slurmd_hosts - if true then build a list of hosts on which slurmd runs - * (only useful for "scontrol show daemons" command). Otherwise only - * record nodes in which NodeName and NodeHostname differ. - * RET 0 if no error, otherwise an error code +/* + * init_slurm_conf - initialize or re-initialize the slurm configuration + * values defaults (NULL or NO_VAL). Note that the configuration + * file pathname (slurm_conf) is not changed. + * IN/OUT ctl_conf_ptr - pointer to data structure to be initialized */ -extern int read_slurm_conf_ctl (slurm_ctl_conf_t *ctl_conf_ptr, - bool slurmd_hosts); +extern void init_slurm_conf (slurm_ctl_conf_t *ctl_conf_ptr); /* - * report_leftover - report any un-parsed (non-whitespace) characters on the - * configuration input line (we over-write parsed characters with whitespace). - * IN in_line - what is left of the configuration input line. - * IN line_num - line number of the configuration file. + * free_slurm_conf - free all storage associated with a slurm_ctl_conf_t. + * IN/OUT ctl_conf_ptr - pointer to data structure to be freed */ -extern void report_leftover (char *in_line, int line_num); +extern void free_slurm_conf (slurm_ctl_conf_t *ctl_conf_ptr); -/* validate configuration - * - * IN/OUT ctl_conf_ptr - a configuration as loaded by read_slurm_conf_ctl - * - * NOTE: default slurmctld and slurmd ports are built thus: - * if SLURMCTLD_PORT/SLURMD_PORT are set then - * get the port number based upon a look-up in /etc/services - * if the lookup fails then translate SLURMCTLD_PORT/SLURMD_PORT - * into a number - * These port numbers are overridden if set in the configuration file - * NOTE: a backup_controller or control_machine of "localhost" are over-written - * with this machine's name. - * NOTE: if backup_addr is NULL, it is over-written by backup_controller - * NOTE: if control_addr is NULL, it is over-written by control_machine +/* + * getnodename - equivalent to gethostname(), but return only the first + * component of the fully qualified name (e.g. "linux123.foo.bar" + * becomes "linux123") + * NOTE: NodeName in the config may be different from real hostname. + * Use get_conf_node_name() to get the former. */ -extern void validate_config (slurm_ctl_conf_t *ctl_conf_ptr); +extern int getnodename (char *name, size_t len); + #endif /* !_READ_CONFIG_H */ diff --git a/src/common/slurm_protocol_api.c b/src/common/slurm_protocol_api.c index d80393b11aa..74b0b0a352b 100644 --- a/src/common/slurm_protocol_api.c +++ b/src/common/slurm_protocol_api.c @@ -67,10 +67,10 @@ #define MAX_RETRIES 3 /* STATIC VARIABLES */ -static pthread_mutex_t config_lock = PTHREAD_MUTEX_INITIALIZER; +/* static pthread_mutex_t config_lock = PTHREAD_MUTEX_INITIALIZER; */ static slurm_protocol_config_t proto_conf_default; static slurm_protocol_config_t *proto_conf = &proto_conf_default; -static slurm_ctl_conf_t slurmctld_conf; +/* static slurm_ctl_conf_t slurmctld_conf; */ /* STATIC FUNCTIONS */ static void _remap_slurmctld_errno(void); @@ -106,10 +106,7 @@ slurm_protocol_config_t *slurm_get_api_config() */ extern void slurm_api_set_conf_file(char *pathname) { - if (pathname == NULL) - return; - xfree(slurmctld_conf.slurm_conf); - slurmctld_conf.slurm_conf = xstrdup(pathname); + slurm_conf_reinit(pathname); return; } @@ -121,56 +118,40 @@ extern void slurm_api_set_conf_file(char *pathname) int slurm_api_set_default_config() { int rc = SLURM_SUCCESS; - struct stat config_stat; - static time_t last_config_update = (time_t) 0; - - slurm_mutex_lock(&config_lock); - config_stat.st_mtime = 0; - if (slurmctld_conf.slurm_conf - && (stat(slurmctld_conf.slurm_conf, &config_stat) < 0)) { - error("Can't stat %s: %m", slurmctld_conf.slurm_conf); + slurm_ctl_conf_t *conf; + + /*slurm_conf_init(NULL);*/ + conf = slurm_conf_lock(); + + if (conf->control_addr == NULL) { + error("Unable to establish controller machine"); rc = SLURM_ERROR; goto cleanup; } - - if (last_config_update - && (slurmctld_conf.slurm_conf - && (last_config_update == config_stat.st_mtime)) - && slurmctld_conf.control_addr - && slurmctld_conf.slurmctld_port) - goto cleanup; - - init_slurm_conf(&slurmctld_conf); - read_slurm_conf_ctl(&slurmctld_conf, false); - if (!config_stat.st_mtime) - stat(slurmctld_conf.slurm_conf, &config_stat); - last_config_update = config_stat.st_mtime; - - if ((slurmctld_conf.control_addr == NULL) || - (slurmctld_conf.slurmctld_port == 0)) { - error("Unable to establish control machine or port"); + if (conf->slurmctld_port == 0) { + error("Unable to establish controller port"); rc = SLURM_ERROR; goto cleanup; } slurm_set_addr(&proto_conf_default.primary_controller, - slurmctld_conf.slurmctld_port, - slurmctld_conf.control_addr); + conf->slurmctld_port, + conf->control_addr); if (proto_conf_default.primary_controller.sin_port == 0) { error("Unable to establish control machine address"); rc = SLURM_ERROR; goto cleanup; } - if (slurmctld_conf.backup_addr) { + if (conf->backup_addr) { slurm_set_addr(&proto_conf_default.secondary_controller, - slurmctld_conf.slurmctld_port, - slurmctld_conf.backup_addr); + conf->slurmctld_port, + conf->backup_addr); } proto_conf = &proto_conf_default; cleanup: - slurm_mutex_unlock(&config_lock); + slurm_conf_unlock(); return rc; } @@ -178,19 +159,16 @@ int slurm_api_set_default_config() * execute this only at program termination to free all memory */ void slurm_api_clear_config(void) { - slurm_mutex_lock(&config_lock); - slurmctld_conf.slurmd_port = 0; - free_slurm_conf(&slurmctld_conf); - slurm_mutex_unlock(&config_lock); + slurm_conf_destroy(); } /* update internal configuration data structure as needed. * exit with lock set */ -static inline void _lock_update_config() -{ - slurm_api_set_default_config(); - slurm_mutex_lock(&config_lock); -} +/* static inline void _lock_update_config() */ +/* { */ +/* slurm_api_set_default_config(); */ +/* slurm_mutex_lock(&config_lock); */ +/* } */ /* slurm_get_mpi_default * get default mpi value from slurmctld_conf object @@ -199,10 +177,11 @@ static inline void _lock_update_config() char *slurm_get_mpi_default(void) { char *mpi_default; + slurm_ctl_conf_t *conf; - _lock_update_config(); - mpi_default = xstrdup(slurmctld_conf.mpi_default); - slurm_mutex_unlock(&config_lock); + conf = slurm_conf_lock(); + mpi_default = xstrdup(conf->mpi_default); + slurm_conf_unlock(); return mpi_default; } @@ -213,10 +192,11 @@ char *slurm_get_mpi_default(void) char *slurm_get_plugin_dir(void) { char *plugin_dir; + slurm_ctl_conf_t *conf; - _lock_update_config(); - plugin_dir = xstrdup(slurmctld_conf.plugindir); - slurm_mutex_unlock(&config_lock); + conf = slurm_conf_lock(); + plugin_dir = xstrdup(conf->plugindir); + slurm_conf_unlock(); return plugin_dir; } @@ -227,10 +207,11 @@ char *slurm_get_plugin_dir(void) char *slurm_get_auth_type(void) { char *auth_type; + slurm_ctl_conf_t *conf; - _lock_update_config(); - auth_type = xstrdup(slurmctld_conf.authtype); - slurm_mutex_unlock(&config_lock); + conf = slurm_conf_lock(); + auth_type = xstrdup(conf->authtype); + slurm_conf_unlock(); return auth_type; } @@ -240,10 +221,11 @@ char *slurm_get_auth_type(void) extern uint16_t slurm_get_fast_schedule(void) { uint16_t fast_val; + slurm_ctl_conf_t *conf; - _lock_update_config(); - fast_val = slurmctld_conf.fast_schedule; - slurm_mutex_unlock(&config_lock); + conf = slurm_conf_lock(); + fast_val = conf->fast_schedule; + slurm_conf_unlock(); return fast_val; } @@ -253,11 +235,15 @@ extern uint16_t slurm_get_fast_schedule(void) */ extern int slurm_set_tree_width(uint16_t tree_width) { + slurm_ctl_conf_t *conf; + + conf = slurm_conf_lock(); if (tree_width == 0) { error("can't have span count of 0"); return SLURM_ERROR; } - slurmctld_conf.tree_width = tree_width; + conf->tree_width = tree_width; + slurm_conf_unlock(); return SLURM_SUCCESS; } /* slurm_get_tree_width @@ -266,10 +252,11 @@ extern int slurm_set_tree_width(uint16_t tree_width) extern uint16_t slurm_get_tree_width(void) { uint16_t tree_width; + slurm_ctl_conf_t *conf; - _lock_update_config(); - tree_width = slurmctld_conf.tree_width; - slurm_mutex_unlock(&config_lock); + conf = slurm_conf_lock(); + tree_width = conf->tree_width; + slurm_conf_unlock(); return tree_width; } @@ -280,10 +267,12 @@ extern uint16_t slurm_get_tree_width(void) */ extern int slurm_set_auth_type(char *auth_type) { - _lock_update_config(); - xfree(slurmctld_conf.authtype); - slurmctld_conf.authtype = xstrdup(auth_type); - slurm_mutex_unlock(&config_lock); + slurm_ctl_conf_t *conf; + + conf = slurm_conf_lock(); + xfree(conf->authtype); + conf->authtype = xstrdup(auth_type); + slurm_conf_unlock(); return 0; } @@ -294,10 +283,11 @@ extern int slurm_set_auth_type(char *auth_type) char *slurm_get_jobacct_loc(void) { char *jobacct_loc; + slurm_ctl_conf_t *conf; - _lock_update_config(); - jobacct_loc = xstrdup(slurmctld_conf.job_acct_loc); - slurm_mutex_unlock(&config_lock); + conf = slurm_conf_lock(); + jobacct_loc = xstrdup(conf->job_acct_loc); + slurm_conf_unlock(); return jobacct_loc; } @@ -308,10 +298,11 @@ char *slurm_get_jobacct_loc(void) char *slurm_get_jobacct_parameters(void) { char *jobacct_parameters; + slurm_ctl_conf_t *conf; - _lock_update_config(); - jobacct_parameters = xstrdup(slurmctld_conf.job_acct_parameters); - slurm_mutex_unlock(&config_lock); + conf = slurm_conf_lock(); + jobacct_parameters = xstrdup(conf->job_acct_parameters); + slurm_conf_unlock(); return jobacct_parameters; } @@ -322,10 +313,11 @@ char *slurm_get_jobacct_parameters(void) char *slurm_get_jobacct_type(void) { char *jobacct_type; + slurm_ctl_conf_t *conf; - _lock_update_config(); - jobacct_type = xstrdup(slurmctld_conf.job_acct_type); - slurm_mutex_unlock(&config_lock); + conf = slurm_conf_lock(); + jobacct_type = xstrdup(conf->job_acct_type); + slurm_conf_unlock(); return jobacct_type; } @@ -336,10 +328,11 @@ char *slurm_get_jobacct_type(void) char *slurm_get_jobcomp_type(void) { char *jobcomp_type; + slurm_ctl_conf_t *conf; - _lock_update_config(); - jobcomp_type = xstrdup(slurmctld_conf.job_comp_type); - slurm_mutex_unlock(&config_lock); + conf = slurm_conf_lock(); + jobcomp_type = xstrdup(conf->job_comp_type); + slurm_conf_unlock(); return jobcomp_type; } @@ -350,10 +343,11 @@ char *slurm_get_jobcomp_type(void) char *slurm_get_proctrack_type(void) { char *proctrack_type; + slurm_ctl_conf_t *conf; - _lock_update_config(); - proctrack_type = xstrdup(slurmctld_conf.proctrack_type); - slurm_mutex_unlock(&config_lock); + conf = slurm_conf_lock(); + proctrack_type = xstrdup(conf->proctrack_type); + slurm_conf_unlock(); return proctrack_type; } @@ -364,10 +358,11 @@ char *slurm_get_proctrack_type(void) uint16_t slurm_get_slurmd_port(void) { uint16_t slurmd_port; + slurm_ctl_conf_t *conf; - _lock_update_config(); - slurmd_port = slurmctld_conf.slurmd_port; - slurm_mutex_unlock(&config_lock); + conf = slurm_conf_lock(); + slurmd_port = conf->slurmd_port; + slurm_conf_unlock(); return slurmd_port; } @@ -378,10 +373,11 @@ uint16_t slurm_get_slurmd_port(void) uint32_t slurm_get_slurm_user_id(void) { uint32_t slurm_uid; + slurm_ctl_conf_t *conf; - _lock_update_config(); - slurm_uid = slurmctld_conf.slurm_user_id; - slurm_mutex_unlock(&config_lock); + conf = slurm_conf_lock(); + slurm_uid = conf->slurm_user_id; + slurm_conf_unlock(); return slurm_uid; } @@ -392,10 +388,11 @@ uint32_t slurm_get_slurm_user_id(void) char *slurm_get_sched_type(void) { char *sched_type; + slurm_ctl_conf_t *conf; - _lock_update_config(); - sched_type = xstrdup(slurmctld_conf.schedtype); - slurm_mutex_unlock(&config_lock); + conf = slurm_conf_lock(); + sched_type = xstrdup(conf->schedtype); + slurm_conf_unlock(); return sched_type; } @@ -406,10 +403,11 @@ char *slurm_get_sched_type(void) char *slurm_get_select_type(void) { char *select_type; + slurm_ctl_conf_t *conf; - _lock_update_config(); - select_type = xstrdup(slurmctld_conf.select_type); - slurm_mutex_unlock(&config_lock); + conf = slurm_conf_lock(); + select_type = xstrdup(conf->select_type); + slurm_conf_unlock(); return select_type; } @@ -420,10 +418,11 @@ char *slurm_get_select_type(void) char *slurm_get_switch_type(void) { char *switch_type; + slurm_ctl_conf_t *conf; - _lock_update_config(); - switch_type = xstrdup(slurmctld_conf.switch_type); - slurm_mutex_unlock(&config_lock); + conf = slurm_conf_lock(); + switch_type = xstrdup(conf->switch_type); + slurm_conf_unlock(); return switch_type; } @@ -434,10 +433,11 @@ char *slurm_get_switch_type(void) uint16_t slurm_get_wait_time(void) { uint16_t wait_time; + slurm_ctl_conf_t *conf; - _lock_update_config(); - wait_time = slurmctld_conf.wait_time; - slurm_mutex_unlock(&config_lock); + conf = slurm_conf_lock(); + wait_time = conf->wait_time; + slurm_conf_unlock(); return wait_time; } @@ -448,10 +448,11 @@ uint16_t slurm_get_wait_time(void) char *slurm_get_srun_prolog(void) { char *prolog; + slurm_ctl_conf_t *conf; - _lock_update_config(); - prolog = xstrdup(slurmctld_conf.srun_prolog); - slurm_mutex_unlock(&config_lock); + conf = slurm_conf_lock(); + prolog = xstrdup(conf->srun_prolog); + slurm_conf_unlock(); return prolog; } @@ -462,10 +463,11 @@ char *slurm_get_srun_prolog(void) char *slurm_get_srun_epilog(void) { char *epilog; + slurm_ctl_conf_t *conf; - _lock_update_config(); - epilog = xstrdup(slurmctld_conf.srun_epilog); - slurm_mutex_unlock(&config_lock); + conf = slurm_conf_lock(); + epilog = xstrdup(conf->srun_epilog); + slurm_conf_unlock(); return epilog; } @@ -474,10 +476,11 @@ char *slurm_get_srun_epilog(void) char *slurm_get_task_epilog(void) { char *task_epilog; + slurm_ctl_conf_t *conf; - _lock_update_config(); - task_epilog = xstrdup(slurmctld_conf.task_epilog); - slurm_mutex_unlock(&config_lock); + conf = slurm_conf_lock(); + task_epilog = xstrdup(conf->task_epilog); + slurm_conf_unlock(); return task_epilog; } @@ -486,10 +489,11 @@ char *slurm_get_task_epilog(void) char *slurm_get_task_prolog(void) { char *task_prolog; - - _lock_update_config(); - task_prolog = xstrdup(slurmctld_conf.task_prolog); - slurm_mutex_unlock(&config_lock); + slurm_ctl_conf_t *conf; + + conf = slurm_conf_lock(); + task_prolog = xstrdup(conf->task_prolog); + slurm_conf_unlock(); return task_prolog; } @@ -498,10 +502,11 @@ char *slurm_get_task_prolog(void) char *slurm_get_task_plugin(void) { char *task_plugin; + slurm_ctl_conf_t *conf; - _lock_update_config(); - task_plugin = xstrdup(slurmctld_conf.task_plugin); - slurm_mutex_unlock(&config_lock); + conf = slurm_conf_lock(); + task_plugin = xstrdup(conf->task_plugin); + slurm_conf_unlock(); return task_plugin; } /* Change general slurm communication errors to slurmctld specific errors */ @@ -599,6 +604,7 @@ slurm_fd slurm_open_msg_conn(slurm_addr * slurm_address) slurm_fd slurm_open_controller_conn() { slurm_fd fd; + slurm_ctl_conf_t *conf; if (slurm_api_set_default_config() < 0) return SLURM_FAILURE; @@ -608,8 +614,12 @@ slurm_fd slurm_open_controller_conn() debug("Failed to contact primary controller: %m"); - if (!slurmctld_conf.backup_controller) + conf = slurm_conf_lock(); + if (!conf->backup_controller) { + slurm_conf_unlock(); goto fail; + } + slurm_conf_unlock(); if ((fd = slurm_open_msg_conn(&proto_conf->secondary_controller)) >= 0) return fd; @@ -1334,7 +1344,7 @@ _send_and_recv_msg(slurm_fd fd, slurm_msg_t *req, timeout = SLURM_MESSAGE_TIMEOUT_MSEC_STATIC; if(req->forward.cnt>0) { - steps = req->forward.cnt/slurmctld_conf.tree_width; + steps = req->forward.cnt/slurm_get_tree_width(); steps += 1; timeout += (req->forward.timeout*steps); } @@ -1375,6 +1385,9 @@ int slurm_send_recv_controller_msg(slurm_msg_t *req, slurm_msg_t *resp) time_t start_time = time(NULL); List ret_list = NULL; int retry = 1; + slurm_ctl_conf_t *conf; + bool backup_controller_flag; + uint16_t slurmctld_timeout; if ((fd = slurm_open_controller_conn()) < 0) { rc = SLURM_SOCKET_ERROR; @@ -1385,6 +1398,11 @@ int slurm_send_recv_controller_msg(slurm_msg_t *req, slurm_msg_t *resp) req->orig_addr.sin_addr.s_addr = 0; //info("here 2"); + conf = slurm_conf_lock(); + backup_controller_flag = conf->backup_controller ? true : false; + slurmctld_timeout = conf->slurmctld_timeout; + slurm_conf_unlock(); + while(retry) { retry = 0; /* If the backup controller is in the process of assuming @@ -1410,10 +1428,9 @@ int slurm_send_recv_controller_msg(slurm_msg_t *req, slurm_msg_t *resp) ((((return_code_msg_t *) resp->data)->return_code) == ESLURM_IN_STANDBY_MODE) && (req->msg_type != MESSAGE_NODE_REGISTRATION_STATUS) && - (slurmctld_conf.backup_controller) && + (backup_controller_flag) && (difftime(time(NULL), start_time) < - (slurmctld_conf.slurmctld_timeout + - (slurmctld_conf.slurmctld_timeout / 2)))) { + (slurmctld_timeout + (slurmctld_timeout / 2)))) { debug("Neither primary nor backup controller " "responding, sleep and retry"); slurm_free_return_code_msg(resp->data); @@ -1605,7 +1622,7 @@ List slurm_send_recv_rc_packed_msg(slurm_msg_t *msg, int timeout) timeout = SLURM_MESSAGE_TIMEOUT_MSEC_STATIC; if(msg->forward.cnt>0) { - steps = msg->forward.cnt/slurmctld_conf.tree_width; + steps = msg->forward.cnt/slurm_get_tree_width(); steps += 1; timeout += (msg->forward.timeout*steps); } @@ -1753,18 +1770,21 @@ int slurm_send_recv_controller_rc_msg(slurm_msg_t *req, int *rc) extern int *set_span(int total) { - int *span = xmalloc(sizeof(int)*slurmctld_conf.tree_width); + int *span; int left = total; int i = 0; - //info("span count = %d",slurmctld_conf.tree_width); - memset(span,0,slurmctld_conf.tree_width); - if(total <= slurmctld_conf.tree_width) { + uint16_t tree_width = slurm_get_tree_width(); + + span = xmalloc(sizeof(int) * tree_width); + //info("span count = %d", tree_width); + memset(span, 0, tree_width); + if(total <= tree_width) { return span; } - while(left>0) { - for(i=0; i<slurmctld_conf.tree_width; i++) { - if((slurmctld_conf.tree_width-i)>=left) { + while(left > 0) { + for(i = 0; i < tree_width; i++) { + if((tree_width-i) >= left) { if(span[i] == 0) { left = 0; break; @@ -1773,13 +1793,13 @@ extern int *set_span(int total) left = 0; break; } - } else if(left<=slurmctld_conf.tree_width) { - span[i]+=left; + } else if(left <= tree_width) { + span[i] += left; left = 0; break; } - span[i] += slurmctld_conf.tree_width; - left -= slurmctld_conf.tree_width; + span[i] += tree_width; + left -= tree_width; } } return span; diff --git a/src/common/xstring.c b/src/common/xstring.c index defd89accad..ecda3b74c2b 100644 --- a/src/common/xstring.c +++ b/src/common/xstring.c @@ -65,6 +65,7 @@ strong_alias(_xstrftimecat, slurm_xstrftimecat); strong_alias(_xstrfmtcat, slurm_xstrfmtcat); strong_alias(_xmemcat, slurm_xmemcat); strong_alias(xstrdup, slurm_xstrdup); +strong_alias(xstrndup, slurm_xstrndup); strong_alias(xbasename, slurm_xbasename); /* @@ -252,3 +253,29 @@ char * xstrdup(const char *str) return result; } + +/* + * Duplicate at most "n" characters of a string. + * str (IN) string to duplicate + * n (IN) + * RETURN copy of string + */ +char * xstrndup(const char *str, size_t n) +{ + size_t siz, + rsiz; + char *result; + + if (str == NULL) + return NULL; + + siz = strlen(str); + if (n < siz) + siz = n; + siz++; + result = (char *)xmalloc(siz); + + rsiz = strlcpy(result, str, siz); + + return result; +} diff --git a/src/common/xstring.h b/src/common/xstring.h index d34f730d10f..71091c4ee8f 100644 --- a/src/common/xstring.h +++ b/src/common/xstring.h @@ -86,6 +86,11 @@ void _xmemcat(char **str, char *start, char *end); */ char *xstrdup(const char *str); +/* +** strndup which uses xmalloc routines +*/ +char *xstrndup(const char *str, size_t n); + /* ** replacement for libc basename */ diff --git a/src/plugins/switch/federation/federation.c b/src/plugins/switch/federation/federation.c index b4643699ab7..9f8ca04c962 100644 --- a/src/plugins/switch/federation/federation.c +++ b/src/plugins/switch/federation/federation.c @@ -42,6 +42,7 @@ #include <sys/stat.h> #include <slurm/slurm_errno.h> #include "src/common/slurm_xlator.h" +#include "src/common/read_config.h" #include "src/plugins/switch/federation/federation.h" #include "src/plugins/switch/federation/federation_keys.h" @@ -170,8 +171,6 @@ static fed_status_t fed_status_tab[]= { }; static void _hash_rebuild(fed_libstate_t *state); -static void _strip_cr_nl(char *line); -static void _strip_comments(char *line); static int _set_up_adapter(fed_adapter_t *fed_adapter, char *adapter_name); static int _parse_fed_file(hostlist_t *adapter_list); static void _init_adapter_cache(void); @@ -417,47 +416,6 @@ _get_lid_from_adapter(char *adapter_name) } -/* Explicitly strip out carriage-return and new-line */ -static void _strip_cr_nl(char *line) -{ - int len = strlen(line); - int i; - - for(i=0;i<len;i++) { - if(line[i]=='\r' || line[i]=='\n') { - line[i] = '\0'; - return; - } - } -} - -/* Strip comments from a line by terminating the string - * where the comment begins. - * Everything after a non-escaped "#" is a comment. - */ -static void _strip_comments(char *line) -{ - int i, j; - int len = strlen(line); - - /* replace comment flag "#" with an end of string (NULL) */ - /* escape sequence "\#" translated to "#" */ - for (i = 0; i < len; i++) { - if (line[i] == (char) NULL) - break; - if (line[i] != '#') - continue; - if ((i > 0) && (line[i - 1] == '\\')) { - for (j = i; j < len; j++) { - line[j - 1] = line[j]; - } - continue; - } - line[i] = (char) NULL; - break; - } -} - static int _set_up_adapter(fed_adapter_t *fed_adapter, char *adapter_name) { ADAPTER_RESOURCES res; @@ -534,52 +492,26 @@ static char *_get_fed_conf(void) static int _parse_fed_file(hostlist_t *adapter_list) { - FILE *fed_spec_file; /* pointer to input data file */ - int line_num; /* line number in input file */ - char in_line[BUFSIZE]; /* input line */ - char *adapter_name = NULL; - int i, j; - int error_code; + s_p_options_t options[] = {{"AdapterName", S_P_STRING}, {NULL}}; + s_p_hashtbl_t *tbl; + char *adapter_name; debug("Reading the federation.conf file"); if (!fed_conf) fed_conf = _get_fed_conf(); - fed_spec_file = fopen(fed_conf, "r"); - if (fed_spec_file == NULL) - fatal("_parse_fed_file error opening file %s, %m", - fed_conf); - line_num = 0; - while (fgets(in_line, BUFSIZE, fed_spec_file) != NULL) { - line_num++; - _strip_cr_nl(in_line); - _strip_comments(in_line); - if (strlen(in_line) >= (BUFSIZE - 1)) { - error("_parse_fed_file line %d, of input file %s " - "too long", line_num, fed_conf); - fclose(fed_spec_file); - xfree(fed_conf); - return E2BIG; - } - /* parse what is left, non-comments */ - /* partition adapter names */ - error_code = slurm_parser(in_line, - "AdapterName=", 's', &adapter_name, - "END"); - if(error_code == SLURM_ERROR) - error("There was an error code from slurm_parser"); - if (adapter_name) { - int rc; - rc = hostlist_push(*adapter_list, adapter_name); - if (rc == 0) - error("Adapter name format is incorrect."); - adapter_name = NULL; - } - /* report any leftover strings on input line */ - report_leftover(in_line, line_num); + tbl = s_p_hashtbl_create(options); + s_p_parse_file(tbl, fed_conf); + + if (s_p_get_string(&adapter_name, "AdapterName", tbl)) { + int rc; + rc = hostlist_push(*adapter_list, adapter_name); + if (rc == 0) + error("Adapter name format is incorrect."); + xfree(adapter_name); } - fclose(fed_spec_file); - xfree(fed_conf); + + s_p_hashtbl_destroy(tbl); return SLURM_SUCCESS; } diff --git a/src/scontrol/scontrol.c b/src/scontrol/scontrol.c index fe4d8d64aa1..42963c0b8d6 100644 --- a/src/scontrol/scontrol.c +++ b/src/scontrol/scontrol.c @@ -94,7 +94,7 @@ static int _load_nodes (node_info_msg_t ** node_buffer_pptr, uint16_t show_flags); static int _load_partitions (partition_info_msg_t **part_info_pptr); static void _pid_info(pid_t job_pid); -static void _ping_slurmctld(slurm_ctl_conf_info_msg_t *slurm_ctl_conf_ptr); +static void _ping_slurmctld(char *control_machine, char *backup_controller); static void _print_completing (void); static void _print_completing_job(job_info_t *job_ptr, node_info_msg_t *node_info_msg); @@ -616,28 +616,33 @@ _print_config (char *config_param) fprintf(stdout, "\n"); } if (slurm_ctl_conf_ptr) - _ping_slurmctld (slurm_ctl_conf_ptr); + _ping_slurmctld (slurm_ctl_conf_ptr->control_machine, + slurm_ctl_conf_ptr->backup_controller); } /* Print state of controllers only */ static void _print_ping (void) { - static slurm_ctl_conf_info_msg_t *slurm_conf_ptr = NULL; + slurm_ctl_conf_info_msg_t *conf; + char *primary, *secondary; - if (slurm_conf_ptr == NULL) { - slurm_conf_ptr = xmalloc(sizeof(slurm_ctl_conf_info_msg_t)); - init_slurm_conf(slurm_conf_ptr); - read_slurm_conf_ctl(slurm_conf_ptr, false); - validate_config(slurm_conf_ptr); - } + slurm_conf_init(NULL); + + conf = slurm_conf_lock(); + primary = xstrdup(conf->control_machine); + secondary = xstrdup(conf->backup_controller); + slurm_conf_unlock(); + + _ping_slurmctld (primary, secondary); - _ping_slurmctld (slurm_conf_ptr); + xfree(primary); + xfree(secondary); } /* Report if slurmctld daemons are responding */ static void -_ping_slurmctld(slurm_ctl_conf_info_msg_t *slurm_ctl_conf_ptr) +_ping_slurmctld(char *control_machine, char *backup_controller) { static char *state[2] = { "UP", "DOWN" }; int primary = 1, secondary = 1; @@ -647,16 +652,14 @@ _ping_slurmctld(slurm_ctl_conf_info_msg_t *slurm_ctl_conf_ptr) if (slurm_ping(2) == SLURM_SUCCESS) secondary = 0; fprintf(stdout, "Slurmctld(primary/backup) "); - if (slurm_ctl_conf_ptr) { + if (control_machine || backup_controller) { fprintf(stdout, "at "); - if (slurm_ctl_conf_ptr->control_machine) - fprintf(stdout, "%s/", - slurm_ctl_conf_ptr->control_machine); + if (control_machine) + fprintf(stdout, "%s/", control_machine); else fprintf(stdout, "(NULL)/"); - if (slurm_ctl_conf_ptr->backup_controller) - fprintf(stdout, "%s ", - slurm_ctl_conf_ptr->backup_controller); + if (backup_controller) + fprintf(stdout, "%s ", backup_controller); else fprintf(stdout, "(NULL) "); } @@ -670,31 +673,32 @@ _ping_slurmctld(slurm_ctl_conf_info_msg_t *slurm_ctl_conf_ptr) static void _print_daemons (void) { - slurm_ctl_conf_info_msg_t conf; + slurm_ctl_conf_info_msg_t *conf; char me[MAX_SLURM_NAME], *b, *c, *n; int actld = 0, ctld = 0, d = 0; char daemon_list[] = "slurmctld slurmd"; - bzero(&conf, sizeof(conf)); - if (read_slurm_conf_ctl(&conf, true) != SLURM_SUCCESS) - return; + slurm_conf_init(NULL); + conf = slurm_conf_lock(); + getnodename(me, MAX_SLURM_NAME); - if ((b = conf.backup_controller)) { + if ((b = conf->backup_controller)) { if ((strcmp(b, me) == 0) || (strcasecmp(b, "localhost") == 0)) ctld = 1; } - if ((c = conf.control_machine)) { + if ((c = conf->control_machine)) { actld = 1; if ((strcmp(c, me) == 0) || (strcasecmp(c, "localhost") == 0)) ctld = 1; } - if ((n = get_conf_node_name(me))) { + slurm_conf_unlock(); + + if ((n = slurm_conf_get_nodename(me))) { d = 1; xfree(n); } - free_slurm_conf(&conf); strcpy(daemon_list, ""); if (actld && ctld) diff --git a/src/slurmctld/controller.c b/src/slurmctld/controller.c index 2830d24e3b6..11760e1d989 100644 --- a/src/slurmctld/controller.c +++ b/src/slurmctld/controller.c @@ -106,7 +106,6 @@ log_options_t log_opts = LOG_OPTS_INITIALIZER; /* Global variables */ -slurm_ctl_conf_t slurmctld_conf; slurmctld_config_t slurmctld_config; int bg_recover = DEFAULT_RECOVER; @@ -119,6 +118,8 @@ static char node_name[MAX_SLURM_NAME]; static int recover = DEFAULT_RECOVER; static pthread_cond_t server_thread_cond = PTHREAD_COND_INITIALIZER; static pid_t slurmctld_pid; +static char *slurm_conf_filename; + /* * Static list of signals to block in this process * *Must be zero-terminated* @@ -134,8 +135,7 @@ inline static void _free_server_thread(void); static void _init_config(void); static void _init_pidfile(void); static void _kill_old_slurmctld(void); -static void _parse_commandline(int argc, char *argv[], - slurm_ctl_conf_t *); +static void _parse_commandline(int argc, char *argv[]); inline static int _report_locks_set(void); static void * _service_connection(void *arg); static int _shutdown_backup_controller(int wait_time); @@ -162,14 +162,10 @@ int main(int argc, char *argv[]) _init_config(); log_init(argv[0], log_opts, LOG_DAEMON, NULL); slurmctld_pid = getpid(); - _parse_commandline(argc, argv, &slurmctld_conf); + _parse_commandline(argc, argv); init_locks(); - slurm_api_set_conf_file(slurmctld_conf.slurm_conf); + slurm_conf_reinit(slurm_conf_filename); - /* Get SlurmctldPidFile for _kill_old_slurmctld */ - if ((error_code = read_slurm_conf_ctl (&slurmctld_conf, false))) - fatal("read_slurm_conf_ctl reading %s: %s", - slurmctld_conf.slurm_conf, slurm_strerror(error_code)); update_logging(); _kill_old_slurmctld(); @@ -358,7 +354,7 @@ int main(int argc, char *argv[]) /* purge remaining data structures */ slurm_cred_ctx_destroy(slurmctld_config.cred_ctx); - free_slurm_conf(&slurmctld_conf); + slurm_conf_destroy(); slurm_api_clear_config(); sleep(1); #endif @@ -918,8 +914,7 @@ extern int optind, opterr, optopt; * IN argv - the command line arguments * IN/OUT conf_ptr - pointer to current configuration, update as needed */ -static void _parse_commandline(int argc, char *argv[], - slurm_ctl_conf_t * conf_ptr) +static void _parse_commandline(int argc, char *argv[]) { int c = 0; @@ -937,7 +932,7 @@ static void _parse_commandline(int argc, char *argv[], daemonize = 0; break; case 'f': - slurmctld_conf.slurm_conf = xstrdup(optarg); + slurm_conf_filename = xstrdup(optarg); break; case 'h': _usage(argv[0]); diff --git a/src/slurmctld/job_mgr.c b/src/slurmctld/job_mgr.c index ab2faf3e7b1..477b43fbfda 100644 --- a/src/slurmctld/job_mgr.c +++ b/src/slurmctld/job_mgr.c @@ -3556,6 +3556,7 @@ static void _get_batch_job_dir_ids(List batch_dirs) uint32_t *job_id_ptr; char *endptr; + xassert(slurmctld_conf.state_save_location); f_dir = opendir(slurmctld_conf.state_save_location); if (!f_dir) { error("opendir(%s): %m", diff --git a/src/slurmctld/node_mgr.c b/src/slurmctld/node_mgr.c index b77c968cf5a..6eb3077b9dd 100644 --- a/src/slurmctld/node_mgr.c +++ b/src/slurmctld/node_mgr.c @@ -61,8 +61,6 @@ List config_list = NULL; /* list of config_record entries */ struct node_record *node_record_table_ptr = NULL; /* node records */ struct node_record **node_hash_table = NULL; /* node_record hash table */ -struct config_record default_config_record; -struct node_record default_node_record; time_t last_bitmap_update = (time_t) NULL; /* time of last node creation * or deletion */ time_t last_node_update = (time_t) NULL; /* time of last update to @@ -124,7 +122,6 @@ char * bitmap2node_name (bitstr_t *bitmap) * slurm.conf file and typically describes the configuration of a * large number of nodes * RET pointer to the config_record - * global: default_config_record - default configuration values * NOTE: memory allocated will remain in existence until * _delete_config_record() is called to delete all configuration records */ @@ -136,18 +133,9 @@ struct config_record * create_config_record (void) config_ptr = (struct config_record *) xmalloc (sizeof (struct config_record)); - /* set default values */ - config_ptr->cpus = default_config_record.cpus; - config_ptr->real_memory = default_config_record.real_memory; - config_ptr->tmp_disk = default_config_record.tmp_disk; - config_ptr->weight = default_config_record.weight; config_ptr->nodes = NULL; config_ptr->node_bitmap = NULL; xassert (config_ptr->magic = CONFIG_MAGIC); /* set value */ - if (default_config_record.feature) - config_ptr->feature = xstrdup(default_config_record.feature); - else - config_ptr->feature = NULL; if (list_append(config_list, config_ptr) == NULL) fatal ("create_config_record: unable to allocate memory"); @@ -161,9 +149,6 @@ struct config_record * create_config_record (void) * IN config_ptr - pointer to node's configuration information * IN node_name - name of the node * RET pointer to the record or NULL if error - * global: default_node_record - default node values - * NOTE: the record's values are initialized to those of default_node_record, - * node_name and config_ptr's cpus, real_memory, and tmp_disk values * NOTE: allocates memory at node_record_table_ptr that must be xfreed when * the global node table is no longer required */ @@ -194,9 +179,7 @@ create_node_record (struct config_record *config_ptr, char *node_name) node_ptr = node_record_table_ptr + (node_record_count++); strcpy (node_ptr->name, node_name); - node_ptr->node_state = default_node_record.node_state; - node_ptr->last_response = default_node_record.last_response; - node_ptr->port = default_node_record.port; + node_ptr->last_response = (time_t)0; node_ptr->config_ptr = config_ptr; node_ptr->part_cnt = 0; node_ptr->part_pptr = NULL; @@ -514,8 +497,6 @@ static int _hash_index (char *name) * entries. * RET 0 if no error, otherwise an error code * global: node_record_table_ptr - pointer to global node table - * default_node_record - default values for node records - * default_config_record - default values for configuration records * node_hash_table - table of hash indecies * last_node_update - time of last node table update */ @@ -527,23 +508,6 @@ int init_node_conf (void) xfree(node_record_table_ptr); xfree(node_hash_table); - strcpy (default_node_record.name, "DEFAULT"); - default_node_record.node_state = NODE_STATE_UNKNOWN; - default_node_record.last_response = (time_t) 0; - default_node_record.cpus = 1; - default_node_record.real_memory = 1; - default_node_record.tmp_disk = 1; - default_node_record.config_ptr = NULL; - default_node_record.part_cnt = 0; - default_node_record.part_pptr = NULL; - default_config_record.cpus = 1; - default_config_record.real_memory = 1; - default_config_record.tmp_disk = 1; - default_config_record.weight = 1; - xfree(default_config_record.feature); - xfree(default_config_record.nodes); - FREE_NULL_BITMAP (default_config_record.node_bitmap); - if (config_list) /* delete defunct configuration entries */ (void) _delete_config_record (); else { diff --git a/src/slurmctld/proc_req.c b/src/slurmctld/proc_req.c index f3fc1963ca0..ec09b82f4c2 100644 --- a/src/slurmctld/proc_req.c +++ b/src/slurmctld/proc_req.c @@ -278,83 +278,72 @@ void slurmctld_req (slurm_msg_t * msg) */ void _fill_ctld_conf(slurm_ctl_conf_t * conf_ptr) { + slurm_ctl_conf_t *conf = slurm_conf_lock(); + conf_ptr->last_update = time(NULL); - conf_ptr->authtype = xstrdup(slurmctld_conf.authtype); - conf_ptr->backup_addr = xstrdup(slurmctld_conf.backup_addr); - conf_ptr->backup_controller = xstrdup(slurmctld_conf. - backup_controller); - conf_ptr->cache_groups = slurmctld_conf.cache_groups; - conf_ptr->checkpoint_type = xstrdup(slurmctld_conf.checkpoint_type); - conf_ptr->control_addr = xstrdup(slurmctld_conf.control_addr); - conf_ptr->control_machine = xstrdup(slurmctld_conf. - control_machine); - conf_ptr->epilog = xstrdup(slurmctld_conf.epilog); - conf_ptr->fast_schedule = slurmctld_conf.fast_schedule; - conf_ptr->first_job_id = slurmctld_conf.first_job_id; - conf_ptr->inactive_limit = slurmctld_conf.inactive_limit; - conf_ptr->job_acct_loc = xstrdup(slurmctld_conf.job_acct_loc); - conf_ptr->job_acct_parameters = xstrdup(slurmctld_conf. - job_acct_parameters); - conf_ptr->job_acct_type = xstrdup(slurmctld_conf. - job_acct_type); - conf_ptr->job_comp_loc = xstrdup(slurmctld_conf.job_comp_loc); - conf_ptr->job_comp_type = xstrdup(slurmctld_conf. - job_comp_type); - conf_ptr->job_credential_private_key = xstrdup(slurmctld_conf. + conf_ptr->authtype = xstrdup(conf->authtype); + conf_ptr->backup_addr = xstrdup(conf->backup_addr); + conf_ptr->backup_controller = xstrdup(conf->backup_controller); + conf_ptr->cache_groups = conf->cache_groups; + conf_ptr->checkpoint_type = xstrdup(conf->checkpoint_type); + conf_ptr->control_addr = xstrdup(conf->control_addr); + conf_ptr->control_machine = xstrdup(conf->control_machine); + conf_ptr->epilog = xstrdup(conf->epilog); + conf_ptr->fast_schedule = conf->fast_schedule; + conf_ptr->first_job_id = conf->first_job_id; + conf_ptr->inactive_limit = conf->inactive_limit; + conf_ptr->job_acct_loc = xstrdup(conf->job_acct_loc); + conf_ptr->job_acct_parameters = xstrdup(conf->job_acct_parameters); + conf_ptr->job_acct_type = xstrdup(conf->job_acct_type); + conf_ptr->job_comp_loc = xstrdup(conf->job_comp_loc); + conf_ptr->job_comp_type = xstrdup(conf->job_comp_type); + conf_ptr->job_credential_private_key = xstrdup(conf-> job_credential_private_key); - conf_ptr->job_credential_public_certificate = xstrdup(slurmctld_conf. + conf_ptr->job_credential_public_certificate = xstrdup(conf-> job_credential_public_certificate); - conf_ptr->kill_wait = slurmctld_conf.kill_wait; - conf_ptr->max_job_cnt = slurmctld_conf.max_job_cnt; - conf_ptr->min_job_age = slurmctld_conf.min_job_age; - conf_ptr->mpi_default = xstrdup(slurmctld_conf.mpi_default); - conf_ptr->plugindir = xstrdup(slurmctld_conf.plugindir); - conf_ptr->proctrack_type = xstrdup(slurmctld_conf.proctrack_type); - conf_ptr->prolog = xstrdup(slurmctld_conf.prolog); - conf_ptr->propagate_rlimits = xstrdup(slurmctld_conf. - propagate_rlimits); - conf_ptr->propagate_rlimits_except = xstrdup(slurmctld_conf. - propagate_rlimits_except); - conf_ptr->ret2service = slurmctld_conf.ret2service; - conf_ptr->schedauth = xstrdup(slurmctld_conf.schedauth); - conf_ptr->schedport = slurmctld_conf.schedport; - conf_ptr->schedrootfltr = slurmctld_conf.schedrootfltr; - conf_ptr->schedtype = xstrdup(slurmctld_conf.schedtype); - conf_ptr->select_type = xstrdup(slurmctld_conf.select_type); - conf_ptr->slurm_user_id = slurmctld_conf.slurm_user_id; - conf_ptr->slurm_user_name = xstrdup(slurmctld_conf. - slurm_user_name); - conf_ptr->slurmctld_debug = slurmctld_conf.slurmctld_debug; - conf_ptr->slurmctld_logfile = xstrdup(slurmctld_conf. - slurmctld_logfile); - conf_ptr->slurmctld_pidfile = xstrdup(slurmctld_conf. - slurmctld_pidfile); - conf_ptr->slurmctld_port = slurmctld_conf.slurmctld_port; - conf_ptr->slurmctld_timeout = slurmctld_conf.slurmctld_timeout; - conf_ptr->slurmd_debug = slurmctld_conf.slurmd_debug; - conf_ptr->slurmd_logfile = xstrdup(slurmctld_conf. - slurmd_logfile); - conf_ptr->slurmd_pidfile = xstrdup(slurmctld_conf. - slurmd_pidfile); -#ifndef MULTIPLE_SLURMD - conf_ptr->slurmd_port = slurmctld_conf.slurmd_port; -#endif - conf_ptr->slurmd_spooldir = xstrdup(slurmctld_conf. - slurmd_spooldir); - conf_ptr->slurmd_timeout = slurmctld_conf.slurmd_timeout; - conf_ptr->slurm_conf = xstrdup(slurmctld_conf.slurm_conf); - conf_ptr->state_save_location = xstrdup(slurmctld_conf. - state_save_location); - conf_ptr->switch_type = xstrdup(slurmctld_conf.switch_type); - conf_ptr->task_epilog = xstrdup(slurmctld_conf.task_epilog); - conf_ptr->task_prolog = xstrdup(slurmctld_conf.task_prolog); - conf_ptr->task_plugin = xstrdup(slurmctld_conf.task_plugin); - conf_ptr->tmp_fs = xstrdup(slurmctld_conf.tmp_fs); - conf_ptr->wait_time = slurmctld_conf.wait_time; - conf_ptr->srun_prolog = xstrdup(slurmctld_conf.srun_prolog); - conf_ptr->srun_epilog = xstrdup(slurmctld_conf.srun_epilog); - conf_ptr->node_prefix = xstrdup(slurmctld_conf.node_prefix); - conf_ptr->tree_width = slurmctld_conf.tree_width; + conf_ptr->kill_wait = conf->kill_wait; + conf_ptr->max_job_cnt = conf->max_job_cnt; + conf_ptr->min_job_age = conf->min_job_age; + conf_ptr->mpi_default = xstrdup(conf->mpi_default); + conf_ptr->plugindir = xstrdup(conf->plugindir); + conf_ptr->proctrack_type = xstrdup(conf->proctrack_type); + conf_ptr->prolog = xstrdup(conf->prolog); + conf_ptr->propagate_rlimits = xstrdup(conf->propagate_rlimits); + conf_ptr->propagate_rlimits_except = xstrdup(conf-> + propagate_rlimits_except); + conf_ptr->ret2service = conf->ret2service; + conf_ptr->schedauth = xstrdup(conf->schedauth); + conf_ptr->schedport = conf->schedport; + conf_ptr->schedrootfltr = conf->schedrootfltr; + conf_ptr->schedtype = xstrdup(conf->schedtype); + conf_ptr->select_type = xstrdup(conf->select_type); + conf_ptr->slurm_user_id = conf->slurm_user_id; + conf_ptr->slurm_user_name = xstrdup(conf->slurm_user_name); + conf_ptr->slurmctld_debug = conf->slurmctld_debug; + conf_ptr->slurmctld_logfile = xstrdup(conf->slurmctld_logfile); + conf_ptr->slurmctld_pidfile = xstrdup(conf->slurmctld_pidfile); + conf_ptr->slurmctld_port = conf->slurmctld_port; + conf_ptr->slurmctld_timeout = conf->slurmctld_timeout; + conf_ptr->slurmd_debug = conf->slurmd_debug; + conf_ptr->slurmd_logfile = xstrdup(conf->slurmd_logfile); + conf_ptr->slurmd_pidfile = xstrdup(conf->slurmd_pidfile); + conf_ptr->slurmd_port = conf->slurmd_port; + conf_ptr->slurmd_spooldir = xstrdup(conf->slurmd_spooldir); + conf_ptr->slurmd_timeout = conf->slurmd_timeout; + conf_ptr->slurm_conf = xstrdup(conf->slurm_conf); + conf_ptr->state_save_location = xstrdup(conf->state_save_location); + conf_ptr->switch_type = xstrdup(conf->switch_type); + conf_ptr->task_epilog = xstrdup(conf->task_epilog); + conf_ptr->task_prolog = xstrdup(conf->task_prolog); + conf_ptr->task_plugin = xstrdup(conf->task_plugin); + conf_ptr->tmp_fs = xstrdup(conf->tmp_fs); + conf_ptr->wait_time = conf->wait_time; + conf_ptr->srun_prolog = xstrdup(conf->srun_prolog); + conf_ptr->srun_epilog = xstrdup(conf->srun_epilog); + conf_ptr->node_prefix = xstrdup(conf->node_prefix); + conf_ptr->tree_width = conf->tree_width; + + slurm_conf_unlock(); return; } diff --git a/src/slurmctld/read_config.c b/src/slurmctld/read_config.c index d26a32ae9bf..42b86b9e209 100644 --- a/src/slurmctld/read_config.c +++ b/src/slurmctld/read_config.c @@ -57,13 +57,12 @@ #include "src/slurmctld/read_config.h" #include "src/slurmctld/sched_plugin.h" #include "src/slurmctld/slurmctld.h" +#include "src/common/slurm_rlimits_info.h" #define BUFFER_SIZE 1024 static int _build_bitmaps(void); static int _init_all_slurm_conf(void); -static int _parse_node_spec(char *in_line); -static int _parse_part_spec(char *in_line); static void _purge_old_node_state(struct node_record *old_node_table_ptr, int old_node_record_count); static void _restore_node_state(struct node_record *old_node_table_ptr, @@ -82,6 +81,9 @@ static void _validate_node_proc_count(void); static char highest_node_name[MAX_SLURM_NAME] = ""; int node_record_count = 0; +/* FIXME - declarations for temporarily moved functions */ +#define MULTIPLE_VALUE_MSG "Multiple values for %s, latest one used" + /* * _build_bitmaps - build node bitmaps to define which nodes are in which @@ -248,7 +250,7 @@ static int _init_all_slurm_conf(void) { int error_code; - init_slurm_conf(&slurmctld_conf); + slurm_conf_init(NULL); if ((error_code = init_node_conf())) return error_code; @@ -263,488 +265,358 @@ static int _init_all_slurm_conf(void) return 0; } +static int _state_str2int(const char *state_str) +{ + int state_val = NO_VAL; + int i; + + for (i = 0; i <= NODE_STATE_END; i++) { + if (strcasecmp(node_state_string(i), "END") == 0) + break; + if (strcasecmp(node_state_string(i), state_str) == 0) { + state_val = i; + break; + } + } + if ((i == 0) && (strncasecmp("DRAIN", state_str, 5) == 0)) + state_val = NODE_STATE_IDLE | NODE_STATE_DRAIN; + if (state_val == NO_VAL) { + error("invalid state %s", state_str); + errno = EINVAL; + } + return state_val; +} + +/* Caller must be holding slurm_conf_lock() */ +static void _set_node_prefix(const char *nodenames, slurm_ctl_conf_t *conf) +{ + int i; + char *tmp; + + xassert(nodenames != NULL); + for (i = 1; nodenames[i] != '\0'; i++) { + if((nodenames[i-1] == '[') + || (nodenames[i-1] <= '9' + && nodenames[i-1] >= '0')) + break; + } + xfree(conf->node_prefix); + if(nodenames[i] == '\0') + conf->node_prefix = xstrdup(nodenames); + else { + tmp = xmalloc(sizeof(char)*i+1); + memset(tmp, 0, i+1); + snprintf(tmp, i, "%s", nodenames); + conf->node_prefix = tmp; + tmp = NULL; + } + debug3("Prefix is %s %s %d", conf->node_prefix, nodenames, i); +} /* - * _parse_node_spec - parse the node specification (per the configuration - * file format), build table and set values - * IN/OUT in_line - line from the configuration file, parsed keywords - * and values replaced by blanks + * _build_single_nodeline_info - rom the slurm.conf reader, build table, + * and set values * RET 0 if no error, error code otherwise * Note: Operates on common variables - * global: default_config_record - default configuration values for - * group of nodes * default_node_record - default node configuration values */ -static int _parse_node_spec(char *in_line) +static int _build_single_nodeline_info(slurm_conf_node_t *node_ptr, + struct config_record *config_ptr, + slurm_ctl_conf_t *conf) { - char *node_addr = NULL, *node_name = NULL, *state = NULL; - char *feature = NULL, *reason = NULL, *node_hostname = NULL; - int error_code, first, i; - int state_val, cpus_val, real_memory_val, tmp_disk_val, weight_val; - struct node_record *node_ptr; - struct config_record *config_ptr = NULL; - hostlist_t addr_list = NULL, host_list = NULL; - char *this_node_name; -#ifndef HAVE_FRONT_END /* Fake node addresses for front-end */ - char *this_node_addr; -#endif - int port; - - node_addr = node_name = state = feature = (char *) NULL; - cpus_val = real_memory_val = state_val = NO_VAL; - tmp_disk_val = weight_val = NO_VAL; - port = NO_VAL; - if ((error_code = load_string(&node_name, "NodeName=", in_line))) - return error_code; - if (node_name == NULL) - return 0; /* no node info */ - if (strcasecmp(node_name, "localhost") == 0) { - xfree(node_name); - node_name = xmalloc(MAX_SLURM_NAME); - getnodename(node_name, MAX_SLURM_NAME); - } - - error_code = slurm_parser(in_line, - "Feature=", 's', &feature, - "NodeAddr=", 's', &node_addr, - "NodeHostname=", 's', &node_hostname, -#ifdef MULTIPLE_SLURMD - "Port=", 'd', &port, -#endif - "Procs=", 'd', &cpus_val, - "RealMemory=", 'd', &real_memory_val, - "Reason=", 's', &reason, - "State=", 's', &state, - "TmpDisk=", 'd', &tmp_disk_val, - "Weight=", 'd', &weight_val, "END"); - - if (error_code) - goto cleanup; - - if (state != NULL) { - state_val = NO_VAL; - for (i = 0; i <= NODE_STATE_END; i++) { - if (strcasecmp(node_state_string(i), "END") == 0) - break; - if (strcasecmp(node_state_string(i), state) == 0) { - state_val = i; - break; - } - } - if ((i == 0) && (strncasecmp("DRAIN", state, 5) == 0)) - state_val = NODE_STATE_IDLE | NODE_STATE_DRAIN; - if (state_val == NO_VAL) { - error("_parse_node_spec: invalid initial state %s for " - "node %s", state, node_name); - error_code = EINVAL; + int error_code, i; + struct node_record *node_rec = NULL; + hostlist_t alias_list = NULL; + hostlist_t hostname_list = NULL; + hostlist_t address_list = NULL; + char *alias = NULL; + char *hostname = NULL; + char *address = NULL; + int state_val = NODE_STATE_UNKNOWN; + + if (node_ptr->state != NULL) { + state_val = _state_str2int(node_ptr->state); + if (state_val == NO_VAL) goto cleanup; - } - xfree(state); } -#ifndef HAVE_FRONT_END /* Support NodeAddr expression */ - if (node_addr && - ((addr_list = hostlist_create(node_addr)) == NULL)) { - error("hostlist_create error for %s: %m", node_addr); + if ((alias_list = hostlist_create(node_ptr->nodenames)) == NULL) { + error("Unable to create NodeName list from %s", + node_ptr->nodenames); error_code = errno; goto cleanup; } -#endif - if (strcasecmp(node_name, "DEFAULT") != 0) { - i=1; - while (node_name[i] != '\0') { - if((node_name[i-1] == '[') - || (node_name[i-1] < 58 - && node_name[i-1] > 47)) - break; - i++; - } - xfree(slurmctld_conf.node_prefix); - if(node_name[i] == '\0') - slurmctld_conf.node_prefix = xstrdup(node_name); - else { - this_node_name = xmalloc(sizeof(char)*i+1); - memset(this_node_name,0,i+1); - snprintf(this_node_name, i, "%s", node_name); - slurmctld_conf.node_prefix = xstrdup(this_node_name); - xfree(this_node_name); - } - debug3("Prefix is %s %s %d",slurmctld_conf.node_prefix, - node_name, i); + if ((hostname_list = hostlist_create(node_ptr->hostnames)) == NULL) { + error("Unable to create NodeHostname list from %s", + node_ptr->hostnames); + error_code = errno; + goto cleanup; } - - if ((host_list = hostlist_create(node_name)) == NULL) { - error("hostlist_create error for %s: %m", node_name); + if ((address_list = hostlist_create(node_ptr->addresses)) == NULL) { + error("Unable to create NodeAddr list from %s", + node_ptr->addresses); error_code = errno; goto cleanup; } - first = 1; - while ((this_node_name = hostlist_shift(host_list))) { - if (strcasecmp(this_node_name, "DEFAULT") == 0) { - xfree(node_name); - if (cpus_val != NO_VAL) - default_config_record.cpus = cpus_val; - if (real_memory_val != NO_VAL) - default_config_record.real_memory = - real_memory_val; - if (tmp_disk_val != NO_VAL) - default_config_record.tmp_disk = - tmp_disk_val; - if (weight_val != NO_VAL) - default_config_record.weight = weight_val; - if (state_val != NO_VAL) - default_node_record.node_state = state_val; - if (feature) { - xfree(default_config_record.feature); - default_config_record.feature = feature; - feature = NULL; - } -#ifdef MULTIPLE_SLURMD - if (port != NO_VAL) - default_node_record.port = port; + _set_node_prefix(node_ptr->nodenames, conf); + + /* some sanity checks */ +#ifdef HAVE_FRONT_END + if (hostlist_count(hostname_list) != 1 + || hostlist_count(address_list) != 1) { + error("Only one hostname and address allowed " + "in FRONT_END mode"); + goto cleanup; + } + hostname = node_ptr->hostnames; + address = node_ptr->addresses; +#else + if (hostlist_count(hostname_list) < hostlist_count(alias_list)) { + error("At least as many NodeHostname are required " + "as NodeName"); + goto cleanup; + } + if (hostlist_count(address_list) < hostlist_count(alias_list)) { + error("At least as many NodeAddr are required as NodeName"); + goto cleanup; + } #endif - free(this_node_name); - break; - } - if (first == 1) { - first = 0; - config_ptr = create_config_record(); - config_ptr->nodes = node_name; - if (cpus_val != NO_VAL) - config_ptr->cpus = cpus_val; - if (real_memory_val != NO_VAL) - config_ptr->real_memory = - real_memory_val; - if (tmp_disk_val != NO_VAL) - config_ptr->tmp_disk = tmp_disk_val; - if (weight_val != NO_VAL) - config_ptr->weight = weight_val; - if (feature) { - xfree(config_ptr->feature); - config_ptr->feature = feature; - feature = NULL; - } - } + /* now build the individual node structures */ + while ((alias = hostlist_shift(alias_list))) { +#ifndef HAVE_FRONT_END + hostname = hostlist_shift(hostname_list); + address = hostlist_shift(address_list); +#endif - if (strcmp(this_node_name, highest_node_name) <= 0) - node_ptr = find_node_record(this_node_name); + if (strcmp(alias, highest_node_name) <= 0) + node_rec = find_node_record(alias); else { - strncpy(highest_node_name, this_node_name, - MAX_SLURM_NAME); - node_ptr = NULL; + strncpy(highest_node_name, alias, MAX_SLURM_NAME); + node_rec = NULL; } - if (node_ptr == NULL) { - node_ptr = create_node_record(config_ptr, - this_node_name); + if (node_rec == NULL) { + node_rec = create_node_record(config_ptr, alias); if ((state_val != NO_VAL) && (state_val != NODE_STATE_UNKNOWN)) - node_ptr->node_state = state_val; - node_ptr->last_response = (time_t) 0; -#ifdef HAVE_FRONT_END /* Permit NodeAddr value reuse for front-end */ - if (node_addr) - strncpy(node_ptr->comm_name, - node_addr, MAX_SLURM_NAME); - else if (node_hostname) - strncpy(node_ptr->comm_name, - node_hostname, MAX_SLURM_NAME); - else - strncpy(node_ptr->comm_name, - node_ptr->name, MAX_SLURM_NAME); -#else - if (node_addr) - this_node_addr = hostlist_shift(addr_list); - else - this_node_addr = NULL; - if (this_node_addr) { - strncpy(node_ptr->comm_name, - this_node_addr, MAX_SLURM_NAME); - free(this_node_addr); - } else - strncpy(node_ptr->comm_name, - node_ptr->name, MAX_SLURM_NAME); -#endif -#ifdef MULTIPLE_SLURMD - if (port != NO_VAL) - node_ptr->port = port; -#endif - node_ptr->reason = xstrdup(reason); + node_rec->node_state = state_val; + node_rec->last_response = (time_t) 0; + strncpy(node_rec->comm_name, address, MAX_SLURM_NAME); + + node_rec->port = node_ptr->port; + node_rec->reason = xstrdup(node_ptr->reason); } else { - error("_parse_node_spec: reconfiguration for node %s", - this_node_name); - if ((state_val != NO_VAL) && - (state_val != NODE_STATE_UNKNOWN)) - node_ptr->node_state = state_val; - if (reason) { - xfree(node_ptr->reason); - node_ptr->reason = xstrdup(reason); - } + /* FIXME - maybe should be fatal? */ + error("reconfiguration for node %s, ignoring!", alias); } - free(this_node_name); + free(alias); +#ifndef HAVE_FRONT_END + free(hostname); + free(address); +#endif } /* free allocated storage */ - xfree(node_addr); - xfree(node_hostname); - xfree(reason); - if (addr_list) - hostlist_destroy(addr_list); - hostlist_destroy(host_list); +cleanup: + if (alias_list) + hostlist_destroy(alias_list); + if (hostname_list) + hostlist_destroy(hostname_list); + if (address_list) + hostlist_destroy(address_list); return error_code; - cleanup: - xfree(node_addr); - xfree(node_name); - xfree(node_hostname); - xfree(feature); - xfree(reason); - xfree(state); - return error_code; } - -/* - * _parse_part_spec - parse the partition specification, build table and - * set values - * IN/OUT in_line - line from the configuration file, parsed keywords - * and values replaced by blanks - * RET 0 if no error, error code otherwise - * Note: Operates on common variables - * global: part_list - global partition list pointer - * default_part - default parameters for a partition - */ -static int _parse_part_spec(char *in_line) +static int _handle_downnodes_line(slurm_conf_downnodes_t *down) { - char *allow_groups = NULL, *nodes = NULL, *partition_name = NULL; - char *max_time_str = NULL, *default_str = NULL, *root_str = NULL; - char *shared_str = NULL, *state_str = NULL, *hidden_str = NULL; - int max_time_val = NO_VAL, max_nodes_val = NO_VAL; - int min_nodes_val = NO_VAL, root_val = NO_VAL, default_val = NO_VAL; - int hidden_val = NO_VAL, state_val = NO_VAL, shared_val = NO_VAL; - int error_code; - struct part_record *part_ptr; - static int default_part_val = NO_VAL; - - if ((error_code = - load_string(&partition_name, "PartitionName=", in_line))) - return error_code; - if (partition_name == NULL) - return 0; /* no partition info */ - - if (strlen(partition_name) >= MAX_SLURM_NAME) { - error("_parse_part_spec: partition name %s too long", - partition_name); - xfree(partition_name); - return EINVAL; + int error_code = 0; + struct node_record *node_rec = NULL; + hostlist_t alias_list = NULL; + char *alias = NULL; + int state_val = NODE_STATE_DOWN; + + if (down->state != NULL) { + state_val = _state_str2int(down->state); + if (state_val == NO_VAL) { + error("Invalid State \"%s\"", down->state); + goto cleanup; + } } - allow_groups = default_str = root_str = nodes = NULL; - shared_str = state_str = NULL; - error_code = slurm_parser(in_line, - "AllowGroups=", 's', &allow_groups, - "Default=", 's', &default_str, - "Hidden=", 's', &hidden_str, - "RootOnly=", 's', &root_str, - "MaxTime=", 's', &max_time_str, - "MaxNodes=", 'd', &max_nodes_val, - "MinNodes=", 'd', &min_nodes_val, - "Nodes=", 's', &nodes, - "Shared=", 's', &shared_str, - "State=", 's', &state_str, "END"); - - if (error_code) + if ((alias_list = hostlist_create(down->nodenames)) == NULL) { + error("Unable to create NodeName list from %s", + down->nodenames); + error_code = errno; goto cleanup; - - if (default_str) { - if (strcasecmp(default_str, "YES") == 0) - default_val = 1; - else if (strcasecmp(default_str, "NO") == 0) - default_val = 0; - else { - error("_parse_part_spec: ignored partition %s update, " - "bad state %s", partition_name, default_str); - error_code = EINVAL; - goto cleanup; - } - xfree(default_str); - } else - default_val = default_part_val; - - if (hidden_str) { - if (strcasecmp(hidden_str, "YES") == 0) - hidden_val = 1; - else if (strcasecmp(hidden_str, "NO") == 0) - hidden_val = 0; - else { - error("_parse_part_spec: ignored partition %s update, " - "bad key %s", partition_name, hidden_str); - error_code = EINVAL; - goto cleanup; - } - xfree(hidden_str); } - if (root_str) { - if (strcasecmp(root_str, "YES") == 0) - root_val = 1; - else if (strcasecmp(root_str, "NO") == 0) - root_val = 0; - else { - error("_parse_part_spec ignored partition %s update, " - "bad key %s", partition_name, root_str); - error_code = EINVAL; - goto cleanup; + while ((alias = hostlist_shift(alias_list))) { + node_rec = find_node_record(alias); + if (node_rec == NULL) { + error("DownNode \"%s\" does not exist!", alias); + free(alias); + continue; } - xfree(root_str); - } - if (max_time_str) { - if (strcasecmp(max_time_str, "INFINITE") == 0) - max_time_val = INFINITE; - else { - char *end_ptr; - max_time_val = strtol(max_time_str, &end_ptr, 10); - if ((max_time_str[0] != '\0') && - (end_ptr[0] != '\0')) { - error_code = EINVAL; - goto cleanup; - } + if ((state_val != NO_VAL) && + (state_val != NODE_STATE_UNKNOWN)) + node_rec->node_state = state_val; + if (down->reason) { + xfree(node_rec->reason); + node_rec->reason = xstrdup(down->reason); } - xfree(max_time_str); + free(alias); } - if (shared_str) { - if (strcasecmp(shared_str, "YES") == 0) - shared_val = SHARED_YES; - else if (strcasecmp(shared_str, "NO") == 0) - shared_val = SHARED_NO; - else if (strcasecmp(shared_str, "FORCE") == 0) - shared_val = SHARED_FORCE; - else { - error("_parse_part_spec ignored partition %s update, " - "bad shared %s", partition_name, shared_str); - error_code = EINVAL; - goto cleanup; - } - xfree(shared_str); +cleanup: + if (alias_list) + hostlist_destroy(alias_list); + return error_code; +} + +static void _handle_all_downnodes() +{ + slurm_conf_downnodes_t *ptr, **ptr_array; + int count; + int i; + + count = slurm_conf_downnodes_array(&ptr_array); + if (count == 0) { + debug("No DownNodes"); + return; + } + + for (i = 0; i < count; i++) { + ptr = ptr_array[i]; + + _handle_downnodes_line(ptr); } +} - if (state_str) { - if (strcasecmp(state_str, "UP") == 0) - state_val = 1; - else if (strcasecmp(state_str, "DOWN") == 0) - state_val = 0; - else { - error("_parse_part_spec ignored partition %s update, " - "bad state %s", partition_name, state_str); - error_code = EINVAL; - goto cleanup; +/* + * _build_all_nodeline_info - get a array of slurm_conf_node_t structures + * from the slurm.conf reader, build table, and set values + * RET 0 if no error, error code otherwise + * Note: Operates on common variables + * default_node_record - default node configuration values + */ +static int _build_all_nodeline_info(slurm_ctl_conf_t *conf) +{ + slurm_conf_node_t *node, **ptr_array; + struct config_record *config_ptr = NULL; + int count; + int i; + + count = slurm_conf_nodename_array(&ptr_array); + if (count == 0) + fatal("No NodeName information available!"); + + error("count = %d", count); + + for (i = 0; i < count; i++) { + node = ptr_array[i]; + + config_ptr = create_config_record(); + config_ptr->nodes = xstrdup(node->nodenames); + config_ptr->cpus = node->cpus; + config_ptr->real_memory = node->real_memory; + config_ptr->tmp_disk = node->tmp_disk; + config_ptr->weight = node->weight; + if (node->feature) { + xfree(config_ptr->feature); + config_ptr->feature = xstrdup(node->feature); } - xfree(state_str); + + _build_single_nodeline_info(node, config_ptr, conf); } +} - if (strcasecmp(partition_name, "DEFAULT") == 0) { - xfree(partition_name); - if (default_val != NO_VAL) - default_part_val = default_val; - if (hidden_val != NO_VAL) - default_part.hidden = hidden_val; - if (max_time_val != NO_VAL) - default_part.max_time = max_time_val; - if (max_nodes_val != NO_VAL) - default_part.max_nodes = max_nodes_val; - if (min_nodes_val != NO_VAL) - default_part.min_nodes = min_nodes_val; - if (root_val != NO_VAL) - default_part.root_only = root_val; - if (state_val != NO_VAL) - default_part.state_up = state_val; - if (shared_val != NO_VAL) - default_part.shared = shared_val; - if (allow_groups) { - xfree(default_part.allow_groups); - if (strcasecmp(allow_groups, "ALL")) { - default_part.allow_groups = allow_groups; - allow_groups = NULL; - } - } - if (nodes) { - xfree(default_part.nodes); - default_part.nodes = nodes; - nodes = NULL; - } - return 0; +/* + * _build_single_partitionline_info - get a array of slurm_conf_partition_t + * structures from the slurm.conf reader, build table, and set values + * RET 0 if no error, error code otherwise + * Note: Operates on common variables + * global: part_list - global partition list pointer + * default_part - default parameters for a partition + */ +static int _build_single_partitionline_info(slurm_conf_partition_t *part) +{ + struct part_record *part_ptr; + + if (strlen(part->name) >= MAX_SLURM_NAME) { + error("_parse_part_spec: partition name %s too long", + part->name); + return EINVAL; } - part_ptr = list_find_first(part_list, &list_find_part, partition_name); + part_ptr = list_find_first(part_list, &list_find_part, part->name); if (part_ptr == NULL) { part_ptr = create_part_record(); - strcpy(part_ptr->name, partition_name); + strcpy(part_ptr->name, part->name); } else { - verbose("_parse_node_spec: duplicate entry for partition %s", - partition_name); + verbose("_parse_part_spec: duplicate entry for partition %s", + part->name); } - if (default_val == 1) { + + if (part->default_flag) { if ((strlen(default_part_name) > 0) - && strcmp(default_part_name,partition_name)) + && strcmp(default_part_name, part->name)) info("_parse_part_spec: changing default partition " "from %s to %s", - default_part_name, partition_name); - strcpy(default_part_name, partition_name); + default_part_name, part->name); + strcpy(default_part_name, part->name); default_part_loc = part_ptr; } - if (hidden_val != NO_VAL) - part_ptr->hidden = hidden_val; - if (max_time_val != NO_VAL) - part_ptr->max_time = max_time_val; - if (max_nodes_val != NO_VAL) - part_ptr->max_nodes = max_nodes_val; - if (min_nodes_val != NO_VAL) - part_ptr->min_nodes = min_nodes_val; - if (root_val != NO_VAL) - part_ptr->root_only = root_val; - if (state_val != NO_VAL) - part_ptr->state_up = state_val; - if (shared_val != NO_VAL) - part_ptr->shared = shared_val; - if (allow_groups) { + part_ptr->hidden = part->hidden_flag ? 1 : 0; + part_ptr->max_time = part->max_time; + part_ptr->max_nodes = part->max_nodes; + part_ptr->min_nodes = part->min_nodes; + part_ptr->root_only = part->root_only_flag ? 1 : 0; + part_ptr->state_up = part->state_up_flag ? 1 : 0; + part_ptr->shared = part->shared; + if (part->allow_groups) { xfree(part_ptr->allow_groups); - part_ptr->allow_groups = allow_groups; - allow_groups = NULL; + part_ptr->allow_groups = xstrdup(part->allow_groups); } - if (nodes) { - if (strcasecmp(nodes, "localhost") == 0) { - xfree(nodes); - nodes = xmalloc(MAX_SLURM_NAME); - getnodename(nodes, MAX_SLURM_NAME); - } + if (part->nodes) { if (part_ptr->nodes) { xstrcat(part_ptr->nodes, ","); - xstrcat(part_ptr->nodes, nodes); - xfree(nodes); + xstrcat(part_ptr->nodes, part->nodes); } else { - part_ptr->nodes = nodes; - nodes = NULL; + part_ptr->nodes = xstrdup(part->nodes); } } - xfree(partition_name); - return 0; - cleanup: - xfree(allow_groups); - xfree(default_str); - xfree(hidden_str); - xfree(max_time_str); - xfree(root_str); - xfree(nodes); - xfree(partition_name); - xfree(shared_str); - xfree(state_str); - return error_code; + return 0; } +/* + * _build_all_partitionline_info - get a array of slurm_conf_partition_t + * structures from the slurm.conf reader, build table, and set values + * RET 0 if no error, error code otherwise + * Note: Operates on common variables + * global: part_list - global partition list pointer + * default_part - default parameters for a partition + */ +static int _build_all_partitionline_info() +{ + slurm_conf_partition_t *part, **ptr_array; + int count; + int i; + + count = slurm_conf_partition_array(&ptr_array); + if (count == 0) + fatal("No PartitionName information available!"); + + for (i = 0; i < count; i++) { + part = ptr_array[i]; + + _build_single_partitionline_info(part); + } +} /* * read_slurm_conf - load the slurm configuration from the configured file. @@ -761,9 +633,6 @@ static int _parse_part_spec(char *in_line) int read_slurm_conf(int recover) { DEF_TIMERS; - FILE *slurm_spec_file; /* pointer to input data file */ - int line_num; /* line number in input file */ - char in_line[BUFFER_SIZE]; /* input line */ int i, j, error_code; int old_node_record_count; struct node_record *old_node_table_ptr; @@ -772,6 +641,7 @@ int read_slurm_conf(int recover) char *old_sched_type = xstrdup(slurmctld_conf.schedtype); char *old_select_type = xstrdup(slurmctld_conf.select_type); char *old_switch_type = xstrdup(slurmctld_conf.switch_type); + slurm_ctl_conf_t *conf; /* initialization */ START_TIMER; @@ -784,76 +654,12 @@ int read_slurm_conf(int recover) return error_code; } - slurm_spec_file = fopen(slurmctld_conf.slurm_conf, "r"); - if (slurm_spec_file == NULL) - fatal("read_slurm_conf error opening file %s, %m", - slurmctld_conf.slurm_conf); - - info("read_slurm_conf: loading configuration from %s", - slurmctld_conf.slurm_conf); - - /* process the data file */ - line_num = 0; - while (fgets(in_line, BUFFER_SIZE, slurm_spec_file) != NULL) { - line_num++; - if (strlen(in_line) >= (BUFFER_SIZE - 1)) { - error("read_slurm_conf line %d, of input file %s " - "too long", - line_num, slurmctld_conf.slurm_conf); - xfree(old_node_table_ptr); - fclose(slurm_spec_file); - return E2BIG; - break; - } - - /* everything after a non-escaped "#" is a comment */ - /* replace comment flag "#" with an end of string (NULL) */ - /* escape sequence "\#" translated to "#" */ - for (i = 0; i < BUFFER_SIZE; i++) { - if (in_line[i] == (char) NULL) - break; - if (in_line[i] != '#') - continue; - if ((i > 0) && (in_line[i - 1] == '\\')) { - for (j = i; j < BUFFER_SIZE; j++) { - in_line[j - 1] = in_line[j]; - } - continue; - } - in_line[i] = (char) NULL; - break; - } - - /* parse what is left, non-comments */ - - /* overall configuration parameters */ - if ((error_code = - parse_config_spec(in_line, &slurmctld_conf))) { - fclose(slurm_spec_file); - xfree(old_node_table_ptr); - return error_code; - } + conf = slurm_conf_lock(); + _build_all_nodeline_info(conf); + _handle_all_downnodes(); + _build_all_partitionline_info(); + slurm_conf_unlock(); - /* node configuration parameters */ - if ((error_code = _parse_node_spec(in_line))) { - fclose(slurm_spec_file); - xfree(old_node_table_ptr); - return error_code; - } - - /* partition configuration parameters */ - if ((error_code = _parse_part_spec(in_line))) { - fclose(slurm_spec_file); - xfree(old_node_table_ptr); - return error_code; - } - - /* report any leftover strings on input line */ - report_leftover(in_line, line_num); - } - fclose(slurm_spec_file); - - validate_config(&slurmctld_conf); update_logging(); g_slurmctld_jobacct_init(slurmctld_conf.job_acct_loc, slurmctld_conf.job_acct_parameters); @@ -1166,3 +972,15 @@ static void _validate_node_proc_count(void) list_iterator_destroy(part_iterator); } #endif + +/* Normalize supplied debug level to be in range per log.h definitions */ +static void _normalize_debug_level(uint16_t *level) +{ + if (*level > LOG_LEVEL_DEBUG3) { + error("Normalizing debug level from %u to %d", + *level, LOG_LEVEL_DEBUG3); + *level = LOG_LEVEL_DEBUG3; + } + /* level is uint16, always > LOG_LEVEL_QUIET(0), can't underflow */ +} + diff --git a/src/slurmctld/slurmctld.h b/src/slurmctld/slurmctld.h index c75dd383336..c8fb4aea1c6 100644 --- a/src/slurmctld/slurmctld.h +++ b/src/slurmctld/slurmctld.h @@ -64,6 +64,7 @@ #include "src/common/slurm_protocol_defs.h" #include "src/common/switch.h" #include "src/common/xmalloc.h" +#include "src/common/read_config.h" /* location of slurmctld_conf */ #define FREE_NULL_BITMAP(_X) \ do { \ @@ -132,7 +133,6 @@ typedef struct slurmctld_config { } slurmctld_config_t; extern slurmctld_config_t slurmctld_config; -extern slurm_ctl_conf_t slurmctld_conf; extern int bg_recover; /* state recovery mode */ /*****************************************************************************\ @@ -190,8 +190,6 @@ extern bitstr_t *avail_node_bitmap; /* bitmap of available nodes, * not DOWN, DRAINED or DRAINING */ extern bitstr_t *idle_node_bitmap; /* bitmap of idle nodes */ extern bitstr_t *share_node_bitmap; /* bitmap of sharable nodes */ -extern struct config_record default_config_record; -extern struct node_record default_node_record; /*****************************************************************************\ * PARTITION parameters and data structures diff --git a/src/slurmd/common/slurmstepd_init.c b/src/slurmd/common/slurmstepd_init.c index b8f62f63c6b..462d9b93ddb 100644 --- a/src/slurmd/common/slurmstepd_init.c +++ b/src/slurmd/common/slurmstepd_init.c @@ -33,7 +33,7 @@ extern void pack_slurmd_conf_lite(slurmd_conf_t *conf, Buf buffer) packstr(conf->spooldir, buffer); packstr(conf->node_name, buffer); packstr(conf->logfile, buffer); - packstr(conf->cf.job_acct_parameters, buffer); + packstr(conf->job_acct_parameters, buffer); pack32(conf->debug_level, buffer); pack32(conf->daemonize, buffer); pack32((uint32_t)conf->slurm_user_id, buffer); @@ -47,7 +47,7 @@ extern int unpack_slurmd_conf_lite_no_alloc(slurmd_conf_t *conf, Buf buffer) safe_unpackstr_xmalloc(&conf->spooldir, &uint16_tmp, buffer); safe_unpackstr_xmalloc(&conf->node_name, &uint16_tmp, buffer); safe_unpackstr_xmalloc(&conf->logfile, &uint16_tmp, buffer); - safe_unpackstr_xmalloc(&conf->cf.job_acct_parameters, + safe_unpackstr_xmalloc(&conf->job_acct_parameters, &uint16_tmp, buffer); safe_unpack32(&uint32_tmp, buffer); conf->debug_level = uint32_tmp; diff --git a/src/slurmd/slurmd/req.c b/src/slurmd/slurmd/req.c index 973ab024529..987532657b2 100644 --- a/src/slurmd/slurmd/req.c +++ b/src/slurmd/slurmd/req.c @@ -1734,6 +1734,7 @@ _rpc_terminate_job(slurm_msg_t *msg, slurm_addr *cli) int nsteps = 0; int delay; char *bg_part_id = NULL; + slurm_ctl_conf_t *cf; debug("_rpc_terminate_job, uid = %d", uid); /* @@ -1820,7 +1821,9 @@ _rpc_terminate_job(slurm_msg_t *msg, slurm_addr *cli) /* * Check for corpses */ - delay = MAX(conf->cf.kill_wait, 5); + cf = slurm_conf_lock(); + delay = MAX(cf->kill_wait, 5); + slurm_conf_unlock(); if ( !_pause_for_job_completion (req->job_id, delay) && _terminate_all_steps(req->job_id, true) ) { /* diff --git a/src/slurmd/slurmd/slurmd.c b/src/slurmd/slurmd/slurmd.c index 0e63d6f0a9f..aade2ae4020 100644 --- a/src/slurmd/slurmd/slurmd.c +++ b/src/slurmd/slurmd/slurmd.c @@ -67,11 +67,7 @@ #include "src/slurmd/common/proctrack.h" #include "src/slurmd/common/task_plugin.h" -#ifdef MULTIPLE_SLURMD -#define GETOPT_ARGS "L:Dvhcf:MN:P:" -#else -#define GETOPT_ARGS "L:Dvhcf:M" -#endif +#define GETOPT_ARGS "L:Dvhcf:MN:" #ifndef MAXHOSTNAMELEN # define MAXHOSTNAMELEN 64 @@ -427,12 +423,15 @@ _fill_registration_msg(slurm_node_registration_status_msg_t *msg) ListIterator i; step_loc_t *stepd; int n; + slurm_ctl_conf_t *cf; msg->node_name = xstrdup (conf->node_name); get_procs(&msg->cpus); get_memory(&msg->real_memory_size); - get_tmp_disk(&msg->temporary_disk_space, conf->cf.tmp_fs); + cf = slurm_conf_lock(); + get_tmp_disk(&msg->temporary_disk_space, cf->tmp_fs); + slurm_conf_unlock(); debug3("Procs=%u RealMemory=%u, TmpDisk=%u", msg->cpus, msg->real_memory_size, msg->temporary_disk_space); @@ -504,61 +503,70 @@ static void _read_config() { char *path_pubkey; + slurm_ctl_conf_t *cf; - conf->cf.slurm_conf = xstrdup(conf->conffile); - - read_slurm_conf_ctl(&conf->cf, false); + slurm_conf_reinit(conf->conffile); + cf = slurm_conf_lock(); slurm_mutex_lock(&conf->config_mutex); if (conf->conffile == NULL) - conf->conffile = xstrdup(conf->cf.slurm_conf); + conf->conffile = xstrdup(cf->slurm_conf); -#ifndef MULTIPLE_SLURMD - conf->port = conf->cf.slurmd_port; -#endif - conf->slurm_user_id = conf->cf.slurm_user_id; + conf->slurm_user_id = cf->slurm_user_id; - path_pubkey = xstrdup(conf->cf.job_credential_public_certificate); + path_pubkey = xstrdup(cf->job_credential_public_certificate); if (!conf->logfile) - conf->logfile = xstrdup(conf->cf.slurmd_logfile); + conf->logfile = xstrdup(cf->slurmd_logfile); /* node_name may already be set from a command line parameter */ if (conf->node_name == NULL) - _free_and_set(&conf->node_name, - get_conf_node_name(conf->hostname)); - _free_and_set(&conf->epilog, xstrdup(conf->cf.epilog)); - _free_and_set(&conf->prolog, xstrdup(conf->cf.prolog)); - _free_and_set(&conf->tmpfs, xstrdup(conf->cf.tmp_fs)); - _free_and_set(&conf->spooldir, xstrdup(conf->cf.slurmd_spooldir)); + conf->node_name = slurm_conf_get_nodename(conf->hostname); + if (conf->node_name == NULL) + conf->node_name = slurm_conf_get_nodename("localhost"); + if (conf->node_name == NULL) + fatal("Unable to determine this slurmd's NodeName"); + + conf->port = slurm_conf_get_port(conf->node_name); + + _free_and_set(&conf->epilog, xstrdup(cf->epilog)); + _free_and_set(&conf->prolog, xstrdup(cf->prolog)); + _free_and_set(&conf->tmpfs, xstrdup(cf->tmp_fs)); + _free_and_set(&conf->spooldir, xstrdup(cf->slurmd_spooldir)); #ifdef MULTIPLE_SLURMD /* append the NodeName to the spooldir to make it unique */ xstrfmtcat(conf->spooldir, ".%s", conf->node_name); #endif - _free_and_set(&conf->pidfile, xstrdup(conf->cf.slurmd_pidfile)); + _free_and_set(&conf->pidfile, xstrdup(cf->slurmd_pidfile)); #ifdef MULTIPLE_SLURMD /* append the NodeName to the pidfile name to make it unique */ xstrfmtcat(conf->pidfile, ".%s", conf->node_name); #endif - _free_and_set(&conf->task_prolog, xstrdup(conf->cf.task_prolog)); - _free_and_set(&conf->task_epilog, xstrdup(conf->cf.task_epilog)); - _free_and_set(&conf->pubkey, path_pubkey); + _free_and_set(&conf->task_prolog, xstrdup(cf->task_prolog)); + _free_and_set(&conf->task_epilog, xstrdup(cf->task_epilog)); + _free_and_set(&conf->pubkey, path_pubkey); + _free_and_set(&conf->job_acct_parameters, + xstrdup(cf->job_acct_parameters)); if ( (conf->node_name == NULL) || (conf->node_name[0] == '\0') ) fatal("Node name lookup failure"); - if ( (conf->cf.control_addr == NULL) || - (conf->cf.slurmctld_port == 0) ) - fatal("Unable to establish control machine or port"); + if (cf->control_addr == NULL) + fatal("Unable to establish controller machine"); + if (cf->slurmctld_port == 0) + fatal("Unable to establish controller port"); slurm_mutex_unlock(&conf->config_mutex); + slurm_conf_unlock(); } static void _reconfigure(void) { + slurm_ctl_conf_t *cf; + _reconfig = 0; _read_config(); @@ -574,7 +582,9 @@ _reconfigure(void) /* * Reinitialize the groups cache */ - init_gids_cache(conf->cf.cache_groups); + cf = slurm_conf_lock(); + init_gids_cache(cf->cache_groups); + slurm_conf_unlock(); /* * XXX: reopen slurmd port? @@ -584,11 +594,14 @@ _reconfigure(void) static void _print_conf() { - debug3("CacheGroups = %d", conf->cf.cache_groups); + slurm_ctl_conf_t *cf; + + cf = slurm_conf_lock(); + debug3("CacheGroups = %d", cf->cache_groups); debug3("Confile = `%s'", conf->conffile); - debug3("Debug = %d", conf->cf.slurmd_debug); + debug3("Debug = %d", cf->slurmd_debug); debug3("Epilog = `%s'", conf->epilog); - debug3("Logfile = `%s'", conf->cf.slurmd_logfile); + debug3("Logfile = `%s'", cf->slurmd_logfile); debug3("Port = %u", conf->port); debug3("Prolog = `%s'", conf->prolog); debug3("TmpFS = `%s'", conf->tmpfs); @@ -598,6 +611,7 @@ _print_conf() debug3("Slurm UID = %u", conf->slurm_user_id); debug3("TaskProlog = `%s'", conf->task_prolog); debug3("TaskEpilog = `%s'", conf->task_epilog); + slurm_conf_unlock(); } static void @@ -664,14 +678,9 @@ _process_cmdline(int ac, char **av) case 'M': conf->mlock_pages = 1; break; -#ifdef MULTIPLE_SLURMD case 'N': conf->node_name = xstrdup(optarg); break; - case 'P': - conf->port = (uint16_t)atoi(optarg); - break; -#endif default: _usage(c); exit(1); @@ -705,6 +714,7 @@ static int _slurmd_init() { struct rlimit rlim; + slurm_ctl_conf_t *cf; /* * Process commandline arguments first, since one option may be @@ -774,13 +784,15 @@ _slurmd_init() /* * Set up the job accounting plugin */ - g_slurmd_jobacct_init(conf->cf.job_acct_parameters); + g_slurmd_jobacct_init(conf->job_acct_parameters); /* * Cache the group access list */ - init_gids_cache(conf->cf.cache_groups); + cf = slurm_conf_lock(); + init_gids_cache(cf->cache_groups); + slurm_conf_unlock(); return SLURM_SUCCESS; } @@ -970,13 +982,16 @@ _kill_old_slurmd(void) static void _update_logging(void) { log_options_t *o = &conf->log_opts; + slurm_ctl_conf_t *cf; /* * Initialize debug level if not already set */ + cf = slurm_conf_lock(); if ( (conf->debug_level == LOG_LEVEL_INFO) - && (conf->cf.slurmd_debug != (uint16_t) NO_VAL) ) - conf->debug_level = conf->cf.slurmd_debug; + && (cf->slurmd_debug != (uint16_t) NO_VAL) ) + conf->debug_level = cf->slurmd_debug; + slurm_conf_unlock(); o->stderr_level = conf->debug_level; o->logfile_level = conf->debug_level; diff --git a/src/slurmd/slurmd/slurmd.h b/src/slurmd/slurmd/slurmd.h index 9da432d3405..7899c25f139 100644 --- a/src/slurmd/slurmd/slurmd.h +++ b/src/slurmd/slurmd/slurmd.h @@ -58,9 +58,6 @@ extern pid_t getpgid(pid_t pid); * Global config type */ typedef struct slurmd_config { - - slurm_ctl_conf_t cf; /* slurm.conf configuration */ - char *prog; /* Program basename */ char ***argv; /* pointer to argument vector */ int *argc; /* pointer to argument count */ @@ -91,6 +88,7 @@ typedef struct slurmd_config { uid_t slurm_user_id; /* UID that slurmctld runs as */ pthread_mutex_t config_mutex; /* lock for slurmd_config access */ + char *job_acct_parameters; } slurmd_conf_t; slurmd_conf_t * conf; diff --git a/src/slurmd/slurmstepd/slurmstepd.c b/src/slurmd/slurmstepd/slurmstepd.c index 1a7b60099ce..3763c919787 100644 --- a/src/slurmd/slurmstepd/slurmstepd.c +++ b/src/slurmd/slurmstepd/slurmstepd.c @@ -109,7 +109,7 @@ main (int argc, char *argv[]) xfree(conf->spooldir); xfree(conf->node_name); xfree(conf->logfile); - xfree(conf->cf.job_acct_parameters); + xfree(conf->job_acct_parameters); xfree(conf); info("done with job"); return 0; @@ -194,7 +194,7 @@ _init_from_slurmd(int sock, char **argv, conf->log_opts.syslog_level = LOG_LEVEL_QUIET; log_init(argv[0],conf->log_opts, LOG_DAEMON, conf->logfile); - g_slurmd_jobacct_init(conf->cf.job_acct_parameters); + g_slurmd_jobacct_init(conf->job_acct_parameters); switch_g_slurmd_step_init(); /* receive cli from slurmd */ diff --git a/src/squeue/opts.c b/src/squeue/opts.c index 9f80960a757..0b57a348bb1 100644 --- a/src/squeue/opts.c +++ b/src/squeue/opts.c @@ -221,7 +221,7 @@ parse_command_line( int argc, char* argv[] ) params.node = xmalloc(128); getnodename(params.node, 128); } - name1 = get_conf_node_name(params.node); + name1 = slurm_conf_get_nodename(params.node); xfree(params.node); params.node = xstrdup(name1); } diff --git a/src/srun/srun_job.c b/src/srun/srun_job.c index 63e4e44039b..f634790964e 100644 --- a/src/srun/srun_job.c +++ b/src/srun/srun_job.c @@ -149,9 +149,9 @@ job_create_noalloc(void) for (i = 0; i < job->nhosts; i++) { - char *nd = get_conf_node_hostname(job->step_layout->host[i]); - slurm_set_addr ( &job->slurmd_addr[i], - slurm_get_slurmd_port(), nd ); + char *nd = slurm_conf_get_hostname(job->step_layout->host[i]); + uint16_t port = slurm_conf_get_port(job->step_layout->host[i]); + slurm_set_addr ( &job->slurmd_addr[i], port, nd ); xfree(nd); } -- GitLab