From 0cf28d0bd46322bacec7eea912e4c6b06c0ac6b1 Mon Sep 17 00:00:00 2001
From: "Christopher J. Morrone" <morrone2@llnl.gov>
Date: Wed, 25 Jan 2006 00:30:17 +0000
Subject: [PATCH] New --enable-multiple-slurmd configure parameter to allow
 running more than one copy of slurmd on a node at the same time.  Only really
 useful for developers.

---
 NEWS                             |  6 +++++
 configure.ac                     | 21 +++++++++++++++++
 src/api/config_info.c            |  2 ++
 src/common/read_config.c         | 11 +++++++--
 src/common/slurm_protocol_pack.c |  4 ++++
 src/slurmctld/node_mgr.c         |  5 +++-
 src/slurmctld/proc_req.c         |  2 ++
 src/slurmctld/read_config.c      | 13 +++++++++++
 src/slurmctld/slurmctld.h        |  1 +
 src/slurmd/slurmd/slurmd.c       | 39 +++++++++++++++++++++++---------
 src/srun/launch.c                |  1 -
 11 files changed, 90 insertions(+), 15 deletions(-)

diff --git a/NEWS b/NEWS
index d8040998ae1..b557e0438c4 100644
--- a/NEWS
+++ b/NEWS
@@ -1,6 +1,12 @@
 This file describes changes in recent versions of SLURM. It primarily
 documents those changes that are of interest to users and admins. 
 
+* Changes in SLURM 1.1.0-pre1
+=============================
+ -- New --enable-multiple-slurmd configure parameter to allow running
+    more than one copy of slurmd on a node at the same time.  Only
+    really useful for developers.
+
 * Changes in SLURM 1.0.2
 ========================
  -- Correctly report DRAINED node state as type OTHER for "sinfo --summarize".
diff --git a/configure.ac b/configure.ac
index 1b613fd196c..3c93e15da54 100644
--- a/configure.ac
+++ b/configure.ac
@@ -159,6 +159,27 @@ dnl Check for compilation of SLURM auth modules:
 dnl
 X_AC_MUNGE
 
+dnl
+dnl Check if multiple-slurmd support is requested and define MULTIPLE_SLURMD
+dnl if it is.
+dnl
+AC_MSG_CHECKING(whether to enable multiple-slurmd support)
+AC_ARG_ENABLE([multiple-slurmd],
+  AC_HELP_STRING([--enable-multiple-slurmd], [enable multiple-slurmd support]),
+    [ case "$enableval" in
+      yes) multiple_slurmd=yes ;;
+      no)  multiple_slurmd=no ;;
+      *)   AC_MSG_ERROR([bad value "$enableval" for --enable-multiple-slurmd]);;
+    esac ]
+)
+if test "x$multiple_slurmd" = "xyes"; then
+  AC_DEFINE([MULTIPLE_SLURMD], [1], [Enable multiple slurmd on one node])
+  AC_MSG_RESULT([yes])
+else
+  AC_MSG_RESULT([no])
+fi
+
+
 AUTHD_LIBS="-lauth -le"
 savedLIBS="$LIBS"
 savedCFLAGS="$CFLAGS"
diff --git a/src/api/config_info.c b/src/api/config_info.c
index 4b71ff462ed..9cd4434ec09 100644
--- a/src/api/config_info.c
+++ b/src/api/config_info.c
@@ -152,8 +152,10 @@ void slurm_print_ctl_conf ( FILE* out,
 		slurm_ctl_conf_ptr->slurmd_logfile);
 	fprintf(out, "SlurmdPidFile     = %s\n", 
 		slurm_ctl_conf_ptr->slurmd_pidfile);
+#ifndef MULTIPLE_SLURMD
 	fprintf(out, "SlurmdPort        = %u\n", 
 		slurm_ctl_conf_ptr->slurmd_port);
+#endif
 	fprintf(out, "SlurmdSpoolDir    = %s\n", 
 		slurm_ctl_conf_ptr->slurmd_spooldir);
 	fprintf(out, "SlurmdTimeout     = %u\n", 
diff --git a/src/common/read_config.c b/src/common/read_config.c
index d1288feee30..6dc003b71ba 100644
--- a/src/common/read_config.c
+++ b/src/common/read_config.c
@@ -267,6 +267,7 @@ extern char *get_conf_node_name(char *node_hostname)
 /* getnodename - equivalent to gethostname, but return only the first 
  * component of the fully qualified name 
  * (e.g. "linux123.foo.bar" becomes "linux123") 
+ * OUT name
  */
 int
 getnodename (char *name, size_t len)
@@ -397,7 +398,7 @@ init_slurm_conf (slurm_ctl_conf_t *ctl_conf_ptr)
 	ctl_conf_ptr->slurmd_debug		= (uint16_t) NO_VAL; 
 	xfree (ctl_conf_ptr->slurmd_logfile);
 	xfree (ctl_conf_ptr->slurmd_pidfile);
-	ctl_conf_ptr->slurmd_port		= (uint32_t) NO_VAL;
+ 	ctl_conf_ptr->slurmd_port		= (uint32_t) NO_VAL;
 	xfree (ctl_conf_ptr->slurmd_spooldir);
 	ctl_conf_ptr->slurmd_timeout		= (uint16_t) NO_VAL;
 	xfree (ctl_conf_ptr->state_save_location);
@@ -933,6 +934,7 @@ parse_config_spec (char *in_line, slurm_ctl_conf_t *ctl_conf_ptr)
 		ctl_conf_ptr->slurmd_logfile = slurmd_logfile;
 	}
 
+#ifndef MULTIPLE_SLURMD
 	if ( slurmd_port != -1) {
 		if ( ctl_conf_ptr->slurmd_port != (uint32_t) NO_VAL)
 			error (MULTIPLE_VALUE_MSG, "SlurmdPort");
@@ -941,6 +943,7 @@ parse_config_spec (char *in_line, slurm_ctl_conf_t *ctl_conf_ptr)
 		else
 			ctl_conf_ptr->slurmd_port = slurmd_port;
 	}
+#endif
 
 	if ( slurmd_spooldir ) {
 		if ( ctl_conf_ptr->slurmd_spooldir ) {
@@ -1058,12 +1061,14 @@ _parse_node_spec (char *in_line, bool slurmd_hosts)
 	char *state = NULL, *reason=NULL;
 	char *node_hostname = NULL;
 	int cpus_val, real_memory_val, tmp_disk_val, weight_val;
+	int port;
 
 	error_code = slurm_parser (in_line,
 		"Feature=", 's', &feature, 
 		"NodeAddr=", 's', &node_addr, 
 		"NodeName=", 's', &node_name, 
 		"NodeHostname=", 's', &node_hostname, 
+		"Port=", 'd', &port,
 		"Procs=", 'd', &cpus_val, 
 		"RealMemory=", 'd', &real_memory_val, 
 		"Reason=", 's', &reason, 
@@ -1076,7 +1081,7 @@ _parse_node_spec (char *in_line, bool slurmd_hosts)
 		return error_code;
 
 	if (node_name
-	&&  (node_hostname || slurmd_hosts)) {
+	    && (node_hostname || slurmd_hosts)) {
 		all_slurmd_hosts = true;
 		_register_conf_node_aliases(node_name, node_hostname);
 	}
@@ -1433,8 +1438,10 @@ validate_config (slurm_ctl_conf_t *ctl_conf_ptr)
 	if (ctl_conf_ptr->slurmd_pidfile == NULL)
 		ctl_conf_ptr->slurmd_pidfile = xstrdup(DEFAULT_SLURMD_PIDFILE);
 
+#ifndef MULTIPLE_SLURMD
 	if (ctl_conf_ptr->slurmd_port == (uint32_t) NO_VAL) 
 		ctl_conf_ptr->slurmd_port = SLURMD_PORT;
+#endif
 
 	if (ctl_conf_ptr->slurmd_spooldir == NULL)
 		ctl_conf_ptr->slurmd_spooldir = xstrdup(DEFAULT_SPOOLDIR);
diff --git a/src/common/slurm_protocol_pack.c b/src/common/slurm_protocol_pack.c
index 68c14332408..fae0b90c32e 100644
--- a/src/common/slurm_protocol_pack.c
+++ b/src/common/slurm_protocol_pack.c
@@ -1920,7 +1920,9 @@ _pack_slurm_ctl_conf_msg(slurm_ctl_conf_info_msg_t * build_ptr, Buf buffer)
 	pack16(build_ptr->slurmd_debug, buffer);
 	packstr(build_ptr->slurmd_logfile, buffer);
 	packstr(build_ptr->slurmd_pidfile, buffer);
+#ifndef MULTIPLE_SLURMD
 	pack32(build_ptr->slurmd_port, buffer);
+#endif
 	packstr(build_ptr->slurmd_spooldir, buffer);
 	debug2("Packing string %s", build_ptr->slurmd_spooldir);
 	pack16(build_ptr->slurmd_timeout, buffer);
@@ -2006,7 +2008,9 @@ _unpack_slurm_ctl_conf_msg(slurm_ctl_conf_info_msg_t **
 			       buffer);
 	safe_unpackstr_xmalloc(&build_ptr->slurmd_pidfile, &uint16_tmp,
 			       buffer);
+#ifndef MULTIPLE_SLURMD
 	safe_unpack32(&build_ptr->slurmd_port, buffer);
+#endif
 	safe_unpackstr_xmalloc(&build_ptr->slurmd_spooldir, &uint16_tmp,
 			       buffer);
 	safe_unpack16(&build_ptr->slurmd_timeout, buffer);
diff --git a/src/slurmctld/node_mgr.c b/src/slurmctld/node_mgr.c
index e8336ba221f..38cc10c1047 100644
--- a/src/slurmctld/node_mgr.c
+++ b/src/slurmctld/node_mgr.c
@@ -197,6 +197,7 @@ create_node_record (struct config_record *config_ptr, char *node_name)
 	strcpy (node_ptr->name, node_name);
 	node_ptr->node_state = default_node_record.node_state;
 	node_ptr->last_response = default_node_record.last_response;
+	node_ptr->port = default_node_record.port;
 	node_ptr->config_ptr = config_ptr;
 	node_ptr->part_cnt = 0;
 	node_ptr->part_pptr = NULL;
@@ -803,8 +804,10 @@ void set_slurmd_addr (void)
 	for (i = 0; i < node_record_count; i++, node_ptr++) {
 		if (node_ptr->name[0] == '\0')
 			continue;
+		if (node_ptr->port == 0)
+			node_ptr->port = slurmctld_conf.slurmd_port;
 		slurm_set_addr (&node_ptr->slurm_addr, 
-				slurmctld_conf.slurmd_port, 
+				node_ptr->port,
 				node_ptr->comm_name);
 		if (node_ptr->slurm_addr.sin_port)
 			continue;
diff --git a/src/slurmctld/proc_req.c b/src/slurmctld/proc_req.c
index f3001888415..0d5ed7b8849 100644
--- a/src/slurmctld/proc_req.c
+++ b/src/slurmctld/proc_req.c
@@ -337,7 +337,9 @@ void _fill_ctld_conf(slurm_ctl_conf_t * conf_ptr)
 					slurmd_logfile);
 	conf_ptr->slurmd_pidfile      = xstrdup(slurmctld_conf.
 					slurmd_pidfile);
+#ifndef MULTIPLE_SLURMD
 	conf_ptr->slurmd_port         = slurmctld_conf.slurmd_port;
+#endif
 	conf_ptr->slurmd_spooldir     = xstrdup(slurmctld_conf.
 					slurmd_spooldir);
 	conf_ptr->slurmd_timeout      = slurmctld_conf.slurmd_timeout;
diff --git a/src/slurmctld/read_config.c b/src/slurmctld/read_config.c
index 14eddbc0177..bb6a7a6fad5 100644
--- a/src/slurmctld/read_config.c
+++ b/src/slurmctld/read_config.c
@@ -289,10 +289,12 @@ static int _parse_node_spec(char *in_line)
 #ifndef HAVE_FRONT_END	/* Fake node addresses for front-end */
 	char *this_node_addr;
 #endif
+	int port;
 
 	node_addr = node_name = state = feature = (char *) NULL;
 	cpus_val = real_memory_val = state_val = NO_VAL;
 	tmp_disk_val = weight_val = NO_VAL;
+	port = NO_VAL;
 	if ((error_code = load_string(&node_name, "NodeName=", in_line)))
 		return error_code;
 	if (node_name == NULL)
@@ -307,6 +309,9 @@ static int _parse_node_spec(char *in_line)
 				  "Feature=", 's', &feature,
 				  "NodeAddr=", 's', &node_addr,
 				  "NodeHostname=", 's', &node_hostname,
+#ifdef MULTIPLE_SLURMD
+				  "Port=", 'd', &port,
+#endif
 				  "Procs=", 'd', &cpus_val,
 				  "RealMemory=", 'd', &real_memory_val,
 				  "Reason=", 's', &reason,
@@ -396,6 +401,10 @@ static int _parse_node_spec(char *in_line)
 				default_config_record.feature = feature;
 				feature = NULL;
 			}
+#ifdef MULTIPLE_SLURMD
+			if (port != NO_VAL)
+				default_node_record.port = port;
+#endif
 			free(this_node_name);
 			break;
 		}
@@ -457,6 +466,10 @@ static int _parse_node_spec(char *in_line)
 			} else
 				strncpy(node_ptr->comm_name, 
 				        node_ptr->name, MAX_NAME_LEN);
+#endif
+#ifdef MULTIPLE_SLURMD
+			if (port != NO_VAL)
+				node_ptr->port = port;
 #endif
 			node_ptr->reason = xstrdup(reason);
 		} else {
diff --git a/src/slurmctld/slurmctld.h b/src/slurmctld/slurmctld.h
index 98a009ffc8f..a860c5dbf3d 100644
--- a/src/slurmctld/slurmctld.h
+++ b/src/slurmctld/slurmctld.h
@@ -171,6 +171,7 @@ struct node_record {
 	struct part_record **part_pptr;	/* array of pointers to partitions 
 					 * associated with this node*/
 	char comm_name[MAX_NAME_LEN];	/* communications path name to node */
+	uint16_t port;			/* TCP port number of the slurmd */
 	slurm_addr slurm_addr;		/* network address */
 	uint16_t comp_job_cnt;		/* count of jobs completing on node */
 	uint16_t run_job_cnt;		/* count of jobs running on node */
diff --git a/src/slurmd/slurmd/slurmd.c b/src/slurmd/slurmd/slurmd.c
index b977908d784..587d1046cd4 100644
--- a/src/slurmd/slurmd/slurmd.c
+++ b/src/slurmd/slurmd/slurmd.c
@@ -67,7 +67,11 @@
 #include "src/slurmd/common/proctrack.h"
 #include "src/slurmd/common/task_plugin.h"
 
+#ifdef MULTIPLE_SLURMD
+#define GETOPT_ARGS	"L:Dvhcf:MN:P:"
+#else
 #define GETOPT_ARGS	"L:Dvhcf:M"
+#endif
 
 #ifndef MAXHOSTNAMELEN
 #  define MAXHOSTNAMELEN	64
@@ -105,7 +109,6 @@ static void      _create_msg_socket();
 static void      _msg_engine();
 static int       _slurmd_init();
 static int       _slurmd_fini();
-static void      _create_conf();
 static void      _init_conf();
 static void      _print_conf();
 static void      _read_config();
@@ -141,7 +144,7 @@ main (int argc, char *argv[])
 	 * Create and set default values for the slurmd global
 	 * config variable "conf"
 	 */
-	_create_conf();
+	conf = xmalloc(sizeof(slurmd_conf_t));
 	_init_conf();
 	conf->argv = &argv;
 	conf->argc = &argc;
@@ -473,7 +476,6 @@ _free_and_set(char **confvar, char *newval)
 /*
  * Read the slurm configuration file (slurm.conf) and substitute some
  * values into the slurmd configuration in preference of the defaults.
- *
  */
 static void
 _read_config()
@@ -489,7 +491,9 @@ _read_config()
 	if (conf->conffile == NULL)
 		conf->conffile = xstrdup(conf->cf.slurm_conf);
 
+#ifndef MULTIPLE_SLURMD
 	conf->port          =  conf->cf.slurmd_port;
+#endif
 	conf->slurm_user_id =  conf->cf.slurm_user_id;
 
 	path_pubkey = xstrdup(conf->cf.job_credential_public_certificate);
@@ -497,12 +501,23 @@ _read_config()
 	if (!conf->logfile)
 		conf->logfile = xstrdup(conf->cf.slurmd_logfile);
 
-	_free_and_set(&conf->node_name, get_conf_node_name(conf->hostname));
+	/* node_name may already be set from a command line parameter */
+	if (conf->node_name == NULL)
+		_free_and_set(&conf->node_name,
+			      get_conf_node_name(conf->hostname));
 	_free_and_set(&conf->epilog,   xstrdup(conf->cf.epilog));
 	_free_and_set(&conf->prolog,   xstrdup(conf->cf.prolog));
 	_free_and_set(&conf->tmpfs,    xstrdup(conf->cf.tmp_fs));
 	_free_and_set(&conf->spooldir, xstrdup(conf->cf.slurmd_spooldir));
+#ifdef MULTIPLE_SLURMD
+	/* append the NodeName to the spooldir to make it unique */
+	xstrfmtcat(conf->spooldir, ".%s", conf->node_name);
+#endif
 	_free_and_set(&conf->pidfile,  xstrdup(conf->cf.slurmd_pidfile));
+#ifdef MULTIPLE_SLURMD
+	/* append the NodeName to the pidfile name to make it unique */
+	xstrfmtcat(conf->pidfile, ".%s", conf->node_name);
+#endif
 	_free_and_set(&conf->task_prolog, xstrdup(conf->cf.task_prolog));
 	_free_and_set(&conf->task_epilog, xstrdup(conf->cf.task_epilog));
 	_free_and_set(&conf->pubkey,   path_pubkey);     
@@ -562,12 +577,6 @@ _print_conf()
 	debug3("TaskEpilog  = `%s'",     conf->task_epilog);
 }
 
-static void 
-_create_conf()
-{
-	conf = xmalloc(sizeof(slurmd_conf_t));
-}
-
 static void
 _init_conf()
 {
@@ -632,6 +641,14 @@ _process_cmdline(int ac, char **av)
 		case 'M':
 			conf->mlock_pages = 1;
 			break;
+#ifdef MULTIPLE_SLURMD
+		case 'N':
+			conf->node_name = xstrdup(optarg);
+			break;
+		case 'P':
+			conf->port = (uint16_t)atoi(optarg);
+			break;
+#endif
 		default:
 			_usage(c);
 			exit(1);
@@ -721,7 +738,7 @@ _slurmd_init()
 		/* 
 		 * Need to kill any running slurmd's here
 		 */
-		_kill_old_slurmd(); 
+		_kill_old_slurmd();
 
 		stepd_cleanup_sockets(conf->spooldir, conf->node_name);
 	}
diff --git a/src/srun/launch.c b/src/srun/launch.c
index f045455e4fd..d516cf176ea 100644
--- a/src/srun/launch.c
+++ b/src/srun/launch.c
@@ -115,7 +115,6 @@ launch(void *arg)
 	
 	debug("going to launch %d tasks on %d hosts", 
 	      opt.nprocs, job->step_layout->num_hosts);
-	debug("sending to slurmd port %d", slurm_get_slurmd_port());
 
 	msg_array_ptr = xmalloc(sizeof(launch_tasks_request_msg_t) 
 				* job->step_layout->num_hosts);
-- 
GitLab