From 62a781fbd24b492027610ab5f0f76f460a04e4e2 Mon Sep 17 00:00:00 2001
From: Moe Jette <jette1@llnl.gov>
Date: Mon, 23 Feb 2009 20:10:38 +0000
Subject: [PATCH] get logic in place to allocate and deallocate ports for job
 steps

---
 src/slurmctld/controller.c  |   2 +
 src/slurmctld/port_mgr.c    | 105 +++++++++++++++++++++++++++++++++---
 src/slurmctld/port_mgr.h    |  10 +++-
 src/slurmctld/read_config.c |   2 +
 src/slurmctld/slurmctld.h   |   1 +
 src/slurmctld/step_mgr.c    |   4 +-
 6 files changed, 114 insertions(+), 10 deletions(-)

diff --git a/src/slurmctld/controller.c b/src/slurmctld/controller.c
index b329153f2d7..4ffeb161dcc 100644
--- a/src/slurmctld/controller.c
+++ b/src/slurmctld/controller.c
@@ -87,6 +87,7 @@
 #include "src/slurmctld/licenses.h"
 #include "src/slurmctld/locks.h"
 #include "src/slurmctld/ping_nodes.h"
+#include "src/slurmctld/port_mgr.h"
 #include "src/slurmctld/proc_req.h"
 #include "src/slurmctld/read_config.h"
 #include "src/slurmctld/reservation.h"
@@ -574,6 +575,7 @@ int main(int argc, char *argv[])
 	resv_fini();
 	trigger_fini();
 	assoc_mgr_fini(slurmctld_conf.state_save_location);
+	reserve_port_config(NULL);
 
 	/* Some plugins are needed to purge job/node data structures,
 	 * unplug after other data structures are purged */
diff --git a/src/slurmctld/port_mgr.c b/src/slurmctld/port_mgr.c
index a87d3ece2fe..4a7f50a5aea 100644
--- a/src/slurmctld/port_mgr.c
+++ b/src/slurmctld/port_mgr.c
@@ -41,6 +41,9 @@
 #  include "config.h"
 #endif
 
+#include <string.h>
+#include <stdlib.h>
+
 #include "src/common/bitstring.h"
 #include "src/common/hostlist.h"
 #include "src/common/xmalloc.h"
@@ -53,13 +56,63 @@ int        port_resv_cnt   = 0;
 int        port_resv_min   = 0;
 int        port_resv_max   = 0;
 
+/* Configure reserved ports.
+ * Call with mpi_params==NULL to free memory */
+extern int reserve_port_config(char *mpi_params)
+{
+	char *tmp_e=NULL, *tmp_p=NULL;
+	int i, p_min, p_max;
+
+	if (mpi_params)
+		tmp_p = strstr(mpi_params, "ports:");
+	if (tmp_p == NULL) {
+		if (port_resv_table) {
+			info("Clearing port reservations");
+			xfree(port_resv_table);
+			port_resv_cnt = 0;
+			port_resv_min = port_resv_max = 0;
+		}
+		return SLURM_SUCCESS;
+	}
+
+	tmp_p += 6;
+	p_min = strtol(tmp_p, &tmp_e, 10);
+	if ((p_min < 1) || (tmp_e[0] != '-')) {
+		info("invalid MpiParams: %s", mpi_params);
+		return SLURM_ERROR;
+	}
+	tmp_e++;
+	p_max = strtol(tmp_e, NULL, 10);
+	if (p_max < p_min) {
+		info("invalid MpiParams: %s", mpi_params);
+		return SLURM_ERROR;
+	}
+
+	if ((p_min == port_resv_min) && (p_max == port_resv_max))
+		return SLURM_SUCCESS;	/* No change */
+
+	port_resv_min = p_min;
+	port_resv_max = p_max;
+	port_resv_cnt = p_max - p_min + 1;
+	debug("Ports available for reservation %u-%u", 
+	      port_resv_min, port_resv_max);
+
+	xfree(port_resv_table);
+	port_resv_table = xmalloc(sizeof(bitstr_t *) * port_resv_cnt);
+	for (i=0; i<port_resv_cnt; i++)
+		port_resv_table[i] = bit_alloc(node_record_count);
+
+/* FIXME: Rebuild record of reserved ports */
+	return SLURM_SUCCESS;
+}
+
 /* Reserve ports for a job step
  * RET SLURM_SUCCESS or an error code */
-extern int reserve_ports(struct step_record *step_ptr)
+extern int resv_port_alloc(struct step_record *step_ptr)
 {
 	int i, port_inx;
 	int *port_array = NULL;
-	char port_str[16];
+	char port_str[16], *tmp_str;
 	hostlist_t hl;
 
 	if (step_ptr->resv_port_cnt > port_resv_cnt) {
@@ -95,8 +148,8 @@ extern int reserve_ports(struct step_record *step_ptr)
 	for (i=0; i<port_inx; i++) {
 		bit_or(port_resv_table[port_array[i]], 
 		       step_ptr->step_node_bitmap);
-		snprintf(port_str, sizeof(port_str), 
-			 "%d", (port_array[i] + port_resv_min));
+		port_array[i] += port_resv_min;
+		snprintf(port_str, sizeof(port_str), "%d", port_array[i]);
 		hostlist_push(hl, port_str);
 	}
 	hostlist_sort(hl);
@@ -107,11 +160,47 @@ extern int reserve_ports(struct step_record *step_ptr)
 		xfree(step_ptr->resv_ports);
 	}
 	hostlist_destroy(hl);
-	xfree(port_array);
-	info("reserved ports %s for step %u.%u",
-	     step_ptr->resv_ports,
-	     step_ptr->job_ptr->job_id, step_ptr->step_id);
+	step_ptr->resv_port_array = port_array;
+
+	if (step_ptr->resv_ports[0] == '[') {
+		/* Remove brackets from hostlist */
+		i = strlen(step_ptr->resv_ports);
+		step_ptr->resv_ports[i-1] = '\0';
+		tmp_str = xmalloc(i);
+		strcpy(tmp_str, step_ptr->resv_ports + 1);
+		xfree(step_ptr->resv_ports);
+		step_ptr->resv_ports = tmp_str;
+	}
+
+	debug("reserved ports %s for step %u.%u",
+	      step_ptr->resv_ports,
+	      step_ptr->job_ptr->job_id, step_ptr->step_id);
 
 	return SLURM_SUCCESS;
 }
 
+/* Release reserved ports for a job step
+ * RET SLURM_SUCCESS or an error code */
+extern void resv_port_free(struct step_record *step_ptr)
+{
+	int i, j;
+
+	if (step_ptr->resv_port_array == NULL)
+		return;
+
+	bit_not(step_ptr->step_node_bitmap);
+	for (i=0; i<step_ptr->resv_port_cnt; i++) {
+		if ((step_ptr->resv_port_array[i] < port_resv_min) ||
+		    (step_ptr->resv_port_array[i] > port_resv_max)) 
+			continue;
+		j = step_ptr->resv_port_array[i] - port_resv_min;
+		bit_and(port_resv_table[j], step_ptr->step_node_bitmap);
+		
+	}
+	bit_not(step_ptr->step_node_bitmap);
+	xfree(step_ptr->resv_port_array);
+
+	debug("freed ports %s for step %u.%u",
+	      step_ptr->resv_ports,
+	      step_ptr->job_ptr->job_id, step_ptr->step_id);
+}
diff --git a/src/slurmctld/port_mgr.h b/src/slurmctld/port_mgr.h
index f9ebb209c79..af4a87a4185 100644
--- a/src/slurmctld/port_mgr.h
+++ b/src/slurmctld/port_mgr.h
@@ -42,8 +42,16 @@
 
 #include "src/slurmctld/slurmctld.h"
 
+/* Configure reserved ports.
+ * Call with mpi_params==NULL to free memory */
+extern int reserve_port_config(char *mpi_params);
+
 /* Reserve ports for a job step
  * RET SLURM_SUCCESS or an error code */
-extern int reserve_ports(struct step_record *step_ptr);
+extern int resv_port_alloc(struct step_record *step_ptr);
+
+/* Release reserved ports for a job step
+ * RET SLURM_SUCCESS or an error code */
+extern void resv_port_free(struct step_record *step_ptr);
 
 #endif	/* !_HAVE_PORT_MGR_H */
diff --git a/src/slurmctld/read_config.c b/src/slurmctld/read_config.c
index 226dca999ee..db9e746b26c 100644
--- a/src/slurmctld/read_config.c
+++ b/src/slurmctld/read_config.c
@@ -75,6 +75,7 @@
 #include "src/slurmctld/licenses.h"
 #include "src/slurmctld/locks.h"
 #include "src/slurmctld/node_scheduler.h"
+#include "src/slurmctld/port_mgr.h"
 #include "src/slurmctld/proc_req.h"
 #include "src/slurmctld/read_config.h"
 #include "src/slurmctld/reservation.h"
@@ -851,6 +852,7 @@ int read_slurm_conf(int recover)
 
 	if ((rc = _build_bitmaps()))
 		fatal("_build_bitmaps failure");
+	reserve_port_config(conf->mpi_params);
 
 	license_free();
 	if (license_init(slurmctld_conf.licenses) != SLURM_SUCCESS)
diff --git a/src/slurmctld/slurmctld.h b/src/slurmctld/slurmctld.h
index f0992f8979f..28243c66ba8 100644
--- a/src/slurmctld/slurmctld.h
+++ b/src/slurmctld/slurmctld.h
@@ -555,6 +555,7 @@ struct 	step_record {
 	uint8_t no_kill;		/* 1 if no kill on node failure */
 	uint16_t port;			/* port for srun communications */
 	time_t pre_sus_time;		/* time step ran prior to last suspend */
+	int *resv_port_array;		/* reserved port indexes */
 	uint16_t resv_port_cnt;		/* count of ports reserved per node */
 	char *resv_ports;		/* ports reserved for job */
 	time_t start_time;      	/* step allocation time */
diff --git a/src/slurmctld/step_mgr.c b/src/slurmctld/step_mgr.c
index cde3faa1a51..d36d5fb8750 100644
--- a/src/slurmctld/step_mgr.c
+++ b/src/slurmctld/step_mgr.c
@@ -164,6 +164,7 @@ static void _free_step_rec(struct step_record *step_ptr)
 	FREE_NULL_BITMAP(step_ptr->core_bitmap_job);
 	FREE_NULL_BITMAP(step_ptr->exit_node_bitmap);
 	FREE_NULL_BITMAP(step_ptr->step_node_bitmap);
+	xfree(step_ptr->resv_port_array);
 	xfree(step_ptr->resv_ports);
 	xfree(step_ptr->network);
 	xfree(step_ptr->ckpt_path);
@@ -201,6 +202,7 @@ delete_step_record (struct job_record *job_ptr, uint32_t step_id)
 					step_ptr->step_layout->node_list);
 				switch_free_jobinfo (step_ptr->switch_job);
 			}
+			resv_port_free(step_ptr);
 			checkpoint_free_jobinfo (step_ptr->check_job);
 			_free_step_rec(step_ptr);
 			error_code = 0;
@@ -1347,7 +1349,7 @@ step_create(job_step_create_request_msg_t *step_specs,
 		}
 		if (step_specs->resv_port_cnt != (uint16_t) NO_VAL) {
 			step_ptr->resv_port_cnt = step_specs->resv_port_cnt;
-			i = reserve_ports(step_ptr);
+			i = resv_port_alloc(step_ptr);
 			if (i != SLURM_SUCCESS) {
 				delete_step_record (job_ptr, step_ptr->step_id);
 				return i;
-- 
GitLab