From 995aa09ff1f76e513c71707feb0606348e6ab3a0 Mon Sep 17 00:00:00 2001
From: Moe Jette <jette1@llnl.gov>
Date: Tue, 18 May 2010 22:24:09 +0000
Subject: [PATCH] Add logic to save/restore job gres details (the allocated
 bitmaps) and use them to sync node state info on slurmctld restart.

---
 src/common/gres.c               |  2 +-
 src/common/node_conf.c          |  2 +-
 src/plugins/gres/gpu/gres_gpu.c | 26 +++++++++--
 src/plugins/gres/nic/gres_nic.c | 26 +++++++++--
 src/slurmctld/node_mgr.c        |  2 +
 src/slurmctld/read_config.c     | 78 ++++++++++++++++++++++++++++++---
 6 files changed, 121 insertions(+), 15 deletions(-)

diff --git a/src/common/gres.c b/src/common/gres.c
index b7be1450141..d5933fdff32 100644
--- a/src/common/gres.c
+++ b/src/common/gres.c
@@ -1173,7 +1173,7 @@ extern int gres_plugin_pack_job_state(List gres_list, Buf buffer,
 			if (rc2 != SLURM_SUCCESS) {
 				rc = rc2;
 				set_buf_offset(buffer, header_offset);
-				break;
+				continue;
 			}
 			tail_offset = get_buf_offset(buffer);
 			set_buf_offset(buffer, size_offset);
diff --git a/src/common/node_conf.c b/src/common/node_conf.c
index 4b30ad8d93a..42a00f14466 100644
--- a/src/common/node_conf.c
+++ b/src/common/node_conf.c
@@ -462,7 +462,7 @@ static int _list_find_feature (void *feature_entry, void *key)
 }
 
 /*
- * _build_all_nodeline_info - get a array of slurm_conf_node_t structures
+ * build_all_nodeline_info - get a array of slurm_conf_node_t structures
  *	from the slurm.conf reader, build table, and set values
  * IN set_bitmap - if true, set node_bitmap in config record (used by slurmd)
  * RET 0 if no error, error code otherwise
diff --git a/src/plugins/gres/gpu/gres_gpu.c b/src/plugins/gres/gpu/gres_gpu.c
index 9caa5f8f9b7..de61ea95208 100644
--- a/src/plugins/gres/gpu/gres_gpu.c
+++ b/src/plugins/gres/gpu/gres_gpu.c
@@ -645,16 +645,22 @@ extern int job_gres_validate(char *config, void **gres_data)
 
 extern int pack_job_state(void *gres_data, Buf buffer)
 {
+	int i;
 	gpu_job_state_t *gres_ptr = (gpu_job_state_t *) gres_data;
 
-	pack32(gres_ptr->gpu_cnt_alloc,  buffer);
+	pack32(gres_ptr->gpu_cnt_alloc, buffer);
 	pack8 (gres_ptr->gpu_cnt_mult,  buffer);
 
+	pack32(gres_ptr->node_cnt,      buffer);
+	for (i=0; i<gres_ptr->node_cnt; i++)
+		pack_bit_str(gres_ptr->gpu_bit_alloc[i], buffer);
+
 	return SLURM_SUCCESS;
 }
 
 extern int unpack_job_state(void **gres_data, Buf buffer)
 {
+	int i;
 	gpu_job_state_t *gres_ptr;
 
 	gres_ptr = xmalloc(sizeof(gpu_job_state_t));
@@ -662,12 +668,26 @@ extern int unpack_job_state(void **gres_data, Buf buffer)
 	if (buffer) {
 		safe_unpack32(&gres_ptr->gpu_cnt_alloc,  buffer);
 		safe_unpack8 (&gres_ptr->gpu_cnt_mult,   buffer);
+
+		safe_unpack32(&gres_ptr->node_cnt,       buffer);
+		gres_ptr->gpu_bit_alloc = xmalloc(sizeof(bitstr_t *) *
+						  (gres_ptr->node_cnt + 1));
+		for (i=0; i<gres_ptr->node_cnt; i++)
+			unpack_bit_str(&gres_ptr->gpu_bit_alloc[i], buffer);
 	}
 
 	*gres_data = gres_ptr;
 	return SLURM_SUCCESS;
 
 unpack_error:
+	error("Unpacking %s job state info", plugin_name);
+	if (gres_ptr->gpu_bit_alloc) {
+		for (i=0; i<gres_ptr->node_cnt; i++) {
+			if (gres_ptr->gpu_bit_alloc[i])
+				bit_free(gres_ptr->gpu_bit_alloc[i]);
+		}
+		xfree(gres_ptr->gpu_bit_alloc);
+	}
 	xfree(gres_ptr);
 	*gres_data = NULL;
 	return SLURM_ERROR;
@@ -719,8 +739,8 @@ extern int job_alloc(void *job_gres_data, void *node_gres_data,
 			      plugin_name);
 			xfree(job_gres_ptr->gpu_bit_alloc);
 		}
-		job_gres_ptr->gpu_bit_alloc = 
-					xmalloc(sizeof(bitstr_t *) * node_cnt);
+		job_gres_ptr->gpu_bit_alloc = xmalloc(sizeof(bitstr_t *) *
+						      (node_cnt + 1));
 	} else if (job_gres_ptr->node_cnt < node_cnt) {
 		error("%s: node_cnt increase from %u to %d",
 		      plugin_name, job_gres_ptr->node_cnt, node_cnt);
diff --git a/src/plugins/gres/nic/gres_nic.c b/src/plugins/gres/nic/gres_nic.c
index 631f3a67643..e15fc7c47f8 100644
--- a/src/plugins/gres/nic/gres_nic.c
+++ b/src/plugins/gres/nic/gres_nic.c
@@ -645,16 +645,22 @@ extern int job_gres_validate(char *config, void **gres_data)
 
 extern int pack_job_state(void *gres_data, Buf buffer)
 {
+	int i;
 	nic_job_state_t *gres_ptr = (nic_job_state_t *) gres_data;
 
-	pack32(gres_ptr->nic_cnt_alloc,  buffer);
+	pack32(gres_ptr->nic_cnt_alloc, buffer);
 	pack8 (gres_ptr->nic_cnt_mult,  buffer);
 
+	pack32(gres_ptr->node_cnt,      buffer);
+	for (i=0; i<gres_ptr->node_cnt; i++)
+		pack_bit_str(gres_ptr->nic_bit_alloc[i], buffer);
+
 	return SLURM_SUCCESS;
 }
 
 extern int unpack_job_state(void **gres_data, Buf buffer)
 {
+	int i;
 	nic_job_state_t *gres_ptr;
 
 	gres_ptr = xmalloc(sizeof(nic_job_state_t));
@@ -662,12 +668,26 @@ extern int unpack_job_state(void **gres_data, Buf buffer)
 	if (buffer) {
 		safe_unpack32(&gres_ptr->nic_cnt_alloc,  buffer);
 		safe_unpack8 (&gres_ptr->nic_cnt_mult,   buffer);
+
+		safe_unpack32(&gres_ptr->node_cnt,       buffer);
+		gres_ptr->nic_bit_alloc = xmalloc(sizeof(bitstr_t *) *
+						  (gres_ptr->node_cnt + 1));
+		for (i=0; i<gres_ptr->node_cnt; i++)
+			unpack_bit_str(&gres_ptr->nic_bit_alloc[i], buffer);
 	}
 
 	*gres_data = gres_ptr;
 	return SLURM_SUCCESS;
 
 unpack_error:
+	error("Unpacking %s job state info", plugin_name);
+	if (gres_ptr->nic_bit_alloc) {
+		for (i=0; i<gres_ptr->node_cnt; i++) {
+			if (gres_ptr->nic_bit_alloc[i])
+				bit_free(gres_ptr->nic_bit_alloc[i]);
+		}
+		xfree(gres_ptr->nic_bit_alloc);
+	}
 	xfree(gres_ptr);
 	*gres_data = NULL;
 	return SLURM_ERROR;
@@ -719,8 +739,8 @@ extern int job_alloc(void *job_gres_data, void *node_gres_data,
 			      plugin_name);
 			xfree(job_gres_ptr->nic_bit_alloc);
 		}
-		job_gres_ptr->nic_bit_alloc = 
-					xmalloc(sizeof(bitstr_t *) * node_cnt);
+		job_gres_ptr->nic_bit_alloc = xmalloc(sizeof(bitstr_t *) *
+						      (node_cnt + 1));
 	} else if (job_gres_ptr->node_cnt < node_cnt) {
 		error("%s: node_cnt increase from %u to %d",
 		      plugin_name, job_gres_ptr->node_cnt, node_cnt);
diff --git a/src/slurmctld/node_mgr.c b/src/slurmctld/node_mgr.c
index c99b21bbd3d..8753bf89bfc 100644
--- a/src/slurmctld/node_mgr.c
+++ b/src/slurmctld/node_mgr.c
@@ -435,6 +435,8 @@ extern int load_all_node_state ( bool state_only )
 				node_ptr->reason_time = reason_time;
 				node_ptr->reason_uid = reason_uid;
 			}
+			node_ptr->gres_list	= gres_list;
+			gres_list		= NULL;	/* Nothing to free */
 		} else {
 			node_cnt++;
 			if ((!power_save_mode) &&
diff --git a/src/slurmctld/read_config.c b/src/slurmctld/read_config.c
index 3201d5faab0..16a01dbc88f 100644
--- a/src/slurmctld/read_config.c
+++ b/src/slurmctld/read_config.c
@@ -92,6 +92,7 @@
 static void _acct_restore_active_jobs(void);
 static int  _build_bitmaps(void);
 static void _build_bitmaps_pre_select(void);
+static void _gres_reconig(bool reconfig);
 static int  _init_all_slurm_conf(void);
 static int  _preserve_select_type_param(slurm_ctl_conf_t * ctl_conf_ptr,
 					uint16_t old_select_type_p);
@@ -627,7 +628,6 @@ int read_slurm_conf(int recover, bool reconfig)
 	char *state_save_dir      = xstrdup(slurmctld_conf.state_save_location);
 	char *mpi_params;
 	uint16_t old_select_type_p = slurmctld_conf.select_type_param;
-	bool gres_changed = false;
 
 	/* initialization */
 	START_TIMER;
@@ -670,7 +670,7 @@ int read_slurm_conf(int recover, bool reconfig)
 	if (slurm_topo_init() != SLURM_SUCCESS)
 		fatal("Failed to initialize topology plugin");
 
-	/* Build node and partittion information based upon slurm.conf file */
+	/* Build node and partition information based upon slurm.conf file */
 	_build_all_nodeline_info();
 	_handle_all_downnodes();
 	_build_all_partitionline_info();
@@ -809,8 +809,7 @@ int read_slurm_conf(int recover, bool reconfig)
 #endif
 
 	/* Sync select plugin with synchronized job/node/part data */
-	if (reconfig)
-		gres_plugin_reconfig(&gres_changed);
+	_gres_reconig(reconfig);
 	select_g_reconfigure();
 
 	slurmctld_conf.last_update = time(NULL);
@@ -818,6 +817,65 @@ int read_slurm_conf(int recover, bool reconfig)
 	return error_code;
 }
 
+static void _gres_reconig(bool reconfig)
+{
+	struct node_record *node_ptr;
+	struct job_record *job_ptr;
+	ListIterator job_iterator;
+	int i, i_first, i_last, node_offset;
+	bool gres_active, gres_changed = false;
+	char *plugin_names;
+
+	if (reconfig)
+		gres_plugin_reconfig(&gres_changed);
+
+	plugin_names = slurm_get_gres_plugins();
+	if (plugin_names && plugin_names[0])
+		gres_active = true;
+	else
+		gres_active = false;
+	xfree(plugin_names);
+	if (!gres_active)
+		return;
+
+	/* Clear existing node Gres allocations */
+	for (i = 0, node_ptr = node_record_table_ptr; i < node_record_count;
+	     i++, node_ptr++) {
+		gres_plugin_node_state_dealloc(node_ptr->gres_list);
+	}
+
+	/* Reallocate job gres to the nodes */
+	job_iterator = list_iterator_create(job_list);
+	while ((job_ptr = (struct job_record *) list_next(job_iterator))) {
+		if (!IS_JOB_RUNNING(job_ptr) && !IS_JOB_SUSPENDED(job_ptr))
+			continue;
+		if (job_ptr->job_resrcs == NULL)
+			continue;
+
+		i_first = bit_ffs(job_ptr->node_bitmap);
+		i_last  = bit_fls(job_ptr->node_bitmap);
+		if (i_first == -1)
+			i_last = -2;
+		node_offset = -1;
+		for (i = i_first; i <= i_last; i++) {
+			if (!bit_test(job_ptr->job_resrcs->node_bitmap, i))
+				continue;
+			node_offset++;
+			if (!bit_test(job_ptr->node_bitmap, i))
+				continue;
+			node_ptr = node_record_table_ptr + i;
+			gres_plugin_node_state_realloc(job_ptr->gres_list,
+						       node_offset,
+						       node_ptr->gres_list);
+		}
+	}
+	list_iterator_destroy(job_iterator);
+
+	for (i = 0, node_ptr = node_record_table_ptr; i < node_record_count;
+	     i++, node_ptr++) {
+		gres_plugin_node_state_log(node_ptr->gres_list, node_ptr->name);
+	}
+}
 
 /* Restore node state and size information from saved records which match
  * the node registration message. If a node was re-configured to be down or
@@ -874,12 +932,18 @@ static int _restore_node_state(int recover,
 			      node_ptr->config_ptr->cpus);
 		}
 		node_ptr->cpus          = old_node_ptr->cpus;
-		node_ptr->sockets       = old_node_ptr->sockets;
 		node_ptr->cores         = old_node_ptr->cores;
+		node_ptr->sockets       = old_node_ptr->sockets;
 		node_ptr->threads       = old_node_ptr->threads;
 		node_ptr->real_memory   = old_node_ptr->real_memory;
 		node_ptr->tmp_disk      = old_node_ptr->tmp_disk;
 		node_ptr->weight        = old_node_ptr->weight;
+
+		if (node_ptr->gres_list)
+			list_destroy(node_ptr->gres_list);
+		node_ptr->gres_list = old_node_ptr->gres_list;
+		old_node_ptr->gres_list = NULL;
+
 		if (node_ptr->reason == NULL) {
 			/* Recover only if not explicitly set in slurm.conf */
 			node_ptr->reason = old_node_ptr->reason;
@@ -1335,8 +1399,8 @@ static int _sync_nodes_to_active_job(struct job_record *job_ptr)
 			cnt++;
 		} else if (IS_NODE_IDLE(node_ptr)) {
 			cnt++;
-			node_ptr->node_state =
-				NODE_STATE_ALLOCATED | node_flags;
+			node_ptr->node_state = NODE_STATE_ALLOCATED |
+					       node_flags;
 		}
 	}
 	return cnt;
-- 
GitLab