From 62631b3fa3f172b716f9860291bb0da075003b3d Mon Sep 17 00:00:00 2001
From: Morris Jette <jette@schedmd.com>
Date: Fri, 8 Jan 2016 15:51:47 -0800
Subject: [PATCH] Add active feature list infrastructure

---
 src/common/node_conf.c      | 152 ++++++++++++++++++++++++++++++++++--
 src/common/node_conf.h      |  13 ++-
 src/slurmctld/node_mgr.c    |  76 +++++++++++++-----
 src/slurmctld/read_config.c |  12 ++-
 4 files changed, 221 insertions(+), 32 deletions(-)

diff --git a/src/common/node_conf.c b/src/common/node_conf.c
index 4340c21a866..1544686b7e1 100644
--- a/src/common/node_conf.c
+++ b/src/common/node_conf.c
@@ -89,13 +89,17 @@ time_t last_node_update = (time_t) 0;	/* time of last update */
 struct node_record *node_record_table_ptr = NULL;	/* node records */
 xhash_t* node_hash_table = NULL;
 int node_record_count = 0;		/* count in node_record_table_ptr */
-
 uint16_t *cr_node_num_cores = NULL;
 uint32_t *cr_node_cores_offset = NULL;
 
-static void	_add_config_feature(char *feature, bitstr_t *node_bitmap);
+/* Local function defiitions */
+static void	_add_config_feature(List feature_list, char *feature,
+				    bitstr_t *node_bitmap);
+static void	_add_config_feature_inx(List feature_list, char *feature,
+					int node_inx);
 static int	_build_single_nodeline_info(slurm_conf_node_t *node_ptr,
 					    struct config_record *config_ptr);
+static void	_copy_feature_list(void);
 static int	_delete_config_record (void);
 #if _DEBUG
 static void	_dump_hash (void);
@@ -110,14 +114,15 @@ static int	_list_find_config (void *config_entry, void *key);
 static int	_list_find_feature (void *feature_entry, void *key);
 
 
-static void _add_config_feature(char *feature, bitstr_t *node_bitmap)
+static void	_add_config_feature(List feature_list, char *feature,
+				    bitstr_t *node_bitmap)
 {
 	node_feature_t *feature_ptr;
 	ListIterator feature_iter;
 	bool match = false;
 
 	/* If feature already in avail_feature_list, just update the bitmap */
-	feature_iter = list_iterator_create(avail_feature_list);
+	feature_iter = list_iterator_create(feature_list);
 	while ((feature_ptr = (node_feature_t *) list_next(feature_iter))) {
 		if (strcmp(feature, feature_ptr->name))
 			continue;
@@ -132,10 +137,37 @@ static void _add_config_feature(char *feature, bitstr_t *node_bitmap)
 		feature_ptr->magic = FEATURE_MAGIC;
 		feature_ptr->name = xstrdup(feature);
 		feature_ptr->node_bitmap = bit_copy(node_bitmap);
-		list_append(avail_feature_list, feature_ptr);
+		list_append(feature_list, feature_ptr);
 	}
 }
 
+static void	_add_config_feature_inx(List feature_list, char *feature,
+					int node_inx)
+{
+	node_feature_t *feature_ptr;
+	ListIterator feature_iter;
+	bool match = false;
+
+	/* If feature already in avail_feature_list, just update the bitmap */
+	feature_iter = list_iterator_create(feature_list);
+	while ((feature_ptr = (node_feature_t *) list_next(feature_iter))) {
+		if (strcmp(feature, feature_ptr->name))
+			continue;
+		bit_set(feature_ptr->node_bitmap, node_inx);
+		match = true;
+		break;
+	}
+	list_iterator_destroy(feature_iter);
+
+	if (!match) {	/* Need to create new avail_feature_list record */
+		feature_ptr = xmalloc(sizeof(node_feature_t));
+		feature_ptr->magic = FEATURE_MAGIC;
+		feature_ptr->name = xstrdup(feature);
+		feature_ptr->node_bitmap = bit_alloc(node_record_count);
+		bit_set(feature_ptr->node_bitmap, node_inx);
+		list_append(feature_list, feature_ptr);
+	}
+}
 
 /*
  * _build_single_nodeline_info - From the slurm.conf reader, build table,
@@ -692,8 +724,111 @@ extern int build_all_nodeline_info (bool set_bitmap)
 	return max_rc;
 }
 
-/* Given a config_record with it's bitmap already set,
- * build avail_feature_list */
+/* Rebuild active_feature_list for given node bitmap */
+extern void  build_active_feature_list(bitstr_t *node_bitmap,
+				       char *active_features)
+{
+	node_feature_t *feature_ptr;
+	ListIterator feature_iter;
+	char *tmp_str, *token, *last = NULL;
+
+	/* Clear these nodes from the feature_list record,
+	 * then restore as needed */
+	feature_iter = list_iterator_create(active_feature_list);
+	bit_not(node_bitmap);
+	while ((feature_ptr = (node_feature_t *) list_next(feature_iter))) {
+		bit_and(feature_ptr->node_bitmap, node_bitmap);
+	}
+	list_iterator_destroy(feature_iter);
+	bit_not(node_bitmap);
+
+	if (active_features) {
+		tmp_str = xstrdup(active_features);
+		token = strtok_r(tmp_str, ",", &last);
+		while (token) {
+			_add_config_feature(active_feature_list, token,
+					    node_bitmap);
+			token = strtok_r(NULL, ",", &last);
+		}
+		xfree(tmp_str);
+	}
+}
+
+/* Clear active_feature_list,
+ * then copy avail_feature_list into active_feature_list */
+static void _copy_feature_list(void)
+{
+	node_feature_t *active_feature_ptr, *avail_feature_ptr;
+	ListIterator feature_iter;
+
+	(void) list_delete_all(active_feature_list, &_list_find_feature, NULL);
+
+	feature_iter = list_iterator_create(avail_feature_list);
+	while ((avail_feature_ptr = (node_feature_t *)list_next(feature_iter))){
+		active_feature_ptr = xmalloc(sizeof(node_feature_t));
+		active_feature_ptr->magic = FEATURE_MAGIC;
+		active_feature_ptr->name = xstrdup(avail_feature_ptr->name);
+		active_feature_ptr->node_bitmap =
+			bit_copy(avail_feature_ptr->node_bitmap);
+		list_append(active_feature_list, active_feature_ptr);
+	}
+	list_iterator_destroy(feature_iter);
+}
+
+/* Rebuild active_feature_list for given node index,
+ * IN node_inx - Node index, if -1 then copy alloc_feature_list into
+ *		 acitve_feature_list, if -2 then log state
+ */
+extern void  build_active_feature_list2(int node_inx, char *active_features)
+{
+	node_feature_t *feature_ptr;
+	ListIterator feature_iter;
+	char *tmp_str, *token, *last = NULL;
+
+	if (node_inx == -1) {
+		_copy_feature_list();
+		return;
+	}
+	if (node_inx == -2) {
+#if _DEBUG
+		feature_iter = list_iterator_create(active_feature_list);
+		while ((feature_ptr = (node_feature_t *)
+		        list_next(feature_iter))) {
+			info("ACTIVE FEATURE: NAME:%s CNT:%d",
+			     feature_ptr->name,
+			     bit_set_count(feature_ptr->node_bitmap));
+		}
+		list_iterator_destroy(feature_iter);
+#endif
+		return;
+	}
+
+	if ((node_inx < 0) || (node_inx >= node_record_count)) {
+		error("%s: Invalid node_inx:%d", __func__, node_inx);
+		return;
+	}
+
+	/* Clear this node from the feature_list record,
+	 * then restore as needed */
+	feature_iter = list_iterator_create(active_feature_list);
+	while ((feature_ptr = (node_feature_t *) list_next(feature_iter))) {
+		bit_clear(feature_ptr->node_bitmap, node_inx);
+	}
+	list_iterator_destroy(feature_iter);
+
+	if (active_features) {
+		tmp_str = xstrdup(active_features);
+		token = strtok_r(tmp_str, ",", &last);
+		while (token) {
+			_add_config_feature_inx(active_feature_list, token,
+						node_inx);
+			token = strtok_r(NULL, ",", &last);
+		}
+		xfree(tmp_str);
+	}
+}
+
+/* Rebuild avail_feature_list for given node configuration structure */
 extern void  build_avail_feature_list(struct config_record *config_ptr)
 {
 	node_feature_t *feature_ptr;
@@ -714,7 +849,8 @@ extern void  build_avail_feature_list(struct config_record *config_ptr)
 		tmp_str = xstrdup(config_ptr->feature);
 		token = strtok_r(tmp_str, ",", &last);
 		while (token) {
-			_add_config_feature(token, config_ptr->node_bitmap);
+			_add_config_feature(avail_feature_list, token,
+					    config_ptr->node_bitmap);
 			token = strtok_r(NULL, ",", &last);
 		}
 		xfree(tmp_str);
diff --git a/src/common/node_conf.h b/src/common/node_conf.h
index bef76371e69..78efa433b68 100644
--- a/src/common/node_conf.h
+++ b/src/common/node_conf.h
@@ -240,8 +240,17 @@ extern int build_all_nodeline_info (bool set_bitmap);
  */
 extern int build_all_frontend_info (bool is_slurmd_context);
 
-/* Given a config_record with it's bitmap already set,
- * build avail_feature_list */
+/* Rebuild active_feature_list for given node bitmap */
+extern void  build_active_feature_list(bitstr_t *node_bitmap,
+				       char *active_features);
+
+/* Rebuild active_feature_list for given node index,
+ * IN node_inx - Node index, if -1 then copy alloc_feature_list into
+ *		 acitve_feature_list, if -2 then log state
+ */
+extern void  build_active_feature_list2(int node_inx, char *active_features);
+
+/* Rebuild avail_feature_list for given node configuration structure */
 extern void  build_avail_feature_list(struct config_record *config_ptr);
 
 /*
diff --git a/src/slurmctld/node_mgr.c b/src/slurmctld/node_mgr.c
index d76bda53087..780a9489705 100644
--- a/src/slurmctld/node_mgr.c
+++ b/src/slurmctld/node_mgr.c
@@ -111,7 +111,10 @@ static void 	_pack_node(struct node_record *dump_node_ptr, Buf buffer,
 static void	_sync_bitmaps(struct node_record *node_ptr, int job_count);
 static void	_update_config_ptr(bitstr_t *bitmap,
 				struct config_record *config_ptr);
-static int	_update_node_features(char *node_names, char *features);
+static int	_update_node_active_features(char *node_names,
+				char *active_features);
+static int	_update_node_avail_features(char *node_names,
+				char *avail_features);
 static int	_update_node_gres(char *node_names, char *gres);
 static int	_update_node_weight(char *node_names, uint32_t weight);
 static bool 	_valid_node_state_change(uint32_t old, uint32_t new);
@@ -1347,7 +1350,8 @@ int update_node ( update_node_msg_t * update_node_msg )
 				node_ptr->features =
 					xstrdup(update_node_msg->features);
 			}
-			/* _update_node_features() logs and updates config */
+			/* _update_node_avail_features() logs and updates
+			 * avail_feature_list */
 		}
 
 		if (update_node_msg->features_act &&
@@ -1363,7 +1367,8 @@ int update_node ( update_node_msg_t * update_node_msg )
 				node_ptr->features_act =
 					xstrdup(update_node_msg->features_act);
 			}
-			/* _update_node_features() logs and updates config */
+			/* _update_node_active_features() logs and updates
+			 * active_feature_list */
 		}
 
 		if (update_node_msg->gres) {
@@ -1614,9 +1619,15 @@ int update_node ( update_node_msg_t * update_node_msg )
 	FREE_NULL_HOSTLIST(hostname_list);
 	last_node_update = now;
 
+	if ((error_code == 0) && (update_node_msg->features_act)) {
+		error_code = _update_node_active_features(
+					update_node_msg->node_names,
+					update_node_msg->features_act);
+	}
 	if ((error_code == 0) && (update_node_msg->features)) {
-		error_code = _update_node_features(update_node_msg->node_names,
-						   update_node_msg->features);
+		error_code = _update_node_avail_features(
+					update_node_msg->node_names,
+					update_node_msg->features);
 	}
 	if ((error_code == 0) && (update_node_msg->gres)) {
 		error_code = _update_node_gres(update_node_msg->node_names,
@@ -1668,8 +1679,8 @@ extern void restore_node_features(int recover)
 			error("Node %s Features(%s) differ from slurm.conf",
 			      node_ptr->name, node_ptr->features);
 			if (recover == 2) {
-				_update_node_features(node_ptr->name,
-						      node_ptr->features);
+				_update_node_avail_features(node_ptr->name,
+							    node_ptr->features);
 			} else {
 				xfree(node_ptr->features);
 				node_ptr->features = xstrdup(node_ptr->
@@ -1677,7 +1688,7 @@ extern void restore_node_features(int recover)
 							     feature);
 			}
 		}
-
+//FIXME ??
 		/* We lose the gres information updated manually and always
 		 * use the information from slurm.conf */
 		(void) gres_plugin_node_reconfig(node_ptr->name,
@@ -1762,7 +1773,6 @@ static int _update_node_weight(char *node_names, uint32_t weight)
 			new_config_ptr->node_bitmap = bit_copy(tmp_bitmap);
 			new_config_ptr->nodes = bitmap2node_name(tmp_bitmap);
 
-			build_avail_feature_list(new_config_ptr);
 			_update_config_ptr(tmp_bitmap, new_config_ptr);
 
 			/* Update remaining records */
@@ -1783,13 +1793,37 @@ static int _update_node_weight(char *node_names, uint32_t weight)
 }
 
 /*
- * _update_node_features - Update features associated with nodes
- *	build new config list records as needed
+ * _update_node_active_features - Update active features associated with nodes
  * IN node_names - List of nodes to update
- * IN features - New features value
+ * IN active_features - New active features value
  * RET: SLURM_SUCCESS or error code
  */
-static int _update_node_features(char *node_names, char *features)
+static int _update_node_active_features(char *node_names, char *active_features)
+{
+	bitstr_t *node_bitmap = NULL;
+	int rc;
+
+	rc = node_name2bitmap(node_names, false, &node_bitmap);
+	if (rc) {
+		info("%s: invalid node_name (%s)", __func__, node_names);
+		return rc;
+	}
+	build_active_feature_list(node_bitmap, active_features);
+	FREE_NULL_BITMAP(node_bitmap);
+
+	info("%s: nodes %s active features set to: %s",
+	     __func__, node_names, active_features);
+	return SLURM_SUCCESS;
+}
+
+/*
+ * _update_node_avail_features - Update available features associated with
+ *	nodes, build new config list records as needed
+ * IN node_names - List of nodes to update
+ * IN avail_features - New available features value
+ * RET: SLURM_SUCCESS or error code
+ */
+static int _update_node_avail_features(char *node_names, char *avail_features)
 {
 	bitstr_t *node_bitmap = NULL, *tmp_bitmap;
 	ListIterator config_iterator;
@@ -1799,7 +1833,7 @@ static int _update_node_features(char *node_names, char *features)
 
 	rc = node_name2bitmap(node_names, false, &node_bitmap);
 	if (rc) {
-		info("_update_node_features: invalid node_name");
+		info("%s: invalid node_name (%s)", __func__, node_names);
 		return rc;
 	}
 
@@ -1821,8 +1855,8 @@ static int _update_node_features(char *node_names, char *features)
 		} else if (tmp_cnt == config_cnt) {
 			/* all nodes changed, update in situ */
 			xfree(config_ptr->feature);
-			if (features && features[0])
-				config_ptr->feature = xstrdup(features);
+			if (avail_features && avail_features[0])
+				config_ptr->feature = xstrdup(avail_features);
 			build_avail_feature_list(config_ptr);
 		} else {
 			/* partial update, split config_record */
@@ -1830,8 +1864,10 @@ static int _update_node_features(char *node_names, char *features)
 			if (first_new == NULL)
 				first_new = new_config_ptr;
 			xfree(new_config_ptr->feature);
-			if (features && features[0])
-				new_config_ptr->feature = xstrdup(features);
+			if (avail_features && avail_features[0]) {
+				new_config_ptr->feature =
+					xstrdup(avail_features);
+			}
 			new_config_ptr->node_bitmap = bit_copy(tmp_bitmap);
 			new_config_ptr->nodes = bitmap2node_name(tmp_bitmap);
 
@@ -1850,8 +1886,8 @@ static int _update_node_features(char *node_names, char *features)
 	list_iterator_destroy(config_iterator);
 	FREE_NULL_BITMAP(node_bitmap);
 
-	info("_update_node_features: nodes %s features set to: %s",
-		node_names, features);
+	info("%s: nodes %s available features set to: %s",
+	     __func__, node_names, avail_features);
 	return SLURM_SUCCESS;
 }
 
diff --git a/src/slurmctld/read_config.c b/src/slurmctld/read_config.c
index 196eff948f0..b2d9d03fad0 100644
--- a/src/slurmctld/read_config.c
+++ b/src/slurmctld/read_config.c
@@ -339,8 +339,8 @@ static int _build_bitmaps(void)
 
 	/* scan all nodes and identify which are up, idle and
 	 * their configuration, resync DRAINED vs. DRAINING state */
-	for (i=0, node_ptr=node_record_table_ptr;
-	     i<node_record_count; i++, node_ptr++) {
+	for (i = 0, node_ptr = node_record_table_ptr;
+	     i < node_record_count; i++, node_ptr++) {
 		uint32_t drain_flag, job_cnt;
 
 		if (node_ptr->name[0] == '\0')
@@ -366,12 +366,20 @@ static int _build_bitmaps(void)
 			bit_set(node_ptr->config_ptr->node_bitmap, i);
 	}
 
+	/* Build active and available feature lists used for scheduling */
 	config_iterator = list_iterator_create(config_list);
 	while ((config_ptr = (struct config_record *)
 				      list_next(config_iterator))) {
 		build_avail_feature_list(config_ptr);
 	}
 	list_iterator_destroy(config_iterator);
+	build_active_feature_list2(-1, NULL);	/* Copy avail list to active */
+	for (i = 0, node_ptr = node_record_table_ptr;
+	     i < node_record_count; i++, node_ptr++) {
+		if (node_ptr->features_act)
+			build_active_feature_list2(i, node_ptr->features_act);
+	}
+	build_active_feature_list2(-2, NULL);	/* Log active list */
 
 	return error_code;
 }
-- 
GitLab