From 9c2f11d2d247570d6cf4e1052a192896c1e6e18a Mon Sep 17 00:00:00 2001
From: Danny Auble <da@llnl.gov>
Date: Thu, 3 Sep 2009 23:29:33 +0000
Subject: [PATCH] added checks for preemption loops and modify qos

---
 slurm/slurm_errno.h                           |   1 +
 src/common/assoc_mgr.c                        |  28 ++++-
 src/common/slurm_errno.c                      |   2 +
 src/common/slurmdbd_defs.c                    |  27 ++++-
 .../mysql/accounting_storage_mysql.c          | 100 +++++++++++++++---
 src/sacctmgr/qos_functions.c                  |   6 +-
 src/sacctmgr/sacctmgr.c                       |   5 +-
 src/sacctmgr/sacctmgr.h                       |   1 +
 src/slurmdbd/proc_req.c                       |   3 +
 src/slurmdbd/slurmdbd.c                       |   6 +-
 10 files changed, 155 insertions(+), 24 deletions(-)

diff --git a/slurm/slurm_errno.h b/slurm/slurm_errno.h
index 93605fe16ff..b1e6eafc773 100644
--- a/slurm/slurm_errno.h
+++ b/slurm/slurm_errno.h
@@ -176,6 +176,7 @@ enum {
 	ESLURM_INVALID_BLOCK_LAYOUT,
 	ESLURM_INVALID_BLOCK_NAME,
 	ESLURM_INVALID_QOS,
+	ESLURM_QOS_PREEMPTION_LOOP,
 
 	/* switch specific error codes, specific values defined in plugin module */
 	ESLURM_SWITCH_MIN = 3000,
diff --git a/src/common/assoc_mgr.c b/src/common/assoc_mgr.c
index 95b367abaac..68f1c5f8f11 100644
--- a/src/common/assoc_mgr.c
+++ b/src/common/assoc_mgr.c
@@ -1193,7 +1193,10 @@ extern int assoc_mgr_fill_in_qos(void *db_conn, acct_qos_rec_t *qos,
 
 	qos->norm_priority = found_qos->norm_priority;
 
-	if(!qos->preempt_bitstr)
+	if(qos->preempt_bitstr) {
+		FREE_NULL_BITMAP(qos->preempt_bitstr);
+		qos->preempt_bitstr = bit_copy(found_qos->preempt_bitstr);
+	} else
 		qos->preempt_bitstr = found_qos->preempt_bitstr;
 
 	qos->priority = found_qos->priority;
@@ -2093,7 +2096,14 @@ extern int assoc_mgr_update_qos(acct_update_object_t *update)
 				break;
 			}
 			list_append(assoc_mgr_qos_list, object);
-
+/* 			char *tmp = get_qos_complete_str_bitstr( */
+/* 				assoc_mgr_qos_list, */
+/* 				object->preempt_bitstr); */
+			
+/* 			info("new qos %s(%d) now preempts %s",  */
+/* 			     object->name, object->id, tmp); */
+/* 			xfree(tmp); */
+			
 			/* Since in the database id's don't start at 1
 			   instead of 0 we need to ignore the 0 bit and start
 			   with 1 so increase the count by 1.
@@ -2143,6 +2153,13 @@ extern int assoc_mgr_update_qos(acct_update_object_t *update)
 				
 				rec->preempt_bitstr = object->preempt_bitstr;
 				object->preempt_bitstr = NULL;
+	/* 			char *tmp = get_qos_complete_str_bitstr( */
+/* 					assoc_mgr_qos_list, */
+/* 					rec->preempt_bitstr); */
+				
+/* 				info("qos %s(%d) now preempts %s",  */
+/* 				     rec->name, rec->id, tmp); */
+/* 				xfree(tmp); */
 			}
 
 			if(object->priority != NO_VAL) 
@@ -2153,6 +2170,11 @@ extern int assoc_mgr_update_qos(acct_update_object_t *update)
 			
 			break;
 		case ACCT_REMOVE_QOS:
+			if(rec) 
+				list_delete_item(itr);
+
+			if(!assoc_mgr_association_list)
+				break;
 			/* Remove this qos from all the associations
 			   on this cluster.
 			*/
@@ -2169,8 +2191,6 @@ extern int assoc_mgr_update_qos(acct_update_object_t *update)
 			list_iterator_destroy(assoc_itr);
 			slurm_mutex_unlock(&assoc_mgr_association_lock);
 			
-			if(rec) 
-				list_delete_item(itr);
 			break;
 		default:
 			break;
diff --git a/src/common/slurm_errno.c b/src/common/slurm_errno.c
index 55171b69415..8cb77ddc222 100644
--- a/src/common/slurm_errno.c
+++ b/src/common/slurm_errno.c
@@ -254,6 +254,8 @@ static slurm_errtab_t slurm_errtab[] = {
 	  "Functionality not available with current block layout mode"},
 	{ ESLURM_INVALID_BLOCK_NAME, 
 	  "Invalid block name specified"			},
+	{ ESLURM_QOS_PREEMPTION_LOOP,
+	  "QOS Preemption loop detected"                	},
 
 	/* slurmd error codes */
 
diff --git a/src/common/slurmdbd_defs.c b/src/common/slurmdbd_defs.c
index 5662afa032c..7a4fbe660b0 100644
--- a/src/common/slurmdbd_defs.c
+++ b/src/common/slurmdbd_defs.c
@@ -252,7 +252,10 @@ extern int slurm_send_recv_slurmdbd_msg(uint16_t rpc_version,
 		}
 	}
 
-	buffer = pack_slurmdbd_msg(rpc_version, req);
+	if(!(buffer = pack_slurmdbd_msg(rpc_version, req))) {
+		rc = SLURM_ERROR;
+		goto end_it;
+	}
 
 	rc = _send_msg(buffer);
 	free_buf(buffer);
@@ -501,6 +504,7 @@ extern Buf pack_slurmdbd_msg(uint16_t rpc_version, slurmdbd_msg_t *req)
 	case DBD_MODIFY_ACCOUNTS:
 	case DBD_MODIFY_ASSOCS:
 	case DBD_MODIFY_CLUSTERS:
+	case DBD_MODIFY_QOS:
 	case DBD_MODIFY_USERS:
 		slurmdbd_pack_modify_msg(
 			rpc_version, req->msg_type,
@@ -672,6 +676,7 @@ extern int unpack_slurmdbd_msg(uint16_t rpc_version,
 	case DBD_MODIFY_ACCOUNTS:
 	case DBD_MODIFY_ASSOCS:
 	case DBD_MODIFY_CLUSTERS:
+	case DBD_MODIFY_QOS:
 	case DBD_MODIFY_USERS:
 		rc = slurmdbd_unpack_modify_msg(
 			rpc_version,
@@ -801,6 +806,8 @@ extern slurmdbd_msg_type_t str_2_slurmdbd_msg_type(char *msg_type)
 		return DBD_MODIFY_ASSOCS;
 	} else if(!strcasecmp(msg_type, "Modify Clusters")) {
 		return DBD_MODIFY_CLUSTERS;
+	} else if(!strcasecmp(msg_type, "Modify QOS")) {
+		return DBD_MODIFY_QOS;
 	} else if(!strcasecmp(msg_type, "Modify Users")) {
 		return DBD_MODIFY_USERS;
 	} else if(!strcasecmp(msg_type, "Node State")) {
@@ -1077,6 +1084,12 @@ extern char *slurmdbd_msg_type_2_str(slurmdbd_msg_type_t msg_type, int get_enum)
 		} else
 			return "Modify Clusters";
 		break;
+	case DBD_MODIFY_QOS:
+		if(get_enum) {
+			return "DBD_MODIFY_QOS";
+		} else
+			return "Modify QOS";
+		break;
 	case DBD_MODIFY_USERS:
 		if(get_enum) {
 			return "DBD_MODIFY_USERS";
@@ -2213,6 +2226,10 @@ void inline slurmdbd_free_modify_msg(uint16_t rpc_version,
 			destroy_cond = destroy_acct_cluster_cond;
 			destroy_rec = destroy_acct_cluster_rec;
 			break;
+		case DBD_MODIFY_QOS:
+			destroy_cond = destroy_acct_qos_cond;
+			destroy_rec = destroy_acct_qos_rec;
+			break;
 		case DBD_MODIFY_USERS:
 			destroy_cond = destroy_acct_user_cond;
 			destroy_rec = destroy_acct_user_rec;
@@ -3249,6 +3266,10 @@ void inline slurmdbd_pack_modify_msg(uint16_t rpc_version,
 		my_cond = pack_acct_cluster_cond;
 		my_rec = pack_acct_cluster_rec;
 		break;
+	case DBD_MODIFY_QOS:
+		my_cond = pack_acct_qos_cond;
+		my_rec = pack_acct_qos_rec;
+		break;
 	case DBD_MODIFY_USERS:
 		my_cond = pack_acct_user_cond;
 		my_rec = pack_acct_user_rec;
@@ -3285,6 +3306,10 @@ int inline slurmdbd_unpack_modify_msg(uint16_t rpc_version,
 		my_cond = unpack_acct_cluster_cond;
 		my_rec = unpack_acct_cluster_rec;
 		break;
+	case DBD_MODIFY_QOS:
+		my_cond = unpack_acct_qos_cond;
+		my_rec = unpack_acct_qos_rec;
+		break;
 	case DBD_MODIFY_USERS:
 		my_cond = unpack_acct_user_cond;
 		my_rec = unpack_acct_user_rec;
diff --git a/src/plugins/accounting_storage/mysql/accounting_storage_mysql.c b/src/plugins/accounting_storage/mysql/accounting_storage_mysql.c
index b2cdecee040..eb55c61446f 100644
--- a/src/plugins/accounting_storage/mysql/accounting_storage_mysql.c
+++ b/src/plugins/accounting_storage/mysql/accounting_storage_mysql.c
@@ -348,6 +348,43 @@ no_wckeyid:
 	return wckeyid;
 }
 
+static int _preemption_loop(mysql_conn_t *mysql_conn, int begin_qosid, 
+			    bitstr_t *preempt_bitstr)
+{
+	acct_qos_rec_t qos_rec;
+	int rc = 0, i=0;
+
+	xassert(preempt_bitstr);
+	
+	/* check in the preempt list for all qos's preempted */
+	for(i=0; i<bit_size(preempt_bitstr); i++) {
+		if(!bit_test(preempt_bitstr, i))
+			continue;
+
+		memset(&qos_rec, 0, sizeof(qos_rec));
+		qos_rec.id = i;
+		assoc_mgr_fill_in_qos(mysql_conn, &qos_rec,
+				      ACCOUNTING_ENFORCE_QOS,
+				      NULL);
+		/* check if the begin_qosid is preempted by this qos
+		 * if so we have a loop */
+		if(qos_rec.preempt_bitstr 
+		   && bit_test(qos_rec.preempt_bitstr, begin_qosid)) {
+			error("QOS id %d has a loop at QOS %s",
+			      begin_qosid, qos_rec.name);
+			rc = 1;
+			break;
+		} else if(qos_rec.preempt_bitstr) {
+			/* check this qos' preempt list and make sure
+			   no loops exist there either */
+			if((rc = _preemption_loop(mysql_conn, begin_qosid,
+						  qos_rec.preempt_bitstr)))
+				break;
+		}
+	}
+	return rc;
+}
+
 static int _set_usage_information(char **usage_table, slurmdbd_msg_type_t type,
 				  time_t *usage_start, time_t *usage_end)
 {
@@ -705,7 +742,7 @@ end_qos:
 
 static int _setup_qos_limits(acct_qos_rec_t *qos,
 			     char **cols, char **vals,
-			     char **extra)
+			     char **extra, char **added_preempt)
 {	
 	if(!qos)
 		return SLURM_ERROR;
@@ -880,11 +917,17 @@ static int _setup_qos_limits(acct_qos_rec_t *qos,
 					   "replace(%s, ',%s', ''), ',%s')",
 					   begin_preempt, 
 					   tmp_char+1, tmp_char+1);
+				if(added_preempt)
+					xstrfmtcat(*added_preempt, ",%s",
+						   tmp_char+1);
 				xfree(begin_preempt);
 				begin_preempt = preempt_val;
-			} else if(tmp_char[0]) 
+			} else if(tmp_char[0]) {
 				xstrfmtcat(preempt_val, ",%s", tmp_char);
-			else
+				if(added_preempt)
+					xstrfmtcat(*added_preempt, ",%s",
+						   tmp_char);
+			} else
 				xstrcat(preempt_val, "");
 		}
 		list_iterator_destroy(preempt_itr);
@@ -4702,6 +4745,7 @@ extern int acct_storage_p_add_qos(mysql_conn_t *mysql_conn, uint32_t uid,
 	char *user_name = NULL;
 	int affect_rows = 0;
 	int added = 0;
+	char *added_preempt = NULL;
 
 	if(_check_connection(mysql_conn) != SLURM_SUCCESS)
 		return SLURM_ERROR;
@@ -4719,7 +4763,13 @@ extern int acct_storage_p_add_qos(mysql_conn_t *mysql_conn, uint32_t uid,
 			   now, now, object->name); 
 		xstrfmtcat(extra, ", mod_time=%d", now);
 
-		_setup_qos_limits(object, &cols, &vals, &extra);
+		_setup_qos_limits(object, &cols, &vals, &extra, &added_preempt);
+		if(added_preempt) {
+			object->preempt_bitstr = bit_alloc(g_qos_count);
+			bit_unfmt(object->preempt_bitstr, added_preempt+1);
+			xfree(added_preempt);
+		}
+
 		xstrfmtcat(query, 
 			   "insert into %s (%s) values (%s) "
 			   "on duplicate key update deleted=0, "
@@ -4749,9 +4799,7 @@ extern int acct_storage_p_add_qos(mysql_conn_t *mysql_conn, uint32_t uid,
 			xfree(vals);
 			continue;
 		}
-		/* FIX ME: we have to edit all the other qos's to set
-		   there preemptee or preemptor based on what is here.
-		*/
+		
 		/* we always have a ', ' as the first 2 chars */
 		tmp_extra = _fix_double_quotes(extra+2);
 
@@ -5899,6 +5947,8 @@ extern List acct_storage_p_modify_qos(mysql_conn_t *mysql_conn, uint32_t uid,
 	MYSQL_RES *result = NULL;
 	MYSQL_ROW row;
 	char *tmp_char1=NULL, *tmp_char2=NULL;
+	bitstr_t *preempt_bitstr = NULL;
+	char *added_preempt = NULL;
 
 	if(!qos_cond || !qos) {
 		error("we need something to change");
@@ -5955,20 +6005,27 @@ extern List acct_storage_p_modify_qos(mysql_conn_t *mysql_conn, uint32_t uid,
 		xstrcat(extra, ")");
 	}
 	
-	_setup_qos_limits(qos, &tmp_char1, &tmp_char2, &vals);
+	_setup_qos_limits(qos, &tmp_char1, &tmp_char2, &vals, &added_preempt);
+	if(added_preempt) {
+		preempt_bitstr = bit_alloc(g_qos_count);
+		bit_unfmt(preempt_bitstr, added_preempt+1);
+		xfree(added_preempt);
+	}
 	xfree(tmp_char1);
 	xfree(tmp_char2);
 
 	if(!extra || !vals) {
 		errno = SLURM_NO_CHANGE_IN_DATA;
+		FREE_NULL_BITMAP(preempt_bitstr);
 		error("Nothing to change");
 		return NULL;
 	}
-	query = xstrdup_printf("select name, preempt from %s %s;",
+	query = xstrdup_printf("select name, preempt, id from %s %s;",
 			       qos_table, extra);
 	xfree(extra);
 	if(!(result = mysql_db_query_ret(mysql_conn->db_conn, query, 0))) {
 		xfree(query);
+		FREE_NULL_BITMAP(preempt_bitstr);
 		return NULL;
 	}
 
@@ -5976,7 +6033,11 @@ extern List acct_storage_p_modify_qos(mysql_conn_t *mysql_conn, uint32_t uid,
 	ret_list = list_create(slurm_destroy_char);
 	while((row = mysql_fetch_row(result))) {
 		acct_qos_rec_t *qos_rec = NULL;
-		
+		if(preempt_bitstr) {
+			if(_preemption_loop(mysql_conn,
+					    atoi(row[2]), preempt_bitstr))
+				break;
+		}
 		object = xstrdup(row[0]);
 		list_append(ret_list, object);
 		if(!rc) {
@@ -5985,6 +6046,7 @@ extern List acct_storage_p_modify_qos(mysql_conn_t *mysql_conn, uint32_t uid,
 		} else  {
 			xstrfmtcat(name_char, " || name='%s'", object);
 		}
+		
 		qos_rec = xmalloc(sizeof(acct_qos_rec_t));
 		qos_rec->name = xstrdup(object);
 
@@ -6041,6 +6103,18 @@ extern List acct_storage_p_modify_qos(mysql_conn_t *mysql_conn, uint32_t uid,
 	}
 	mysql_free_result(result);
 
+	FREE_NULL_BITMAP(preempt_bitstr);
+	
+	if(row) {
+		xfree(vals);
+		xfree(name_char);
+		xfree(query);
+		list_destroy(ret_list);
+		ret_list = NULL;
+		errno = ESLURM_QOS_PREEMPTION_LOOP;
+		return ret_list;
+	}
+
 	if(!list_count(ret_list)) {
 		errno = SLURM_NO_CHANGE_IN_DATA;
 		debug3("didn't effect anything\n%s", query);
@@ -8860,9 +8934,11 @@ empty:
 		else
 			qos->max_wall_pu = INFINITE;
 
-		if(row[QOS_REQ_PREE] && row[QOS_REQ_PREE][0]) 
+		if(row[QOS_REQ_PREE] && row[QOS_REQ_PREE][0]) {
+			if(!qos->preempt_bitstr)
+				qos->preempt_bitstr = bit_alloc(g_qos_count);
 			bit_unfmt(qos->preempt_bitstr, row[QOS_REQ_PREE]+1);
-		
+		}
 		if(row[QOS_REQ_PRIO])
 			qos->priority = atoi(row[QOS_REQ_PRIO]);
 
diff --git a/src/sacctmgr/qos_functions.c b/src/sacctmgr/qos_functions.c
index 56053935038..8c5aeb4ee71 100644
--- a/src/sacctmgr/qos_functions.c
+++ b/src/sacctmgr/qos_functions.c
@@ -68,6 +68,7 @@ static int _set_cond(int *start, int argc, char *argv[],
 
 		if (!strncasecmp (argv[i], "Set", MAX(command_len, 3))) {
 			i--;
+			break;
 		} else if (!strncasecmp (argv[i], "WithDeleted", 
 					 MAX(command_len, 5))) {
 			qos_cond->with_deleted = 1;
@@ -131,7 +132,6 @@ static int _set_cond(int *start, int argc, char *argv[],
 	}
 
 	(*start) = i;
-
 	return set;
 }
 
@@ -282,14 +282,14 @@ static int _set_rec(int *start, int argc, char *argv[],
 					argv[i]);
 			}
 		} else if (!strncasecmp (argv[i], "Preempt", 
-					 MAX(command_len, 9))) {
+					 MAX(command_len, 3))) {
 			if(!qos)
 				continue;
 
 			if(!qos->preempt_list) 
 				qos->preempt_list = 
 					list_create(slurm_destroy_char);
-						
+
 			if(!g_qos_list) 
 				g_qos_list = acct_storage_g_get_qos(
 					db_conn, my_uid, NULL);
diff --git a/src/sacctmgr/sacctmgr.c b/src/sacctmgr/sacctmgr.c
index c28bb65c59b..58a0f1b5f2d 100644
--- a/src/sacctmgr/sacctmgr.c
+++ b/src/sacctmgr/sacctmgr.c
@@ -670,6 +670,8 @@ static void _modify_it (int argc, char *argv[])
 	} else if (strncasecmp (argv[0], "Clusters", 
 				MAX(command_len, 1)) == 0) {
 		error_code = sacctmgr_modify_cluster((argc - 1), &argv[1]);
+	} else if (strncasecmp (argv[0], "QOSs", MAX(command_len, 1)) == 0) {
+		error_code = sacctmgr_modify_qos((argc - 1), &argv[1]);
 	} else if (strncasecmp (argv[0], "Users", MAX(command_len, 1)) == 0) {
 		error_code = sacctmgr_modify_user((argc - 1), &argv[1]);
 	} else {
@@ -677,7 +679,8 @@ static void _modify_it (int argc, char *argv[])
 		exit_code = 1;
 		fprintf(stderr, "No valid entity in modify command\n");
 		fprintf(stderr, "Input line must include ");
-		fprintf(stderr, "\"Account\", \"Cluster\", or \"User\"\n");
+		fprintf(stderr, "\"Account\", \"Cluster\", \"QOS\", "
+			"or \"User\"\n");
 	}
 
 	if (error_code == SLURM_ERROR) {
diff --git a/src/sacctmgr/sacctmgr.h b/src/sacctmgr/sacctmgr.h
index f54b3ddfbf2..55a72c7ff51 100644
--- a/src/sacctmgr/sacctmgr.h
+++ b/src/sacctmgr/sacctmgr.h
@@ -120,6 +120,7 @@ extern int sacctmgr_modify_association(int argc, char *argv[]);
 extern int sacctmgr_modify_user(int argc, char *argv[]);
 extern int sacctmgr_modify_account(int argc, char *argv[]);
 extern int sacctmgr_modify_cluster(int argc, char *argv[]);
+extern int sacctmgr_modify_qos(int argc, char *argv[]);
 
 extern int sacctmgr_delete_association(int argc, char *argv[]);
 extern int sacctmgr_delete_user(int argc, char *argv[]);
diff --git a/src/slurmdbd/proc_req.c b/src/slurmdbd/proc_req.c
index 115dbd88f5f..a34eacaffc0 100644
--- a/src/slurmdbd/proc_req.c
+++ b/src/slurmdbd/proc_req.c
@@ -2092,6 +2092,9 @@ static int   _modify_qos(slurmdbd_conn_t *slurmdbd_conn,
 		} else if(errno == SLURM_NO_CHANGE_IN_DATA) {
 			comment = "Request didn't affect anything";
 			rc = SLURM_SUCCESS;
+		} else if(errno == ESLURM_QOS_PREEMPTION_LOOP) {
+			comment = "QOS Preemption loop detected";
+			rc = ESLURM_QOS_PREEMPTION_LOOP;
 		} else {
 			comment = "Unknown issue";
 			rc = SLURM_ERROR;
diff --git a/src/slurmdbd/slurmdbd.c b/src/slurmdbd/slurmdbd.c
index 1beabaabf55..f5d8f53eaaa 100644
--- a/src/slurmdbd/slurmdbd.c
+++ b/src/slurmdbd/slurmdbd.c
@@ -140,9 +140,9 @@ int main(int argc, char *argv[])
 
 	memset(&assoc_init_arg, 0, sizeof(assoc_init_args_t));
 
-	/* If we are tacking wckey we need to cache associations and
-	   wckeys if we aren't only cache the users */
-	assoc_init_arg.cache_level = ASSOC_MGR_CACHE_USER;
+	/* If we are tacking wckey we need to cache 
+	   wckeys, if we aren't only cache the users and qos */
+	assoc_init_arg.cache_level = ASSOC_MGR_CACHE_USER | ASSOC_MGR_CACHE_QOS;
 	if(slurmdbd_conf->track_wckey)
 		assoc_init_arg.cache_level |= ASSOC_MGR_CACHE_WCKEY;
 	
-- 
GitLab