From bf5dafe03f795ceeb144d5c6442906da8d043b33 Mon Sep 17 00:00:00 2001
From: Danny Auble <da@llnl.gov>
Date: Fri, 3 Jun 2005 17:36:40 +0000
Subject: [PATCH] parallelize running of jobs and destroying partitions on a
 reconfiguration

---
 src/plugins/select/bluegene/bgl_job_run.c |  36 ++----
 src/plugins/select/bluegene/bluegene.c    | 141 ++++++++++++++++------
 src/plugins/select/bluegene/bluegene.h    |   6 +
 3 files changed, 119 insertions(+), 64 deletions(-)

diff --git a/src/plugins/select/bluegene/bgl_job_run.c b/src/plugins/select/bluegene/bgl_job_run.c
index 4c665f2c124..ba3d9d8caa4 100644
--- a/src/plugins/select/bluegene/bgl_job_run.c
+++ b/src/plugins/select/bluegene/bgl_job_run.c
@@ -55,9 +55,8 @@
 #ifdef HAVE_BGL_FILES
 
 #define MAX_POLL_RETRIES    110
-#define MAX_PTHREAD_RETRIES  1
 #define POLL_INTERVAL        3
-
+#define MAX_AGENT_COUNT      32
 enum update_op {START_OP, TERM_OP, SYNC_OP};
 
 typedef struct bgl_update {
@@ -71,10 +70,7 @@ typedef struct bgl_update {
 static List bgl_update_list = NULL;
 
 static pthread_mutex_t agent_cnt_mutex = PTHREAD_MUTEX_INITIALIZER;
-static pthread_mutex_t freed_cnt_mutex = PTHREAD_MUTEX_INITIALIZER;
 static int agent_cnt = 0;
-static int num_part_to_free = 0;
-static int num_part_freed = 0;
 
 static void	_bgl_list_del(void *x);
 static int	_excise_block(List block_list, 
@@ -82,7 +78,6 @@ static int	_excise_block(List block_list,
 			      char *nodes);
 static List	_get_all_blocks(void);
 static void *	_part_agent(void *args);
-static void *	_mult_free_part(void *args);
 static void	_part_op(bgl_update_t *bgl_update_ptr);
 static int	_remove_job(db_job_id_t job_id);
 static void	_start_agent(bgl_update_t *bgl_update_ptr);
@@ -260,7 +255,7 @@ static void _start_agent(bgl_update_t *bgl_update_ptr)
 					retries = 0;
 					while (pthread_create(&thread_agent, 
 							      &attr_agent, 
-							      _mult_free_part, 
+							      mult_free_part, 
 							      (void *)
 							      found_record)) {
 						error("pthread_create "
@@ -460,26 +455,11 @@ static void *_part_agent(void *args)
 		_bgl_list_del(bgl_update_ptr);
 	}
 	slurm_mutex_lock(&agent_cnt_mutex);
-	agent_cnt = 0;
+	agent_cnt--;
 	slurm_mutex_unlock(&agent_cnt_mutex);
 	return NULL;
 }
 
-/* Free multiple partitions in parallel */
-static void *_mult_free_part(void *args)
-{
-	bgl_record_t *bgl_record = (bgl_record_t*) args;
-
-	debug("destroying the partition %s.", bgl_record->bgl_part_id);
-	bgl_free_partition(bgl_record);	
-	
-	slurm_mutex_lock(&freed_cnt_mutex);
-	num_part_freed++;
-	slurm_mutex_unlock(&freed_cnt_mutex);
-	
-	return NULL;
-}
-
 /* Perform an operation upon a BGL partition (block) for starting or 
  * terminating a job */
 static void _part_op(bgl_update_t *bgl_update_ptr)
@@ -489,6 +469,11 @@ static void _part_op(bgl_update_t *bgl_update_ptr)
 	int retries;
 	
 	slurm_mutex_lock(&agent_cnt_mutex);
+	if (agent_cnt > MAX_AGENT_COUNT) {	/* already running an agent */
+		slurm_mutex_unlock(&agent_cnt_mutex);
+		return;
+	}
+	agent_cnt++;
 	if ((bgl_update_list == NULL)
 	&&  ((bgl_update_list = list_create(_bgl_list_del)) == NULL))
 		fatal("malloc failure in start_job/list_create");
@@ -502,11 +487,6 @@ static void _part_op(bgl_update_t *bgl_update_ptr)
 		if (list_enqueue(bgl_update_list, bgl_update_ptr) == NULL)
 			fatal("malloc failure in _part_op/list_enqueue");
 	}
-	if (agent_cnt > 0) {	/* already running an agent */
-		slurm_mutex_unlock(&agent_cnt_mutex);
-		return;
-	}
-	agent_cnt = 1;
 	slurm_mutex_unlock(&agent_cnt_mutex);
 	/* spawn an agent */
 	slurm_attr_init(&attr_agent);
diff --git a/src/plugins/select/bluegene/bluegene.c b/src/plugins/select/bluegene/bluegene.c
index 1a29eeae956..479ea66bd72 100644
--- a/src/plugins/select/bluegene/bluegene.c
+++ b/src/plugins/select/bluegene/bluegene.c
@@ -51,6 +51,9 @@ bool agent_fini = false;
 int bridge_api_verb = 0;
 time_t last_bgl_update;
 pthread_mutex_t part_state_mutex = PTHREAD_MUTEX_INITIALIZER;
+int num_part_to_free = 0;
+int num_part_freed = 0;
+static pthread_mutex_t freed_cnt_mutex = PTHREAD_MUTEX_INITIALIZER;
 
 /* some local functions */
 #ifdef HAVE_BGL
@@ -842,6 +845,53 @@ extern int bgl_free_partition(bgl_record_t *bgl_record)
 	return SLURM_SUCCESS;
 }
 
+/* Free multiple partitions in parallel */
+extern void *mult_free_part(void *args)
+{
+#ifdef HAVE_BGL_FILES
+	bgl_record_t *bgl_record = (bgl_record_t*) args;
+
+	debug("destroying the partition %s.", bgl_record->bgl_part_id);
+	bgl_free_partition(bgl_record);	
+	debug("done\n");
+	slurm_mutex_lock(&freed_cnt_mutex);
+	num_part_freed++;
+	slurm_mutex_unlock(&freed_cnt_mutex);
+#endif	
+	return NULL;
+}
+
+/* destroy multiple partitions in parallel */
+extern void *mult_destroy_part(void *args)
+{
+#ifdef HAVE_BGL_FILES
+	bgl_record_t *bgl_record = (bgl_record_t*) args;
+	int rc;
+
+	debug("removing the jobs on partition %s\n",
+	      bgl_record->bgl_part_id);
+	term_jobs_on_part(bgl_record->bgl_part_id);
+	
+	debug("destroying %s\n",
+	      (char *)bgl_record->bgl_part_id);
+	bgl_free_partition(bgl_record);
+	
+	rc = rm_remove_partition(
+		bgl_record->bgl_part_id);
+	if (rc != STATUS_OK) {
+		error("rm_remove_partition(%s): %s",
+		      bgl_record->bgl_part_id,
+		      bgl_err_str(rc));
+	} else
+		debug("done\n");
+	slurm_mutex_lock(&freed_cnt_mutex);
+	num_part_freed++;
+	slurm_mutex_unlock(&freed_cnt_mutex);
+
+#endif	
+	return NULL;
+}
+
 #ifdef HAVE_BGL
 static int _addto_node_list(bgl_record_t *bgl_record, int *start, int *end)
 {
@@ -1078,31 +1128,43 @@ static int _bgl_record_cmpf_inc(bgl_record_t* rec_a, bgl_record_t* rec_b)
 static int _delete_old_partitions(void)
 {
 #ifdef HAVE_BGL_FILES
-	int rc;
 	ListIterator itr_curr, itr_found;
 	bgl_record_t *found_record = NULL, *init_record = NULL;
-        
+	pthread_attr_t attr_agent;
+	pthread_t thread_agent;
+	int retries;
+	
+	num_part_to_free = 0;
+	num_part_freed = 0;
+		
 	if(!bgl_recover) {
 		if(bgl_curr_part_list) {
 			itr_curr = list_iterator_create(bgl_curr_part_list);
 			while ((init_record = (bgl_record_t*) 
 				list_next(itr_curr))) {
-				debug("removing the jobs on partition %s\n",
-				      init_record->bgl_part_id);
-				term_jobs_on_part(init_record->bgl_part_id);
-				
-				debug("destroying %s\n",
-				      (char *)init_record->bgl_part_id);
-				bgl_free_partition(init_record);
+				slurm_attr_init(&attr_agent);
+				if (pthread_attr_setdetachstate(
+					    &attr_agent, 
+					    PTHREAD_CREATE_JOINABLE))
+					error("pthread_attr_setdetach"
+						      "state error %m");
 				
-				rc = rm_remove_partition(
-					init_record->bgl_part_id);
-				if (rc != STATUS_OK) {
-					error("rm_remove_partition(%s): %s",
-					      init_record->bgl_part_id,
-					      bgl_err_str(rc));
-				} else
-					debug("done\n");
+				retries = 0;
+				while (pthread_create(&thread_agent, 
+						      &attr_agent, 
+						      mult_destroy_part, 
+						      (void *)
+						      found_record)) {
+					error("pthread_create "
+					      "error %m");
+					if (++retries 
+					    > MAX_PTHREAD_RETRIES)
+						fatal("Can't create "
+						      "pthread");
+					/* sleep and retry */
+					usleep(1000);	
+				}
+				num_part_to_free++;
 			}
 			list_iterator_destroy(itr_curr);
 		} else {
@@ -1138,25 +1200,29 @@ static int _delete_old_partitions(void)
 					return SLURM_ERROR;
 				}
 				if(found_record == NULL) {
-					debug("removing the jobs on "
-					      "partition %s\n",
-					      init_record->bgl_part_id);
-					term_jobs_on_part(
-						init_record->bgl_part_id);
-			
-					debug("destroying %s\n",(char *)
-					      init_record->bgl_part_id);
-					bgl_free_partition(init_record);
-			
-					rc = rm_remove_partition(init_record->
-								 bgl_part_id);
-					if (rc != STATUS_OK) {
-						error("rm_remove_partition"
-						      "(%s): %s",
-						      init_record->bgl_part_id,
-						      bgl_err_str(rc));
-					} else
-						debug("done\n");
+					slurm_attr_init(&attr_agent);
+					if (pthread_attr_setdetachstate(
+						    &attr_agent, 
+						    PTHREAD_CREATE_JOINABLE))
+						error("pthread_attr_setdetach"
+						      "state error %m");
+				
+					retries = 0;
+					while (pthread_create(
+						       &thread_agent, 
+						       &attr_agent, 
+						       mult_destroy_part, 
+						       (void *)found_record)) {
+						error("pthread_create "
+						      "error %m");
+						if (++retries 
+						    > MAX_PTHREAD_RETRIES)
+							fatal("Can't create "
+							      "pthread");
+						/* sleep and retry */
+						usleep(1000);	
+					}
+					num_part_to_free++;
 				}
 			}		
 			list_iterator_destroy(itr_curr);
@@ -1166,6 +1232,9 @@ static int _delete_old_partitions(void)
 			return SLURM_ERROR;
 		}
 	}
+	while(num_part_to_free != num_part_freed)
+			usleep(1000);
+		
 #endif	
 	return SLURM_SUCCESS;
 }
diff --git a/src/plugins/select/bluegene/bluegene.h b/src/plugins/select/bluegene/bluegene.h
index adaccf81105..389f15c88e0 100644
--- a/src/plugins/select/bluegene/bluegene.h
+++ b/src/plugins/select/bluegene/bluegene.h
@@ -59,6 +59,8 @@ extern List bgl_curr_part_list; 	/* Initial bgl partition state */
 extern List bgl_list;			/* List of configured BGL blocks */
 extern bool agent_fini;
 extern pthread_mutex_t part_state_mutex;
+extern int num_part_to_free;
+extern int num_part_freed;
 
 typedef int lifecycle_type_t;
 enum part_lifecycle {DYNAMIC, STATIC};
@@ -107,6 +109,8 @@ typedef struct {
 	List switch_list;
 } bgl_bp_t;
 
+#define MAX_PTHREAD_RETRIES  1
+
 #include "bgl_part_info.h"
 #include "bgl_job_place.h"
 #include "bgl_job_run.h"
@@ -195,6 +199,8 @@ extern int read_bgl_partitions(void);
 /*****************************************************/
 extern int configure_partition_switches(bgl_record_t * bgl_conf_record);
 extern int bgl_free_partition(bgl_record_t *bgl_record);
+extern void *mult_free_part(void *args);
+extern void *mult_destroy_part(void *args);
 
 
 #endif /* _BLUEGENE_H_ */
-- 
GitLab