From ee97c2d60416d2a92299a08d35afe12557913914 Mon Sep 17 00:00:00 2001
From: Danny Auble <da@llnl.gov>
Date: Fri, 27 Jan 2006 22:27:44 +0000
Subject: [PATCH] moved things around for cleaner code.

---
 src/slurmctld/agent.c | 145 ++++++++++++++++++++++--------------------
 1 file changed, 76 insertions(+), 69 deletions(-)

diff --git a/src/slurmctld/agent.c b/src/slurmctld/agent.c
index 472c17ed1af..7fcb4dc86a2 100644
--- a/src/slurmctld/agent.c
+++ b/src/slurmctld/agent.c
@@ -92,6 +92,15 @@ typedef enum {
 	DSH_FAILED      /* Request resulted in error */
 } state_t;
 
+typedef struct thd_complete {
+	bool work_done; 	/* assume all threads complete */
+	int fail_cnt;   	/* assume no threads failures */
+	int no_resp_cnt;	/* assume all threads respond */
+	int retry_cnt;  	/* assume no required retries */
+	int max_delay;
+	time_t now;
+} thd_complete_t;
+
 typedef struct thd {
 	pthread_t thread;		/* thread ID */
 	pthread_attr_t attr;		/* thread attributes */
@@ -363,7 +372,7 @@ static agent_info_t *_make_agent_info(agent_arg_t *agent_arg_ptr)
 	for (i = 0; i < agent_info_ptr->thread_count; i++) {
 		thread_ptr[thr_count].state      = DSH_NEW;
 		thread_ptr[thr_count].slurm_addr = 
-		   agent_arg_ptr->slurm_addr[i];
+			agent_arg_ptr->slurm_addr[i];
 		strncpy(thread_ptr[thr_count].node_name,
 			&agent_arg_ptr->node_names[i * MAX_NAME_LEN],
 			MAX_NAME_LEN);
@@ -396,6 +405,39 @@ static task_info_t *_make_task_data(agent_info_t *agent_info_ptr, int inx)
 	return task_info_ptr;
 }
 
+static void _update_wdog_state(thd_t *thread_ptr, 
+			       state_t *state, 
+			       thd_complete_t *thd_comp)
+{
+	switch(*state) {
+	case DSH_ACTIVE:
+		thd_comp->work_done = false;
+		if (thread_ptr->end_time <= thd_comp->now) {
+			debug3("agent thread %lu timed out\n", 
+			       (unsigned long) 
+			       thread_ptr->thread);
+			if (pthread_kill(thread_ptr->thread, SIGALRM) == ESRCH)
+				*state = DSH_NO_RESP;
+		}
+		break;
+	case DSH_NEW:
+		thd_comp->work_done = false;
+		break;
+	case DSH_DONE:
+		if (thd_comp->max_delay < 
+		    (int)thread_ptr->end_time)
+			thd_comp->max_delay = (int)thread_ptr->end_time;
+		break;
+	case DSH_NO_RESP:
+		thd_comp->no_resp_cnt++;
+		thd_comp->retry_cnt++;
+		break;
+	case DSH_FAILED:
+		thd_comp->fail_cnt++;
+		break;
+	}
+}
+
 /* 
  * _wdog - Watchdog thread. Send SIGALRM to threads which have been active 
  *	for too long. 
@@ -404,9 +446,8 @@ static task_info_t *_make_task_data(agent_info_t *agent_info_ptr, int inx)
  */
 static void *_wdog(void *args)
 {
-	int fail_cnt, no_resp_cnt, retry_cnt;
-	bool work_done, srun_agent = false;
-	int i, max_delay = 0;
+	bool srun_agent = false;
+	int i;
 	agent_info_t *agent_ptr = (agent_info_t *) args;
 	thd_t *thread_ptr = agent_ptr->thread_struct;
 	unsigned long usec = 1250000;
@@ -415,6 +456,8 @@ static void *_wdog(void *args)
 	ret_types_t *ret_type = NULL;
 	state_t state;
 	int is_ret_list = 1;
+	thd_complete_t thd_comp;
+
 
 	if ( (agent_ptr->msg_type == SRUN_PING) ||
 	     (agent_ptr->msg_type == SRUN_TIMEOUT) ||
@@ -422,87 +465,51 @@ static void *_wdog(void *args)
 	     (agent_ptr->msg_type == SRUN_NODE_FAIL) )
 		srun_agent = true;
 
+	thd_comp.max_delay = 0;
+		
 	while (1) {
-		work_done   = true;	/* assume all threads complete */
-		fail_cnt    = 0;	/* assume no threads failures */
-		no_resp_cnt = 0;	/* assume all threads respond */
-		retry_cnt   = 0;	/* assume no required retries */
-
+		thd_comp.work_done   = true;/* assume all threads complete */
+		thd_comp.fail_cnt    = 0;   /* assume no threads failures */
+		thd_comp.no_resp_cnt = 0;   /* assume all threads respond */
+		thd_comp.retry_cnt   = 0;   /* assume no required retries */
+		thd_comp.now         = time(NULL);
+		
 		usleep(usec);
-		usec = MIN((usec * 2), 1000000);
-		now = time(NULL);
+		usec = MIN((usec * 2), 1000000);		
 
 		slurm_mutex_lock(&agent_ptr->thread_mutex);
 		for (i = 0; i < agent_ptr->thread_count; i++) {
 			if(!thread_ptr[i].ret_list) {
-				state = thread_ptr[i].state;
-				is_ret_list = 0;
-				goto switch_on_state;
-			}
-			is_ret_list = 1;
-			itr = list_iterator_create(thread_ptr[i].ret_list);
-			while((ret_type = list_next(itr)) != NULL) {
-				state = ret_type->msg_rc;
-			switch_on_state:
-				switch(state) {
-				case DSH_ACTIVE:
-					work_done = false;
-					if (thread_ptr[i].end_time <= now) {
-						debug3("agent thread %lu "
-						       "timed out\n", 
-						       (unsigned long) 
-						       thread_ptr[i].thread);
-						if (pthread_kill(thread_ptr[i].
-								 thread,
-								 SIGALRM) 
-						    == ESRCH) {
-							if(is_ret_list)
-								ret_type->
-								  msg_rc = 
-								DSH_NO_RESP;
-							else
-								thread_ptr[i].
-									state =
-								DSH_NO_RESP;
-						}
-					}
-					break;
-				case DSH_NEW:
-					work_done = false;
-					break;
-				case DSH_DONE:
-					if (max_delay < 
-					    (int)thread_ptr[i].end_time)
-						max_delay = 
-							(int)thread_ptr[i].
-							end_time;
-					break;
-				case DSH_NO_RESP:
-					no_resp_cnt++;
-					retry_cnt++;
-					break;
-				case DSH_FAILED:
-					fail_cnt++;
-					break;
+				_update_wdog_state(&thread_ptr[i],
+						   &thread_ptr[i].state,
+						   &thd_comp);
+			} else {
+				itr = list_iterator_create(
+					thread_ptr[i].ret_list);
+				while((ret_type = list_next(itr)) != NULL) {
+					_update_wdog_state(
+						&thread_ptr[i],
+						(state_t *)&ret_type->msg_rc,
+						&thd_comp);
 				}
-				if(!is_ret_list)
-					goto is_work_done;
+				list_iterator_destroy(itr);
 			}
-			list_iterator_destroy(itr);
 		}
-	is_work_done:
-		if (work_done)
+		if (thd_comp.work_done)
 			break;
+		
 		slurm_mutex_unlock(&agent_ptr->thread_mutex);
 	}
 
 	if (srun_agent) {
 	        _notify_slurmctld_jobs(agent_ptr);
 	} else {
-	        _notify_slurmctld_nodes(agent_ptr, no_resp_cnt, retry_cnt);
+	        _notify_slurmctld_nodes(agent_ptr, 
+					thd_comp.no_resp_cnt, 
+					thd_comp.retry_cnt);
 	}
-	if (max_delay)
-		debug2("agent maximum delay %d seconds", max_delay);
+	if (thd_comp.max_delay)
+		debug2("agent maximum delay %d seconds", thd_comp.max_delay);
 	
 	slurm_mutex_unlock(&agent_ptr->thread_mutex);
 	return (void *) NULL;
@@ -550,7 +557,7 @@ static void _notify_slurmctld_jobs(agent_info_t *agent_ptr)
 }
 
 static void _notify_slurmctld_nodes(agent_info_t *agent_ptr, 
-		int no_resp_cnt, int retry_cnt)
+				    int no_resp_cnt, int retry_cnt)
 {
 	ListIterator itr;
 	ListIterator data_itr;
-- 
GitLab