From 36d3104c5e533f319577990726cbce418f3e8721 Mon Sep 17 00:00:00 2001
From: Moe Jette <jette1@llnl.gov>
Date: Thu, 19 Sep 2002 20:35:46 +0000
Subject: [PATCH] State non-responding state mapped to DOWN. enhanced debugging
 added for agent, now passing node name along with address.

---
 src/slurmctld/agent.c          | 93 +++++++++++++++++++++-------------
 src/slurmctld/agent.h          |  1 +
 src/slurmctld/node_mgr.c       | 23 +++++++++
 src/slurmctld/node_scheduler.c | 14 +++--
 src/slurmctld/slurmctld.h      |  3 ++
 5 files changed, 96 insertions(+), 38 deletions(-)

diff --git a/src/slurmctld/agent.c b/src/slurmctld/agent.c
index 43c468fe821..04c093832f7 100644
--- a/src/slurmctld/agent.c
+++ b/src/slurmctld/agent.c
@@ -54,6 +54,7 @@
 #include <errno.h>
 #include <pthread.h>
 #include <signal.h>
+#include <string.h>
 #include <unistd.h>
 
 #include <src/common/log.h>
@@ -61,6 +62,7 @@
 #include <src/common/xmalloc.h>
 #include <src/common/xstring.h>
 #include <src/slurmctld/agent.h>
+#include <src/slurmctld/locks.h>
 
 #if COMMAND_TIMEOUT == 1
 #define WDOG_POLL 		1	/* secs */
@@ -76,6 +78,7 @@ typedef struct thd {
         state_t		state;      		/* thread state */
         time_t 		time;   		/* time stamp for start or delta time */
 	struct sockaddr_in slurm_addr;		/* network address */
+	char		node_name[MAX_NAME_LEN];/* node's name */
 } thd_t;
 
 typedef struct agent_info {
@@ -89,12 +92,12 @@ typedef struct agent_info {
 } agent_info_t;
 
 typedef struct task_info {
-	pthread_mutex_t	*thread_mutex;		/* agent specific mutex */
-	pthread_cond_t	*thread_cond;		/* agent specific condition */
-	uint32_t	*threads_active;	/* count of currently active threads */
-	thd_t 		*thread_struct;		/* thread structures */
+	pthread_mutex_t	*thread_mutex_ptr;	/* pointer to agent specific mutex */
+	pthread_cond_t	*thread_cond_ptr;	/* pointer to agent specific condition */
+	uint32_t	*threads_active_ptr;	/* pointer to count of currently active threads */
+	thd_t 		*thread_struct_ptr;	/* pointer to thread structures */
 	slurm_msg_type_t msg_type;		/* RPC to be issued */
-	void		*msg_args;		/* RPC data to be used */
+	void		*msg_args_ptr;		/* pointer to RPC data to be used */
 } task_info_t;
 
 static void *thread_per_node_rpc (void *args);
@@ -103,8 +106,8 @@ static void *wdog (void *args);
 /*
  * agent - party responsible for transmitting an common RPC in parallel across a set 
  *	of nodes
- * input: pointer to agent_arg_t, which is xfree'd (including slurm_addr and msg_args)
- *	upon completion if AGENT_IS_THREAD is set
+ * input: pointer to agent_arg_t, which is xfree'd (including slurm_addr, node_names, 
+ *	and msg_args) upon completion if AGENT_IS_THREAD is set
  */
 void *
 agent (void *args)
@@ -124,6 +127,8 @@ agent (void *args)
 		goto cleanup;	/* no messages to be sent */
 	if (agent_arg_ptr->slurm_addr == NULL)
 		fatal ("agent passed NULL address list");
+	if (agent_arg_ptr->node_names == NULL)
+		fatal ("agent passed NULL node name list");
 	if (agent_arg_ptr->msg_type != REQUEST_REVOKE_JOB_CREDENTIAL)
 		fatal ("agent passed invalid message type %d", agent_arg_ptr->msg_type);
 
@@ -142,6 +147,8 @@ agent (void *args)
 	for (i = 0; i < agent_info_ptr->thread_count; i++) {
 		thread_ptr[i].state = DSH_NEW;
 		thread_ptr[i].slurm_addr = agent_arg_ptr->slurm_addr[i];
+		strncpy (thread_ptr[i].node_name,
+		         &agent_arg_ptr->node_names[i*MAX_NAME_LEN], MAX_NAME_LEN);
 	}
 
 	/* start the watchdog thread */
@@ -173,14 +180,14 @@ agent (void *args)
 			                   &agent_info_ptr->thread_mutex);
 		}
  
-		/* create thread */
+		/* create thread, note this is freed from  thread_per_node_rpc() */
 		task_specific_ptr 			= xmalloc (sizeof (task_info_t));
-		task_specific_ptr->thread_mutex		= &agent_info_ptr->thread_mutex;
-		task_specific_ptr->thread_cond		= &agent_info_ptr->thread_cond;
-		task_specific_ptr->threads_active	= &agent_info_ptr->threads_active;
-		task_specific_ptr->thread_struct	= &thread_ptr[i];
+		task_specific_ptr->thread_mutex_ptr	= &agent_info_ptr->thread_mutex;
+		task_specific_ptr->thread_cond_ptr	= &agent_info_ptr->thread_cond;
+		task_specific_ptr->threads_active_ptr	= &agent_info_ptr->threads_active;
+		task_specific_ptr->thread_struct_ptr	= &thread_ptr[i];
 		task_specific_ptr->msg_type		= agent_info_ptr->msg_type;
-		task_specific_ptr->msg_args		= &agent_info_ptr->msg_args;
+		task_specific_ptr->msg_args_ptr		= &agent_info_ptr->msg_args;
 
 		if (pthread_attr_init (&thread_ptr[i].attr))
 			fatal ("pthread_attr_init error %m");
@@ -217,6 +224,8 @@ cleanup:
 	if (agent_arg_ptr) {
 		if (agent_arg_ptr->slurm_addr)
 			xfree (agent_arg_ptr->slurm_addr);
+		if (agent_arg_ptr->node_names)
+			xfree (agent_arg_ptr->node_names);
 		if (agent_arg_ptr->msg_args)
 			xfree (agent_arg_ptr->msg_args);
 		xfree (agent_arg_ptr);
@@ -240,6 +249,12 @@ wdog (void *args)
 	int i, fail_cnt, work_done, delay, max_delay = 0;
 	agent_info_t *agent_ptr = (agent_info_t *) args;
 	thd_t *thread_ptr = agent_ptr->thread_struct;
+#if AGENT_IS_THREAD
+	/* Locks: Write node */
+	slurmctld_lock_t node_write_lock = { NO_LOCK, NO_LOCK, WRITE_LOCK, NO_LOCK };
+#else
+	struct sockaddr_in *slurm_addr;
+#endif
 
 	while (1) {
 		work_done = 1;	/* assume all threads complete for now */
@@ -274,30 +289,37 @@ wdog (void *args)
 
 	/* Notify slurmctld of non-responding nodes */
 	if (fail_cnt) {
-		struct sockaddr_in *slurm_addr;
-
+#if AGENT_IS_THREAD
+		/* Update node table data for non-responding nodes */
+		lock_slurmctld (node_write_lock);
+		for (i = 0; i < agent_ptr->thread_count; i++) {
+			if (thread_ptr[i].state == DSH_FAILED)
+				node_not_resp (thread_ptr[i].node_name);
+		}
+		unlock_slurmctld (node_write_lock);
+#else
+		/* Build a list of all non-responding nodes and send it to slurmctld */
+		error ("agent/wdog: %d nodes failed to respond", fail_cnt);
 		slurm_addr = xmalloc (fail_cnt * sizeof (struct sockaddr_in));
-		fail_cnt = 0;
 		for (i = 0; i < agent_ptr->thread_count; i++) {
-			if (thread_ptr[i].state != DSH_FAILED)
-				continue;
-			/* build a list of slurm_addr's */
-			slurm_addr[fail_cnt++] = thread_ptr[i].slurm_addr;
-			xfree (thread_ptr);
+			if (thread_ptr[i].state == DSH_FAILED)
+				slurm_addr[fail_cnt++] = thread_ptr[i].slurm_addr;
 		}
 
-		info ("agent/wdog: %d nodes failed to respond", fail_cnt);
 		/* send RPC */
+		fatal ("Code development needed here if agent is not thread");
 
 		xfree (slurm_addr);
+#endif
 	}
-	info ("agent maximum delay %d seconds", max_delay);
+	debug ("agent maximum delay %d seconds", max_delay);
 
 	pthread_mutex_unlock (&agent_ptr->thread_mutex);
 	return (void *) NULL;
 }
 
-/* thread_per_node_rpc - thread to revoke a credential on a collection of nodes */
+/* thread_per_node_rpc - thread to revoke a credential on a collection of nodes
+ *	This xfrees the argument passed to it */
 static void *
 thread_per_node_rpc (void *args)
 {
@@ -309,7 +331,7 @@ thread_per_node_rpc (void *args)
 	slurm_msg_t response_msg ;
 	return_code_msg_t * slurm_rc_msg ;
 	task_info_t *task_ptr = (task_info_t *) args;
-	thd_t *thread_ptr = task_ptr->thread_struct;
+	thd_t *thread_ptr = task_ptr->thread_struct_ptr;
 	state_t thread_state  = DSH_FAILED;
 
 	/* accept SIGALRM */
@@ -322,10 +344,10 @@ thread_per_node_rpc (void *args)
 
 	if (args == NULL)
 		fatal ("thread_per_node_rpc has NULL argument");
-	pthread_mutex_lock (task_ptr->thread_mutex);
+	pthread_mutex_lock (task_ptr->thread_mutex_ptr);
 	thread_ptr->state = DSH_ACTIVE;
 	thread_ptr->time = time (NULL);
-	pthread_mutex_unlock (task_ptr->thread_mutex);
+	pthread_mutex_unlock (task_ptr->thread_mutex_ptr);
 
 	/* init message connection for message communication */
 	if ( ( sockfd = slurm_open_msg_conn (& thread_ptr->slurm_addr) ) == SLURM_SOCKET_ERROR ) {
@@ -335,7 +357,7 @@ thread_per_node_rpc (void *args)
 
 	/* send request message */
 	request_msg . msg_type = task_ptr->msg_type ;
-	request_msg . data = task_ptr->msg_args ; 
+	request_msg . data = task_ptr->msg_args_ptr ; 
 	if ( ( rc = slurm_send_node_msg ( sockfd , & request_msg ) ) == SLURM_SOCKET_ERROR ) {
 		error ("thread_per_node_rpc/slurm_send_node_msg error %m");
 		goto cleanup;
@@ -365,8 +387,11 @@ thread_per_node_rpc (void *args)
 			slurm_free_return_code_msg ( slurm_rc_msg );	
 			if (rc)
 				error ("thread_per_node_rpc/rc error %d", rc);
-			else
+			else {
+				debug3 ("agent sucessfully processed RPC to node %s", 
+				        thread_ptr->node_name);
 				thread_state = DSH_DONE;
+			}
 
 			break ;
 		default:
@@ -375,16 +400,16 @@ thread_per_node_rpc (void *args)
 	}
 
 cleanup:
-	pthread_mutex_lock (task_ptr->thread_mutex);
+	pthread_mutex_lock (task_ptr->thread_mutex_ptr);
 	thread_ptr->state = thread_state;
 	thread_ptr->time = (time_t) difftime (time (NULL), thread_ptr->time);
 
 	/* Signal completion so another thread can replace us */
-	(*task_ptr->threads_active)--;
-	pthread_cond_signal(task_ptr->thread_cond);
-	pthread_mutex_unlock (task_ptr->thread_mutex);
+	(*task_ptr->threads_active_ptr)--;
+	pthread_cond_signal(task_ptr->thread_cond_ptr);
+	pthread_mutex_unlock (task_ptr->thread_mutex_ptr);
 
-	xfree (task_ptr);
+	xfree (args);
 	return (void *) NULL;
 }
 
diff --git a/src/slurmctld/agent.h b/src/slurmctld/agent.h
index 059b2695933..020fec592db 100644
--- a/src/slurmctld/agent.h
+++ b/src/slurmctld/agent.h
@@ -39,6 +39,7 @@
 typedef struct agent_arg {
 	uint32_t	addr_count;		/* number of network addresses to communicate with */
 	struct sockaddr_in *slurm_addr;		/* array of network addresses */
+	char		*node_names;		/* array with MAX_NAME_LEN bytes per node */
 	slurm_msg_type_t msg_type;		/* RPC to be issued */
 	void		*msg_args;		/* RPC data to be transmitted */
 } agent_arg_t;
diff --git a/src/slurmctld/node_mgr.c b/src/slurmctld/node_mgr.c
index 28252e38526..35428b6c7d1 100644
--- a/src/slurmctld/node_mgr.c
+++ b/src/slurmctld/node_mgr.c
@@ -1072,3 +1072,26 @@ validate_node_specs (char *node_name, uint32_t cpus,
 
 	return error_code;
 }
+
+/* node_not_resp - record that the specified node is not responding */
+void 
+node_not_resp (char *name)
+{
+	struct node_record *node_ptr;
+	int i;
+
+	node_ptr = find_node_record (name);
+	if (node_ptr == NULL) {
+		error ("node_not_resp unable to find node %s", name);
+		return;
+	}
+
+	i = node_ptr - node_record_table_ptr;
+	last_node_update = time (NULL);
+	error ("Node %s not responding", name);
+	bit_clear (up_node_bitmap, i);
+	bit_clear (idle_node_bitmap, i);
+	node_record_table_ptr[i].node_state |= NODE_STATE_NO_RESPOND;
+	return;
+}
+
diff --git a/src/slurmctld/node_scheduler.c b/src/slurmctld/node_scheduler.c
index 8125e9b290e..0342d9bdb55 100644
--- a/src/slurmctld/node_scheduler.c
+++ b/src/slurmctld/node_scheduler.c
@@ -119,6 +119,7 @@ deallocate_nodes (struct job_record  * job_ptr)
 	agent_arg_t *agent_args;
 	pthread_attr_t attr_agent;
 	pthread_t thread_agent;
+	int buf_rec_size = 0;
 
 	agent_args = xmalloc (sizeof (agent_arg_t));
 	agent_args->msg_type = REQUEST_REVOKE_JOB_CREDENTIAL;
@@ -133,10 +134,15 @@ deallocate_nodes (struct job_record  * job_ptr)
 		if (bit_test (job_ptr->node_bitmap, i) == 0)
 			continue;
 #if AGENT_TEST
-		xrealloc ((agent_args->slurm_addr), 
-		          (sizeof (struct sockaddr_in) * (agent_args->addr_count+1)));
-		agent_args->slurm_addr[(agent_args->addr_count++)] = 
-			node_record_table_ptr[i].slurm_addr;
+		if ((agent_args->addr_count+1) > buf_rec_size) {
+			buf_rec_size += 32;
+			xrealloc ((agent_args->slurm_addr), (sizeof (struct sockaddr_in) * buf_rec_size));
+			xrealloc ((agent_args->node_names), (MAX_NAME_LEN * buf_rec_size));
+		}
+		agent_args->slurm_addr[agent_args->addr_count] = node_record_table_ptr[i].slurm_addr;
+		strncpy (&agent_args->node_names[MAX_NAME_LEN*agent_args->addr_count],
+		         node_record_table_ptr[i].name, MAX_NAME_LEN);
+		agent_args->addr_count++;
 #else
 		slurm_revoke_job_cred (&node_record_table_ptr[i], revoke_job_cred);
 #endif
diff --git a/src/slurmctld/slurmctld.h b/src/slurmctld/slurmctld.h
index f1cd7592595..6ca85fe8d42 100644
--- a/src/slurmctld/slurmctld.h
+++ b/src/slurmctld/slurmctld.h
@@ -396,6 +396,9 @@ int mkdir2 (char * path, int modes);
 /* node_name2bitmap - given a node name regular expression, build a bitmap representation */
 extern int node_name2bitmap (char *node_names, bitstr_t **bitmap);
 
+/* node_not_resp - record that the specified node is not responding */
+extern void node_not_resp (char *name);
+
 /* 
  * pack_all_jobs - dump all job information for all jobs in 
  *	machine independent form (for network transmission)
-- 
GitLab