From 6b371f524d4d5a2e9d9d02240bbfab82c5e4ed59 Mon Sep 17 00:00:00 2001
From: Danny Auble <da@llnl.gov>
Date: Tue, 17 Oct 2006 18:45:30 +0000
Subject: [PATCH] fixes after the merge

---
 src/common/forward.c            | 18 +++++++++-------
 src/common/slurm_protocol_api.c | 38 ++++++++++++++++-----------------
 src/common/slurm_protocol_api.h |  5 ++---
 src/slurmctld/agent.c           |  1 -
 src/slurmd/slurmd/slurmd.c      |  3 +--
 src/srun/allocate.c             |  4 ++--
 src/srun/opt.c                  |  2 +-
 7 files changed, 35 insertions(+), 36 deletions(-)

diff --git a/src/common/forward.c b/src/common/forward.c
index ec62d72a708..428ff914945 100644
--- a/src/common/forward.c
+++ b/src/common/forward.c
@@ -107,13 +107,7 @@ void *_forward_thread(void *arg)
 		xfree(fwd_msg->header.forward.nodelist);
 		fwd_msg->header.forward.nodelist = xstrdup(buf);
 		fwd_msg->header.forward.cnt = hostlist_count(hl);
-		if(fwd_msg->header.forward.cnt>0) {
-			steps = (fwd_msg->header.forward.cnt+1) /
-				slurm_get_tree_width();
-			fwd_msg->timeout = (1000*steps);
-			steps++;
-			fwd_msg->timeout += (start_timeout*steps);
-		}	
+		
 		debug3("forward: along with %s",
 		       fwd_msg->header.forward.nodelist);
 		
@@ -167,7 +161,15 @@ void *_forward_thread(void *arg)
 			}
 			goto cleanup;
 		}
-	
+
+		if(fwd_msg->header.forward.cnt>0) {
+			steps = (fwd_msg->header.forward.cnt+1) /
+				slurm_get_tree_width();
+			fwd_msg->timeout = (1000*steps);
+			steps++;
+			fwd_msg->timeout += (start_timeout*steps);
+		}	
+		
 		ret_list = slurm_receive_msgs(fd, steps, fwd_msg->timeout);
 
 		if(!ret_list || (fwd_msg->header.forward.cnt != 0 
diff --git a/src/common/slurm_protocol_api.c b/src/common/slurm_protocol_api.c
index 97a24bd2ab5..b67819f9de8 100644
--- a/src/common/slurm_protocol_api.c
+++ b/src/common/slurm_protocol_api.c
@@ -839,7 +839,7 @@ int slurm_receive_msg(slurm_fd fd, slurm_msg_t *msg, int timeout)
 	/* Forward message to other nodes */
 	if(header.forward.cnt > 0) {
 		error("We need to forward this to other nodes use "
-		      "slurm_receive_and_forward_msgs instead");
+		      "slurm_receive_msg_and_forward instead");
 	}
 	
 	if ((auth_cred = g_slurm_auth_unpack(buffer)) == NULL) {
@@ -925,8 +925,9 @@ List slurm_receive_msgs(slurm_fd fd, int steps, int timeout)
 		/* convert secs to msec */
                 timeout  = slurm_get_msg_timeout() * 1000; 
 	if(steps) {
-		steps++;
 		orig_timeout = timeout/steps;
+		steps--;
+		orig_timeout -= (1000*steps);
 	}
 	debug4("orig_timeout was %d we have %d steps and a timeout of %d",
 	       orig_timeout, steps, timeout);
@@ -1055,12 +1056,11 @@ total_return:
  * IN open_fd	- file descriptor to receive msg on
  * IN/OUT msg	- a slurm_msg struct to be filled in by the function
  *                we use the orig_addr from this var for forwarding. 
- * IN steps	- how many steps down the tree we have to wait for
  * IN timeout	- how long to wait in milliseconds
  * RET int	- returns 0 on success, -1 on failure and sets errno
  */
-int slurm_receive_and_forward_msgs(slurm_fd fd, slurm_addr *orig_addr, 
-				   slurm_msg_t *msg, int steps, int timeout)
+int slurm_receive_msg_and_forward(slurm_fd fd, slurm_addr *orig_addr, 
+				  slurm_msg_t *msg, int timeout)
 {
 	char *buf = NULL;
 	size_t buflen = 0;
@@ -1068,8 +1068,6 @@ int slurm_receive_and_forward_msgs(slurm_fd fd, slurm_addr *orig_addr,
 	int rc;
 	void *auth_cred = NULL;
 	Buf buffer;
-	ret_data_info_t *ret_data_info = NULL;
-	int orig_timeout = timeout;
 
 	xassert(fd >= 0);
 
@@ -1091,21 +1089,16 @@ int slurm_receive_and_forward_msgs(slurm_fd fd, slurm_addr *orig_addr,
 	if (timeout <= 0)
 		/* convert secs to msec */
                 timeout  = slurm_get_msg_timeout() * 1000; 
-	if(steps) {
-		steps++;
-		orig_timeout = timeout/steps;
-	}
-	
-	if(orig_timeout >= (slurm_get_msg_timeout() * 10000)) {
+		
+	if(timeout >= (slurm_get_msg_timeout() * 10000)) {
 		error("You are sending a message with timeout's greater "
 		      "than %d seconds, your's is %d seconds", 
 		      (slurm_get_msg_timeout() * 10), 
 		      (timeout/1000));
-	} else if(orig_timeout < 1000) {
+	} else if(timeout < 1000) {
 		debug("You are sending a message with a very short timeout of "
 		      "%d milliseconds", timeout);
-	} 
-	
+	} 	
 
 	/*
 	 * Receive a msg. slurm_msg_recvfrom() will read the message
@@ -1134,14 +1127,21 @@ int slurm_receive_and_forward_msgs(slurm_fd fd, slurm_addr *orig_addr,
 		rc = SLURM_PROTOCOL_VERSION_ERROR;
 		goto total_return;
 	}
-	//info("ret_cnt = %d",header.ret_cnt);
 	if(header.ret_cnt > 0) {
-		while((ret_data_info = list_pop(header.ret_list)))
-			list_push(msg->ret_list, ret_data_info);
+		error("we recieved more than one message back use "
+		      "slurm_receive_msgs instead");
 		header.ret_cnt = 0;
 		list_destroy(header.ret_list);
 		header.ret_list = NULL;
 	}
+	//info("ret_cnt = %d",header.ret_cnt);
+	/* if(header.ret_cnt > 0) { */
+/* 		while((ret_data_info = list_pop(header.ret_list))) */
+/* 			list_push(msg->ret_list, ret_data_info); */
+/* 		header.ret_cnt = 0; */
+/* 		list_destroy(header.ret_list); */
+/* 		header.ret_list = NULL; */
+/* 	} */
 	/* 
 	 * header.orig_addr will be set to where the first message
 	 * came from if this is a forward else we set the
diff --git a/src/common/slurm_protocol_api.h b/src/common/slurm_protocol_api.h
index 0e343c525cc..2259188da8e 100644
--- a/src/common/slurm_protocol_api.h
+++ b/src/common/slurm_protocol_api.h
@@ -351,12 +351,11 @@ List slurm_receive_msgs(slurm_fd fd, int steps, int timeout);
  *
  * IN open_fd	- file descriptor to receive msg on
  * OUT resp	- a slurm_msg struct to be filled in by the function
- * IN steps	- how many steps down the tree we have to wait for
  * IN timeout	- how long to wait in milliseconds
  * RET int	- returns 0 on success, -1 on failure and sets errno
  */
-int slurm_receive_and_forward_msgs(slurm_fd fd, slurm_addr *orig_addr, 
-				   slurm_msg_t *resp, int steps, int timeout);
+int slurm_receive_msg_and_forward(slurm_fd fd, slurm_addr *orig_addr, 
+				  slurm_msg_t *resp, int timeout);
 
 /**********************************************************************\
  * send message functions
diff --git a/src/slurmctld/agent.c b/src/slurmctld/agent.c
index f3ef27f7e2c..554cafa55cb 100644
--- a/src/slurmctld/agent.c
+++ b/src/slurmctld/agent.c
@@ -1029,7 +1029,6 @@ static void _queue_agent_retry(agent_info_t * agent_info_ptr, int count)
 	j = 0;
 	for (i = 0; i < agent_info_ptr->thread_count; i++) {
 		if(!thread_ptr[i].ret_list) {
-			char ip_buf[32];
 			if (thread_ptr[i].state != DSH_NO_RESP)
 				continue;
 
diff --git a/src/slurmd/slurmd/slurmd.c b/src/slurmd/slurmd/slurmd.c
index fea862f71cb..395ca18322b 100644
--- a/src/slurmd/slurmd/slurmd.c
+++ b/src/slurmd/slurmd/slurmd.c
@@ -364,8 +364,7 @@ _service_connection(void *arg)
 	
 	debug3("in the service_connection");
 	slurm_msg_t_init(msg);
-	if((rc = slurm_receive_and_forward_msgs(
-		    con->fd, con->cli_addr, msg, 0, 0))
+	if((rc = slurm_receive_msg_and_forward(con->fd, con->cli_addr, msg, 0))
 	   != SLURM_SUCCESS) {
 		error("service_connection: slurm_receive_msg: %m");
 		/* if this fails we need to make sure the nodes we forward
diff --git a/src/srun/allocate.c b/src/srun/allocate.c
index 714195fb29e..788296e7a31 100644
--- a/src/srun/allocate.c
+++ b/src/srun/allocate.c
@@ -667,10 +667,10 @@ create_job_step(srun_job_t *job)
 		
 
 	/*  Number of hosts in job may not have been initialized yet if 
-	 *    --jobid was used or only SLURM_JOBID was set in user env.		 
+	 *    --jobid was used or only SLURM_JOBID was set in user env.
 	 *    Reset the value here just in case.
 	 */
-	job->nhosts = job->step_layout->num_hosts;
+	job->nhosts = job->step_layout->node_cnt;
 
 	if(!job->step_layout) {
 		error("step_layout not returned");
diff --git a/src/srun/opt.c b/src/srun/opt.c
index abab6989bc3..ddc346f0407 100644
--- a/src/srun/opt.c
+++ b/src/srun/opt.c
@@ -923,7 +923,7 @@ static void _opt_default()
 	opt.max_launch_time = 120;/* 120 seconds to launch job             */
 	opt.max_exit_timeout= 60; /* Warn user 60 seconds after task exit */
 	/* Default launch msg timeout           */
-	opt.msg_timeout     = SLURM_MESSAGE_TIMEOUT_SEC_STATIC;  
+	opt.msg_timeout     = slurm_get_msg_timeout();  
 
 	for (i=0; i<SYSTEM_DIMENSIONS; i++)
 		opt.geometry[i]	    = (uint16_t) NO_VAL;
-- 
GitLab