From 45b29fea9586dc5caf8a3cb8bd2c7390a94e13b6 Mon Sep 17 00:00:00 2001
From: Moe Jette <jette1@llnl.gov>
Date: Tue, 5 Jun 2007 23:03:51 +0000
Subject: [PATCH] Slurmctld maintains the IP address (rather than hostname) for
 srun     communications. This fixes some possible network routing issues.

---
 NEWS                             |  2 ++
 slurm/slurm.h.in                 | 13 +++++-----
 src/api/allocate.c               | 10 ++++----
 src/api/init_msg.c               |  4 +--
 src/common/slurm_protocol_defs.c |  3 +--
 src/common/slurm_protocol_pack.c |  8 ------
 src/salloc/salloc.c              |  1 -
 src/slurmctld/job_mgr.c          | 40 ++++++++++++-----------------
 src/slurmctld/proc_req.c         | 44 +++++++++++++++++++++++++-------
 src/slurmctld/slurmctld.h        |  3 +--
 src/slurmctld/srun_comm.c        | 43 ++++++++++++++-----------------
 src/srun/allocate.c              |  9 -------
 12 files changed, 88 insertions(+), 92 deletions(-)

diff --git a/NEWS b/NEWS
index ff6eb6455df..85de0fa08dd 100644
--- a/NEWS
+++ b/NEWS
@@ -8,6 +8,8 @@ documents those changes that are of interest to users and admins.
     prevent a user from seeing jobs or job steps belonging to other users.
  -- Added configuration parameters for node power save mode: ResumeProgram
     ResumeRate, SuspendExcNodes, SuspendExcParts, SuspendProgram and SuspendRate.
+ -- Slurmctld maintains the IP address (rather than hostname) for srun 
+    communications. This fixes some possible network routing issues.
 
 * Changes in SLURM 1.2.10
 =========================
diff --git a/slurm/slurm.h.in b/slurm/slurm.h.in
index 171320a3587..f25a82e374c 100644
--- a/slurm/slurm.h.in
+++ b/slurm/slurm.h.in
@@ -505,14 +505,15 @@ typedef struct job_descriptor {	/* For submit, allocate, and update requests */
 				 * slurm_allocate* function */
 
 	/* If the requested allocation is not immediately available,
-	 * The controller sends the RESPONSE_RESOURCE_ALLOCATION message to
-	 * the address designated by the alloc_hostname and alloc_port.
-	 * All other messages (SRUN_PING, SRUN_TIMEOUT, etc.) are sent to
-	 * the address designated by other_hostname/other_port.
+	 * The controller sends the RESPONSE_RESOURCE_ALLOCATION message
+	 * to resp_addr/alloc_port.
+	 * All other messages (SRUN_PING, SRUN_TIMEOUT, etc.) are sent 
+	 * to resp_addr/other_port.
+	 * The value of resp_host is the host address to which message 
+	 * are sent. Its value is set internally by SLURM.
 	 */
-	char    *alloc_resp_hostname;
+	char *resp_host;	/* NOTE: Set by slurmctld */
 	uint16_t alloc_resp_port;
-	char    *other_hostname;
 	uint16_t other_port;
 
 	uint32_t dependency;	/* defer until specified job completes */
diff --git a/src/api/allocate.c b/src/api/allocate.c
index 94944123443..906d0c38dad 100644
--- a/src/api/allocate.c
+++ b/src/api/allocate.c
@@ -212,7 +212,6 @@ slurm_allocate_resources_blocking (const job_desc_msg_t *user_req,
 			xfree(req);
 			return NULL;
 		}
-		req->alloc_resp_hostname = listen->hostname;
 		req->alloc_resp_port = listen->port;
 	}
 
@@ -289,9 +288,13 @@ slurm_allocate_resources_blocking (const job_desc_msg_t *user_req,
 int slurm_job_will_run (job_desc_msg_t *req)
 {
 	slurm_msg_t req_msg;
+	char host[64];
 	int rc;
 
 	/* req.immediate = true;    implicit */
+	if ((req->alloc_node == NULL)
+	&&  (gethostname_short(host, sizeof(host)) == 0))
+		req->alloc_node = host;
 	slurm_msg_t_init(&req_msg);
 	req_msg.msg_type = REQUEST_JOB_WILL_RUN;
 	req_msg.data     = req; 
@@ -556,12 +559,9 @@ static listen_t *_create_allocation_response_socket(char *interface_hostname)
 	listen_t *listen = NULL;
 
 	listen = xmalloc(sizeof(listen_t));
-	if (listen == NULL)
-		return NULL;
 
 	/* port "0" lets the operating system pick any port */
-	slurm_set_addr(&listen->address, 0, interface_hostname);
-	if ((listen->fd = slurm_init_msg_engine(&listen->address)) < 0) {
+	if ((listen->fd = slurm_init_msg_engine_port(0)) < 0) {
 		error("slurm_init_msg_engine_port error %m");
 		return NULL;
 	}
diff --git a/src/api/init_msg.c b/src/api/init_msg.c
index 08a03077931..f06d15829cd 100644
--- a/src/api/init_msg.c
+++ b/src/api/init_msg.c
@@ -111,9 +111,7 @@ void slurm_init_job_desc_msg(job_desc_msg_t * job_desc_msg)
 	job_desc_msg->user_id     = NO_VAL;
 	job_desc_msg->group_id    = NO_VAL;
 	job_desc_msg->work_dir    = NULL;
-	job_desc_msg->alloc_resp_hostname = NULL;
-	job_desc_msg->alloc_resp_port        = 0;
-	job_desc_msg->other_hostname = NULL;
+	job_desc_msg->alloc_resp_port = 0;
 	job_desc_msg->other_port  = 0;
 	job_desc_msg->mail_type   = 0;
 	job_desc_msg->mail_user   = NULL;
diff --git a/src/common/slurm_protocol_defs.c b/src/common/slurm_protocol_defs.c
index 3a3ea8be30a..0e16d428f9e 100644
--- a/src/common/slurm_protocol_defs.c
+++ b/src/common/slurm_protocol_defs.c
@@ -197,11 +197,10 @@ void slurm_free_job_desc_msg(job_desc_msg_t * msg)
 		xfree(msg->in);
 		xfree(msg->out);
 		xfree(msg->work_dir);
-		xfree(msg->alloc_resp_hostname);
-		xfree(msg->other_hostname);
 		xfree(msg->account);
 		xfree(msg->network);
 		xfree(msg->comment);
+		xfree(msg->resp_host);
 		xfree(msg->blrtsimage);
 		xfree(msg->linuximage);
 		xfree(msg->mloaderimage);
diff --git a/src/common/slurm_protocol_pack.c b/src/common/slurm_protocol_pack.c
index f1950b5c82a..03fddd96525 100644
--- a/src/common/slurm_protocol_pack.c
+++ b/src/common/slurm_protocol_pack.c
@@ -2389,9 +2389,7 @@ _pack_job_desc_msg(job_desc_msg_t * job_desc_ptr, Buf buffer)
 	pack32(job_desc_ptr->group_id, buffer);
 
 	pack16(job_desc_ptr->alloc_resp_port, buffer);
-	packstr(job_desc_ptr->alloc_resp_hostname, buffer);
 	pack16(job_desc_ptr->other_port, buffer);
-	packstr(job_desc_ptr->other_hostname, buffer);
 	packstr(job_desc_ptr->network, buffer);
 	pack_time(job_desc_ptr->begin_time, buffer);
 
@@ -2520,11 +2518,7 @@ _unpack_job_desc_msg(job_desc_msg_t ** job_desc_buffer_ptr, Buf buffer)
 	safe_unpack32(&job_desc_ptr->group_id, buffer);
 
 	safe_unpack16(&job_desc_ptr->alloc_resp_port, buffer);
-	safe_unpackstr_xmalloc(&job_desc_ptr->alloc_resp_hostname,
-			       &uint16_tmp, buffer);
 	safe_unpack16(&job_desc_ptr->other_port, buffer);
-	safe_unpackstr_xmalloc(&job_desc_ptr->other_hostname,
-			       &uint16_tmp, buffer);
 	safe_unpackstr_xmalloc(&job_desc_ptr->network, &uint16_tmp, buffer);
 	safe_unpack_time(&job_desc_ptr->begin_time, buffer);
 
@@ -2559,8 +2553,6 @@ unpack_error:
 	xfree(job_desc_ptr->in);
 	xfree(job_desc_ptr->out);
 	xfree(job_desc_ptr->work_dir);
-	xfree(job_desc_ptr->alloc_resp_hostname);
-	xfree(job_desc_ptr->other_hostname);
 	xfree(job_desc_ptr->network);
 	xfree(job_desc_ptr->mail_user);
 	xfree(job_desc_ptr);
diff --git a/src/salloc/salloc.c b/src/salloc/salloc.c
index f91e5f74062..398ab1bd78a 100644
--- a/src/salloc/salloc.c
+++ b/src/salloc/salloc.c
@@ -105,7 +105,6 @@ int main(int argc, char *argv[])
 
 	/* create message thread to handle pings and such from slurmctld */
 	msg_thr = msg_thr_create(&desc.other_port);
-	desc.other_hostname = xshort_hostname();
 
 	xsignal(SIGHUP, _signal_while_allocating);
 	xsignal(SIGINT, _signal_while_allocating);
diff --git a/src/slurmctld/job_mgr.c b/src/slurmctld/job_mgr.c
index c46cf6d50fb..f16e2114626 100644
--- a/src/slurmctld/job_mgr.c
+++ b/src/slurmctld/job_mgr.c
@@ -494,13 +494,13 @@ static void _dump_job_state(struct job_record *dump_job_ptr, Buf buffer)
 	pack16(dump_job_ptr->kill_on_node_fail, buffer);
 	pack16(dump_job_ptr->kill_on_step_done, buffer);
 	pack16(dump_job_ptr->batch_flag, buffer);
-	pack16(dump_job_ptr->alloc_resp_port, buffer);
-	pack16(dump_job_ptr->other_port, buffer);
 	pack16(dump_job_ptr->mail_type, buffer);
 	pack16(dump_job_ptr->state_reason, buffer);
 
-	packstr(dump_job_ptr->alloc_resp_host, buffer);
-	packstr(dump_job_ptr->other_host, buffer);
+	packstr(dump_job_ptr->resp_host, buffer);
+	pack16(dump_job_ptr->alloc_resp_port, buffer);
+	pack16(dump_job_ptr->other_port, buffer);
+
 	if (dump_job_ptr->job_state & JOB_COMPLETING) {
 		if (dump_job_ptr->nodes_completing == NULL) {
 			dump_job_ptr->nodes_completing =
@@ -550,10 +550,9 @@ static int _load_job_state(Buf buffer)
 	uint16_t job_state, next_step_id, details, batch_flag, step_flag;
 	uint16_t kill_on_node_fail, kill_on_step_done, name_len;
 	uint16_t alloc_resp_port, other_port, mail_type, state_reason;
-	char *nodes = NULL, *partition = NULL, *name = NULL;
-	char *alloc_node = NULL, *alloc_resp_host = NULL, *other_host = NULL;
+	char *nodes = NULL, *partition = NULL, *name = NULL, *resp_host = NULL;
 	char *account = NULL, *network = NULL, *mail_user = NULL;
-	char *comment = NULL, *nodes_completing = NULL;
+	char *comment = NULL, *nodes_completing = NULL, *alloc_node = NULL;
 	struct job_record *job_ptr;
 	struct part_record *part_ptr;
 	int error_code;
@@ -579,13 +578,13 @@ static int _load_job_state(Buf buffer)
 	safe_unpack16(&kill_on_node_fail, buffer);
 	safe_unpack16(&kill_on_step_done, buffer);
 	safe_unpack16(&batch_flag, buffer);
-	safe_unpack16(&alloc_resp_port, buffer);
-	safe_unpack16(&other_port, buffer);
 	safe_unpack16(&mail_type, buffer);
 	safe_unpack16(&state_reason, buffer);
 
-	safe_unpackstr_xmalloc(&alloc_resp_host, &name_len, buffer);
-	safe_unpackstr_xmalloc(&other_host, &name_len, buffer);
+	safe_unpackstr_xmalloc(&resp_host, &name_len, buffer);
+	safe_unpack16(&alloc_resp_port, buffer);
+	safe_unpack16(&other_port, buffer);
+
 	if (job_state & JOB_COMPLETING) {
 		safe_unpackstr_xmalloc(&nodes_completing, 
 				       &name_len, buffer);
@@ -701,10 +700,10 @@ static int _load_job_state(Buf buffer)
 	job_ptr->kill_on_node_fail = kill_on_node_fail;
 	job_ptr->kill_on_step_done = kill_on_step_done;
 	job_ptr->batch_flag        = batch_flag;
+	job_ptr->resp_host         = resp_host;
+	resp_host = NULL;	/* reused, nothing left to free */
 	job_ptr->alloc_resp_port   = alloc_resp_port;
-	job_ptr->alloc_resp_host   = alloc_resp_host;
 	job_ptr->other_port        = other_port;
-	job_ptr->other_host        = other_host;
 	job_ptr->mail_type         = mail_type;
 	job_ptr->mail_user         = mail_user;
 	mail_user = NULL;	/* reused, nothing left to free */
@@ -726,8 +725,6 @@ static int _load_job_state(Buf buffer)
 
 unpack_error:
 	error("Incomplete job record");
-	xfree(alloc_resp_host);
-	xfree(other_host);
 	xfree(nodes);
 	xfree(nodes_completing);
 	xfree(partition);
@@ -735,6 +732,7 @@ unpack_error:
 	xfree(alloc_node);
 	xfree(account);
 	xfree(comment);
+	xfree(resp_host);
 	xfree(mail_user);
 	select_g_free_jobinfo(&select_jobinfo);
 	return SLURM_FAILURE;
@@ -1217,10 +1215,9 @@ void dump_job_desc(job_desc_msg_t * job_specs)
 
 	dependency = (job_specs->dependency != NO_VAL) ?
 		(long) job_specs->dependency : -1L;
-	debug3("   alloc_resp_hostname=%s alloc_resp_port=%u",
-	       job_specs->alloc_resp_hostname, job_specs->alloc_resp_port);
-	debug3("   other_hostname=%s other_port=%u",
-	       job_specs->other_hostname, job_specs->other_port);
+	debug3("   resp_host=%s alloc_resp_port=%u  other_port=%u",
+		job_specs->resp_host, 
+		job_specs->alloc_resp_port, job_specs->other_port);
 	debug3("   dependency=%ld account=%s comment=%s",
 	       dependency, job_specs->account, job_specs->comment);
 
@@ -2419,10 +2416,9 @@ _copy_job_desc_to_job_record(job_desc_msg_t * job_desc,
 	if (job_desc->kill_on_node_fail != (uint16_t) NO_VAL)
 		job_ptr->kill_on_node_fail = job_desc->kill_on_node_fail;
 
+	job_ptr->resp_host = xstrdup(job_desc->resp_host);
 	job_ptr->alloc_resp_port = job_desc->alloc_resp_port;
-	job_ptr->alloc_resp_host = xstrdup(job_desc->alloc_resp_hostname);
 	job_ptr->other_port = job_desc->other_port;
-	job_ptr->other_host = xstrdup(job_desc->other_hostname);
 	job_ptr->time_last_active = time(NULL);
 	job_ptr->num_procs = job_desc->num_procs;
         job_ptr->cr_enabled = 0;
@@ -2745,8 +2741,6 @@ static void _list_delete_job(void *job_entry)
 	xfree(job_ptr->cpus_per_node);
 	xfree(job_ptr->cpu_count_reps);
 	xfree(job_ptr->node_addr);
-	xfree(job_ptr->alloc_resp_host);
-	xfree(job_ptr->other_host);
 	xfree(job_ptr->account);
 	xfree(job_ptr->mail_user);
 	xfree(job_ptr->network);
diff --git a/src/slurmctld/proc_req.c b/src/slurmctld/proc_req.c
index da20f5836b5..c62c1debb79 100644
--- a/src/slurmctld/proc_req.c
+++ b/src/slurmctld/proc_req.c
@@ -464,19 +464,29 @@ static void _slurm_rpc_allocate_resources(slurm_msg_t * msg)
 	bool do_unlock = false;
 	bool job_waiting = false;
 	struct job_record *job_ptr;
+	uint16_t port;	/* dummy value */
+	slurm_addr resp_addr;
 
 	START_TIMER;
 	debug2("Processing RPC: REQUEST_RESOURCE_ALLOCATION");
 
 	/* do RPC call */
-	dump_job_desc(job_desc_msg);
 	uid = g_slurm_auth_get_uid(msg->auth_cred);
-	if ( (uid != job_desc_msg->user_id) && (!validate_super_user(uid)) ) {
+	if ((uid != job_desc_msg->user_id) && (!validate_super_user(uid))) {
 		error_code = ESLURM_USER_ID_MISSING;
 		error("Security violation, RESOURCE_ALLOCATE from uid=%u",
-		      (unsigned int) uid);
+			(unsigned int) uid);
 	}
-
+	if ((job_desc_msg->alloc_node == NULL)
+	||  (job_desc_msg->alloc_node[0] == '\0')) {
+		error_code = ESLURM_INVALID_NODE_NAME;
+		error("REQUEST_RESOURCE_ALLOCATE lacks alloc_node from uid=%u",
+			(unsigned int) uid);
+	}
+	slurm_get_peer_addr(msg->conn_fd, &resp_addr);
+	job_desc_msg->resp_host = xmalloc(16);
+	slurm_get_ip_str(&resp_addr, &port, job_desc_msg->resp_host, 16);
+	dump_job_desc(job_desc_msg);
 	if (error_code == SLURM_SUCCESS) {
 		do_unlock = true;
 		lock_slurmctld(job_write_lock);
@@ -1150,19 +1160,29 @@ static void _slurm_rpc_job_will_run(slurm_msg_t * msg)
 	slurmctld_lock_t job_write_lock = { 
 		NO_LOCK, WRITE_LOCK, READ_LOCK, READ_LOCK };
 	uid_t uid;
+	uint16_t port;	/* dummy value */
+	slurm_addr resp_addr;
 
 	START_TIMER;
 	debug2("Processing RPC: REQUEST_JOB_WILL_RUN");
 
 	/* do RPC call */
-	dump_job_desc(job_desc_msg);
 	uid = g_slurm_auth_get_uid(msg->auth_cred);
 	if ( (uid != job_desc_msg->user_id) && (!validate_super_user(uid)) ) {
 		error_code = ESLURM_USER_ID_MISSING;
 		error("Security violation, JOB_WILL_RUN RPC from uid=%u",
-		      (unsigned int) uid);
+			(unsigned int) uid);
 	}
-
+	if ((job_desc_msg->alloc_node == NULL)
+	||  (job_desc_msg->alloc_node[0] == '\0')) {
+		error_code = ESLURM_INVALID_NODE_NAME;
+		error("REQUEST_JOB_WILL_RUN lacks alloc_node from uid=%u",
+			(unsigned int) uid);
+	}
+	slurm_get_peer_addr(msg->conn_fd, &resp_addr);
+	job_desc_msg->resp_host = xmalloc(16);
+	slurm_get_ip_str(&resp_addr, &port, job_desc_msg->resp_host, 16);
+	dump_job_desc(job_desc_msg);
 	if (error_code == SLURM_SUCCESS) {
 		lock_slurmctld(job_write_lock);
 		error_code = job_allocate(job_desc_msg, 
@@ -1712,13 +1732,19 @@ static void _slurm_rpc_submit_batch_job(slurm_msg_t * msg)
 
 	slurm_msg_t_init(&response_msg);
 	/* do RPC call */
-	dump_job_desc(job_desc_msg);
 	uid = g_slurm_auth_get_uid(msg->auth_cred);
 	if ( (uid != job_desc_msg->user_id) && (!validate_super_user(uid)) ) {
 		error_code = ESLURM_USER_ID_MISSING;
 		error("Security violation, SUBMIT_JOB from uid=%u",
-		      (unsigned int) uid);
+			(unsigned int) uid);
 	}
+	if ((job_desc_msg->alloc_node == NULL)
+	||  (job_desc_msg->alloc_node[0] == '\0')) {
+		error_code = ESLURM_INVALID_NODE_NAME;
+		error("REQUEST_SUBMIT_BATCH_JOB lacks alloc_node from uid=%u",
+			(unsigned int) uid);
+	}
+	dump_job_desc(job_desc_msg);
 	if (error_code == SLURM_SUCCESS) {
 		lock_slurmctld(job_write_lock);
 		if (job_desc_msg->job_id != SLURM_BATCH_SCRIPT) {
diff --git a/src/slurmctld/slurmctld.h b/src/slurmctld/slurmctld.h
index a168d2ffd41..fe5c31a410b 100644
--- a/src/slurmctld/slurmctld.h
+++ b/src/slurmctld/slurmctld.h
@@ -356,10 +356,9 @@ struct job_record {
 	slurm_addr *node_addr;		/* addresses of the nodes allocated to 
 					 * job */
 	List step_list;			/* list of job's steps */
+	char *resp_host;		/* host for srun communications */
 	uint16_t alloc_resp_port;	/* RESPONSE_RESOURCE_ALLOCATION port */
-	char *alloc_resp_host;		/* RESPONSE_RESOURCE_ALLOCATION host */
 	uint16_t other_port;		/* port for client communications */
-	char *other_host;		/* host for client communications */
 	char *account;			/* account number to charge */
 	char *comment;			/* arbitrary comment */
 	uint32_t dependency;		/* defer until this job completes */
diff --git a/src/slurmctld/srun_comm.c b/src/slurmctld/srun_comm.c
index 11b7d74c8fe..4b00204abcd 100644
--- a/src/slurmctld/srun_comm.c
+++ b/src/slurmctld/srun_comm.c
@@ -77,14 +77,14 @@ extern void srun_allocate (uint32_t job_id)
 	struct job_record *job_ptr = find_job_record (job_id);
 
 	xassert(job_ptr);
-	if (job_ptr && job_ptr->alloc_resp_port
-	    && job_ptr->alloc_resp_host && job_ptr->alloc_resp_host[0]) {
+	if (job_ptr && job_ptr->alloc_resp_port && job_ptr->alloc_node
+	&&  job_ptr->resp_host) {
 		slurm_addr * addr;
 		resource_allocation_response_msg_t *msg_arg;
 
 		addr = xmalloc(sizeof(struct sockaddr_in));
-		slurm_set_addr(addr, job_ptr->alloc_resp_port,
-			       job_ptr->alloc_resp_host);
+		slurm_set_addr(addr, job_ptr->alloc_resp_port, 
+			job_ptr->resp_host);
 		msg_arg = xmalloc(sizeof(resource_allocation_response_msg_t));
 		msg_arg->job_id 	= job_ptr->job_id;
 		msg_arg->node_list	= xstrdup(job_ptr->nodes);
@@ -101,7 +101,7 @@ extern void srun_allocate (uint32_t job_id)
 		msg_arg->select_jobinfo = select_g_copy_jobinfo(
 				job_ptr->select_jobinfo);
 		msg_arg->error_code	= SLURM_SUCCESS;
-		_srun_agent_launch(addr, job_ptr->alloc_resp_host, 
+		_srun_agent_launch(addr, job_ptr->alloc_node, 
 				   RESPONSE_RESOURCE_ALLOCATION, msg_arg);
 	}
 }
@@ -129,15 +129,14 @@ extern void srun_node_fail (uint32_t job_id, char *node_name)
 		return;
 	bit_position = node_ptr - node_record_table_ptr;
 
-	if (job_ptr->other_port
-	    && job_ptr->other_host && job_ptr->other_host[0]) {
+	if (job_ptr->other_port && job_ptr->alloc_node && job_ptr->resp_host) {
 		addr = xmalloc(sizeof(struct sockaddr_in));
-		slurm_set_addr(addr, job_ptr->other_port, job_ptr->other_host);
+		slurm_set_addr(addr, job_ptr->other_port, job_ptr->resp_host);
 		msg_arg = xmalloc(sizeof(srun_node_fail_msg_t));
 		msg_arg->job_id   = job_id;
 		msg_arg->step_id  = NO_VAL;
 		msg_arg->nodelist = xstrdup(node_name);
-		_srun_agent_launch(addr, job_ptr->other_host, SRUN_NODE_FAIL,
+		_srun_agent_launch(addr, job_ptr->alloc_node, SRUN_NODE_FAIL,
 				   msg_arg);
 	}
 
@@ -182,16 +181,15 @@ extern void srun_ping (void)
 		
 		if (job_ptr->job_state != JOB_RUNNING)
 			continue;
-		if ( (job_ptr->time_last_active <= old)
-		     && job_ptr->other_port
-		     && job_ptr->other_host && job_ptr->other_host[0] ) {
+		if ((job_ptr->time_last_active <= old) && job_ptr->other_port
+		&&  job_ptr->alloc_node && job_ptr->resp_host) {
 			addr = xmalloc(sizeof(struct sockaddr_in));
 			slurm_set_addr(addr, job_ptr->other_port,
-				       job_ptr->other_host);
+				job_ptr->resp_host);
 			msg_arg = xmalloc(sizeof(srun_ping_msg_t));
 			msg_arg->job_id  = job_ptr->job_id;
 			msg_arg->step_id = NO_VAL;
-			_srun_agent_launch(addr, job_ptr->other_host,
+			_srun_agent_launch(addr, job_ptr->alloc_node,
 					   SRUN_PING, msg_arg);
 		}
 	}
@@ -214,15 +212,14 @@ extern void srun_timeout (struct job_record *job_ptr)
 	if (job_ptr->job_state != JOB_RUNNING)
 		return;
 
-	if (job_ptr->other_port
-	    && job_ptr->other_host && job_ptr->other_host[0]) {
+	if (job_ptr->other_port && job_ptr->alloc_node && job_ptr->resp_host) {
 		addr = xmalloc(sizeof(struct sockaddr_in));
-		slurm_set_addr(addr, job_ptr->other_port, job_ptr->other_host);
+		slurm_set_addr(addr, job_ptr->other_port, job_ptr->resp_host);
 		msg_arg = xmalloc(sizeof(srun_timeout_msg_t));
 		msg_arg->job_id   = job_ptr->job_id;
 		msg_arg->step_id  = NO_VAL;
 		msg_arg->timeout  = job_ptr->end_time;
-		_srun_agent_launch(addr, job_ptr->other_host, SRUN_TIMEOUT,
+		_srun_agent_launch(addr, job_ptr->alloc_node, SRUN_TIMEOUT,
 				   msg_arg);
 	}
 
@@ -258,16 +255,14 @@ extern void srun_complete (struct job_record *job_ptr)
 	struct step_record *step_ptr;
 
 	xassert(job_ptr);
-	if (job_ptr->other_port
-	    && job_ptr->other_host && job_ptr->other_host[0]) {
+	if (job_ptr->other_port && job_ptr->alloc_node && job_ptr->resp_host) {
 		addr = xmalloc(sizeof(struct sockaddr_in));
-		slurm_set_addr(addr, job_ptr->other_port, job_ptr->other_host);
+		slurm_set_addr(addr, job_ptr->other_port, job_ptr->resp_host);
 		msg_arg = xmalloc(sizeof(srun_timeout_msg_t));
 		msg_arg->job_id   = job_ptr->job_id;
 		msg_arg->step_id  = NO_VAL;
-		_srun_agent_launch(addr, job_ptr->other_host, 
-				   SRUN_JOB_COMPLETE,
-				   msg_arg);
+		_srun_agent_launch(addr, job_ptr->alloc_node, 
+				   SRUN_JOB_COMPLETE, msg_arg);
 	}
 
 
diff --git a/src/srun/allocate.c b/src/srun/allocate.c
index 68ac0cace24..5bcdec4a9fd 100644
--- a/src/srun/allocate.c
+++ b/src/srun/allocate.c
@@ -536,13 +536,6 @@ job_desc_msg_create_from_opts (char *script)
 	 * message as all other messages */
 	j->alloc_resp_port = slurmctld_comm_addr.port;
 	j->other_port = slurmctld_comm_addr.port;
-	if (slurmctld_comm_addr.hostname) {
-		j->alloc_resp_hostname = xstrdup(slurmctld_comm_addr.hostname);
-		j->other_hostname = xstrdup(slurmctld_comm_addr.hostname);
-	} else {
-		j->alloc_resp_hostname = NULL;
-		j->other_hostname = NULL;
-	}
 
 	if (script) {
 		/*
@@ -583,8 +576,6 @@ job_desc_msg_destroy(job_desc_msg_t *j)
 	if (j) {
 		xfree(j->account);
 		xfree(j->comment);
-		xfree(j->alloc_resp_hostname);
-		xfree(j->other_hostname);
 		xfree(j);
 	}
 }
-- 
GitLab