diff --git a/src/slurmctld/agent.c b/src/slurmctld/agent.c
index 64d20e36e46dde97623f7c02c25fe3cdb39c7670..5a7c99638605067e0e0dd30cb8a9c5fcb55463b4 100644
--- a/src/slurmctld/agent.c
+++ b/src/slurmctld/agent.c
@@ -162,6 +162,7 @@ typedef struct mail_info {
 } mail_info_t;
 
 static void _sig_handler(int dummy);
+static bool _batch_launch_defer(queued_request_t *queued_req_ptr);
 static inline int _comm_err(char *node_name);
 static void _list_delete_retry(void *retry_entry);
 static agent_info_t *_make_agent_info(agent_arg_t *agent_arg_ptr);
@@ -1160,6 +1161,8 @@ extern int agent_retry (int min_wait, bool mail_too)
 		retry_iter = list_iterator_create(retry_list);
 		while ((queued_req_ptr = (queued_request_t *)
 				list_next(retry_iter))) {
+			if (_batch_launch_defer(queued_req_ptr))
+				continue;
  			if (queued_req_ptr->last_attempt == 0) {
 				list_remove(retry_iter);
 				list_size--;
@@ -1178,6 +1181,8 @@ extern int agent_retry (int min_wait, bool mail_too)
 		/* next try to find an older record to retry */
 		while ((queued_req_ptr = (queued_request_t *) 
 				list_next(retry_iter))) {
+			if (_batch_launch_defer(queued_req_ptr))
+				continue;
 			age = difftime(now, queued_req_ptr->last_attempt);
 			if (age > min_wait) {
 				list_remove(retry_iter);
@@ -1440,3 +1445,48 @@ extern void mail_job_info (struct job_record *job_ptr, uint16_t mail_type)
 	return;
 }
 
+/* return true if the requests is to launch a batch job and the message
+ * destination is not yet powered up, otherwise return false */
+static bool _batch_launch_defer(queued_request_t *queued_req_ptr)
+{
+	char hostname[512];
+	agent_arg_t *agent_arg_ptr;
+	batch_job_launch_msg_t *launch_msg_ptr;
+	struct node_record *node_ptr;
+	time_t now = time(NULL);
+
+	agent_arg_ptr = queued_req_ptr->agent_arg_ptr;
+	if (agent_arg_ptr->msg_type != REQUEST_BATCH_JOB_LAUNCH)
+		return false;
+
+	launch_msg_ptr = (batch_job_launch_msg_t *)agent_arg_ptr->msg_args;
+	hostlist_deranged_string(agent_arg_ptr->hostlist, 
+				 sizeof(hostname), hostname);
+	node_ptr = find_node_record(hostname);
+	if (node_ptr == NULL) {
+		error("agent(batch_launch) could not locate node %s",
+		      agent_arg_ptr->hostlist);
+		queued_req_ptr->last_attempt = (time_t) 0;
+		return false;	/* no benefit to defer */
+	}
+
+	if (((node_ptr->node_state & NODE_STATE_POWER_SAVE) == 0) &&
+	    ((node_ptr->node_state & NODE_STATE_NO_RESPOND) == 0)) {
+info("agent ready to send batch request to %s", hostname);
+		queued_req_ptr->last_attempt = (time_t) 0;
+		return false;
+	}
+
+	if (queued_req_ptr->last_attempt == 0)
+		queued_req_ptr->last_attempt = now;
+	else if (difftime(now, queued_req_ptr->last_attempt) >= 
+				BATCH_START_TIME) {
+		error("agent waited too long for node %s to come up, "
+		      "sending batch request anyway...");
+		queued_req_ptr->last_attempt = (time_t) 0;
+		return false;
+	}
+
+info("agent waiting to send batch request to %s", hostname);
+	return true;
+}
diff --git a/src/slurmctld/agent.h b/src/slurmctld/agent.h
index 34cc778282f1c61f652a6997f487db0fdf646d9d..b7b98da1e45f86187c456c15a57ecbb067acd09f 100644
--- a/src/slurmctld/agent.h
+++ b/src/slurmctld/agent.h
@@ -1,10 +1,9 @@
 /*****************************************************************************\
  *  agent.h - data structures and function definitions for parallel 
  *	background communications
- *
- *  $Id$
  *****************************************************************************
- *  Copyright (C) 2002-2006 The Regents of the University of California.
+ *  Copyright (C) 2002-2007 The Regents of the University of California.
+ *  Copyright (C) 2008 Lawrence Livermore National Security.
  *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
  *  Written by Morris Jette <jette@llnl.gov>, et. al.
  *  Derived from dsh written by Jim Garlick <garlick1@llnl.gov>
@@ -47,6 +46,7 @@
 #define AGENT_IS_THREAD  	 1	/* set if agent itself a thread of 
 					 * slurmctld, 0 for function call */
 #define AGENT_THREAD_COUNT	10	/* maximum active threads per agent */
+#define BATCH_START_TIME	300	/* allow batch jobs 300 secs to start */
 #define COMMAND_TIMEOUT 	10	/* command requeue or error, seconds */
 #define MAX_AGENT_CNT		(MAX_SERVER_THREADS / (AGENT_THREAD_COUNT + 2))
 					/* maximum simultaneous agents, note 
diff --git a/src/slurmctld/job_mgr.c b/src/slurmctld/job_mgr.c
index 9a8027a64044aaa119a89e55370a1929d9431341..beb5e126def12136e3ec0a29a59f425e1e00a0da 100644
--- a/src/slurmctld/job_mgr.c
+++ b/src/slurmctld/job_mgr.c
@@ -4604,7 +4604,8 @@ extern void validate_jobs_on_node(slurm_node_registration_status_msg_t *reg_msg)
 			error("Orphan job %u.%u reported on node %s",
 				reg_msg->job_id[i], reg_msg->step_id[i], 
 				reg_msg->node_name);
-			abort_job_on_node(reg_msg->job_id[i], job_ptr, node_ptr);
+			abort_job_on_node(reg_msg->job_id[i], 
+					  job_ptr, node_ptr);
 		}
 
 		else if ((job_ptr->job_state == JOB_RUNNING) ||
@@ -4645,7 +4646,8 @@ extern void validate_jobs_on_node(slurm_node_registration_status_msg_t *reg_msg)
 			error("Registered PENDING job %u.%u on node %s ",
 				reg_msg->job_id[i], reg_msg->step_id[i], 
 				reg_msg->node_name);
-			abort_job_on_node(reg_msg->job_id[i], job_ptr, node_ptr);
+			abort_job_on_node(reg_msg->job_id[i], 
+					  job_ptr, node_ptr);
 		}
 
 		else {		/* else job is supposed to be done */
@@ -4675,7 +4677,7 @@ extern void validate_jobs_on_node(slurm_node_registration_status_msg_t *reg_msg)
 }
 
 /* Purge any batch job that should have its script running on node 
- * node_inx, but is not (i.e. its time_last_active != now) */
+ * node_inx, but is not. Allow BATCH_START_TIME secs for startup. */
 static void _purge_lost_batch_jobs(int node_inx, time_t now)
 {
 	ListIterator job_iterator;
@@ -4685,9 +4687,9 @@ static void _purge_lost_batch_jobs(int node_inx, time_t now)
 	while ((job_ptr = (struct job_record *) list_next(job_iterator))) {
 		bool job_active = ((job_ptr->job_state == JOB_RUNNING) ||
 				   (job_ptr->job_state == JOB_SUSPENDED));
-		if ((!job_active)                       ||
-		    (job_ptr->batch_flag == 0)          ||
-		    (job_ptr->time_last_active == now)  ||
+		if ((!job_active)                           ||
+		    (job_ptr->batch_flag == 0)              ||
+		    ((job_ptr->time_last_active + BATCH_START_TIME) > now) ||
 		    (node_inx != bit_ffs(job_ptr->node_bitmap)))
 			continue;