From e5cef28bfa4b646a91c8cd1a465e6d6c7e949f2f Mon Sep 17 00:00:00 2001
From: Moe Jette <jette1@llnl.gov>
Date: Wed, 16 Oct 2002 23:02:41 +0000
Subject: [PATCH] Added new error code ESLURM_JOB_PENDING if allocation check
 RPC finds job is still not running. srun to wait for job initiation as needed
 with polling.

---
 src/srun/srun.c | 21 ++++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)

diff --git a/src/srun/srun.c b/src/srun/srun.c
index ea8f7140fd8..d9c60194b9c 100644
--- a/src/srun/srun.c
+++ b/src/srun/srun.c
@@ -274,6 +274,7 @@ allocate_nodes(void)
 	int rc, retries;
 	job_desc_msg_t job;
 	resource_allocation_response_msg_t *resp;
+	old_job_alloc_msg_t old_job;
 
 	slurm_init_job_desc_msg(&job);
 
@@ -325,9 +326,23 @@ allocate_nodes(void)
 		}			
 	}
 
-	if (resp->node_list == NULL) {
-		info("No nodes allocated. exiting");
-		return NULL;
+	if ((rc == 0) && (resp->node_list == NULL)) {
+		if (_verbose || _debug)
+			info ("Job %u queued and waiting for resources", resp->job_id);
+		old_job.job_id = resp->job_id;
+		old_job.uid = (uint32_t) getuid();
+		slurm_free_resource_allocation_response_msg (resp);
+		sleep (2);
+		/* Keep polling until the job is allocated resources */
+		while (slurm_confirm_allocation(&old_job, &resp) == SLURM_FAILURE) {
+			if (slurm_get_errno() == ESLURM_JOB_PENDING)
+				sleep (10);
+			else {
+				error("Unable to confirm resource allocation for job %u: %s", 
+					old_job.job_id, slurm_strerror(errno));
+				exit (1);
+			}
+		}
 	}
 
 	return resp;
-- 
GitLab