From 4535783ddd91bbe7c4944a55c549b6f1947a87c2 Mon Sep 17 00:00:00 2001
From: Morris Jette <jette@schedmd.com>
Date: Mon, 28 Oct 2013 14:53:27 -0700
Subject: [PATCH] Add job array depenency support

Add support for dependencies of job array elements (e.g.
"sbatch --depend=afterok:123_4 ..."). This does not support
depenendencies of ALL job array elements, only individual job
array elements.
---
 NEWS                          |  2 ++
 doc/html/job_array.shtml      |  9 ++++++++-
 src/slurmctld/job_scheduler.c | 33 +++++++++++++++++++++++++++------
 3 files changed, 37 insertions(+), 7 deletions(-)

diff --git a/NEWS b/NEWS
index 0bd64ffb4e0..5d9f577cc1d 100644
--- a/NEWS
+++ b/NEWS
@@ -51,6 +51,8 @@ documents those changes that are of interest to users and admins.
  -- Fix sacctmgr modify qos set preempt+/-=.
  -- BLUEGENE - fix issue where node count wasn't set up correctly when srun
     preforms the allocation, regression in 2.6.3.
+ -- Add support for dependencies of job array elements (e.g. 
+    "sbatch --depend=afterok:123_4 ...")
 
 * Changes in Slurm 2.6.3
 ========================
diff --git a/doc/html/job_array.shtml b/doc/html/job_array.shtml
index 326b9b81cf7..05d26e6b650 100644
--- a/doc/html/job_array.shtml
+++ b/doc/html/job_array.shtml
@@ -202,6 +202,13 @@ $ scontrol release 13_2
 
 <h2>Other Command Use</h2>
 
+<p>Job dependencies for individual job array elements are supported in Slurm
+version 2.6.4 and later. Examples of use follow:</p>
+<pre>
+sbatch --depend=after:123_4 my.job
+sbatch --depend=afterok:123_4:123_8 my.job2
+</pre>
+
 <p>The following Slurm commands do not currently recognize job arrays and their
 use requires the use of Slurm job IDs, which are unique for each array element:
 sacct, sbcast, smap, sreport, sshare, sstat, strigger, and sview.
@@ -245,6 +252,6 @@ array data structure is added rather than the current logic that only
 adds a new field to the existing job data structure.
 It is not certain when that work will occur.</p>
 
-<p style="text-align:center;">Last modified 17 September 2013</p>
+<p style="text-align:center;">Last modified 28 October 2013</p>
 
 <!--#include virtual="footer.txt"-->
diff --git a/src/slurmctld/job_scheduler.c b/src/slurmctld/job_scheduler.c
index d4db7b071d6..66dfc07d1e3 100644
--- a/src/slurmctld/job_scheduler.c
+++ b/src/slurmctld/job_scheduler.c
@@ -1527,6 +1527,7 @@ extern int update_job_dependency(struct job_record *job_ptr, char *new_depend)
 	int rc = SLURM_SUCCESS;
 	uint16_t depend_type = 0;
 	uint32_t job_id = 0;
+	uint16_t array_task_id;
 	char *tok = new_depend, *sep_ptr, *sep_ptr2 = NULL;
 	List new_depend_list = NULL;
 	struct depend_spec *dep_ptr;
@@ -1574,21 +1575,33 @@ extern int update_job_dependency(struct job_record *job_ptr, char *new_depend)
 		sep_ptr = strchr(tok, ':');
 		if ((sep_ptr == NULL) && (job_id == 0)) {
 			job_id = strtol(tok, &sep_ptr, 10);
+			if ((sep_ptr != NULL) && (sep_ptr[0] == '_'))
+				array_task_id = strtol(sep_ptr+1, &sep_ptr, 10);
+			else
+				array_task_id = (uint16_t) NO_VAL;
 			if ((sep_ptr == NULL) || (sep_ptr[0] != '\0') ||
 			    (job_id == 0) || (job_id == job_ptr->job_id)) {
 				rc = ESLURM_DEPENDENCY;
 				break;
 			}
 			/* old format, just a single job_id */
-			dep_job_ptr = find_job_record(job_id);
+			if (array_task_id == (uint16_t) NO_VAL) {
+				dep_job_ptr = find_job_record(job_id);
+				snprintf(dep_buf, sizeof(dep_buf),
+					 "afterany:%u", job_id);
+			} else {
+				dep_job_ptr = find_job_array_rec(job_id,
+								 array_task_id);
+				snprintf(dep_buf, sizeof(dep_buf),
+					 "afterany:%u_%u", job_id,
+					 array_task_id);
+			}
 			if (!dep_job_ptr)	/* assume already done */
 				break;
-			snprintf(dep_buf, sizeof(dep_buf),
-				 "afterany:%u", job_id);
 			new_depend = dep_buf;
 			dep_ptr = xmalloc(sizeof(struct depend_spec));
 			dep_ptr->depend_type = SLURM_DEPEND_AFTER_ANY;
-			dep_ptr->job_id = job_id;
+			dep_ptr->job_id  = dep_job_ptr->job_id;
 			dep_ptr->job_ptr = dep_job_ptr;
 			(void) list_append(new_depend_list, dep_ptr);
 			break;
@@ -1618,6 +1631,10 @@ extern int update_job_dependency(struct job_record *job_ptr, char *new_depend)
 		sep_ptr++;	/* skip over ":" */
 		while (rc == SLURM_SUCCESS) {
 			job_id = strtol(sep_ptr, &sep_ptr2, 10);
+			if ((sep_ptr2 != NULL) && (sep_ptr2[0] == '_'))
+				array_task_id = strtol(sep_ptr2+1,&sep_ptr2,10);
+			else
+				array_task_id = (uint16_t) NO_VAL;
 			if ((sep_ptr2 == NULL) ||
 			    (job_id == 0) || (job_id == job_ptr->job_id) ||
 			    ((sep_ptr2[0] != '\0') && (sep_ptr2[0] != ',') &&
@@ -1625,7 +1642,11 @@ extern int update_job_dependency(struct job_record *job_ptr, char *new_depend)
 				rc = ESLURM_DEPENDENCY;
 				break;
 			}
-			dep_job_ptr = find_job_record(job_id);
+			if (array_task_id == (uint16_t) NO_VAL)
+				dep_job_ptr = find_job_record(job_id);
+			else
+				dep_job_ptr = find_job_array_rec(job_id,
+								 array_task_id);
 			if ((depend_type == SLURM_DEPEND_EXPAND) &&
 			    ((expand_cnt++ > 0) || (dep_job_ptr == NULL) ||
 			     (!IS_JOB_RUNNING(dep_job_ptr))              ||
@@ -1652,7 +1673,7 @@ extern int update_job_dependency(struct job_record *job_ptr, char *new_depend)
 			if (dep_job_ptr) {	/* job still active */
 				dep_ptr = xmalloc(sizeof(struct depend_spec));
 				dep_ptr->depend_type = depend_type;
-				dep_ptr->job_id = job_id;
+				dep_ptr->job_id  = dep_job_ptr->job_id;
 				dep_ptr->job_ptr = dep_job_ptr;
 				(void) list_append(new_depend_list, dep_ptr);
 			}
-- 
GitLab