From 4535783ddd91bbe7c4944a55c549b6f1947a87c2 Mon Sep 17 00:00:00 2001 From: Morris Jette <jette@schedmd.com> Date: Mon, 28 Oct 2013 14:53:27 -0700 Subject: [PATCH] Add job array depenency support Add support for dependencies of job array elements (e.g. "sbatch --depend=afterok:123_4 ..."). This does not support depenendencies of ALL job array elements, only individual job array elements. --- NEWS | 2 ++ doc/html/job_array.shtml | 9 ++++++++- src/slurmctld/job_scheduler.c | 33 +++++++++++++++++++++++++++------ 3 files changed, 37 insertions(+), 7 deletions(-) diff --git a/NEWS b/NEWS index 0bd64ffb4e0..5d9f577cc1d 100644 --- a/NEWS +++ b/NEWS @@ -51,6 +51,8 @@ documents those changes that are of interest to users and admins. -- Fix sacctmgr modify qos set preempt+/-=. -- BLUEGENE - fix issue where node count wasn't set up correctly when srun preforms the allocation, regression in 2.6.3. + -- Add support for dependencies of job array elements (e.g. + "sbatch --depend=afterok:123_4 ...") * Changes in Slurm 2.6.3 ======================== diff --git a/doc/html/job_array.shtml b/doc/html/job_array.shtml index 326b9b81cf7..05d26e6b650 100644 --- a/doc/html/job_array.shtml +++ b/doc/html/job_array.shtml @@ -202,6 +202,13 @@ $ scontrol release 13_2 <h2>Other Command Use</h2> +<p>Job dependencies for individual job array elements are supported in Slurm +version 2.6.4 and later. Examples of use follow:</p> +<pre> +sbatch --depend=after:123_4 my.job +sbatch --depend=afterok:123_4:123_8 my.job2 +</pre> + <p>The following Slurm commands do not currently recognize job arrays and their use requires the use of Slurm job IDs, which are unique for each array element: sacct, sbcast, smap, sreport, sshare, sstat, strigger, and sview. @@ -245,6 +252,6 @@ array data structure is added rather than the current logic that only adds a new field to the existing job data structure. It is not certain when that work will occur.</p> -<p style="text-align:center;">Last modified 17 September 2013</p> +<p style="text-align:center;">Last modified 28 October 2013</p> <!--#include virtual="footer.txt"--> diff --git a/src/slurmctld/job_scheduler.c b/src/slurmctld/job_scheduler.c index d4db7b071d6..66dfc07d1e3 100644 --- a/src/slurmctld/job_scheduler.c +++ b/src/slurmctld/job_scheduler.c @@ -1527,6 +1527,7 @@ extern int update_job_dependency(struct job_record *job_ptr, char *new_depend) int rc = SLURM_SUCCESS; uint16_t depend_type = 0; uint32_t job_id = 0; + uint16_t array_task_id; char *tok = new_depend, *sep_ptr, *sep_ptr2 = NULL; List new_depend_list = NULL; struct depend_spec *dep_ptr; @@ -1574,21 +1575,33 @@ extern int update_job_dependency(struct job_record *job_ptr, char *new_depend) sep_ptr = strchr(tok, ':'); if ((sep_ptr == NULL) && (job_id == 0)) { job_id = strtol(tok, &sep_ptr, 10); + if ((sep_ptr != NULL) && (sep_ptr[0] == '_')) + array_task_id = strtol(sep_ptr+1, &sep_ptr, 10); + else + array_task_id = (uint16_t) NO_VAL; if ((sep_ptr == NULL) || (sep_ptr[0] != '\0') || (job_id == 0) || (job_id == job_ptr->job_id)) { rc = ESLURM_DEPENDENCY; break; } /* old format, just a single job_id */ - dep_job_ptr = find_job_record(job_id); + if (array_task_id == (uint16_t) NO_VAL) { + dep_job_ptr = find_job_record(job_id); + snprintf(dep_buf, sizeof(dep_buf), + "afterany:%u", job_id); + } else { + dep_job_ptr = find_job_array_rec(job_id, + array_task_id); + snprintf(dep_buf, sizeof(dep_buf), + "afterany:%u_%u", job_id, + array_task_id); + } if (!dep_job_ptr) /* assume already done */ break; - snprintf(dep_buf, sizeof(dep_buf), - "afterany:%u", job_id); new_depend = dep_buf; dep_ptr = xmalloc(sizeof(struct depend_spec)); dep_ptr->depend_type = SLURM_DEPEND_AFTER_ANY; - dep_ptr->job_id = job_id; + dep_ptr->job_id = dep_job_ptr->job_id; dep_ptr->job_ptr = dep_job_ptr; (void) list_append(new_depend_list, dep_ptr); break; @@ -1618,6 +1631,10 @@ extern int update_job_dependency(struct job_record *job_ptr, char *new_depend) sep_ptr++; /* skip over ":" */ while (rc == SLURM_SUCCESS) { job_id = strtol(sep_ptr, &sep_ptr2, 10); + if ((sep_ptr2 != NULL) && (sep_ptr2[0] == '_')) + array_task_id = strtol(sep_ptr2+1,&sep_ptr2,10); + else + array_task_id = (uint16_t) NO_VAL; if ((sep_ptr2 == NULL) || (job_id == 0) || (job_id == job_ptr->job_id) || ((sep_ptr2[0] != '\0') && (sep_ptr2[0] != ',') && @@ -1625,7 +1642,11 @@ extern int update_job_dependency(struct job_record *job_ptr, char *new_depend) rc = ESLURM_DEPENDENCY; break; } - dep_job_ptr = find_job_record(job_id); + if (array_task_id == (uint16_t) NO_VAL) + dep_job_ptr = find_job_record(job_id); + else + dep_job_ptr = find_job_array_rec(job_id, + array_task_id); if ((depend_type == SLURM_DEPEND_EXPAND) && ((expand_cnt++ > 0) || (dep_job_ptr == NULL) || (!IS_JOB_RUNNING(dep_job_ptr)) || @@ -1652,7 +1673,7 @@ extern int update_job_dependency(struct job_record *job_ptr, char *new_depend) if (dep_job_ptr) { /* job still active */ dep_ptr = xmalloc(sizeof(struct depend_spec)); dep_ptr->depend_type = depend_type; - dep_ptr->job_id = job_id; + dep_ptr->job_id = dep_job_ptr->job_id; dep_ptr->job_ptr = dep_job_ptr; (void) list_append(new_depend_list, dep_ptr); } -- GitLab