diff --git a/NEWS b/NEWS index 0bd64ffb4e0f76147da688eda4b96aa73995edcc..5d9f577cc1d0b27ff2bbb201caa59add912d60ef 100644 --- a/NEWS +++ b/NEWS @@ -51,6 +51,8 @@ documents those changes that are of interest to users and admins. -- Fix sacctmgr modify qos set preempt+/-=. -- BLUEGENE - fix issue where node count wasn't set up correctly when srun preforms the allocation, regression in 2.6.3. + -- Add support for dependencies of job array elements (e.g. + "sbatch --depend=afterok:123_4 ...") * Changes in Slurm 2.6.3 ======================== diff --git a/doc/html/job_array.shtml b/doc/html/job_array.shtml index 326b9b81cf74a16beb2541ae0c483723193017aa..05d26e6b650e30b19f878a4c3454ce49a2eaf17e 100644 --- a/doc/html/job_array.shtml +++ b/doc/html/job_array.shtml @@ -202,6 +202,13 @@ $ scontrol release 13_2 <h2>Other Command Use</h2> +<p>Job dependencies for individual job array elements are supported in Slurm +version 2.6.4 and later. Examples of use follow:</p> +<pre> +sbatch --depend=after:123_4 my.job +sbatch --depend=afterok:123_4:123_8 my.job2 +</pre> + <p>The following Slurm commands do not currently recognize job arrays and their use requires the use of Slurm job IDs, which are unique for each array element: sacct, sbcast, smap, sreport, sshare, sstat, strigger, and sview. @@ -245,6 +252,6 @@ array data structure is added rather than the current logic that only adds a new field to the existing job data structure. It is not certain when that work will occur.</p> -<p style="text-align:center;">Last modified 17 September 2013</p> +<p style="text-align:center;">Last modified 28 October 2013</p> <!--#include virtual="footer.txt"--> diff --git a/src/slurmctld/job_scheduler.c b/src/slurmctld/job_scheduler.c index d4db7b071d698dc0a3ea9f7691fc2cb14c22df63..66dfc07d1e36e183c4cbfaa6dec3836f2bace601 100644 --- a/src/slurmctld/job_scheduler.c +++ b/src/slurmctld/job_scheduler.c @@ -1527,6 +1527,7 @@ extern int update_job_dependency(struct job_record *job_ptr, char *new_depend) int rc = SLURM_SUCCESS; uint16_t depend_type = 0; uint32_t job_id = 0; + uint16_t array_task_id; char *tok = new_depend, *sep_ptr, *sep_ptr2 = NULL; List new_depend_list = NULL; struct depend_spec *dep_ptr; @@ -1574,21 +1575,33 @@ extern int update_job_dependency(struct job_record *job_ptr, char *new_depend) sep_ptr = strchr(tok, ':'); if ((sep_ptr == NULL) && (job_id == 0)) { job_id = strtol(tok, &sep_ptr, 10); + if ((sep_ptr != NULL) && (sep_ptr[0] == '_')) + array_task_id = strtol(sep_ptr+1, &sep_ptr, 10); + else + array_task_id = (uint16_t) NO_VAL; if ((sep_ptr == NULL) || (sep_ptr[0] != '\0') || (job_id == 0) || (job_id == job_ptr->job_id)) { rc = ESLURM_DEPENDENCY; break; } /* old format, just a single job_id */ - dep_job_ptr = find_job_record(job_id); + if (array_task_id == (uint16_t) NO_VAL) { + dep_job_ptr = find_job_record(job_id); + snprintf(dep_buf, sizeof(dep_buf), + "afterany:%u", job_id); + } else { + dep_job_ptr = find_job_array_rec(job_id, + array_task_id); + snprintf(dep_buf, sizeof(dep_buf), + "afterany:%u_%u", job_id, + array_task_id); + } if (!dep_job_ptr) /* assume already done */ break; - snprintf(dep_buf, sizeof(dep_buf), - "afterany:%u", job_id); new_depend = dep_buf; dep_ptr = xmalloc(sizeof(struct depend_spec)); dep_ptr->depend_type = SLURM_DEPEND_AFTER_ANY; - dep_ptr->job_id = job_id; + dep_ptr->job_id = dep_job_ptr->job_id; dep_ptr->job_ptr = dep_job_ptr; (void) list_append(new_depend_list, dep_ptr); break; @@ -1618,6 +1631,10 @@ extern int update_job_dependency(struct job_record *job_ptr, char *new_depend) sep_ptr++; /* skip over ":" */ while (rc == SLURM_SUCCESS) { job_id = strtol(sep_ptr, &sep_ptr2, 10); + if ((sep_ptr2 != NULL) && (sep_ptr2[0] == '_')) + array_task_id = strtol(sep_ptr2+1,&sep_ptr2,10); + else + array_task_id = (uint16_t) NO_VAL; if ((sep_ptr2 == NULL) || (job_id == 0) || (job_id == job_ptr->job_id) || ((sep_ptr2[0] != '\0') && (sep_ptr2[0] != ',') && @@ -1625,7 +1642,11 @@ extern int update_job_dependency(struct job_record *job_ptr, char *new_depend) rc = ESLURM_DEPENDENCY; break; } - dep_job_ptr = find_job_record(job_id); + if (array_task_id == (uint16_t) NO_VAL) + dep_job_ptr = find_job_record(job_id); + else + dep_job_ptr = find_job_array_rec(job_id, + array_task_id); if ((depend_type == SLURM_DEPEND_EXPAND) && ((expand_cnt++ > 0) || (dep_job_ptr == NULL) || (!IS_JOB_RUNNING(dep_job_ptr)) || @@ -1652,7 +1673,7 @@ extern int update_job_dependency(struct job_record *job_ptr, char *new_depend) if (dep_job_ptr) { /* job still active */ dep_ptr = xmalloc(sizeof(struct depend_spec)); dep_ptr->depend_type = depend_type; - dep_ptr->job_id = job_id; + dep_ptr->job_id = dep_job_ptr->job_id; dep_ptr->job_ptr = dep_job_ptr; (void) list_append(new_depend_list, dep_ptr); }