diff --git a/NEWS b/NEWS index 01a5ab33abf512b91b9a3f11c8c17e7a6ffa83f0..2ebe8a07edc6cf73c577adddef72fb5da39e61db 100644 --- a/NEWS +++ b/NEWS @@ -161,6 +161,7 @@ documents those changes that are of interest to users and administrators. -- Enable CUDA v7.0+ use with a Slurm configuration of TaskPlugin=task/cgroup ConstrainDevices=yes (in cgroup.conf). With that configuration CUDA_VISIBLE_DEVICES will start at 0 rather than the device number. + -- Fix job array logic that can cause slurmctld to abort. * Changes in Slurm 14.11.3 ========================== diff --git a/src/slurmctld/job_mgr.c b/src/slurmctld/job_mgr.c index 311aff7c2d275c5391cfe82a02238e407abf8507..99b390865f7e722284ecc76574c10ea6b6e33eba 100644 --- a/src/slurmctld/job_mgr.c +++ b/src/slurmctld/job_mgr.c @@ -1077,7 +1077,11 @@ static void _dump_job_state(struct job_record *dump_job_ptr, Buf buffer) pack32(dump_job_ptr->array_task_id, buffer); if (dump_job_ptr->array_recs) { build_array_str(dump_job_ptr); - tmp_32 = bit_size(dump_job_ptr->array_recs->task_id_bitmap); + if (dump_job_ptr->array_recs->task_id_bitmap) { + tmp_32 = bit_size(dump_job_ptr->array_recs-> + task_id_bitmap); + } else + tmp_32 = 0; pack32(tmp_32, buffer); if (tmp_32) packstr(dump_job_ptr->array_recs->task_id_str, buffer); @@ -2454,7 +2458,8 @@ extern void build_array_str(struct job_record *job_ptr) { job_array_struct_t *array_recs = job_ptr->array_recs; - if (!array_recs || array_recs->task_id_str || !array_recs->task_cnt) + if (!array_recs || array_recs->task_id_str || !array_recs->task_cnt || + !array_recs->task_id_bitmap) return; array_recs->task_id_str = bit_fmt_hexmask(array_recs->task_id_bitmap); @@ -3517,8 +3522,11 @@ extern struct job_record *job_array_split(struct job_record *job_ptr) job_ptr_pend->array_recs = job_ptr->array_recs; job_ptr->array_recs = NULL; - bit_clear(job_ptr_pend->array_recs->task_id_bitmap, - job_ptr_pend->array_task_id); + if (job_ptr_pend->array_recs && + job_ptr_pend->array_recs->task_id_bitmap) { + bit_clear(job_ptr_pend->array_recs->task_id_bitmap, + job_ptr_pend->array_task_id); + } xfree(job_ptr_pend->array_recs->task_id_str); job_ptr_pend->array_recs->task_cnt--; job_ptr_pend->array_task_id = NO_VAL;