diff --git a/NEWS b/NEWS index 9c3c793ec2806530effc812fe0a488e4ece021bd..4ca6e95835ef5304d7a73ffe8aae3ac81edf0c13 100644 --- a/NEWS +++ b/NEWS @@ -582,6 +582,7 @@ documents those changes that are of interest to users and administrators. -- Fix job array dependency with "aftercorr" option and some task arrays in the first job fail. This fix lets all task array elements that can run proceed rather than stopping all subsequent task array elements. + -- Fix whole node allocation cpu counts when --hint=nomultihtread. * Changes in Slurm 17.02.9 ========================== diff --git a/src/plugins/burst_buffer/cray/burst_buffer_cray.c b/src/plugins/burst_buffer/cray/burst_buffer_cray.c index 593d23f9e9ca6095580bc21981dbe38e4756008b..0b1a5c68b3ab9a46154fbb0e75655c3d4909ef0a 100644 --- a/src/plugins/burst_buffer/cray/burst_buffer_cray.c +++ b/src/plugins/burst_buffer/cray/burst_buffer_cray.c @@ -3673,7 +3673,7 @@ extern int bb_p_job_test_stage_in(struct job_record *job_ptr, bool test_only) } else if (bb_job->state == BB_STATE_STAGED_IN) { rc = 1; } else { - rc = -1; /* Requeued job still staging out */ + rc = -1; /* Requeued job still staging in */ } slurm_mutex_unlock(&bb_state.bb_mutex); @@ -4195,12 +4195,14 @@ static void _free_create_args(create_buf_data_t *create_args) } } -/* Create/destroy persistent burst buffers +/* + * Create/destroy persistent burst buffers * job_ptr IN - job to operate upon * bb_job IN - job's burst buffer data * job_ready IN - if true, job is ready to run now, if false then do not * delete persistent buffers - * Returns count of buffer create/destroy requests which are pending */ + * Returns count of buffer create/destroy requests which are pending + */ static int _create_bufs(struct job_record *job_ptr, bb_job_t *bb_job, bool job_ready) { diff --git a/src/plugins/select/cons_res/dist_tasks.c b/src/plugins/select/cons_res/dist_tasks.c index a255570fa46d73873b252811f8569349d77b11e9..ee2faab8affac8a65d089916e167489386ad67bd 100644 --- a/src/plugins/select/cons_res/dist_tasks.c +++ b/src/plugins/select/cons_res/dist_tasks.c @@ -940,12 +940,17 @@ fini: xfree(sock_avoid); } /* Remove any specialized cores from those allocated to the job */ -static void _clear_spec_cores(job_resources_t *job_res, +static void _clear_spec_cores(struct job_record *job_ptr, bitstr_t *avail_core_bitmap) { int first_node, last_node, i_node; int first_core, last_core, i_core; int alloc_node = -1, alloc_core = -1, size; + job_resources_t *job_res = job_ptr->job_resrcs; + multi_core_data_t *mc_ptr = NULL; + + if (job_ptr->details && job_ptr->details->mc_ptr) + mc_ptr = job_ptr->details->mc_ptr; size = bit_size(job_res->core_bitmap); bit_nset(job_res->core_bitmap, 0, size - 1); @@ -965,8 +970,13 @@ static void _clear_spec_cores(job_resources_t *job_res, for (i_core = first_core; i_core <= last_core; i_core++) { alloc_core++; if (bit_test(avail_core_bitmap, i_core)) { - job_res->cpus[alloc_node] += - select_node_record[i_node].vpus; + uint16_t tpc = select_node_record[i_node].vpus; + if (mc_ptr && + (mc_ptr->threads_per_core != NO_VAL16) && + (mc_ptr->threads_per_core < tpc)) + tpc = mc_ptr->threads_per_core; + + job_res->cpus[alloc_node] += tpc; } else { bit_clear(job_res->core_bitmap, alloc_core); } @@ -1026,7 +1036,7 @@ extern int cr_dist(struct job_record *job_ptr, const uint16_t cr_type, * the available CPUs in the cpus array. Up to this point * we might not have the correct CPU count, but a core count * and ignoring specialized cores. Fix that too. */ - _clear_spec_cores(job_ptr->job_resrcs, avail_core_bitmap); + _clear_spec_cores(job_ptr, avail_core_bitmap); return SLURM_SUCCESS; }