diff --git a/NEWS b/NEWS index 2ff7e341292630216d1e028839fbdd3523f7cb79..f65f94ed294d79b6cfbc4abb38a7dbc9626e7266 100644 --- a/NEWS +++ b/NEWS @@ -81,6 +81,10 @@ documents those changes that are of interest to users and administrators. time. -- MYSQL - Better locking around g_qos_count which was previously unprotected. -- Correct size of buffer used for jobid2str to avoid truncation. + -- Fix allocation/distribution of tasks across multiple nodes when + --hint=nomultithread is requested. + -- If a reservation's nodes value is "all" then track the current nodes in the + system, even if those nodes change. * Changes in Slurm 15.08.6 ========================== diff --git a/doc/html/meetings.shtml b/doc/html/meetings.shtml index a783ae2021551812533e02c46657428e5a682d5d..eeee5e87d4b39bfd0e870d9022731d149a08a727 100644 --- a/doc/html/meetings.shtml +++ b/doc/html/meetings.shtml @@ -5,7 +5,7 @@ <p><b>Slurm User Group Meeting 2016</b><br> 26-27 September 2016<br> Athens, Greece<br> -Host: <a href="http://www.grnet.gre/">Greek Research and Technology Network (GRNET)</a></p> +Host: <a href="http://www.grnet.gr/">Greek Research and Technology Network (GRNET)</a></p> <p>More information coming soon.</p> <!-- <a href="slurm_ug_cfp.html">Call for Abstracts: Due 1 June 2015</a><br> --> <!-- <a href="slurm_ug_agenda.html">Meeting agenda, registration information, etc.</a><br> --> @@ -44,6 +44,6 @@ Host: Bull</p> Paris, France<br> Host: CEA</p> -<p style="text-align:center;">Last modified 23 December 2015</p> +<p style="text-align:center;">Last modified 31 December 2015</p> <!--#include virtual="footer.txt"--> diff --git a/slurm/slurm.h.in b/slurm/slurm.h.in index 2b105cf647bd037f7b3b6ccb8d83a4955b1b855a..a4896811300abc1c2a4c259dd2e95f67a9096b6f 100644 --- a/slurm/slurm.h.in +++ b/slurm/slurm.h.in @@ -2369,6 +2369,8 @@ void slurm_init_update_block_msg PARAMS((update_block_msg_t *update_block_msg)); * relative */ #define RESERVE_FLAG_REPLACE 0x00040000 /* Replace resources * as assigned to jobs */ +#define RESERVE_FLAG_ALL_NODES 0x00080000 /* Use all compute + * nodes */ typedef struct reserve_info { char *accounts; /* names of accounts permitted to use */ diff --git a/src/common/slurm_protocol_defs.c b/src/common/slurm_protocol_defs.c index 5b5628bfc4095900be012a2c2a4b3df511a63ffd..c878fa4b6885edba9a8650806dccb03a6aaa990d 100644 --- a/src/common/slurm_protocol_defs.c +++ b/src/common/slurm_protocol_defs.c @@ -2063,6 +2063,11 @@ extern char *reservation_flags_string(uint32_t flags) xstrcat(flag_str, ","); xstrcat(flag_str, "SPEC_NODES"); } + if (flags & RESERVE_FLAG_ALL_NODES) { + if (flag_str[0]) + xstrcat(flag_str, ","); + xstrcat(flag_str, "ALL_NODES"); + } if (flags & RESERVE_FLAG_ANY_NODES) { if (flag_str[0]) xstrcat(flag_str, ","); diff --git a/src/plugins/select/cons_res/dist_tasks.c b/src/plugins/select/cons_res/dist_tasks.c index c42b3b7af1d5633cee0d5626e62766ddc9c2d966..6a42b46addf9b0096c7437e7b3d04495f2f9f84c 100644 --- a/src/plugins/select/cons_res/dist_tasks.c +++ b/src/plugins/select/cons_res/dist_tasks.c @@ -329,7 +329,7 @@ static void _block_sync_core_bitmap(struct job_record *job_ptr, uint16_t cpus_per_task = job_ptr->details->cpus_per_task; job_resources_t *job_res = job_ptr->job_resrcs; bool alloc_cores = false, alloc_sockets = false; - uint16_t ntasks_per_core = 0xffff; + uint16_t ncpus_per_core = 0xffff; /* Usable CPUs per core */ int count, cpu_min, b_min, elig, s_min, comb_idx, sock_idx; int elig_idx, comb_brd_idx, sock_list_idx, comb_min, board_num; int* boards_cpu_cnt; @@ -361,12 +361,14 @@ static void _block_sync_core_bitmap(struct job_record *job_ptr, if (job_ptr->details && job_ptr->details->mc_ptr) { multi_core_data_t *mc_ptr = job_ptr->details->mc_ptr; - if (mc_ptr->ntasks_per_core) { - ntasks_per_core = mc_ptr->ntasks_per_core; + if ((mc_ptr->ntasks_per_core != (uint16_t) INFINITE) && + (mc_ptr->ntasks_per_core)) { + ncpus_per_core = mc_ptr->ntasks_per_core; + ncpus_per_core *= cpus_per_task; } if ((mc_ptr->threads_per_core != (uint16_t) NO_VAL) && - (mc_ptr->threads_per_core < ntasks_per_core)) { - ntasks_per_core = mc_ptr->threads_per_core; + (mc_ptr->threads_per_core < ncpus_per_core)) { + ncpus_per_core = mc_ptr->threads_per_core; } } @@ -395,12 +397,7 @@ static void _block_sync_core_bitmap(struct job_record *job_ptr, fatal("cons_res: _block_sync_core_bitmap index error"); cpus = job_res->cpus[i]; - if (ntasks_per_core == 0xffff) { - vpus = select_node_record[n].vpus; - } else { - vpus = MIN(select_node_record[n].vpus, - (ntasks_per_core * cpus_per_task)); - } + vpus = MIN(select_node_record[n].vpus, ncpus_per_core); /* compute still required cores on the node */ req_cpus = cpus / vpus; @@ -682,7 +679,8 @@ static int _cyclic_sync_core_bitmap(struct job_record *job_ptr, bitstr_t *core_map; bool *sock_used, *sock_avoid; bool alloc_cores = false, alloc_sockets = false; - uint16_t ntasks_per_core = 0xffff, ntasks_per_socket = 0xffff; + uint16_t ncpus_per_core = 0xffff; /* Usable CPUs per core */ + uint16_t ntasks_per_socket = 0xffff; int error_code = SLURM_SUCCESS; if ((job_res == NULL) || (job_res->core_bitmap == NULL) || @@ -697,13 +695,15 @@ static int _cyclic_sync_core_bitmap(struct job_record *job_ptr, core_map = job_res->core_bitmap; if (job_ptr->details->mc_ptr) { multi_core_data_t *mc_ptr = job_ptr->details->mc_ptr; - if (mc_ptr->ntasks_per_core) { - ntasks_per_core = mc_ptr->ntasks_per_core; + if ((mc_ptr->ntasks_per_core != (uint16_t) INFINITE) && + (mc_ptr->ntasks_per_core)) { + ncpus_per_core = mc_ptr->ntasks_per_core; + ncpus_per_core *= cpus_per_task; } if ((mc_ptr->threads_per_core != (uint16_t) NO_VAL) && - (mc_ptr->threads_per_core < ntasks_per_core)) { - ntasks_per_core = mc_ptr->threads_per_core; + (mc_ptr->threads_per_core < ncpus_per_core)) { + ncpus_per_core = mc_ptr->threads_per_core; } if (mc_ptr->ntasks_per_socket) @@ -723,12 +723,7 @@ static int _cyclic_sync_core_bitmap(struct job_record *job_ptr, continue; sockets = select_node_record[n].sockets; cps = select_node_record[n].cores; - if (ntasks_per_core == 0xffff) { - vpus = select_node_record[n].vpus; - } else { - vpus = MIN(select_node_record[n].vpus, - (ntasks_per_core * cpus_per_task)); - } + vpus = MIN(select_node_record[n].vpus, ncpus_per_core); if (select_debug_flags & DEBUG_FLAG_SELECT_TYPE) { info("DEBUG: job %u node %s vpus %u cpus %u", diff --git a/src/plugins/select/cons_res/job_test.c b/src/plugins/select/cons_res/job_test.c index 4115c7a93b8e63be60072f7b445d41ffbbf2a1f0..918397f52aa02c2213a62a86cfad9235249ff7e2 100644 --- a/src/plugins/select/cons_res/job_test.c +++ b/src/plugins/select/cons_res/job_test.c @@ -214,7 +214,7 @@ static uint16_t _allocate_sc(struct job_record *job_ptr, bitstr_t *core_map, uint16_t cores_per_socket = select_node_record[node_i].cores; uint16_t threads_per_core = select_node_record[node_i].vpus; uint16_t min_cores = 1, min_sockets = 1, ntasks_per_socket = 0; - uint16_t ntasks_per_core = 0xffff; + uint16_t ncpus_per_core = 0xffff; /* Usable CPUs per core */ uint32_t free_cpu_count = 0, used_cpu_count = 0, *used_cpu_array = NULL; if (job_ptr->details && job_ptr->details->mc_ptr) { @@ -226,18 +226,20 @@ static uint16_t _allocate_sc(struct job_record *job_ptr, bitstr_t *core_map, if (mc_ptr->sockets_per_node != (uint16_t) NO_VAL) { min_sockets = mc_ptr->sockets_per_node; } - if (mc_ptr->ntasks_per_core) { - ntasks_per_core = mc_ptr->ntasks_per_core; + if ((mc_ptr->ntasks_per_core != (uint16_t) INFINITE) && + (mc_ptr->ntasks_per_core)) { + ncpus_per_core = mc_ptr->ntasks_per_core; + ncpus_per_core *= cpus_per_task; } if ((mc_ptr->threads_per_core != (uint16_t) NO_VAL) && - (mc_ptr->threads_per_core < ntasks_per_core)) { - ntasks_per_core = mc_ptr->threads_per_core; + (mc_ptr->threads_per_core < ncpus_per_core)) { + ncpus_per_core = mc_ptr->threads_per_core; } ntasks_per_socket = mc_ptr->ntasks_per_socket; - if ((ntasks_per_core != (uint16_t) NO_VAL) && - (ntasks_per_core != (uint16_t) INFINITE) && - (ntasks_per_core > threads_per_core)) { + if ((ncpus_per_core != (uint16_t) NO_VAL) && + (ncpus_per_core != (uint16_t) INFINITE) && + (ncpus_per_core > threads_per_core)) { goto fini; } threads_per_socket = threads_per_core * cores_per_socket; @@ -282,7 +284,7 @@ static uint16_t _allocate_sc(struct job_record *job_ptr, bitstr_t *core_map, * Step 2: For core-level and socket-level: apply sockets_per_node * and cores_per_socket to the "free" cores. * - * Step 3: Compute task-related data: ntasks_per_core, + * Step 3: Compute task-related data: ncpus_per_core, * ntasks_per_socket, ntasks_per_node and cpus_per_task * and determine the number of tasks to run on this node * @@ -381,15 +383,12 @@ static uint16_t _allocate_sc(struct job_record *job_ptr, bitstr_t *core_map, * ntasks_per_socket, ntasks_per_node and cpus_per_task * to determine the number of tasks to run on this node * - * Note: cpus_per_task and ntasks_per_core need to play nice + * Note: cpus_per_task and ncpus_per_core need to play nice * 2 tasks_per_core vs. 2 cpus_per_task */ avail_cpus = 0; num_tasks = 0; - if (ntasks_per_core != 0xffff) { - threads_per_core = MIN(threads_per_core, - (ntasks_per_core * cpus_per_task)); - } + threads_per_core = MIN(threads_per_core, ncpus_per_core); for (i = 0; i < sockets; i++) { uint16_t tmp = free_cores[i] * threads_per_core; diff --git a/src/slurmctld/reservation.c b/src/slurmctld/reservation.c index 743ee70ef64a22930915069ee7ef277a9bd651a0..1be9c3681b027ef74c5b71198d8faba4acd781f6 100644 --- a/src/slurmctld/reservation.c +++ b/src/slurmctld/reservation.c @@ -2253,6 +2253,7 @@ extern int create_resv(resv_desc_msg_t *resv_desc_ptr) resv_desc_ptr->partition); node_bitmap = bit_copy(part_ptr->node_bitmap); } else { + resv_desc_ptr->flags |= RESERVE_FLAG_ALL_NODES; node_bitmap = bit_alloc(node_record_count); bit_nset(node_bitmap, 0,(node_record_count-1)); } @@ -2710,6 +2711,7 @@ extern int update_resv(resv_desc_msg_t *resv_desc_ptr) if (resv_desc_ptr->node_list && (resv_desc_ptr->node_list[0] == '\0')) { /* Clear bitmap */ resv_ptr->flags &= (~RESERVE_FLAG_SPEC_NODES); + resv_ptr->flags &= (~RESERVE_FLAG_ALL_NODES); xfree(resv_desc_ptr->node_list); xfree(resv_ptr->node_list); FREE_NULL_BITMAP(resv_ptr->node_bitmap); @@ -2738,14 +2740,18 @@ extern int update_resv(resv_desc_msg_t *resv_desc_ptr) xfree(resv_desc_ptr->node_list); resv_ptr->node_list = xstrdup(part_ptr->nodes); } else { + resv_ptr->flags |= RESERVE_FLAG_ALL_NODES; node_bitmap = bit_alloc(node_record_count); bit_nset(node_bitmap, 0,(node_record_count-1)); resv_ptr->flags &= (~RESERVE_FLAG_PART_NODES); xfree(resv_ptr->node_list); - resv_ptr->node_list = resv_desc_ptr->node_list; + xfree(resv_desc_ptr->node_list); + resv_ptr->node_list = + bitmap2node_name(node_bitmap); } } else { resv_ptr->flags &= (~RESERVE_FLAG_PART_NODES); + resv_ptr->flags &= (~RESERVE_FLAG_ALL_NODES); if (node_name2bitmap(resv_desc_ptr->node_list, false, &node_bitmap)) { info("Reservation %s request has invalid node name (%s)", @@ -2767,6 +2773,7 @@ extern int update_resv(resv_desc_msg_t *resv_desc_ptr) if (resv_desc_ptr->node_cnt) { uint32_t total_node_cnt = 0; resv_ptr->flags &= (~RESERVE_FLAG_PART_NODES); + resv_ptr->flags &= (~RESERVE_FLAG_ALL_NODES); #ifdef HAVE_BG if (!cnodes_per_mp) { @@ -3158,6 +3165,8 @@ extern int dump_all_resv_state(void) static bool _validate_one_reservation(slurmctld_resv_t *resv_ptr) { bool account_not = false, user_not = false; + slurmctld_resv_t old_resv_ptr; + bitstr_t *node_bitmap; if ((resv_ptr->name == NULL) || (resv_ptr->name[0] == '\0')) { error("Read reservation without name"); @@ -3221,9 +3230,7 @@ static bool _validate_one_reservation(slurmctld_resv_t *resv_ptr) } if ((resv_ptr->flags & RESERVE_FLAG_PART_NODES) && resv_ptr->part_ptr && resv_ptr->part_ptr->node_bitmap) { - slurmctld_resv_t old_resv_ptr; memset(&old_resv_ptr, 0, sizeof(slurmctld_resv_t)); - xfree(resv_ptr->node_list); resv_ptr->node_list = xstrdup(resv_ptr->part_ptr->nodes); FREE_NULL_BITMAP(resv_ptr->node_bitmap); @@ -3235,8 +3242,20 @@ static bool _validate_one_reservation(slurmctld_resv_t *resv_ptr) _set_tres_cnt(resv_ptr, &old_resv_ptr); xfree(old_resv_ptr.tres_str); last_resv_update = time(NULL); + } else if (resv_ptr->flags & RESERVE_FLAG_ALL_NODES) { + memset(&old_resv_ptr, 0, sizeof(slurmctld_resv_t)); + FREE_NULL_BITMAP(resv_ptr->node_bitmap); + resv_ptr->node_bitmap = bit_alloc(node_record_count); + bit_nset(resv_ptr->node_bitmap, 0, (node_record_count - 1)); + xfree(resv_ptr->node_list); + resv_ptr->node_list = bitmap2node_name(resv_ptr->node_bitmap); + resv_ptr->node_cnt = bit_set_count(resv_ptr->node_bitmap); + old_resv_ptr.tres_str = resv_ptr->tres_str; + resv_ptr->tres_str = NULL; + _set_tres_cnt(resv_ptr, &old_resv_ptr); + xfree(old_resv_ptr.tres_str); + last_resv_update = time(NULL); } else if (resv_ptr->node_list) { /* Change bitmap last */ - bitstr_t *node_bitmap; #ifdef HAVE_BG int inx; char save = '\0';