diff --git a/NEWS b/NEWS index a4ac5fe60e28a948c046465cc53d5b5f30666836..afcee9e1599fa4adc772069f2a88d43216f5a8cd 100644 --- a/NEWS +++ b/NEWS @@ -171,6 +171,10 @@ documents those changes that are of interest to users and admins. -- Fix slurmstepd core dump. -- Modify the description of -E and -S option of sacct command as point in time 'before' or 'after' the database records are returned. + -- Correct support for partition with Shared=YES configuration. + -- If job requests --exclusive then do not use nodes which have any cores in an + advanced reservation. Also prevents case where nodes can be shared by other + jobs. * Changes in Slurm 14.03.3-2 ============================ diff --git a/doc/html/faq.shtml b/doc/html/faq.shtml index 1b854f08e9209a90a254d15f9c0749de7b5f6a72..6bd15524a1d3b77d81afc8ea405db7e12fe5196a 100644 --- a/doc/html/faq.shtml +++ b/doc/html/faq.shtml @@ -403,8 +403,8 @@ as part of the process launch.</p> <p><a name="prompt"><b>12. How can I get shell prompts in interactive mode?</b></a><br> -<i>srun -u bash -i</i><br> -Srun's <i>-u</i> option turns off buffering of stdout. +<i>srun --pty bash -i</i><br> +Srun's <i>--pty</i> option runs task zero in pseudo terminal mode. Bash's <i>-i</i> option tells it to run in interactive mode (with prompts). <p><a name="batch_out"><b>13. How can I get the task ID in the output @@ -1931,6 +1931,6 @@ sacctmgr delete user name=adam cluster=tux account=chemistry <p class="footer"><a href="#top">top</a></p> -<p style="text-align:center;">Last modified 30 April 2014</p> +<p style="text-align:center;">Last modified 12 June 2014</p> <!--#include virtual="footer.txt"--> diff --git a/src/plugins/sched/backfill/backfill.c b/src/plugins/sched/backfill/backfill.c index d9a96a8c86b99992cc35e51501bff5fb50cd5c76..68dfedf021899f1188e84c17eb34fbefb948affe 100644 --- a/src/plugins/sched/backfill/backfill.c +++ b/src/plugins/sched/backfill/backfill.c @@ -1189,8 +1189,14 @@ static int _attempt_backfill(void) job_ptr->time_limit = orig_time_limit; } - if (later_start && - (job_ptr->start_time > (later_start+backfill_resolution))) { + start_time = job_ptr->start_time; + end_reserve = job_ptr->start_time + (time_limit * 60); + start_time = (start_time / backfill_resolution) * + backfill_resolution; + end_reserve = (end_reserve / backfill_resolution) * + backfill_resolution; + + if (later_start && (start_time > later_start)) { /* Try later when some nodes currently reserved for * pending jobs are free */ job_ptr->start_time = 0; @@ -1210,12 +1216,6 @@ static int _attempt_backfill(void) break; } - start_time = job_ptr->start_time; - end_reserve = job_ptr->start_time + (time_limit * 60); - start_time = (start_time / backfill_resolution) * - backfill_resolution; - end_reserve = (end_reserve / backfill_resolution) * - backfill_resolution; if ((job_ptr->start_time > now) && _test_resv_overlap(node_space, avail_bitmap, start_time, end_reserve)) { diff --git a/src/plugins/select/cons_res/job_test.c b/src/plugins/select/cons_res/job_test.c index c5d829bc51f18aa64284cea88737a3671c7ccd40..7eddf74dc680f6a7c5454864a9a15b7621cfcf23 100644 --- a/src/plugins/select/cons_res/job_test.c +++ b/src/plugins/select/cons_res/job_test.c @@ -634,10 +634,11 @@ static int _verify_node_state(struct part_res_record *cr_part_ptr, bitstr_t *node_bitmap, uint16_t cr_type, struct node_use_record *node_usage, - enum node_cr_state job_node_req) + enum node_cr_state job_node_req, + bitstr_t *exc_core_bitmap) { struct node_record *node_ptr; - uint32_t i, free_mem, gres_cpus, gres_cores, min_mem; + uint32_t i, j, free_mem, gres_cpus, gres_cores, min_mem; int core_start_bit, core_end_bit, cpus_per_core; List gres_list; int i_first, i_last; @@ -683,6 +684,17 @@ static int _verify_node_state(struct part_res_record *cr_part_ptr, } } + /* Exclude nodes with reserved cores */ + if (job_ptr->details->whole_node && exc_core_bitmap) { + for (j = core_start_bit; j <= core_end_bit; j++) { + if (bit_test(exc_core_bitmap, j)) + continue; + debug3("cons_res: _vns: node %s exc", + select_node_record[i].node_ptr->name); + goto clear_bit; + } + } + /* node-level gres check */ if (node_usage[i].gres_list) gres_list = node_usage[i].gres_list; @@ -2024,6 +2036,35 @@ static uint16_t *_select_nodes(struct job_record *job_ptr, uint32_t min_nodes, return cpus; } +/* When any cores on a node are removed from being available for a job, + * then remove the entire node from being available. */ +static void _block_whole_nodes(bitstr_t *node_bitmap, + bitstr_t *orig_core_bitmap, + bitstr_t *new_core_bitmap) +{ + int first_node, last_node, i_node; + int first_core, last_core, i_core; + + first_node = bit_ffs(node_bitmap); + if (first_node >= 0) + last_node = bit_fls(node_bitmap); + else + last_node = first_node - 1; + + for (i_node = first_node; i_node <= last_node; i_node++) { + if (!bit_test(node_bitmap, i_node)) + continue; + first_core = cr_get_coremap_offset(i_node); + last_core = cr_get_coremap_offset(i_node + 1) - 1; + for (i_core = first_core; i_core <= last_core; i_core++) { + if ( bit_test(orig_core_bitmap, i_core) && + !bit_test(new_core_bitmap, i_core)) { + bit_clear(node_bitmap, i_node); + break; + } + } + } +} /* cr_job_test - does most of the real work for select_p_job_test(), which * includes contiguous selection, load-leveling and max_share logic @@ -2084,7 +2125,8 @@ extern int cr_job_test(struct job_record *job_ptr, bitstr_t *node_bitmap, if (!test_only) { error_code = _verify_node_state(cr_part_ptr, job_ptr, node_bitmap, cr_type, - node_usage, job_node_req); + node_usage, job_node_req, + exc_core_bitmap); if (error_code != SLURM_SUCCESS) { return error_code; } @@ -2214,10 +2256,11 @@ extern int cr_job_test(struct job_record *job_ptr, bitstr_t *node_bitmap, } } if (exc_core_bitmap) { +#if _DEBUG char str[100]; bit_fmt(str, (sizeof(str) - 1), exc_core_bitmap); debug2("excluding cores reserved: %s", str); - +#endif bit_not(exc_core_bitmap); bit_and(free_cores, exc_core_bitmap); bit_not(exc_core_bitmap); @@ -2234,7 +2277,6 @@ extern int cr_job_test(struct job_record *job_ptr, bitstr_t *node_bitmap, bit_copybits(tmpcore, p_ptr->row[i].row_bitmap); bit_not(tmpcore); /* set bits now "free" resources */ bit_and(free_cores, tmpcore); - if (p_ptr->part_ptr != job_ptr->part_ptr) continue; if (part_core_map) { @@ -2245,6 +2287,9 @@ extern int cr_job_test(struct job_record *job_ptr, bitstr_t *node_bitmap, } } } + if (job_ptr->details->whole_node) + _block_whole_nodes(node_bitmap, avail_cores, free_cores); + cpu_count = _select_nodes(job_ptr, min_nodes, max_nodes, req_nodes, node_bitmap, cr_node_cnt, free_cores, node_usage, cr_type, test_only, @@ -2307,7 +2352,7 @@ extern int cr_job_test(struct job_record *job_ptr, bitstr_t *node_bitmap, (p_ptr->part_ptr->preempt_mode != PREEMPT_MODE_OFF)) { if (select_debug_flags & DEBUG_FLAG_SELECT_TYPE) { info("cons_res: cr_job_test: continuing on " - "part: %s ", p_ptr->part_ptr->name); + "part: %s", p_ptr->part_ptr->name); } continue; } @@ -2321,6 +2366,8 @@ extern int cr_job_test(struct job_record *job_ptr, bitstr_t *node_bitmap, bit_and(free_cores, tmpcore); } } + if (job_ptr->details->whole_node) + _block_whole_nodes(node_bitmap, avail_cores, free_cores); /* make these changes permanent */ bit_copybits(avail_cores, free_cores); cpu_count = _select_nodes(job_ptr, min_nodes, max_nodes, req_nodes, diff --git a/src/plugins/select/cons_res/select_cons_res.h b/src/plugins/select/cons_res/select_cons_res.h index 27ad9088eb98b2e35abb857102b78b06d88b33f8..caf75ce3bbd727b7b9e4115cf57fe029a70d0dab 100644 --- a/src/plugins/select/cons_res/select_cons_res.h +++ b/src/plugins/select/cons_res/select_cons_res.h @@ -67,7 +67,8 @@ /* a partition's per-row CPU allocation data */ struct part_row_data { - bitstr_t *row_bitmap; /* contains all jobs for this row */ + bitstr_t *row_bitmap; /* contains core bitmap for all jobs in + * this row */ uint32_t num_jobs; /* Number of jobs in this row */ struct job_resources **job_list;/* List of jobs in this row */ uint32_t job_list_size; /* Size of job_list array */ diff --git a/src/slurmctld/node_scheduler.c b/src/slurmctld/node_scheduler.c index e46deb4338ee31d3c7404efb0ca4a45b8d7aad1c..462e9de7b12267df49da52a66f08ed50d1e666db 100644 --- a/src/slurmctld/node_scheduler.c +++ b/src/slurmctld/node_scheduler.c @@ -700,14 +700,16 @@ _resolve_shared_status(struct job_record *job_ptr, uint16_t part_max_share, return 1; if (cons_res_flag) { + if (part_max_share == 1) /* partition configured Shared=NO */ + return 0; if ((job_ptr->details->share_res == 0) || + (job_ptr->details->share_res == (uint8_t) NO_VAL) || (job_ptr->details->whole_node == 1)) return 0; return 1; } else { job_ptr->details->whole_node = 1; - /* no sharing if partition Shared=NO */ - if (part_max_share == 1) + if (part_max_share == 1) /* partition configured Shared=NO */ return 0; /* share if the user requested it */ if (job_ptr->details->share_res == 1)