diff --git a/src/plugins/select/cons_res/job_test.c b/src/plugins/select/cons_res/job_test.c index 5094cf509ed1e455048e10961b5e3ce2c88f2791..14203c658f9c45a2ca76d3d4b818e54f82c6e949 100644 --- a/src/plugins/select/cons_res/job_test.c +++ b/src/plugins/select/cons_res/job_test.c @@ -731,7 +731,7 @@ static int _verify_node_state(struct part_res_record *cr_part_ptr, * then we cannot rule out nodes just because Shared=NO * (NODE_CR_ONE_ROW) or Shared=EXCLUSIVE(NODE_CR_RESERVED) */ - if (cr_priority_selection_enabled()) + if (cr_preemption_enabled()) continue; /* exclusive node check */ @@ -1806,7 +1806,7 @@ extern int cr_job_test(struct job_record *job_ptr, bitstr_t *bitmap, /* remove all existing allocations from free_cores */ tmpcore = bit_copy(free_cores); - for(p_ptr = cr_part_ptr; p_ptr; p_ptr = p_ptr->next) { + for (p_ptr = cr_part_ptr; p_ptr; p_ptr = p_ptr->next) { if (!p_ptr->row) continue; for (i = 0; i < p_ptr->num_rows; i++) { @@ -1842,7 +1842,7 @@ extern int cr_job_test(struct job_record *job_ptr, bitstr_t *bitmap, job_ptr->job_id); /* remove hi-pri existing allocations from avail_cores */ - for(p_ptr = cr_part_ptr; p_ptr; p_ptr = p_ptr->next) { + for (p_ptr = cr_part_ptr; p_ptr; p_ptr = p_ptr->next) { if (p_ptr->part_ptr->priority <= jp_ptr->part_ptr->priority) continue; if (!p_ptr->row) @@ -1876,7 +1876,7 @@ extern int cr_job_test(struct job_record *job_ptr, bitstr_t *bitmap, bit_copybits(free_cores, avail_cores); /* remove same-priority existing allocations from free_cores */ - for(p_ptr = cr_part_ptr; p_ptr; p_ptr = p_ptr->next) { + for (p_ptr = cr_part_ptr; p_ptr; p_ptr = p_ptr->next) { if (p_ptr->part_ptr->priority != jp_ptr->part_ptr->priority) continue; if (!p_ptr->row) diff --git a/src/plugins/select/cons_res/select_cons_res.c b/src/plugins/select/cons_res/select_cons_res.c index 8c2dbdd84987b8eb429e8b2b3413415aef14a555..5fe5b5012a498a4f25755e04b15427401fe07a54 100644 --- a/src/plugins/select/cons_res/select_cons_res.c +++ b/src/plugins/select/cons_res/select_cons_res.c @@ -170,8 +170,9 @@ struct part_res_record *select_part_record = NULL; struct node_res_record *select_node_record = NULL; struct node_use_record *select_node_usage = NULL; static int select_node_cnt = 0; -static bool cr_priority_test = false; -static bool cr_priority_selection = false; +static bool job_preemption_enabled = false; +static bool job_preemption_killing = false; +static bool job_preemption_tested = false; struct select_nodeinfo { uint16_t magic; /* magic number */ @@ -248,14 +249,24 @@ static void _dump_state(struct part_res_record *p_ptr) #endif /* */ -extern bool cr_priority_selection_enabled() -{ - if (!cr_priority_test) { - if (slurm_get_preempt_mode() != PREEMPT_MODE_OFF) - cr_priority_selection = true; - cr_priority_test = true; +extern bool cr_preemption_enabled(void) +{ + if (!job_preemption_tested) { + uint16_t mode = slurm_get_preempt_mode(); + if (mode == PREEMPT_MODE_SUSPEND) + job_preemption_enabled = true; + else if (mode == PREEMPT_MODE_KILL) { + job_preemption_enabled = true; + job_preemption_killing = true; + } + job_preemption_tested = true; } - return cr_priority_selection; + return job_preemption_enabled; +} +extern bool cr_preemption_killing(void) +{ + (void) cr_preemption_enabled(); + return job_preemption_killing; } @@ -840,8 +851,9 @@ static int _add_job_to_res(struct job_record *job_ptr, int action) continue; select_node_usage[i].alloc_memory += job->memory_allocated[n]; - if (select_node_usage[i].alloc_memory > - select_node_record[i].real_memory) { + if ((select_node_usage[i].alloc_memory > + select_node_record[i].real_memory) && + !cr_preemption_killing()) { error("error: node %s mem is overallocated " "(%u) for job %u", select_node_record[i].node_ptr->name, @@ -1164,9 +1176,9 @@ static uint16_t _is_node_avail(struct part_res_record *p_ptr, uint32_t node_i) cpu_end = cr_get_coremap_offset(node_i+1); if (select_node_usage[node_i].node_state >= NODE_CR_RESERVED) { - if (!cr_priority_selection_enabled()) + if (!cr_preemption_enabled()) return (uint16_t) 0; - /* cr_priority_selection has been enabled: + /* job_preemption has been enabled: * check to see if the existing job that reserved * this node is in a partition with a priority that * is equal-to or greater-than this partition. If it @@ -1838,8 +1850,9 @@ extern int select_p_reconfigure(void) info("cons_res: select_p_reconfigure"); /* Rebuild the global data structures */ - cr_priority_selection = false; - cr_priority_test = false; + job_preemption_enabled = false; + job_preemption_killing = false; + job_preemption_tested = false; rc = select_p_node_init(node_record_table_ptr, node_record_count); if (rc != SLURM_SUCCESS) return rc; diff --git a/src/plugins/select/cons_res/select_cons_res.h b/src/plugins/select/cons_res/select_cons_res.h index 217c96aec8e711e1692eec2c6d36682eddef2972..6a847416bfca78c1e156811430725a16cc47aca4 100644 --- a/src/plugins/select/cons_res/select_cons_res.h +++ b/src/plugins/select/cons_res/select_cons_res.h @@ -123,6 +123,8 @@ extern struct node_use_record *select_node_usage; extern void cr_sort_part_rows(struct part_res_record *p_ptr); extern uint32_t cr_get_coremap_offset(uint32_t node_index); extern uint32_t cr_get_node_num_cores(uint32_t node_index); -extern bool cr_priority_selection_enabled(); + +extern bool cr_preemption_enabled(void); +extern bool cr_preemption_killing(void); #endif /* !_CONS_RES_H */