Skip to content
Snippets Groups Projects
Commit fcef06b4 authored by Magnus Jonsson's avatar Magnus Jonsson Committed by Morris Jette
Browse files

Fix scheduling if node in more than one partition

I found a bug in cons_res/select_p_select_nodeinfo_set_all.

If a node is part of two (or more) partitions the code will only count the number of cores/cpus in the partition that has the most running jobs on that node.

Patch attached to fix the problem.

I also added an new function to bitstring to count the number of bits in an range (bit_set_count_range) and made a minor improvement of (bit_set_count) while reviewing the range version.

Best regards,
Magnus
parent 845a7925
No related branches found
No related tags found
No related merge requests found
......@@ -69,6 +69,7 @@ strong_alias(bit_and, slurm_bit_and);
strong_alias(bit_not, slurm_bit_not);
strong_alias(bit_or, slurm_bit_or);
strong_alias(bit_set_count, slurm_bit_set_count);
strong_alias(bit_set_count_range, slurm_bit_set_count_range);
strong_alias(bit_clear_count, slurm_bit_clear_count);
strong_alias(bit_nset_max_count,slurm_bit_nset_max_count);
strong_alias(int_and_set_count, slurm_int_and_set_count);
......@@ -662,15 +663,45 @@ bit_set_count(bitstr_t *b)
_assert_bitstr_valid(b);
bit_cnt = _bitstr_bits(b);
for (bit = 0; bit < bit_cnt; bit += word_size) {
if ((bit + word_size - 1) >= bit_cnt)
break;
for (bit = 0; (bit + word_size) <= bit_cnt; bit += word_size) {
count += hweight(b[_bit_word(bit)]);
}
for ( ; bit < bit_cnt; bit++) {
if (bit_test(b, bit))
count++;
}
return count;
}
/*
* Count the number of bits set in a range of bitstring.
* b (IN) bitstring to check
* start (IN) first bit to check
* end (IN) last bit to check+1
* RETURN count of set bits
*/
int
bit_set_count_range(bitstr_t *b, int start, int end)
{
int count = 0;
bitoff_t bit, bit_cnt;
const int word_size = sizeof(bitstr_t) * 8;
_assert_bitstr_valid(b);
_assert_bit_valid(b,start);
end = MIN(end,_bitstr_bits(b));
for ( bit = start; bit < end && bit < ((start+word_size-1)/word_size) * word_size; bit++) {
if (bit_test(b, bit))
count++;
}
for (; (bit + word_size) <= end ; bit += word_size) {
count += hweight(b[_bit_word(bit)]);
}
for ( ; bit < end; bit++) {
if (bit_test(b, bit))
count++;
}
return count;
}
......
......@@ -172,6 +172,7 @@ void bit_and(bitstr_t *b1, bitstr_t *b2);
void bit_not(bitstr_t *b);
void bit_or(bitstr_t *b1, bitstr_t *b2);
int bit_set_count(bitstr_t *b);
int bit_set_count_range(bitstr_t *b, int start, int end);
int bit_clear_count(bitstr_t *b);
int bit_nset_max_count(bitstr_t *b);
int int_and_set_count(int *i1, int ilen, bitstr_t *b2);
......
......@@ -93,6 +93,7 @@
#define bit_not slurm_bit_not
#define bit_or slurm_bit_or
#define bit_set_count slurm_bit_set_count
#define bit_set_count_range slurm_bit_set_count_range
#define bit_clear_count slurm_bit_clear_count
#define bit_nset_max_count slurm_bit_nset_max_count
#define bit_and_set_count slurm_bit_and_set_count
......
......@@ -2200,7 +2200,7 @@ extern int select_p_select_nodeinfo_set_all(void)
struct part_res_record *p_ptr;
struct node_record *node_ptr = NULL;
int i=0, n=0, c, start, end;
uint16_t tmp, tmp_16 = 0;
uint16_t tmp, tmp_16 = 0, tmp_part;
static time_t last_set_all = 0;
uint32_t node_threads, node_cpus;
select_nodeinfo_t *nodeinfo = NULL;
......@@ -2245,20 +2245,17 @@ extern int select_p_select_nodeinfo_set_all(void)
for (p_ptr = select_part_record; p_ptr; p_ptr = p_ptr->next) {
if (!p_ptr->row)
continue;
tmp_part = 0;
for (i = 0; i < p_ptr->num_rows; i++) {
if (!p_ptr->row[i].row_bitmap)
continue;
tmp = 0;
for (c = start; c < end; c++) {
if (bit_test(p_ptr->row[i].row_bitmap,
c))
tmp++;
}
tmp = bit_set_count_range(p_ptr->row[i].row_bitmap,
start,end);
/* get the row with the largest cpu
count on it. */
if (tmp > tmp_16)
tmp_16 = tmp;
tmp_part = MAX(tmp,tmp_part);
}
tmp_16 += tmp_part;
}
/* The minimum allocatable unit may a core, so scale
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment