Skip to content
Snippets Groups Projects
Commit 1995f306 authored by Morris Jette's avatar Morris Jette
Browse files

Cray - Fix avail_node_bitmap handling for non-responsive nodes

On Cray systems only, the value of avail_node_bitmap was not being
properly set for non-responsive nodes.
parent 493aa97a
No related branches found
No related tags found
No related merge requests found
...@@ -182,6 +182,7 @@ extern int basil_inventory(void) ...@@ -182,6 +182,7 @@ extern int basil_inventory(void)
rc = ESLURM_REQUESTED_NODE_CONFIG_UNAVAILABLE; rc = ESLURM_REQUESTED_NODE_CONFIG_UNAVAILABLE;
for (node = inv->f->node_head; node; node = node->next) { for (node = inv->f->node_head; node; node = node->next) {
int node_inx;
struct node_record *node_ptr; struct node_record *node_ptr;
char *reason = NULL; char *reason = NULL;
...@@ -192,6 +193,7 @@ extern int basil_inventory(void) ...@@ -192,6 +193,7 @@ extern int basil_inventory(void)
nam_nodestate[node->state]); nam_nodestate[node->state]);
continue; continue;
} }
node_inx = node_ptr - node_record_table_ptr;
if (node_is_allocated(node) && !IS_NODE_ALLOCATED(node_ptr)) { if (node_is_allocated(node) && !IS_NODE_ALLOCATED(node_ptr)) {
/* /*
...@@ -236,6 +238,7 @@ extern int basil_inventory(void) ...@@ -236,6 +238,7 @@ extern int basil_inventory(void)
((now - node_ptr->down_time) < ((now - node_ptr->down_time) <
slurmctld_conf.slurmd_timeout)) { slurmctld_conf.slurmd_timeout)) {
node_ptr->node_state |= NODE_STATE_NO_RESPOND; node_ptr->node_state |= NODE_STATE_NO_RESPOND;
bit_clear(avail_node_bitmap, node_inx);
} else { } else {
xfree(node_ptr->reason); xfree(node_ptr->reason);
info("MARKING %s DOWN (%s)", info("MARKING %s DOWN (%s)",
...@@ -256,15 +259,18 @@ extern int basil_inventory(void) ...@@ -256,15 +259,18 @@ extern int basil_inventory(void)
make_node_idle(node_ptr, NULL); make_node_idle(node_ptr, NULL);
if (!IS_NODE_DRAIN(node_ptr) && if (!IS_NODE_DRAIN(node_ptr) &&
!IS_NODE_FAIL(node_ptr)) { !IS_NODE_FAIL(node_ptr)) {
int node_inx = node_ptr - node_record_table_ptr;
bit_set(avail_node_bitmap, node_inx);
bit_set(up_node_bitmap, node_inx);
xfree(node_ptr->reason); xfree(node_ptr->reason);
node_ptr->reason_time = 0; node_ptr->reason_time = 0;
node_ptr->reason_uid = NO_VAL; node_ptr->reason_uid = NO_VAL;
clusteracct_storage_g_node_up( clusteracct_storage_g_node_up(
acct_db_conn, node_ptr, now); acct_db_conn, node_ptr, now);
} }
} else if (IS_NODE_NO_RESPOND(node_ptr)) {
node_ptr->node_state &= (~NODE_STATE_NO_RESPOND);
if (!IS_NODE_DRAIN(node_ptr) &&
!IS_NODE_FAIL(node_ptr)) {
bit_set(avail_node_bitmap, node_inx);
}
} }
} }
......
...@@ -74,7 +74,6 @@ int switch_record_cnt __attribute__((weak_import)); ...@@ -74,7 +74,6 @@ int switch_record_cnt __attribute__((weak_import));
slurmdb_cluster_rec_t *working_cluster_rec __attribute__((weak_import)) = NULL; slurmdb_cluster_rec_t *working_cluster_rec __attribute__((weak_import)) = NULL;
void *acct_db_conn __attribute__((weak_import)) = NULL; void *acct_db_conn __attribute__((weak_import)) = NULL;
bitstr_t *avail_node_bitmap __attribute__((weak_import)) = NULL; bitstr_t *avail_node_bitmap __attribute__((weak_import)) = NULL;
bitstr_t *up_node_bitmap __attribute__((weak_import)) = NULL;
#else #else
slurm_ctl_conf_t slurmctld_conf; slurm_ctl_conf_t slurmctld_conf;
struct node_record *node_record_table_ptr; struct node_record *node_record_table_ptr;
...@@ -87,7 +86,6 @@ int switch_record_cnt; ...@@ -87,7 +86,6 @@ int switch_record_cnt;
slurmdb_cluster_rec_t *working_cluster_rec = NULL; slurmdb_cluster_rec_t *working_cluster_rec = NULL;
void *acct_db_conn = NULL; void *acct_db_conn = NULL;
bitstr_t *avail_node_bitmap = NULL; bitstr_t *avail_node_bitmap = NULL;
bitstr_t *up_node_bitmap = NULL;
#endif #endif
/* /*
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment