From cc5dfa208e48a666e77c43f93bc9cacb668e0c21 Mon Sep 17 00:00:00 2001 From: Moe Jette <jette1@llnl.gov> Date: Tue, 24 Sep 2002 00:05:51 +0000 Subject: [PATCH] Added support for node states DRAINING/DRAINED. --- src/slurmctld/node_mgr.c | 52 +++++++++++++++++++++++----------- src/slurmctld/node_scheduler.c | 24 ++++++++++++---- src/slurmctld/read_config.c | 10 +++++-- 3 files changed, 62 insertions(+), 24 deletions(-) diff --git a/src/slurmctld/node_mgr.c b/src/slurmctld/node_mgr.c index ee29608f10a..577510f586b 100644 --- a/src/slurmctld/node_mgr.c +++ b/src/slurmctld/node_mgr.c @@ -956,7 +956,7 @@ split_node_name (char *name, char *prefix, char *suffix, int *index, int update_node ( update_node_msg_t * update_node_msg ) { - int error_code = 0, state_val; + int error_code = 0, state_val, node_inx; char *this_node_name ; struct node_record *node_record_point; hostlist_t host_list; @@ -976,6 +976,7 @@ update_node ( update_node_msg_t * update_node_msg ) last_node_update = time (NULL); while ( (this_node_name = hostlist_shift (host_list)) ) { node_record_point = find_node_record (this_node_name); + node_inx = node_record_point - node_record_table_ptr; if (node_record_point == NULL) { error ("update_node: node name %s does not exist, can not be updated", this_node_name); @@ -986,17 +987,36 @@ update_node ( update_node_msg_t * update_node_msg ) if (state_val != NO_VAL) { if (state_val == NODE_STATE_DOWN) { - bit_clear (up_node_bitmap, - (int) (node_record_point - node_record_table_ptr)); - bit_clear (idle_node_bitmap, - (int) (node_record_point - node_record_table_ptr)); + bit_clear (up_node_bitmap, node_inx); + bit_clear (idle_node_bitmap, node_inx); + } + else if (state_val == NODE_STATE_UNKNOWN) { + bit_clear (up_node_bitmap, node_inx); + bit_clear (idle_node_bitmap, node_inx); + } + else if (state_val == NODE_STATE_IDLE) { + bit_set (up_node_bitmap, node_inx); + bit_set (idle_node_bitmap, node_inx); + } + else if (state_val == NODE_STATE_ALLOCATED) { + bit_set (up_node_bitmap, node_inx); + bit_clear (idle_node_bitmap, node_inx); + } + else if (state_val == NODE_STATE_DRAINED) { + if (~bit_test (idle_node_bitmap, node_inx)) + state_val = NODE_STATE_DRAINING; + bit_clear (up_node_bitmap, node_inx); + } + else if (state_val == NODE_STATE_DRAINING) { + if (bit_test (idle_node_bitmap, node_inx)) { + state_val = NODE_STATE_DRAINED; + bit_clear (idle_node_bitmap, node_inx); + } + bit_clear (up_node_bitmap, node_inx); + } + else { + error ("Invalid node state specified %d", state_val); } - else if (state_val != NODE_STATE_IDLE) - bit_clear (idle_node_bitmap, - (int) (node_record_point - node_record_table_ptr)); - else /* (state_val == NODE_STATE_IDLE) */ - bit_set (idle_node_bitmap, - (int) (node_record_point - node_record_table_ptr)); node_record_point->node_state = state_val; info ("update_node: node %s state set to %s", @@ -1081,7 +1101,7 @@ void node_did_resp (char *name) { struct node_record *node_ptr; - int i; + int node_inx; node_ptr = find_node_record (name); if (node_ptr == NULL) { @@ -1089,16 +1109,16 @@ node_did_resp (char *name) return; } - i = node_ptr - node_record_table_ptr; + node_inx = node_ptr - node_record_table_ptr; last_node_update = time (NULL); - node_record_table_ptr[i].last_response = time (NULL); + node_record_table_ptr[node_inx].last_response = time (NULL); node_ptr->node_state &= (uint16_t) (~NODE_STATE_NO_RESPOND); if (node_ptr->node_state == NODE_STATE_UNKNOWN) node_ptr->node_state = NODE_STATE_IDLE; if (node_ptr->node_state == NODE_STATE_IDLE) - bit_set (idle_node_bitmap, (node_ptr - node_record_table_ptr)); + bit_set (idle_node_bitmap, node_inx); if (node_ptr->node_state != NODE_STATE_DOWN) - bit_set (up_node_bitmap, (node_ptr - node_record_table_ptr)); + bit_set (up_node_bitmap, node_inx); return; } diff --git a/src/slurmctld/node_scheduler.c b/src/slurmctld/node_scheduler.c index 294bd41cbfa..c7336589aee 100644 --- a/src/slurmctld/node_scheduler.c +++ b/src/slurmctld/node_scheduler.c @@ -118,6 +118,7 @@ deallocate_nodes (struct job_record * job_ptr) pthread_attr_t attr_agent; pthread_t thread_agent; int buf_rec_size = 0; + uint16_t no_resp_flag, base_state; agent_args = xmalloc (sizeof (agent_arg_t)); agent_args->msg_type = REQUEST_REVOKE_JOB_CREDENTIAL; @@ -132,15 +133,28 @@ deallocate_nodes (struct job_record * job_ptr) continue; if ((agent_args->addr_count+1) > buf_rec_size) { buf_rec_size += 32; - xrealloc ((agent_args->slurm_addr), (sizeof (struct sockaddr_in) * buf_rec_size)); - xrealloc ((agent_args->node_names), (MAX_NAME_LEN * buf_rec_size)); + xrealloc ((agent_args->slurm_addr), + (sizeof (struct sockaddr_in) * buf_rec_size)); + xrealloc ((agent_args->node_names), + (MAX_NAME_LEN * buf_rec_size)); } - agent_args->slurm_addr[agent_args->addr_count] = node_record_table_ptr[i].slurm_addr; + agent_args->slurm_addr[agent_args->addr_count] = + node_record_table_ptr[i].slurm_addr; strncpy (&agent_args->node_names[MAX_NAME_LEN*agent_args->addr_count], node_record_table_ptr[i].name, MAX_NAME_LEN); agent_args->addr_count++; - node_record_table_ptr[i].node_state = NODE_STATE_IDLE; - bit_set (idle_node_bitmap, i); + base_state = node_record_table_ptr[i].node_state & (~NODE_STATE_NO_RESPOND); + no_resp_flag = node_record_table_ptr[i].node_state & NODE_STATE_NO_RESPOND; + if (base_state == NODE_STATE_DRAINING) { + node_record_table_ptr[i].node_state = NODE_STATE_DRAINED; + bit_clear (idle_node_bitmap, i); + bit_clear (up_node_bitmap, i); + } + else { + node_record_table_ptr[i].node_state = NODE_STATE_IDLE | no_resp_flag; + if (no_resp_flag == 0) + bit_set (idle_node_bitmap, i); + } } agent_args->msg_args = revoke_job_cred; diff --git a/src/slurmctld/read_config.c b/src/slurmctld/read_config.c index 6cafc7a8ff5..d81f0e4d622 100644 --- a/src/slurmctld/read_config.c +++ b/src/slurmctld/read_config.c @@ -139,12 +139,16 @@ build_bitmaps () /* scan all nodes and identify which are up and idle and their configuration */ for (i = 0; i < node_record_count; i++) { + uint16_t base_state; + if (node_record_table_ptr[i].name[0] == '\0') continue; /* defunct */ - if (node_record_table_ptr[i].node_state == NODE_STATE_IDLE) + base_state = node_record_table_ptr[i].node_state & (~NODE_STATE_NO_RESPOND); + if (base_state == NODE_STATE_IDLE) bit_set (idle_node_bitmap, i); - if ((node_record_table_ptr[i].node_state != NODE_STATE_DOWN) && - (node_record_table_ptr[i].node_state != NODE_STATE_UNKNOWN) && + if ((base_state != NODE_STATE_DOWN) && + (base_state != NODE_STATE_UNKNOWN) && + (base_state != NODE_STATE_DRAINED) && ((node_record_table_ptr[i].node_state & NODE_STATE_NO_RESPOND) == 0)) bit_set (up_node_bitmap, i); if (node_record_table_ptr[i].config_ptr) -- GitLab