Skip to content
Snippets Groups Projects
Commit 142539c5 authored by Moe Jette's avatar Moe Jette
Browse files

Make drain node function that is a no-op if a node is already drained.

parent 9db3f9f5
No related branches found
No related tags found
No related merge requests found
......@@ -955,6 +955,72 @@ int update_node ( update_node_msg_t * update_node_msg )
return error_code;
}
/*
* drain_nodes - drain one or more nodes,
* no-op for nodes already drained or draining
* IN nodes - nodes to drain
* IN reason - reason to drain the nodes
* RET SLURM_SUCCESS or error code
* global: node_record_table_ptr - pointer to global node table
*/
extern int drain_nodes ( char *nodes, char *reason )
{
int error_code = 0, node_inx;
struct node_record *node_ptr;
char *this_node_name ;
hostlist_t host_list;
uint16_t base_state, no_resp_flag, state_val;
if ((nodes == NULL) || (nodes[0] == '\0')) {
error ("drain_nodes: invalid node name %s", nodes);
return ESLURM_INVALID_NODE_NAME;
}
if ( (host_list = hostlist_create (nodes)) == NULL) {
error ("hostlist_create error on %s: %m", nodes);
return ESLURM_INVALID_NODE_NAME;
}
last_node_update = time (NULL);
while ( (this_node_name = hostlist_shift (host_list)) ) {
int err_code = 0;
node_ptr = find_node_record (this_node_name);
node_inx = node_ptr - node_record_table_ptr;
if (node_ptr == NULL) {
error ("drain_nodes: node %s does not exist",
this_node_name);
error_code = ESLURM_INVALID_NODE_NAME;
free (this_node_name);
break;
}
base_state = node_ptr->node_state & (~NODE_STATE_NO_RESPOND);
no_resp_flag = node_ptr->node_state & NODE_STATE_NO_RESPOND;
if ((base_state == NODE_STATE_DRAINED)
|| (base_state == NODE_STATE_DRAINING)) {
/* state already changed, nothing to do */
free (this_node_name);
continue;
}
if ((node_ptr->run_job_cnt + node_ptr->comp_job_cnt) == 0)
state_val = NODE_STATE_DRAINED;
else
state_val = NODE_STATE_DRAINING;
node_ptr->node_state = state_val | no_resp_flag;
bit_clear (avail_node_bitmap, node_inx);
info ("drain_nodes: node %s state set to %s",
this_node_name, node_state_string(state_val));
xfree(node_ptr->reason);
node_ptr->reason = xstrdup(reason);
free (this_node_name);
}
hostlist_destroy (host_list);
return error_code;
}
/* Return true if admin request to change node state from old to new is valid */
static bool _valid_node_state_change(enum node_states old, enum node_states new)
{
......
......@@ -1503,25 +1503,22 @@ static void _slurm_rpc_update_job(slurm_msg_t * msg)
}
/*
* slurm_drain_nodes - process a request to drain a list of nodes
* slurm_drain_nodes - process a request to drain a list of nodes,
* no-op for nodes already drained or draining
* node_list IN - list of nodes to drain
* reason IN - reason to drain the nodes
* RET SLURM_SUCCESS or error code
* NOTE: This is utilzed by plugins and not via RPC
*/
extern int slurm_drain_nodes(char *node_list, char *reason)
{
int error_code;
update_node_msg_t update_node_msg;
/* Locks: Write node */
slurmctld_lock_t node_write_lock = {
NO_LOCK, NO_LOCK, WRITE_LOCK, NO_LOCK };
update_node_msg.node_names = node_list;
update_node_msg.node_state = NODE_STATE_DRAINED;
update_node_msg.reason = reason;
lock_slurmctld(node_write_lock);
error_code = update_node(&update_node_msg);
error_code = drain_nodes(node_list, reason);
unlock_slurmctld(node_write_lock);
return error_code;
......
......@@ -431,6 +431,16 @@ extern int delete_partition(delete_part_msg_t *part_desc_ptr);
*/
extern int delete_step_record (struct job_record *job_ptr, uint32_t step_id);
/*
* drain_nodes - drain one or more nodes,
* no-op for nodes already drained or draining
* IN nodes - nodes to drain
* IN reason - reason to drain the nodes
* RET SLURM_SUCCESS or error code
* global: node_record_table_ptr - pointer to global node table
*/
extern int drain_nodes ( char *nodes, char *reason );
/* dump_all_job_state - save the state of all jobs to file
* RET 0 or error code */
extern int dump_all_job_state ( void );
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment