Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
S
Slurm
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
tud-zih-energy
Slurm
Commits
142539c5
Commit
142539c5
authored
20 years ago
by
Moe Jette
Browse files
Options
Downloads
Patches
Plain Diff
Make drain node function that is a no-op if a node is already drained.
parent
9db3f9f5
No related branches found
No related tags found
No related merge requests found
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
src/slurmctld/node_mgr.c
+66
-0
66 additions, 0 deletions
src/slurmctld/node_mgr.c
src/slurmctld/proc_req.c
+4
-7
4 additions, 7 deletions
src/slurmctld/proc_req.c
src/slurmctld/slurmctld.h
+10
-0
10 additions, 0 deletions
src/slurmctld/slurmctld.h
with
80 additions
and
7 deletions
src/slurmctld/node_mgr.c
+
66
−
0
View file @
142539c5
...
...
@@ -955,6 +955,72 @@ int update_node ( update_node_msg_t * update_node_msg )
return
error_code
;
}
/*
* drain_nodes - drain one or more nodes,
* no-op for nodes already drained or draining
* IN nodes - nodes to drain
* IN reason - reason to drain the nodes
* RET SLURM_SUCCESS or error code
* global: node_record_table_ptr - pointer to global node table
*/
extern
int
drain_nodes
(
char
*
nodes
,
char
*
reason
)
{
int
error_code
=
0
,
node_inx
;
struct
node_record
*
node_ptr
;
char
*
this_node_name
;
hostlist_t
host_list
;
uint16_t
base_state
,
no_resp_flag
,
state_val
;
if
((
nodes
==
NULL
)
||
(
nodes
[
0
]
==
'\0'
))
{
error
(
"drain_nodes: invalid node name %s"
,
nodes
);
return
ESLURM_INVALID_NODE_NAME
;
}
if
(
(
host_list
=
hostlist_create
(
nodes
))
==
NULL
)
{
error
(
"hostlist_create error on %s: %m"
,
nodes
);
return
ESLURM_INVALID_NODE_NAME
;
}
last_node_update
=
time
(
NULL
);
while
(
(
this_node_name
=
hostlist_shift
(
host_list
))
)
{
int
err_code
=
0
;
node_ptr
=
find_node_record
(
this_node_name
);
node_inx
=
node_ptr
-
node_record_table_ptr
;
if
(
node_ptr
==
NULL
)
{
error
(
"drain_nodes: node %s does not exist"
,
this_node_name
);
error_code
=
ESLURM_INVALID_NODE_NAME
;
free
(
this_node_name
);
break
;
}
base_state
=
node_ptr
->
node_state
&
(
~
NODE_STATE_NO_RESPOND
);
no_resp_flag
=
node_ptr
->
node_state
&
NODE_STATE_NO_RESPOND
;
if
((
base_state
==
NODE_STATE_DRAINED
)
||
(
base_state
==
NODE_STATE_DRAINING
))
{
/* state already changed, nothing to do */
free
(
this_node_name
);
continue
;
}
if
((
node_ptr
->
run_job_cnt
+
node_ptr
->
comp_job_cnt
)
==
0
)
state_val
=
NODE_STATE_DRAINED
;
else
state_val
=
NODE_STATE_DRAINING
;
node_ptr
->
node_state
=
state_val
|
no_resp_flag
;
bit_clear
(
avail_node_bitmap
,
node_inx
);
info
(
"drain_nodes: node %s state set to %s"
,
this_node_name
,
node_state_string
(
state_val
));
xfree
(
node_ptr
->
reason
);
node_ptr
->
reason
=
xstrdup
(
reason
);
free
(
this_node_name
);
}
hostlist_destroy
(
host_list
);
return
error_code
;
}
/* Return true if admin request to change node state from old to new is valid */
static
bool
_valid_node_state_change
(
enum
node_states
old
,
enum
node_states
new
)
{
...
...
This diff is collapsed.
Click to expand it.
src/slurmctld/proc_req.c
+
4
−
7
View file @
142539c5
...
...
@@ -1503,25 +1503,22 @@ static void _slurm_rpc_update_job(slurm_msg_t * msg)
}
/*
* slurm_drain_nodes - process a request to drain a list of nodes
* slurm_drain_nodes - process a request to drain a list of nodes,
* no-op for nodes already drained or draining
* node_list IN - list of nodes to drain
* reason IN - reason to drain the nodes
* RET SLURM_SUCCESS or error code
* NOTE: This is utilzed by plugins and not via RPC
*/
extern
int
slurm_drain_nodes
(
char
*
node_list
,
char
*
reason
)
{
int
error_code
;
update_node_msg_t
update_node_msg
;
/* Locks: Write node */
slurmctld_lock_t
node_write_lock
=
{
NO_LOCK
,
NO_LOCK
,
WRITE_LOCK
,
NO_LOCK
};
update_node_msg
.
node_names
=
node_list
;
update_node_msg
.
node_state
=
NODE_STATE_DRAINED
;
update_node_msg
.
reason
=
reason
;
lock_slurmctld
(
node_write_lock
);
error_code
=
update_node
(
&
update_node_msg
);
error_code
=
drain_nodes
(
node_list
,
reason
);
unlock_slurmctld
(
node_write_lock
);
return
error_code
;
...
...
This diff is collapsed.
Click to expand it.
src/slurmctld/slurmctld.h
+
10
−
0
View file @
142539c5
...
...
@@ -431,6 +431,16 @@ extern int delete_partition(delete_part_msg_t *part_desc_ptr);
*/
extern
int
delete_step_record
(
struct
job_record
*
job_ptr
,
uint32_t
step_id
);
/*
* drain_nodes - drain one or more nodes,
* no-op for nodes already drained or draining
* IN nodes - nodes to drain
* IN reason - reason to drain the nodes
* RET SLURM_SUCCESS or error code
* global: node_record_table_ptr - pointer to global node table
*/
extern
int
drain_nodes
(
char
*
nodes
,
char
*
reason
);
/* dump_all_job_state - save the state of all jobs to file
* RET 0 or error code */
extern
int
dump_all_job_state
(
void
);
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment