Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
S
Slurm
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
tud-zih-energy
Slurm
Commits
da29f2dc
Commit
da29f2dc
authored
15 years ago
by
Moe Jette
Browse files
Options
Downloads
Patches
Plain Diff
Fix logic so that we clear POWER_SAVE flag on nodes on reconfig
as appropriate
parent
ee0214d6
No related branches found
No related tags found
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
src/slurmctld/power_save.c
+21
-16
21 additions, 16 deletions
src/slurmctld/power_save.c
src/slurmctld/read_config.c
+31
-8
31 additions, 8 deletions
src/slurmctld/read_config.c
with
52 additions
and
24 deletions
src/slurmctld/power_save.c
+
21
−
16
View file @
da29f2dc
...
@@ -345,36 +345,38 @@ static int _init_power_config(void)
...
@@ -345,36 +345,38 @@ static int _init_power_config(void)
slurm_conf_unlock
();
slurm_conf_unlock
();
if
(
idle_time
<
0
)
{
/* not an error */
if
(
idle_time
<
0
)
{
/* not an error */
debug
(
"power_save module disabled,
idle_t
ime < 0"
);
debug
(
"power_save module disabled,
SuspendT
ime < 0"
);
return
-
1
;
return
-
1
;
}
}
if
(
suspend_rate
<
1
)
{
if
(
suspend_rate
<
1
)
{
error
(
"power_save module disabled,
s
uspend
_r
ate < 1"
);
error
(
"power_save module disabled,
S
uspend
R
ate < 1"
);
return
-
1
;
return
-
1
;
}
}
if
(
resume_rate
<
1
)
{
if
(
resume_rate
<
1
)
{
error
(
"power_save module disabled,
r
esume
_r
ate < 1"
);
error
(
"power_save module disabled,
R
esume
R
ate < 1"
);
return
-
1
;
return
-
1
;
}
}
if
(
suspend_prog
==
NULL
)
if
(
suspend_prog
==
NULL
)
{
info
(
"WARNING: power_save module has NULL suspend program"
);
error
(
"power_save module disabled, NULL SuspendProgram"
);
else
if
(
!
_valid_prog
(
suspend_prog
))
{
return
-
1
;
error
(
"power_save module disabled, invalid suspend program %s"
,
}
else
if
(
!
_valid_prog
(
suspend_prog
))
{
error
(
"power_save module disabled, invalid SuspendProgram %s"
,
suspend_prog
);
suspend_prog
);
return
-
1
;
return
-
1
;
}
}
if
(
resume_prog
==
NULL
)
if
(
resume_prog
==
NULL
)
{
info
(
"WARNING: power_save module has NULL resume program"
);
error
(
"power_save module disabled, NULL ResumeProgram"
);
else
if
(
!
_valid_prog
(
resume_prog
))
{
return
-
1
;
error
(
"power_save module disabled, invalid resume program %s"
,
}
else
if
(
!
_valid_prog
(
resume_prog
))
{
error
(
"power_save module disabled, invalid ResumeProgram %s"
,
resume_prog
);
resume_prog
);
return
-
1
;
return
-
1
;
}
}
if
(
exc_nodes
if
(
exc_nodes
&&
&&
(
node_name2bitmap
(
exc_nodes
,
false
,
&
exc_node_bitmap
)))
{
(
node_name2bitmap
(
exc_nodes
,
false
,
&
exc_node_bitmap
)))
{
error
(
"power_save module disabled, "
error
(
"power_save module disabled, "
"invalid
excluded n
odes %s"
,
exc_nodes
);
"invalid
SuspendExcN
odes %s"
,
exc_nodes
);
return
-
1
;
return
-
1
;
}
}
...
@@ -389,7 +391,7 @@ static int _init_power_config(void)
...
@@ -389,7 +391,7 @@ static int _init_power_config(void)
part_ptr
=
find_part_record
(
one_part
);
part_ptr
=
find_part_record
(
one_part
);
if
(
!
part_ptr
)
{
if
(
!
part_ptr
)
{
error
(
"power_save module disabled, "
error
(
"power_save module disabled, "
"invalid
excluded partition
%s"
,
"invalid
SuspendExcPart
%s"
,
one_part
);
one_part
);
rc
=
-
1
;
rc
=
-
1
;
break
;
break
;
...
@@ -468,8 +470,11 @@ extern void *init_power_save(void *arg)
...
@@ -468,8 +470,11 @@ extern void *init_power_save(void *arg)
}
}
if
((
last_config
!=
slurmctld_conf
.
last_update
)
&&
if
((
last_config
!=
slurmctld_conf
.
last_update
)
&&
(
_init_power_config
()))
(
_init_power_config
()))
{
info
(
"power_save mode has been disabled due to "
"configuration changes"
);
goto
fini
;
goto
fini
;
}
/* Only run every 60 seconds or after
/* Only run every 60 seconds or after
* a node state change, whichever
* a node state change, whichever
...
...
This diff is collapsed.
Click to expand it.
src/slurmctld/read_config.c
+
31
−
8
View file @
da29f2dc
...
@@ -17,7 +17,7 @@
...
@@ -17,7 +17,7 @@
* any later version.
* any later version.
*
*
* In addition, as a special exception, the copyright holders give permission
* In addition, as a special exception, the copyright holders give permission
* to link the code of portions of this program with the OpenSSL library under
* to link the code of portions of this program with the OpenSSL library under
* certain conditions as described in each individual source file, and
* certain conditions as described in each individual source file, and
* distribute linked combinations including the two. You must obey the GNU
* distribute linked combinations including the two. You must obey the GNU
* General Public License in all respects for all of the code used other than
* General Public License in all respects for all of the code used other than
...
@@ -840,10 +840,10 @@ int read_slurm_conf(int recover)
...
@@ -840,10 +840,10 @@ int read_slurm_conf(int recover)
_build_bitmaps_pre_select
();
_build_bitmaps_pre_select
();
if
((
select_g_node_init
(
node_record_table_ptr
,
node_record_count
)
if
((
select_g_node_init
(
node_record_table_ptr
,
node_record_count
)
!=
SLURM_SUCCESS
)
!=
SLURM_SUCCESS
)
||
||
(
select_g_block_init
(
part_list
)
!=
SLURM_SUCCESS
)
(
select_g_block_init
(
part_list
)
!=
SLURM_SUCCESS
)
||
||
(
select_g_state_restore
(
state_save_dir
)
!=
SLURM_SUCCESS
)
(
select_g_state_restore
(
state_save_dir
)
!=
SLURM_SUCCESS
)
||
||
(
select_g_job_init
(
job_list
)
!=
SLURM_SUCCESS
))
{
(
select_g_job_init
(
job_list
)
!=
SLURM_SUCCESS
))
{
fatal
(
"failed to initialize node selection plugin state, "
fatal
(
"failed to initialize node selection plugin state, "
"Clean start required."
);
"Clean start required."
);
}
}
...
@@ -909,10 +909,17 @@ int read_slurm_conf(int recover)
...
@@ -909,10 +909,17 @@ int read_slurm_conf(int recover)
/* Restore node state and size information from saved records.
/* Restore node state and size information from saved records.
* If a node was re-configured to be down or drained, we set those states */
* If a node was re-configured to be down or drained, we set those states */
static
int
_restore_node_state
(
struct
node_record
*
old_node_table_ptr
,
static
int
_restore_node_state
(
struct
node_record
*
old_node_table_ptr
,
int
old_node_record_count
)
int
old_node_record_count
)
{
{
struct
node_record
*
node_ptr
;
struct
node_record
*
node_ptr
;
int
i
,
rc
=
SLURM_SUCCESS
;
int
i
,
rc
=
SLURM_SUCCESS
;
hostset_t
hs
=
NULL
;
slurm_ctl_conf_t
*
conf
=
slurm_conf_lock
();
bool
power_save_mode
=
false
;
if
(
conf
->
suspend_program
&&
conf
->
resume_program
)
power_save_mode
=
true
;
slurm_conf_unlock
();
for
(
i
=
0
;
i
<
old_node_record_count
;
i
++
)
{
for
(
i
=
0
;
i
<
old_node_record_count
;
i
++
)
{
uint16_t
drain_flag
=
false
,
down_flag
=
false
;
uint16_t
drain_flag
=
false
,
down_flag
=
false
;
...
@@ -920,7 +927,8 @@ static int _restore_node_state(struct node_record *old_node_table_ptr,
...
@@ -920,7 +927,8 @@ static int _restore_node_state(struct node_record *old_node_table_ptr,
if
(
node_ptr
==
NULL
)
if
(
node_ptr
==
NULL
)
continue
;
continue
;
if
((
node_ptr
->
node_state
&
NODE_STATE_BASE
)
==
NODE_STATE_DOWN
)
if
((
node_ptr
->
node_state
&
NODE_STATE_BASE
)
==
NODE_STATE_DOWN
)
down_flag
=
true
;
down_flag
=
true
;
if
(
node_ptr
->
node_state
&
NODE_STATE_DRAIN
)
if
(
node_ptr
->
node_state
&
NODE_STATE_DRAIN
)
drain_flag
=
true
;
drain_flag
=
true
;
...
@@ -931,7 +939,15 @@ static int _restore_node_state(struct node_record *old_node_table_ptr,
...
@@ -931,7 +939,15 @@ static int _restore_node_state(struct node_record *old_node_table_ptr,
}
}
if
(
drain_flag
)
if
(
drain_flag
)
node_ptr
->
node_state
|=
NODE_STATE_DRAIN
;
node_ptr
->
node_state
|=
NODE_STATE_DRAIN
;
if
((
node_ptr
->
node_state
&
NODE_STATE_POWER_SAVE
)
&&
(
!
power_save_mode
))
{
node_ptr
->
node_state
&=
(
~
NODE_STATE_POWER_SAVE
);
if
(
hs
)
hostset_insert
(
hs
,
node_ptr
->
name
);
else
hs
=
hostset_create
(
node_ptr
->
name
);
}
node_ptr
->
last_response
=
old_node_table_ptr
[
i
].
last_response
;
node_ptr
->
last_response
=
old_node_table_ptr
[
i
].
last_response
;
if
(
old_node_table_ptr
[
i
].
port
!=
node_ptr
->
config_ptr
->
cpus
)
{
if
(
old_node_table_ptr
[
i
].
port
!=
node_ptr
->
config_ptr
->
cpus
)
{
rc
=
ESLURM_NEED_RESTART
;
rc
=
ESLURM_NEED_RESTART
;
...
@@ -966,6 +982,13 @@ static int _restore_node_state(struct node_record *old_node_table_ptr,
...
@@ -966,6 +982,13 @@ static int _restore_node_state(struct node_record *old_node_table_ptr,
old_node_table_ptr
[
i
].
os
=
NULL
;
old_node_table_ptr
[
i
].
os
=
NULL
;
}
}
}
}
if
(
hs
)
{
char
node_names
[
128
];
hostset_ranged_string
(
hs
,
sizeof
(
node_names
),
node_names
);
info
(
"Cleared POWER_SAVE flag from nodes %s"
,
node_names
);
hostset_destroy
(
hs
);
}
return
rc
;
return
rc
;
}
}
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment