Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
S
Slurm
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
tud-zih-energy
Slurm
Commits
2f0edf01
Commit
2f0edf01
authored
14 years ago
by
Moe Jette
Browse files
Options
Downloads
Patches
Plain Diff
fix some topology related problems for gres job_test logic
parent
71e043c9
No related branches found
No related tags found
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
src/common/gres.c
+56
-9
56 additions, 9 deletions
src/common/gres.c
src/common/gres.h
+0
-1
0 additions, 1 deletion
src/common/gres.h
with
56 additions
and
10 deletions
src/common/gres.c
+
56
−
9
View file @
2f0edf01
...
...
@@ -136,9 +136,9 @@ static int _job_state_unpack(void **gres_data, Buf buffer,
char
*
gres_name
);
static
int
_job_state_validate
(
char
*
config
,
void
**
gres_data
,
slurm_gres_context_t
*
gres_name
);
static
uint32_t
_job_test
(
void
*
job_gres_data
,
void
*
node_gres_data
,
extern
uint32_t
_job_test
(
void
*
job_gres_data
,
void
*
node_gres_data
,
bool
use_total_gres
,
bitstr_t
*
cpu_bitmap
,
int
cpu_start_bit
,
int
cpu_end_bit
);
int
cpu_start_bit
,
int
cpu_end_bit
,
bool
*
topo_set
);
static
int
_load_gres_plugin
(
char
*
plugin_name
,
slurm_gres_context_t
*
plugin_context
);
static
int
_log_gres_slurmd_conf
(
void
*
x
,
void
*
arg
);
...
...
@@ -741,8 +741,8 @@ extern int gres_plugin_node_config_unpack(Buf buffer, char* node_name)
p
->
count
=
count
;
p
->
cpu_cnt
=
cpu_cnt
;
p
->
cpus
=
tmp_cpus
;
p
->
plugin_id
=
plugin_id
;
tmp_cpus
=
NULL
;
/* Nothing left to xfree */
p
->
plugin_id
=
plugin_id
;
list_append
(
gres_conf_list
,
p
);
}
for
(
j
=
0
;
j
<
gres_context_cnt
;
j
++
)
{
...
...
@@ -1086,6 +1086,18 @@ extern int _node_config_validate(char *node_name, char *orig_config,
context_ptr
->
gres_type
);
}
rc
=
EINVAL
;
}
else
if
((
fast_schedule
==
2
)
&&
gres_data
->
topo_cnt
&&
(
gres_data
->
gres_cnt_found
!=
gres_data
->
gres_cnt_config
))
{
error
(
"%s on node %s configured for %u resources but %u found,"
" ignoring topology support"
,
context_ptr
->
gres_type
,
node_name
,
gres_data
->
gres_cnt_config
,
gres_data
->
gres_cnt_found
);
if
(
gres_data
->
cpus_bitmap
)
{
for
(
i
=
0
;
i
<
gres_data
->
topo_cnt
;
i
++
)
FREE_NULL_BITMAP
(
gres_data
->
cpus_bitmap
[
i
]);
xfree
(
gres_data
->
cpus_bitmap
);
}
gres_data
->
topo_cnt
=
0
;
}
else
if
((
fast_schedule
==
0
)
&&
(
gres_data
->
gres_cnt_found
>
gres_data
->
gres_cnt_config
))
{
/* need to rebuild new_config */
...
...
@@ -1276,7 +1288,7 @@ static int _node_state_unpack(void **gres_data, Buf buffer)
unpack_bit_str
(
&
gres_ptr
->
gres_bit_alloc
,
buffer
);
if
(
gres_ptr
->
gres_bit_alloc
==
NULL
)
goto
unpack_error
;
if
(
gres_ptr
->
gres_cnt_avail
!=
if
(
gres_ptr
->
gres_cnt_avail
!=
bit_size
(
gres_ptr
->
gres_bit_alloc
))
{
gres_ptr
->
gres_bit_alloc
=
bit_realloc
(
gres_ptr
->
gres_bit_alloc
,
...
...
@@ -2240,17 +2252,49 @@ static void _validate_gres_node_cpus(gres_node_state_t *node_gres_ptr,
}
}
static
uint32_t
_job_test
(
void
*
job_gres_data
,
void
*
node_gres_data
,
extern
uint32_t
_job_test
(
void
*
job_gres_data
,
void
*
node_gres_data
,
bool
use_total_gres
,
bitstr_t
*
cpu_bitmap
,
int
cpu_start_bit
,
int
cpu_end_bit
)
int
cpu_start_bit
,
int
cpu_end_bit
,
bool
*
topo_set
)
{
int
i
,
j
,
cpus_ctld
,
gres_avail
,
top_inx
;
int
i
,
j
,
cpus_ctld
,
gres_avail
=
0
,
top_inx
;
gres_job_state_t
*
job_gres_ptr
=
(
gres_job_state_t
*
)
job_gres_data
;
gres_node_state_t
*
node_gres_ptr
=
(
gres_node_state_t
*
)
node_gres_data
;
uint32_t
*
cpus_avail
=
NULL
,
cpu_cnt
=
0
;
bitstr_t
*
alloc_cpu_bitmap
=
NULL
;
if
(
job_gres_ptr
->
gres_cnt_alloc
&&
node_gres_ptr
->
topo_cnt
)
{
if
(
job_gres_ptr
->
gres_cnt_alloc
&&
node_gres_ptr
->
topo_cnt
&&
*
topo_set
)
{
/* Need to determine how many gres available for these
* specific CPUs */
if
(
cpu_bitmap
)
{
cpus_ctld
=
cpu_end_bit
-
cpu_start_bit
+
1
;
if
(
cpus_ctld
<
1
)
{
error
(
"gres_plugin_job_test: cpus on node < 1"
);
return
(
uint32_t
)
0
;
}
_validate_gres_node_cpus
(
node_gres_ptr
,
cpus_ctld
);
}
else
{
cpus_ctld
=
bit_size
(
node_gres_ptr
->
cpus_bitmap
[
0
]);
}
for
(
i
=
0
;
i
<
node_gres_ptr
->
topo_cnt
;
i
++
)
{
if
(
!
use_total_gres
&&
bit_test
(
node_gres_ptr
->
gres_bit_alloc
,
i
))
{
continue
;
/* gres already allocated */
}
for
(
j
=
0
;
j
<
cpus_ctld
;
j
++
)
{
if
(
cpu_bitmap
&&
!
bit_test
(
cpu_bitmap
,
cpu_start_bit
+
j
))
continue
;
if
(
!
bit_test
(
node_gres_ptr
->
cpus_bitmap
[
i
],
j
))
continue
;
/* not avail for this gres */
gres_avail
++
;
break
;
}
}
if
(
job_gres_ptr
->
gres_cnt_alloc
>
gres_avail
)
return
(
uint32_t
)
0
;
/* insufficient, gres to use */
return
NO_VAL
;
}
else
if
(
job_gres_ptr
->
gres_cnt_alloc
&&
node_gres_ptr
->
topo_cnt
)
{
/* Need to determine which specific CPUs can be used */
if
(
cpu_bitmap
)
{
cpus_ctld
=
cpu_end_bit
-
cpu_start_bit
+
1
;
...
...
@@ -2304,6 +2348,7 @@ static uint32_t _job_test(void *job_gres_data, void *node_gres_data,
node_gres_ptr
->
cpus_bitmap
[
top_inx
]);
}
if
(
cpu_bitmap
&&
(
cpu_cnt
>
0
))
{
*
topo_set
=
true
;
for
(
i
=
0
;
i
<
cpus_ctld
;
i
++
)
{
if
(
!
bit_test
(
alloc_cpu_bitmap
,
i
))
bit_clear
(
cpu_bitmap
,
cpu_start_bit
+
i
);
...
...
@@ -2342,6 +2387,7 @@ extern uint32_t gres_plugin_job_test(List job_gres_list, List node_gres_list,
uint32_t
cpu_cnt
,
tmp_cnt
;
ListIterator
job_gres_iter
,
node_gres_iter
;
gres_state_t
*
job_gres_ptr
,
*
node_gres_ptr
;
bool
topo_set
=
false
;
if
(
job_gres_list
==
NULL
)
return
NO_VAL
;
...
...
@@ -2374,7 +2420,8 @@ extern uint32_t gres_plugin_job_test(List job_gres_list, List node_gres_list,
tmp_cnt
=
_job_test
(
job_gres_ptr
->
gres_data
,
node_gres_ptr
->
gres_data
,
use_total_gres
,
cpu_bitmap
,
cpu_start_bit
,
cpu_end_bit
);
cpu_start_bit
,
cpu_end_bit
,
&
topo_set
);
cpu_cnt
=
MIN
(
tmp_cnt
,
cpu_cnt
);
break
;
}
...
...
This diff is collapsed.
Click to expand it.
src/common/gres.h
+
0
−
1
View file @
2f0edf01
...
...
@@ -83,7 +83,6 @@ typedef struct gres_node_state {
/* Topology specific information (if gres.conf contains CPUs spec) */
uint16_t
topo_cnt
;
bitstr_t
**
cpus_bitmap
;
bitstr_t
**
gres_block_bitmap
;
}
gres_node_state_t
;
/* Gres job state as used by slurmctld daemon */
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment