Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
S
Slurm
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
tud-zih-energy
Slurm
Commits
89e4d130
Commit
89e4d130
authored
10 years ago
by
Morris Jette
Browse files
Options
Downloads
Patches
Plain Diff
GRES, add "type" field
This just adds logic so slurmd can read "type" from a gres.conf and ship it to slurmctld.
parent
ea45ffd7
No related branches found
No related tags found
No related merge requests found
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
doc/man/man5/gres.conf.5
+10
-5
10 additions, 5 deletions
doc/man/man5/gres.conf.5
src/common/gres.c
+155
-65
155 additions, 65 deletions
src/common/gres.c
src/common/gres.h
+4
-1
4 additions, 1 deletion
src/common/gres.h
with
169 additions
and
71 deletions
doc/man/man5/gres.conf.5
+
10
−
5
View file @
89e4d130
...
...
@@ -95,6 +95,11 @@ line should apply to.
The NodeName specification can use a Slurm hostlist specification as shown in
the example below.
.TP
\fBType\fR
An arbitrary string identifying the type of device.
For example, a particular model of GPU.
.SH "EXAMPLES"
.LP
.br
...
...
@@ -106,13 +111,13 @@ the example below.
.br
# Configure support for our four GPUs
.br
Name=gpu File=/dev/nvidia0 CPUs=0,1
Name=gpu
Type=gtx560
File=/dev/nvidia0 CPUs=0,1
.br
Name=gpu File=/dev/nvidia1 CPUs=0,1
Name=gpu
Type=gtx560
File=/dev/nvidia1 CPUs=0,1
.br
Name=gpu File=/dev/nvidia2 CPUs=2,3
Name=gpu
Type=tesla
File=/dev/nvidia2 CPUs=2,3
.br
Name=gpu File=/dev/nvidia3 CPUs=2,3
Name=gpu
Type=tesla
File=/dev/nvidia3 CPUs=2,3
.br
Name=bandwidth Count=20M
...
...
@@ -134,7 +139,7 @@ NodeName=tux[16\-31] Name=gpu File=/dev/nvidia[0\-7]
Copyright (C) 2010 The Regents of the University of California.
Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
.br
Copyright (C) 2010\-201
3
SchedMD LLC.
Copyright (C) 2010\-201
4
SchedMD LLC.
.LP
This file is part of SLURM, a resource management program.
For details, see <http://slurm.schedmd.com/>.
...
...
This diff is collapsed.
Click to expand it.
src/common/gres.c
+
155
−
65
View file @
89e4d130
...
...
@@ -552,6 +552,7 @@ static void _destroy_gres_slurmd_conf(void *x)
xfree
(
p
->
cpus
);
xfree
(
p
->
file
);
/* Only used by slurmd */
xfree
(
p
->
name
);
xfree
(
p
->
type
);
xfree
(
p
);
}
...
...
@@ -566,20 +567,22 @@ static int _log_gres_slurmd_conf(void *x, void *arg)
xassert
(
p
);
if
(
!
gres_debug
)
{
verbose
(
"Gres Name=%s Count=%u"
,
p
->
name
,
p
->
count
);
verbose
(
"Gres Name=%s Type=%s Count=%u"
,
p
->
name
,
p
->
type
,
p
->
count
);
return
0
;
}
if
(
p
->
cpus
)
{
info
(
"Gres Name=%s Count=%u ID=%u File=%s CPUs=%s CpuCnt=%u"
,
p
->
name
,
p
->
count
,
p
->
plugin_id
,
p
->
file
,
p
->
cpus
,
info
(
"Gres Name=%s Type=%s Count=%u ID=%u File=%s CPUs=%s "
"CpuCnt=%u"
,
p
->
name
,
p
->
type
,
p
->
count
,
p
->
plugin_id
,
p
->
file
,
p
->
cpus
,
p
->
cpu_cnt
);
}
else
if
(
p
->
file
)
{
info
(
"Gres Name=%s Count=%u ID=%u File=%s"
,
p
->
name
,
p
->
count
,
p
->
plugin_id
,
p
->
file
);
info
(
"Gres Name=%s
Type=%s
Count=%u ID=%u File=%s"
,
p
->
name
,
p
->
type
,
p
->
count
,
p
->
plugin_id
,
p
->
file
);
}
else
{
info
(
"Gres Name=%s Count=%u ID=%u"
,
p
->
name
,
p
->
count
,
p
->
plugin_id
);
info
(
"Gres Name=%s
Type=%s
Count=%u ID=%u"
,
p
->
name
,
p
->
type
,
p
->
count
,
p
->
plugin_id
);
}
return
0
;
...
...
@@ -650,7 +653,8 @@ static int _parse_gres_config(void **dest, slurm_parser_enum_t type,
{
"Count"
,
S_P_STRING
},
/* Number of Gres available */
{
"CPUs"
,
S_P_STRING
},
/* CPUs to bind to Gres resource */
{
"File"
,
S_P_STRING
},
/* Path to Gres device */
{
"Name"
,
S_P_STRING
},
/* Gres type name */
{
"Name"
,
S_P_STRING
},
/* Gres name */
{
"Type"
,
S_P_STRING
},
/* Gres type (e.g. model name) */
{
NULL
}
};
int
i
;
...
...
@@ -692,6 +696,8 @@ static int _parse_gres_config(void **dest, slurm_parser_enum_t type,
p
->
has_file
=
1
;
}
(
void
)
s_p_get_string
(
&
p
->
type
,
"Type"
,
tbl
);
if
(
s_p_get_string
(
&
tmp_str
,
"Count"
,
tbl
))
{
tmp_long
=
strtol
(
tmp_str
,
&
last
,
10
);
if
((
tmp_long
==
LONG_MIN
)
||
(
tmp_long
==
LONG_MAX
))
{
...
...
@@ -744,7 +750,8 @@ static int _parse_gres_config2(void **dest, slurm_parser_enum_t type,
{
"Count"
,
S_P_STRING
},
/* Number of Gres available */
{
"CPUs"
,
S_P_STRING
},
/* CPUs to bind to Gres resource */
{
"File"
,
S_P_STRING
},
/* Path to Gres device */
{
"Name"
,
S_P_STRING
},
/* Gres type name */
{
"Name"
,
S_P_STRING
},
/* Gres name */
{
"Type"
,
S_P_STRING
},
/* Gres type (e.g. model name) */
{
NULL
}
};
s_p_hashtbl_t
*
tbl
;
...
...
@@ -968,7 +975,7 @@ extern int gres_plugin_node_config_pack(Buf buffer)
{
int
rc
;
uint32_t
magic
=
GRES_MAGIC
;
uint16_t
rec_cnt
=
0
,
version
=
SLURM_PROTOCOL_VERSION
;
uint16_t
rec_cnt
=
0
,
version
=
SLURM_PROTOCOL_VERSION
;
ListIterator
iter
;
gres_slurmd_conf_t
*
gres_slurmd_conf
;
...
...
@@ -990,6 +997,7 @@ extern int gres_plugin_node_config_pack(Buf buffer)
pack32
(
gres_slurmd_conf
->
plugin_id
,
buffer
);
packstr
(
gres_slurmd_conf
->
cpus
,
buffer
);
packstr
(
gres_slurmd_conf
->
name
,
buffer
);
packstr
(
gres_slurmd_conf
->
type
,
buffer
);
}
list_iterator_destroy
(
iter
);
}
...
...
@@ -1009,7 +1017,7 @@ extern int gres_plugin_node_config_unpack(Buf buffer, char* node_name)
uint32_t
count
,
cpu_cnt
,
magic
,
plugin_id
,
utmp32
;
uint16_t
rec_cnt
,
version
;
uint8_t
has_file
;
char
*
tmp_cpus
,
*
tmp_name
;
char
*
tmp_cpus
,
*
tmp_name
,
*
tmp_type
;
gres_slurmd_conf_t
*
p
;
rc
=
gres_plugin_init
();
...
...
@@ -1024,67 +1032,149 @@ extern int gres_plugin_node_config_unpack(Buf buffer, char* node_name)
return
SLURM_SUCCESS
;
slurm_mutex_lock
(
&
gres_context_lock
);
for
(
i
=
0
;
i
<
rec_cnt
;
i
++
)
{
safe_unpack32
(
&
magic
,
buffer
);
if
(
magic
!=
GRES_MAGIC
)
goto
unpack_error
;
if
(
version
>=
SLURM_14_11_PROTOCOL_VERSION
)
{
for
(
i
=
0
;
i
<
rec_cnt
;
i
++
)
{
safe_unpack32
(
&
magic
,
buffer
);
if
(
magic
!=
GRES_MAGIC
)
goto
unpack_error
;
safe_unpack32
(
&
count
,
buffer
);
safe_unpack32
(
&
cpu_cnt
,
buffer
);
safe_unpack8
(
&
has_file
,
buffer
);
safe_unpack32
(
&
plugin_id
,
buffer
);
safe_unpackstr_xmalloc
(
&
tmp_cpus
,
&
utmp32
,
buffer
);
safe_unpackstr_xmalloc
(
&
tmp_name
,
&
utmp32
,
buffer
);
safe_unpack32
(
&
count
,
buffer
);
safe_unpack32
(
&
cpu_cnt
,
buffer
);
safe_unpack8
(
&
has_file
,
buffer
);
safe_unpack32
(
&
plugin_id
,
buffer
);
safe_unpackstr_xmalloc
(
&
tmp_cpus
,
&
utmp32
,
buffer
);
safe_unpackstr_xmalloc
(
&
tmp_name
,
&
utmp32
,
buffer
);
safe_unpackstr_xmalloc
(
&
tmp_type
,
&
utmp32
,
buffer
);
for
(
j
=
0
;
j
<
gres_context_cnt
;
j
++
)
{
if
(
gres_context
[
j
].
plugin_id
!=
plugin_id
)
continue
;
if
(
strcmp
(
gres_context
[
j
].
gres_name
,
tmp_name
))
{
/* Should be caught in gres_plugin_init() */
error
(
"gres_plugin_node_config_unpack: gres/%s"
" duplicate plugin ID with %s, unable "
"to process"
,
tmp_name
,
gres_context
[
j
].
gres_name
);
for
(
j
=
0
;
j
<
gres_context_cnt
;
j
++
)
{
if
(
gres_context
[
j
].
plugin_id
!=
plugin_id
)
continue
;
if
(
strcmp
(
gres_context
[
j
].
gres_name
,
tmp_name
))
{
/* Should have beeen caught in
* gres_plugin_init() */
error
(
"gres_plugin_node_config_unpack: "
"gres/%s duplicate plugin ID with"
" %s, unable to process"
,
tmp_name
,
gres_context
[
j
].
gres_name
);
continue
;
}
if
(
gres_context
[
j
].
has_file
&&
!
has_file
&&
count
)
{
error
(
"gres_plugin_node_config_unpack: "
"gres/%s lacks File parameter "
"for node %s"
,
tmp_name
,
node_name
);
has_file
=
1
;
}
if
(
has_file
&&
(
count
>
1024
))
{
/* Avoid over-subscribing memory with
* huge bitmaps */
error
(
"gres_plugin_node_config_unpack: "
"gres/%s has File plus very "
"large Count (%u) for node %s, "
"resetting value to 1024"
,
tmp_name
,
count
,
node_name
);
count
=
1024
;
}
if
(
has_file
)
/* Don't clear if already set */
gres_context
[
j
].
has_file
=
has_file
;
break
;
}
if
(
j
>=
gres_context_cnt
)
{
/* GresPlugins is inconsistently configured.
* Not a fatal error. Skip this data. */
error
(
"gres_plugin_node_config_unpack: no "
"plugin configured to unpack data "
"type %s from node %s"
,
tmp_name
,
node_name
);
xfree
(
tmp_cpus
);
xfree
(
tmp_name
);
continue
;
}
if
(
gres_context
[
j
].
has_file
&&
!
has_file
&&
count
)
{
error
(
"gres_plugin_node_config_unpack: gres/%s"
" lacks File parameter for node %s"
,
p
=
xmalloc
(
sizeof
(
gres_slurmd_conf_t
));
p
->
count
=
count
;
p
->
cpu_cnt
=
cpu_cnt
;
p
->
has_file
=
has_file
;
p
->
cpus
=
tmp_cpus
;
tmp_cpus
=
NULL
;
/* Nothing left to xfree */
p
->
name
=
tmp_name
;
/* Preserve for accounting! */
p
->
type
=
tmp_type
;
tmp_type
=
NULL
;
/* Nothing left to xfree */
p
->
plugin_id
=
plugin_id
;
list_append
(
gres_conf_list
,
p
);
}
}
else
{
for
(
i
=
0
;
i
<
rec_cnt
;
i
++
)
{
safe_unpack32
(
&
magic
,
buffer
);
if
(
magic
!=
GRES_MAGIC
)
goto
unpack_error
;
safe_unpack32
(
&
count
,
buffer
);
safe_unpack32
(
&
cpu_cnt
,
buffer
);
safe_unpack8
(
&
has_file
,
buffer
);
safe_unpack32
(
&
plugin_id
,
buffer
);
safe_unpackstr_xmalloc
(
&
tmp_cpus
,
&
utmp32
,
buffer
);
safe_unpackstr_xmalloc
(
&
tmp_name
,
&
utmp32
,
buffer
);
for
(
j
=
0
;
j
<
gres_context_cnt
;
j
++
)
{
if
(
gres_context
[
j
].
plugin_id
!=
plugin_id
)
continue
;
if
(
strcmp
(
gres_context
[
j
].
gres_name
,
tmp_name
))
{
/* Should have beeen caught in
* gres_plugin_init() */
error
(
"gres_plugin_node_config_unpack: "
"gres/%s duplicate plugin ID with"
" %s, unable to process"
,
tmp_name
,
gres_context
[
j
].
gres_name
);
continue
;
}
if
(
gres_context
[
j
].
has_file
&&
!
has_file
&&
count
)
{
error
(
"gres_plugin_node_config_unpack: "
"gres/%s lacks File parameter "
"for node %s"
,
tmp_name
,
node_name
);
has_file
=
1
;
}
if
(
has_file
&&
(
count
>
1024
))
{
/* Avoid over-subscribing memory with
* huge bitmaps */
error
(
"gres_plugin_node_config_unpack: "
"gres/%s has File plus very "
"large Count (%u) for node %s, "
"resetting value to 1024"
,
tmp_name
,
count
,
node_name
);
count
=
1024
;
}
if
(
has_file
)
/* Don't clear if already set */
gres_context
[
j
].
has_file
=
has_file
;
break
;
}
if
(
j
>=
gres_context_cnt
)
{
/* GresPlugins is inconsistently configured.
* Not a fatal error. Skip this data. */
error
(
"gres_plugin_node_config_unpack: no "
"plugin configured to unpack data "
"type %s from node %s"
,
tmp_name
,
node_name
);
has_file
=
1
;
}
if
(
has_file
&&
(
count
>
1024
))
{
/* Avoid over-subscribing memory with huge
* bitmaps */
error
(
"gres_plugin_node_config_unpack: gres/%s"
" has File plus very large Count (%u) "
"for node %s, resetting value to 1024"
,
tmp_name
,
count
,
node_name
);
count
=
1024
;
xfree
(
tmp_cpus
);
xfree
(
tmp_name
);
continue
;
}
if
(
has_file
)
/* Don't clear if already set */
gres_context
[
j
].
has_file
=
has_file
;
break
;
}
if
(
j
>=
gres_context_cnt
)
{
/* A sign that GresPlugins is inconsistently
* configured. Not a fatal error. Skip this data. */
error
(
"gres_plugin_node_config_unpack: no plugin "
"configured to unpack data type %s from node %s"
,
tmp_name
,
node_name
);
xfree
(
tmp_cpus
);
xfree
(
tmp_name
);
continue
;
p
=
xmalloc
(
sizeof
(
gres_slurmd_conf_t
));
p
->
count
=
count
;
p
->
cpu_cnt
=
cpu_cnt
;
p
->
has_file
=
has_file
;
p
->
cpus
=
tmp_cpus
;
tmp_cpus
=
NULL
;
/* Nothing left to xfree */
p
->
name
=
tmp_name
;
/* Preserve for accounting! */
p
->
plugin_id
=
plugin_id
;
list_append
(
gres_conf_list
,
p
);
}
p
=
xmalloc
(
sizeof
(
gres_slurmd_conf_t
));
p
->
count
=
count
;
p
->
cpu_cnt
=
cpu_cnt
;
p
->
has_file
=
has_file
;
p
->
cpus
=
tmp_cpus
;
tmp_cpus
=
NULL
;
/* Nothing left to xfree */
p
->
name
=
tmp_name
;
/* We need to preserve for accounting! */
p
->
plugin_id
=
plugin_id
;
list_append
(
gres_conf_list
,
p
);
}
slurm_mutex_unlock
(
&
gres_context_lock
);
return
rc
;
...
...
This diff is collapsed.
Click to expand it.
src/common/gres.h
+
4
−
1
View file @
89e4d130
...
...
@@ -66,9 +66,12 @@ typedef struct gres_slurmd_conf {
char
*
file
;
uint8_t
has_file
;
/* non-zero if file is set, flag for RPC */
/* Name of this gres
type
*/
/* Name of this gres */
char
*
name
;
/* Type of this gres (e.g. model name) */
char
*
type
;
/* Gres ID number */
uint32_t
plugin_id
;
}
gres_slurmd_conf_t
;
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment