Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
S
Slurm
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
tud-zih-energy
Slurm
Commits
bbffae1b
Commit
bbffae1b
authored
22 years ago
by
tewk
Browse files
Options
Downloads
Patches
Plain Diff
Implemeneted parition info with new comm layer
fixed a few bugs with node info and reconfigure
parent
63c47fa4
No related branches found
No related tags found
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
src/api/partition_info.c
+74
-189
74 additions, 189 deletions
src/api/partition_info.c
src/api/slurm.h
+11
-17
11 additions, 17 deletions
src/api/slurm.h
with
85 additions
and
206 deletions
src/api/partition_info.c
+
74
−
189
View file @
bbffae1b
...
...
@@ -12,16 +12,9 @@
#include
<errno.h>
#include
<stdio.h>
#include
<stdlib.h>
#include
<string.h>
#include
<syslog.h>
#include
<sys/socket.h>
#include
<netinet/in.h>
#include
<arpa/inet.h>
#include
<unistd.h>
#include
<src/api/slurm.h>
#include
<src/common/nodelist.h>
#include
<src/common/pack.h>
#include
<src/common/slurm_protocol_api.h>
#if DEBUG_MODULE
/* main is used here for module testing purposes only */
...
...
@@ -29,11 +22,10 @@ int
main
(
int
argc
,
char
*
argv
[])
{
static
time_t
last_update_time
=
(
time_t
)
NULL
;
int
error_code
,
i
,
j
;
struct
part_buffer
*
part_buffer_ptr
=
NULL
;
struct
part_table
*
part_ptr
=
NULL
;
int
error_code
;
partition_info_msg_t
*
part_info_ptr
=
NULL
;
error_code
=
slurm_load_part
(
last_update_time
,
&
part_buffer_ptr
);
error_code
=
slurm_load_part
ition_info
(
last_update_time
,
&
part_buffer_ptr
);
if
(
error_code
)
{
printf
(
"slurm_load_part error %d
\n
"
,
error_code
);
exit
(
error_code
);
...
...
@@ -41,196 +33,89 @@ main (int argc, char *argv[])
printf
(
"Updated at %lx, record count %d
\n
"
,
part_buffer_ptr
->
last_update
,
part_buffer_ptr
->
part_count
);
part_ptr
=
part_buffer_ptr
->
part_table_ptr
;
for
(
i
=
0
;
i
<
part_buffer_ptr
->
part_count
;
i
++
)
{
printf
(
"PartitionName=%s MaxTime=%u "
,
part_ptr
[
i
].
name
,
part_ptr
[
i
].
max_time
);
printf
(
"MaxNodes=%u TotalNodes=%u "
,
part_ptr
[
i
].
max_nodes
,
part_ptr
[
i
].
total_nodes
);
printf
(
"TotalCPUs=%u Key=%u
\n
"
,
part_ptr
[
i
].
total_cpus
,
part_ptr
[
i
].
key
);
printf
(
" Default=%u "
,
part_ptr
[
i
].
default_part
);
printf
(
"Shared=%u StateUp=%u "
,
part_ptr
[
i
].
shared
,
part_ptr
[
i
].
state_up
);
printf
(
"Nodes=%s AllowGroups=%s
\n
"
,
part_ptr
[
i
].
nodes
,
part_ptr
[
i
].
allow_groups
);
printf
(
" NodeIndecies="
);
for
(
j
=
0
;
part_ptr
[
i
].
node_inx
;
j
++
)
{
if
(
j
>
0
)
printf
(
",%d"
,
part_ptr
[
i
].
node_inx
[
j
]);
else
printf
(
"%d"
,
part_ptr
[
i
].
node_inx
[
j
]);
if
(
part_ptr
[
i
].
node_inx
[
j
]
==
-
1
)
break
;
}
printf
(
"
\n\n
"
);
}
slurm_free_part_info
(
part_buffer_ptr
);
slurm_free_partition_info
(
part_buffer_ptr
);
exit
(
0
);
}
#endif
void
slurm_print_partition_info
(
partition_info_msg_t
*
part_info_ptr
)
{
int
i
;
partition_table_t
*
part_ptr
=
part_info_ptr
->
partition_array
;
/*
* slurm_free_part_info - free the partition information buffer (if allocated)
* NOTE: buffer is loaded by load_part.
*/
void
slurm_free_part_info
(
struct
part_buffer
*
part_buffer_ptr
)
for
(
i
=
0
;
i
<
part_info_ptr
->
record_count
;
i
++
)
{
slurm_print_partition_table
(
&
part_ptr
[
i
]
)
;
}
}
void
slurm_print_partition_table
(
partition_table_t
*
part_ptr
)
{
int
i
;
if
(
part_buffer_ptr
==
NULL
)
return
;
if
(
part_buffer_ptr
->
raw_buffer_ptr
)
free
(
part_buffer_ptr
->
raw_buffer_ptr
);
if
(
part_buffer_ptr
->
part_table_ptr
)
{
for
(
i
=
0
;
i
<
part_buffer_ptr
->
part_count
;
i
++
)
{
if
(
part_buffer_ptr
->
part_table_ptr
[
i
].
node_inx
==
NULL
)
continue
;
free
(
part_buffer_ptr
->
part_table_ptr
[
i
].
node_inx
);
}
free
(
part_buffer_ptr
->
part_table_ptr
);
int
j
;
printf
(
"PartitionName=%s MaxTime=%u "
,
part_ptr
->
name
,
part_ptr
->
max_time
);
printf
(
"MaxNodes=%u TotalNodes=%u "
,
part_ptr
->
max_nodes
,
part_ptr
->
total_nodes
);
printf
(
"TotalCPUs=%u Key=%u
\n
"
,
part_ptr
->
total_cpus
,
part_ptr
->
key
);
printf
(
" Default=%u "
,
part_ptr
->
default_part
);
printf
(
"Shared=%u StateUp=%u "
,
part_ptr
->
shared
,
part_ptr
->
state_up
);
printf
(
"Nodes=%s AllowGroups=%s
\n
"
,
part_ptr
->
nodes
,
part_ptr
->
allow_groups
);
printf
(
" NodeIndecies="
);
for
(
j
=
0
;
part_ptr
->
node_inx
;
j
++
)
{
if
(
j
>
0
)
printf
(
",%d"
,
part_ptr
->
node_inx
[
j
]);
else
printf
(
"%d"
,
part_ptr
->
node_inx
[
j
]);
if
(
part_ptr
->
node_inx
[
j
]
==
-
1
)
break
;
}
printf
(
"
\n\n
"
);
}
/*
* slurm_load_part - load the supplied partition information buffer for use by info
* gathering APIs if partition records have changed since the time specified.
* input: update_time - time of last update
* part_buffer_ptr - place to park part_buffer pointer
* output: part_buffer_ptr - pointer to allocated part_buffer
* returns -1 if no update since update_time,
* 0 if update with no error,
* EINVAL if the buffer (version or otherwise) is invalid,
* ENOMEM if malloc failure
* NOTE: the allocated memory at part_buffer_ptr freed by slurm_free_part_info.
*/
int
slurm_load_part
(
time_t
update_time
,
struct
part_buffer
**
part_buffer_
ptr
)
slurm_load_part
itions
(
time_t
update_time
,
partition_info_msg_t
**
partition_info_msg_p
ptr
)
{
int
buffer_offset
,
buffer_size
,
in_size
,
i
,
sockfd
;
char
request_msg
[
64
],
*
buffer
,
*
node_inx_str
;
void
*
buf_ptr
;
struct
sockaddr_in
serv_addr
;
uint16_t
uint16_tmp
;
uint32_t
uint32_tmp
,
uint32_time
;
struct
part_table
*
part
;
*
part_buffer_ptr
=
NULL
;
if
((
sockfd
=
socket
(
AF_INET
,
SOCK_STREAM
,
0
))
<
0
)
return
EINVAL
;
serv_addr
.
sin_family
=
PF_INET
;
serv_addr
.
sin_addr
.
s_addr
=
inet_addr
(
SLURMCTLD_HOST
);
serv_addr
.
sin_port
=
htons
(
SLURMCTLD_PORT
);
if
(
connect
(
sockfd
,
(
struct
sockaddr
*
)
&
serv_addr
,
sizeof
(
serv_addr
))
<
0
)
{
close
(
sockfd
);
return
EINVAL
;
}
sprintf
(
request_msg
,
"DumpPart LastUpdate=%lu"
,
(
long
)
(
update_time
));
if
(
send
(
sockfd
,
request_msg
,
strlen
(
request_msg
)
+
1
,
0
)
<
strlen
(
request_msg
))
{
close
(
sockfd
);
return
EINVAL
;
}
buffer
=
NULL
;
buffer_offset
=
0
;
buffer_size
=
8
*
1024
;
while
(
1
)
{
buffer
=
realloc
(
buffer
,
buffer_size
);
if
(
buffer
==
NULL
)
{
close
(
sockfd
);
return
ENOMEM
;
}
in_size
=
recv
(
sockfd
,
&
buffer
[
buffer_offset
],
(
buffer_size
-
buffer_offset
),
0
);
if
(
in_size
<=
0
)
{
/* end of input */
in_size
=
0
;
break
;
}
buffer_offset
+=
in_size
;
buffer_size
+=
in_size
;
}
close
(
sockfd
);
buffer_size
=
buffer_offset
+
in_size
;
buffer
=
realloc
(
buffer
,
buffer_size
);
if
(
buffer
==
NULL
)
return
ENOMEM
;
if
(
strcmp
(
buffer
,
"nochange"
)
==
0
)
{
free
(
buffer
);
return
-
1
;
int
msg_size
;
int
rc
;
slurm_fd
sockfd
;
slurm_msg_t
request_msg
;
slurm_msg_t
response_msg
;
last_update_msg_t
last_time_msg
;
return_code_msg_t
*
slurm_rc_msg
;
/* init message connection for message communication with controller */
if
(
(
sockfd
=
slurm_open_controller_conn
(
SLURM_PORT
)
)
==
SLURM_SOCKET_ERROR
)
return
SLURM_SOCKET_ERROR
;
/* send request message */
/* pack32 ( update_time , &buf_ptr , &buffer_size ); */
last_time_msg
.
last_update
=
update_time
;
request_msg
.
msg_type
=
REQUEST_PARTITION_INFO
;
request_msg
.
data
=
&
last_time_msg
;
if
(
(
rc
=
slurm_send_controller_msg
(
sockfd
,
&
request_msg
)
)
==
SLURM_SOCKET_ERROR
)
return
SLURM_SOCKET_ERROR
;
/* receive message */
if
(
(
msg_size
=
slurm_receive_msg
(
sockfd
,
&
response_msg
)
)
==
SLURM_SOCKET_ERROR
)
return
SLURM_SOCKET_ERROR
;
/* shutdown message connection */
if
(
(
rc
=
slurm_shutdown_msg_conn
(
sockfd
)
)
==
SLURM_SOCKET_ERROR
)
return
SLURM_SOCKET_ERROR
;
switch
(
response_msg
.
msg_type
)
{
case
RESPONSE_PARTITION_INFO
:
*
partition_info_msg_pptr
=
(
partition_info_msg_t
*
)
response_msg
.
data
;
break
;
case
RESPONSE_SLURM_RC
:
slurm_rc_msg
=
(
return_code_msg_t
*
)
response_msg
.
data
;
break
;
default:
return
SLURM_UNEXPECTED_MSG_ERROR
;
break
;
}
/* load buffer's header (data structure version and time) */
buf_ptr
=
buffer
;
unpack32
(
&
uint32_tmp
,
&
buf_ptr
,
&
buffer_size
);
if
(
uint32_tmp
!=
PART_STRUCT_VERSION
)
{
free
(
buffer
);
return
EINVAL
;
}
unpack32
(
&
uint32_time
,
&
buf_ptr
,
&
buffer_size
);
/* load individual partition info */
part
=
NULL
;
for
(
i
=
0
;
buffer_size
>
0
;
i
++
)
{
part
=
realloc
(
part
,
sizeof
(
struct
part_table
)
*
(
i
+
1
));
if
(
part
==
NULL
)
{
free
(
buffer
);
return
ENOMEM
;
}
unpackstr_ptr
(
&
part
[
i
].
name
,
&
uint16_tmp
,
&
buf_ptr
,
&
buffer_size
);
if
(
part
[
i
].
name
==
NULL
)
part
[
i
].
name
=
""
;
unpack32
(
&
part
[
i
].
max_time
,
&
buf_ptr
,
&
buffer_size
);
unpack32
(
&
part
[
i
].
max_nodes
,
&
buf_ptr
,
&
buffer_size
);
unpack32
(
&
part
[
i
].
total_nodes
,
&
buf_ptr
,
&
buffer_size
);
unpack32
(
&
part
[
i
].
total_cpus
,
&
buf_ptr
,
&
buffer_size
);
unpack16
(
&
part
[
i
].
default_part
,
&
buf_ptr
,
&
buffer_size
);
unpack16
(
&
part
[
i
].
key
,
&
buf_ptr
,
&
buffer_size
);
unpack16
(
&
part
[
i
].
shared
,
&
buf_ptr
,
&
buffer_size
);
unpack16
(
&
part
[
i
].
state_up
,
&
buf_ptr
,
&
buffer_size
);
unpackstr_ptr
(
&
part
[
i
].
allow_groups
,
&
uint16_tmp
,
&
buf_ptr
,
&
buffer_size
);
if
(
part
[
i
].
allow_groups
==
NULL
)
part
[
i
].
allow_groups
=
""
;
unpackstr_ptr
(
&
part
[
i
].
nodes
,
&
uint16_tmp
,
&
buf_ptr
,
&
buffer_size
);
if
(
part
[
i
].
nodes
==
NULL
)
part
[
i
].
nodes
=
""
;
unpackstr_ptr
(
&
node_inx_str
,
&
uint16_tmp
,
&
buf_ptr
,
&
buffer_size
);
if
(
node_inx_str
==
NULL
)
node_inx_str
=
""
;
part
[
i
].
node_inx
=
bitfmt2int
(
node_inx_str
);
}
*
part_buffer_ptr
=
malloc
(
sizeof
(
struct
part_buffer
));
if
(
*
part_buffer_ptr
==
NULL
)
{
free
(
buffer
);
if
(
part
)
{
int
j
;
for
(
j
=
0
;
j
<
i
;
j
++
)
{
if
(
part
[
j
].
node_inx
)
free
(
part
[
j
].
node_inx
);
}
free
(
part
);
}
return
ENOMEM
;
}
(
*
part_buffer_ptr
)
->
last_update
=
(
time_t
)
uint32_time
;
(
*
part_buffer_ptr
)
->
part_count
=
i
;
(
*
part_buffer_ptr
)
->
raw_buffer_ptr
=
buffer
;
(
*
part_buffer_ptr
)
->
part_table_ptr
=
part
;
return
0
;
return
SLURM_SUCCESS
;
}
This diff is collapsed.
Click to expand it.
src/api/slurm.h
+
11
−
17
View file @
bbffae1b
...
...
@@ -71,20 +71,12 @@ struct job_buffer {
struct
job_table
*
job_table_ptr
;
};
struct
part_table
{
char
*
name
;
/* name of the partition */
uint32_t
max_time
;
/* minutes or INFINITE */
uint32_t
max_nodes
;
/* per job or INFINITE */
uint32_t
total_nodes
;
/* total number of nodes in the partition */
uint32_t
total_cpus
;
/* total number of cpus in the partition */
uint16_t
default_part
;
/* 1 if this is default partition */
uint16_t
key
;
/* 1 if slurm distributed key is required for use */
uint16_t
shared
;
/* 1 if job can share nodes, 2 if job must share nodes */
uint16_t
state_up
;
/* 1 if state is up, 0 if down */
char
*
nodes
;
/* comma delimited list names of nodes in partition */
int
*
node_inx
;
/* list index pairs into node_table:
start_range_1, end_range_1, start_range_2, .., -1 */
char
*
allow_groups
;
/* comma delimited list of groups, null indicates all */
struct
node_buffer
{
time_t
last_update
;
/* time of last buffer update */
uint32_t
node_count
;
/* count of entries in node_table */
void
*
raw_buffer_ptr
;
/* raw network buffer info */
struct
node_table
*
node_table_ptr
;
};
struct
part_buffer
{
...
...
@@ -172,7 +164,9 @@ extern void slurm_print_node_table (node_table_t * node_ptr );
* slurm_free_part_info - free the partition information buffer (if allocated)
* NOTE: buffer is loaded by load_part.
*/
extern
void
slurm_free_part_info
(
struct
part_buffer
*
part_buffer_ptr
);
extern
void
slurm_free_partition_info
(
partition_info_msg_t
*
part_info_ptr
);
extern
void
slurm_print_partition_info
(
partition_info_msg_t
*
part_info_ptr
)
;
extern
void
slurm_print_partition_table
(
partition_table_t
*
part_ptr
)
;
/*
* slurm_load_build - load the slurm build information buffer for use by info
...
...
@@ -208,7 +202,7 @@ extern int slurm_load_node (time_t update_time, node_info_msg_t **node_info_msg_
* ENOMEM if malloc failure
* NOTE: the allocated memory at part_buffer_ptr freed by slurm_free_part_info.
*/
extern
int
slurm_load_part
(
time_t
update_time
,
struct
part_buffer
**
part_buffer_ptr
);
extern
int
slurm_load_part
itions
(
time_t
update_time
,
partition_info_msg_t
**
part_buffer_ptr
);
/*
* slurm_submit - submit/queue a job with supplied contraints.
...
...
@@ -263,7 +257,7 @@ extern int parse_node_name (char *node_name, char **format, int *start_inx,
* reconfigure - _ request that slurmctld re-read the configuration files
* output: returns 0 on success, errno otherwise
*/
extern
int
reconfigure
();
extern
int
slurm_
reconfigure
();
/*
* update_config - request that slurmctld update its configuration per request
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment