Skip to content
Snippets Groups Projects
Commit bbffae1b authored by tewk's avatar tewk
Browse files

Implemeneted parition info with new comm layer

fixed a few bugs with node info and reconfigure
parent 63c47fa4
No related branches found
No related tags found
No related merge requests found
......@@ -12,16 +12,9 @@
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <syslog.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <unistd.h>
#include <src/api/slurm.h>
#include <src/common/nodelist.h>
#include <src/common/pack.h>
#include <src/common/slurm_protocol_api.h>
#if DEBUG_MODULE
/* main is used here for module testing purposes only */
......@@ -29,11 +22,10 @@ int
main (int argc, char *argv[])
{
static time_t last_update_time = (time_t) NULL;
int error_code, i, j;
struct part_buffer *part_buffer_ptr = NULL;
struct part_table *part_ptr = NULL;
int error_code ;
partition_info_msg_t * part_info_ptr = NULL;
error_code = slurm_load_part (last_update_time, &part_buffer_ptr);
error_code = slurm_load_partition_info (last_update_time, &part_buffer_ptr);
if (error_code) {
printf ("slurm_load_part error %d\n", error_code);
exit (error_code);
......@@ -41,196 +33,89 @@ main (int argc, char *argv[])
printf("Updated at %lx, record count %d\n",
part_buffer_ptr->last_update, part_buffer_ptr->part_count);
part_ptr = part_buffer_ptr->part_table_ptr;
for (i = 0; i < part_buffer_ptr->part_count; i++) {
printf ("PartitionName=%s MaxTime=%u ",
part_ptr[i].name, part_ptr[i].max_time);
printf ("MaxNodes=%u TotalNodes=%u ",
part_ptr[i].max_nodes, part_ptr[i].total_nodes);
printf ("TotalCPUs=%u Key=%u\n",
part_ptr[i].total_cpus, part_ptr[i].key);
printf (" Default=%u ",
part_ptr[i].default_part);
printf ("Shared=%u StateUp=%u ",
part_ptr[i].shared, part_ptr[i].state_up);
printf ("Nodes=%s AllowGroups=%s\n",
part_ptr[i].nodes, part_ptr[i].allow_groups);
printf (" NodeIndecies=");
for (j = 0; part_ptr[i].node_inx; j++) {
if (j > 0)
printf(",%d", part_ptr[i].node_inx[j]);
else
printf("%d", part_ptr[i].node_inx[j]);
if (part_ptr[i].node_inx[j] == -1)
break;
}
printf("\n\n");
}
slurm_free_part_info (part_buffer_ptr);
slurm_free_partition_info (part_buffer_ptr);
exit (0);
}
#endif
void slurm_print_partition_info ( partition_info_msg_t * part_info_ptr )
{
int i ;
partition_table_t * part_ptr = part_info_ptr->partition_array ;
/*
* slurm_free_part_info - free the partition information buffer (if allocated)
* NOTE: buffer is loaded by load_part.
*/
void
slurm_free_part_info (struct part_buffer *part_buffer_ptr)
for (i = 0; i < part_info_ptr->record_count; i++) {
slurm_print_partition_table ( & part_ptr[i] ) ;
}
}
void slurm_print_partition_table ( partition_table_t * part_ptr )
{
int i;
if (part_buffer_ptr == NULL)
return;
if (part_buffer_ptr->raw_buffer_ptr)
free (part_buffer_ptr->raw_buffer_ptr);
if (part_buffer_ptr->part_table_ptr) {
for (i = 0; i < part_buffer_ptr->part_count; i++) {
if (part_buffer_ptr->part_table_ptr[i].node_inx == NULL)
continue;
free (part_buffer_ptr->part_table_ptr[i].node_inx);
}
free (part_buffer_ptr->part_table_ptr);
int j ;
printf ("PartitionName=%s MaxTime=%u ", part_ptr->name, part_ptr->max_time);
printf ("MaxNodes=%u TotalNodes=%u ", part_ptr->max_nodes, part_ptr->total_nodes);
printf ("TotalCPUs=%u Key=%u\n", part_ptr->total_cpus, part_ptr->key);
printf (" Default=%u ", part_ptr->default_part);
printf ("Shared=%u StateUp=%u ", part_ptr->shared, part_ptr->state_up);
printf ("Nodes=%s AllowGroups=%s\n", part_ptr->nodes, part_ptr->allow_groups);
printf (" NodeIndecies=");
for (j = 0; part_ptr->node_inx; j++) {
if (j > 0)
printf(",%d", part_ptr->node_inx[j]);
else
printf("%d", part_ptr->node_inx[j]);
if (part_ptr->node_inx[j] == -1)
break;
}
printf("\n\n");
}
/*
* slurm_load_part - load the supplied partition information buffer for use by info
* gathering APIs if partition records have changed since the time specified.
* input: update_time - time of last update
* part_buffer_ptr - place to park part_buffer pointer
* output: part_buffer_ptr - pointer to allocated part_buffer
* returns -1 if no update since update_time,
* 0 if update with no error,
* EINVAL if the buffer (version or otherwise) is invalid,
* ENOMEM if malloc failure
* NOTE: the allocated memory at part_buffer_ptr freed by slurm_free_part_info.
*/
int
slurm_load_part (time_t update_time, struct part_buffer **part_buffer_ptr)
slurm_load_partitions (time_t update_time, partition_info_msg_t **partition_info_msg_pptr)
{
int buffer_offset, buffer_size, in_size, i, sockfd;
char request_msg[64], *buffer, *node_inx_str;
void *buf_ptr;
struct sockaddr_in serv_addr;
uint16_t uint16_tmp;
uint32_t uint32_tmp, uint32_time;
struct part_table *part;
*part_buffer_ptr = NULL;
if ((sockfd = socket (AF_INET, SOCK_STREAM, 0)) < 0)
return EINVAL;
serv_addr.sin_family = PF_INET;
serv_addr.sin_addr.s_addr = inet_addr (SLURMCTLD_HOST);
serv_addr.sin_port = htons (SLURMCTLD_PORT);
if (connect
(sockfd, (struct sockaddr *) &serv_addr,
sizeof (serv_addr)) < 0) {
close (sockfd);
return EINVAL;
}
sprintf (request_msg, "DumpPart LastUpdate=%lu",
(long) (update_time));
if (send (sockfd, request_msg, strlen (request_msg) + 1, 0) <
strlen (request_msg)) {
close (sockfd);
return EINVAL;
}
buffer = NULL;
buffer_offset = 0;
buffer_size = 8 * 1024;
while (1) {
buffer = realloc (buffer, buffer_size);
if (buffer == NULL) {
close (sockfd);
return ENOMEM;
}
in_size =
recv (sockfd, &buffer[buffer_offset],
(buffer_size - buffer_offset), 0);
if (in_size <= 0) { /* end of input */
in_size = 0;
break;
}
buffer_offset += in_size;
buffer_size += in_size;
}
close (sockfd);
buffer_size = buffer_offset + in_size;
buffer = realloc (buffer, buffer_size);
if (buffer == NULL)
return ENOMEM;
if (strcmp (buffer, "nochange") == 0) {
free (buffer);
return -1;
int msg_size ;
int rc ;
slurm_fd sockfd ;
slurm_msg_t request_msg ;
slurm_msg_t response_msg ;
last_update_msg_t last_time_msg ;
return_code_msg_t * slurm_rc_msg ;
/* init message connection for message communication with controller */
if ( ( sockfd = slurm_open_controller_conn ( SLURM_PORT ) ) == SLURM_SOCKET_ERROR )
return SLURM_SOCKET_ERROR ;
/* send request message */
/* pack32 ( update_time , &buf_ptr , &buffer_size ); */
last_time_msg . last_update = update_time ;
request_msg . msg_type = REQUEST_PARTITION_INFO ;
request_msg . data = &last_time_msg ;
if ( ( rc = slurm_send_controller_msg ( sockfd , & request_msg ) ) == SLURM_SOCKET_ERROR )
return SLURM_SOCKET_ERROR ;
/* receive message */
if ( ( msg_size = slurm_receive_msg ( sockfd , & response_msg ) ) == SLURM_SOCKET_ERROR )
return SLURM_SOCKET_ERROR ;
/* shutdown message connection */
if ( ( rc = slurm_shutdown_msg_conn ( sockfd ) ) == SLURM_SOCKET_ERROR )
return SLURM_SOCKET_ERROR ;
switch ( response_msg . msg_type )
{
case RESPONSE_PARTITION_INFO:
*partition_info_msg_pptr = ( partition_info_msg_t * ) response_msg . data ;
break ;
case RESPONSE_SLURM_RC:
slurm_rc_msg = ( return_code_msg_t * ) response_msg . data ;
break ;
default:
return SLURM_UNEXPECTED_MSG_ERROR ;
break ;
}
/* load buffer's header (data structure version and time) */
buf_ptr = buffer;
unpack32 (&uint32_tmp, &buf_ptr, &buffer_size);
if (uint32_tmp != PART_STRUCT_VERSION) {
free (buffer);
return EINVAL;
}
unpack32 (&uint32_time, &buf_ptr, &buffer_size);
/* load individual partition info */
part = NULL;
for (i = 0; buffer_size > 0; i++) {
part = realloc (part, sizeof(struct part_table) * (i+1));
if (part == NULL) {
free (buffer);
return ENOMEM;
}
unpackstr_ptr (&part[i].name, &uint16_tmp,
&buf_ptr, &buffer_size);
if (part[i].name == NULL)
part[i].name = "";
unpack32 (&part[i].max_time, &buf_ptr, &buffer_size);
unpack32 (&part[i].max_nodes, &buf_ptr, &buffer_size);
unpack32 (&part[i].total_nodes, &buf_ptr, &buffer_size);
unpack32 (&part[i].total_cpus, &buf_ptr, &buffer_size);
unpack16 (&part[i].default_part, &buf_ptr, &buffer_size);
unpack16 (&part[i].key, &buf_ptr, &buffer_size);
unpack16 (&part[i].shared, &buf_ptr, &buffer_size);
unpack16 (&part[i].state_up, &buf_ptr, &buffer_size);
unpackstr_ptr (&part[i].allow_groups, &uint16_tmp,
&buf_ptr, &buffer_size);
if (part[i].allow_groups == NULL)
part[i].allow_groups = "";
unpackstr_ptr (&part[i].nodes, &uint16_tmp,
&buf_ptr, &buffer_size);
if (part[i].nodes == NULL)
part[i].nodes = "";
unpackstr_ptr (&node_inx_str, &uint16_tmp,
&buf_ptr, &buffer_size);
if (node_inx_str == NULL)
node_inx_str = "";
part[i].node_inx = bitfmt2int(node_inx_str);
}
*part_buffer_ptr = malloc (sizeof (struct part_buffer));
if (*part_buffer_ptr == NULL) {
free (buffer);
if (part) {
int j;
for (j = 0; j < i; j++) {
if (part[j].node_inx)
free (part[j].node_inx);
}
free (part);
}
return ENOMEM;
}
(*part_buffer_ptr)->last_update = (time_t) uint32_time;
(*part_buffer_ptr)->part_count = i;
(*part_buffer_ptr)->raw_buffer_ptr = buffer;
(*part_buffer_ptr)->part_table_ptr = part;
return 0;
return SLURM_SUCCESS ;
}
......@@ -71,20 +71,12 @@ struct job_buffer {
struct job_table *job_table_ptr;
};
struct part_table {
char *name; /* name of the partition */
uint32_t max_time; /* minutes or INFINITE */
uint32_t max_nodes; /* per job or INFINITE */
uint32_t total_nodes; /* total number of nodes in the partition */
uint32_t total_cpus; /* total number of cpus in the partition */
uint16_t default_part; /* 1 if this is default partition */
uint16_t key; /* 1 if slurm distributed key is required for use */
uint16_t shared; /* 1 if job can share nodes, 2 if job must share nodes */
uint16_t state_up; /* 1 if state is up, 0 if down */
char *nodes; /* comma delimited list names of nodes in partition */
int *node_inx; /* list index pairs into node_table:
start_range_1, end_range_1, start_range_2, .., -1 */
char *allow_groups; /* comma delimited list of groups, null indicates all */
struct node_buffer {
time_t last_update; /* time of last buffer update */
uint32_t node_count; /* count of entries in node_table */
void *raw_buffer_ptr; /* raw network buffer info */
struct node_table *node_table_ptr;
};
struct part_buffer {
......@@ -172,7 +164,9 @@ extern void slurm_print_node_table (node_table_t * node_ptr );
* slurm_free_part_info - free the partition information buffer (if allocated)
* NOTE: buffer is loaded by load_part.
*/
extern void slurm_free_part_info (struct part_buffer *part_buffer_ptr);
extern void slurm_free_partition_info ( partition_info_msg_t * part_info_ptr);
extern void slurm_print_partition_info ( partition_info_msg_t * part_info_ptr ) ;
extern void slurm_print_partition_table ( partition_table_t * part_ptr ) ;
/*
* slurm_load_build - load the slurm build information buffer for use by info
......@@ -208,7 +202,7 @@ extern int slurm_load_node (time_t update_time, node_info_msg_t **node_info_msg_
* ENOMEM if malloc failure
* NOTE: the allocated memory at part_buffer_ptr freed by slurm_free_part_info.
*/
extern int slurm_load_part (time_t update_time, struct part_buffer **part_buffer_ptr);
extern int slurm_load_partitions (time_t update_time, partition_info_msg_t **part_buffer_ptr);
/*
* slurm_submit - submit/queue a job with supplied contraints.
......@@ -263,7 +257,7 @@ extern int parse_node_name (char *node_name, char **format, int *start_inx,
* reconfigure - _ request that slurmctld re-read the configuration files
* output: returns 0 on success, errno otherwise
*/
extern int reconfigure ();
extern int slurm_reconfigure ();
/*
* update_config - request that slurmctld update its configuration per request
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment