Skip to content
Snippets Groups Projects
Commit 0f62d94e authored by Danny Auble's avatar Danny Auble
Browse files

forward documentation

parent 91f3a749
No related branches found
No related tags found
No related merge requests found
......@@ -270,14 +270,11 @@ extern int forward_msg(forward_struct_t *forward_struct,
/*
* forward_set - add to the message possible forwards to go to
* IN: forward - forward_t * - message to add forwards to
* IN: thr_count - int - number of messages already done
* IN: pos - int * - posistion in the forward_addr and names
* will change to update to set the
* correct start after forwarding
* information has been added.
* IN: forward_addr- sockaddr_in * - list of address structures to forward to
* IN: forward_names - char * - list of names in MAX_SLURM_NAME increments
* IN: forward - forward_t * - struct to store forward info
* IN: span - int - count of forwards to do
* IN: pos - int * - position in the original messages addr
* structure
* IN: from - forward_t * - information from original message
* RET: SLURM_SUCCESS - int
*/
extern int forward_set(forward_t *forward,
......@@ -331,6 +328,20 @@ extern int forward_set(forward_t *forward,
return SLURM_SUCCESS;
}
/*
* forward_set_launch - add to the message possible forwards to go to during
* a job launch
* IN: forward - forward_t * - struct to store forward info
* IN: span - int - count of forwards to do
* IN: step_layout - slurm_step_layout_t * - contains information about hosts
* from original message
* IN: slurmd_addr - slurm_addr * - addrs of hosts to send messages to
* IN: itr - hostlist_iterator_t - count into host list of hosts to
* send messages to
* IN: timeout - int32_t - timeout if any to wait for
* message responses
* RET: SLURM_SUCCESS - int
*/
extern int forward_set_launch(forward_t *forward,
int span,
int *pos,
......@@ -352,7 +363,8 @@ extern int forward_set_launch(forward_t *forward,
if(span > 0) {
forward->addr = xmalloc(sizeof(slurm_addr) * span);
forward->name = xmalloc(sizeof(char) * (MAX_SLURM_NAME * span));
forward->name =
xmalloc(sizeof(char) * (MAX_SLURM_NAME * span));
forward->node_id = xmalloc(sizeof(int32_t) * span);
forward->timeout = timeout;
forward->init = FORWARD_INIT;
......
......@@ -34,23 +34,174 @@
#include "src/common/dist_tasks.h"
/* STRUCTURES */
/*
* forward_init - initilize forward structure
* IN: forward - forward_t * - struct to store forward info
* IN: from - forward_t * - (OPTIONAL) can be NULL, can be used to
* init the forward to this state
* RET: VOID
*/
extern void forward_init(forward_t *forward, forward_t *from);
/*
* forward_msg - logic to forward and collect return codes from childern
* of a parent forward
* IN: forward_struct - forward_struct_t * - holds information about message
* that needs to be forwarded to
* childern processes
* IN: header - header_t - header from message that came in
* needing to be forwarded.
* RET: SLURM_SUCCESS - int
*/
/*********************************************************************
Code taken from common/slurm_protocol_api.c
//This function should only be used when a message is being recieved.
//set up the forward_struct off of the buffer being received right after
//header is pulled off the received buffer
forward_struct = xmalloc(sizeof(forward_struct_t));
forward_struct->buf_len = remaining_buf(buffer);
forward_struct->buf = xmalloc(sizeof(char) * forward_struct->buf_len);
memcpy(forward_struct->buf, &buffer->head[buffer->processed],
forward_struct->buf_len);
forward_struct->ret_list = ret_list;
forward_struct->timeout = timeout - header.forward.timeout;
//send the structure created off the buffer and the header from the message
if(forward_msg(forward_struct, &header) == SLURM_ERROR) {
error("problem with forward msg");
}
*********************************************************************/
extern int forward_msg(forward_struct_t *forward_struct,
header_t *header);
/*
* set_forward_addrs - add to the message possible forwards to go to
* forward_set - add to the message possible forwards to go to
* IN: forward - forward_t * - struct to store forward info
* IN: thr_count - int - number of messages already done
* IN: from - forward_t * - info to separate into new forward struct
* IN: span - int - count of forwards to do
* IN: pos - int * - position in the original messages
* structures
* IN: from - forward_t * - information from original message
* RET: SLURM_SUCCESS - int
*/
/********************************************************************
Code taken from slurmctld/agent.c
This function should be used sending a message that could be forwarded.
//set the span with total count of hosts to send to
int *span = set_span(agent_arg_ptr->node_count);
// fill in a local forward structure with count of thread to create
// array of names and addrs of hosts and node_id (if any) to be sent to
// along with the timeout of the message
forward.cnt = agent_info_ptr->thread_count;
forward.name = agent_arg_ptr->node_names;
forward.addr = agent_arg_ptr->slurm_addr;
forward.node_id = NULL;
forward.timeout = SLURM_MESSAGE_TIMEOUT_MSEC_STATIC;
for (i = 0; i < agent_info_ptr->thread_count; i++) {
thread_ptr[thr_count].state = DSH_NEW;
thread_ptr[thr_count].slurm_addr = agent_arg_ptr->slurm_addr[i];
strncpy(thread_ptr[thr_count].node_name,
&agent_arg_ptr->node_names[i * MAX_SLURM_NAME],
MAX_SLURM_NAME);
// for each 'main' thread we want to add hosts for this one to forward to.
// send the thread_ptr's forward, span at the thr_count, the address of
// position we are in the count, and the forward we set up earlier
forward_set(&thread_ptr[thr_count].forward,
span[thr_count],
&i,
&forward);
thr_count++;
}
//free the span
xfree(span);
// set the new thread_count to the number with the forwards taken out of the
// count since we don't keep track of those on the master sender
agent_info_ptr->thread_count = thr_count;
********************************************************************/
extern int forward_set (forward_t *forward,
int span,
int *pos,
forward_t *from);
/*
* forward_set_launch - add to the message possible forwards to go to during
* a job launch
* IN: forward - forward_t * - struct to store forward info
* IN: span - int - count of forwards to do
* IN: step_layout - slurm_step_layout_t * - contains information about hosts
* from original message
* IN: slurmd_addr - slurm_addr * - addrs of hosts to send messages to
* IN: itr - hostlist_iterator_t - count into host list of hosts to
* send messages to
* IN: timeout - int32_t - timeout if any to wait for
* message responses
* RET: SLURM_SUCCESS - int
*/
/********************************************************************
Code taken from srun/launch.c
This function should be used sending a launch message that could be forwarded.
//set the span with total count of hosts to send to
int *span = set_span(job->step_layout->num_hosts);
//set up hostlist off the nodelist of the job
hostlist = hostlist_create(job->nodelist);
itr = hostlist_iterator_create(hostlist);
job->thr_count = 0;
for (i = 0; i < job->step_layout->num_hosts; i++) {
slurm_msg_t *m = &msg_array_ptr[job->thr_count];
m->srun_node_id = (uint32_t)i;
m->msg_type = REQUEST_LAUNCH_TASKS;
m->data = &r;
m->ret_list = NULL;
// set orig_add.sin_addr.s_addr to 0 meaning there is no one
// forwarded this message to this node
m->orig_addr.sin_addr.s_addr = 0;
m->buffer = buffer;
j=0;
while(host = hostlist_next(itr)) {
if(!strcmp(host,job->step_layout->host[i])) {
free(host);
break;
}
j++;
free(host);
}
hostlist_iterator_reset(itr);
memcpy(&m->address,
&job->slurmd_addr[j],
sizeof(slurm_addr));
// send the messages forward struct to be filled in with the information from
// the other variables
forward_set_launch(&m->forward,
span[job->thr_count],
&i,
job->step_layout,
job->slurmd_addr,
itr,
opt.msg_timeout);
//increment the count of threads created
job->thr_count++;
}
//free the span and destroy the hostlist we created
xfree(span);
hostlist_iterator_destroy(itr);
hostlist_destroy(hostlist);
********************************************************************/
extern int forward_set_launch (forward_t *forward,
int span,
int *pos,
......@@ -59,6 +210,29 @@ extern int forward_set_launch (forward_t *forward,
hostlist_iterator_t itr,
int32_t timeout);
/*
* no_resp_forward - Used to respond for nodes not able to respond since
* the parent had failed in some way
* IN: forward - forward_t * -
* IN: ret_list - List * -
* IN: err - int - type of error from parent
* RET: SLURM_SUCCESS - int
*/
/*********************************************************************
Code taken from common/slurm_protocol_api.c
//This function should only be used after a message is recieved.
// a call to slurm_receive_msg will fill in a ret_list
ret_list = slurm_receive_msg(fd, resp, timeout);
}
// if ret_list is null or list_count is 0 means there may have been an error
// this fuction will check to make sure if there were supposed to be forwards
// we handle the return code for the messages
if(!ret_list || list_count(ret_list) == 0) {
no_resp_forwards(&req->forward, &ret_list, errno);
}
**********************************************************************/
extern int no_resp_forwards(forward_t *forward, List *ret_list, int err);
/* destroyers */
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment