Skip to content
Snippets Groups Projects
Commit 176f0e64 authored by Moe Jette's avatar Moe Jette
Browse files

Fixed support for job steps, added dump of incoming RPC data for job step.

parent c2e5d310
No related branches found
No related tags found
No related merge requests found
/*****************************************************************************\
* controller.c - main control machine daemon for slurm
* controller.c - main control machine daemon for slurm
*****************************************************************************
* Copyright (C) 2002 The Regents of the University of California.
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
......@@ -1184,6 +1184,8 @@ slurm_rpc_job_step_create( slurm_msg_t* msg )
start_time = clock ();
/* issue the RPC */
dump_step_desc ( req_step_msg );
error_code = step_create ( req_step_msg, &step_rec );
/* return result */
......
......@@ -300,7 +300,7 @@ dump_job_desc(job_desc_msg_t * job_specs)
return;
job_id = (job_specs->job_id != NO_VAL) ? job_specs->job_id : -1 ;
debug3("JobDesc: user_id=%u job_id=%ld partition=%s, name=%s\n",
debug3("JobDesc: user_id=%u job_id=%ld partition=%s name=%s\n",
job_specs->user_id, job_id,
job_specs->partition, job_specs->name);
......
/*****************************************************************************\
* pack.c - pack slurmctld structures into buffers understood by the
* pack.c - pack slurmctld structures into buffers understood by the
* slurm_protocol
*****************************************************************************
* Copyright (C) 2002 The Regents of the University of California.
......@@ -100,10 +100,10 @@ pack_ctld_job_step_info_reponse_msg( List steps, void** buffer_base, int* buffer
uint32_t list_size = list_count(steps);
current = *buffer_base = xmalloc( buffer_size );
pack32( current_time, &current, &current_size ); /* FIXME What am I really suppose to put as the time?*/
debug("job_step_count = %d\n");
debug("job_step_count = %u\n", list_size);
pack32( list_size , &current, &current_size );
/* Pack the Steps */
while( ( current_step = (struct step_record*)list_next( iterator ) ) != NULL )
{
......
......@@ -262,7 +262,10 @@ extern int delete_part_record (char *name);
extern int delete_step_record (struct job_record *job_ptr, uint32_t step_id);
/* dump_job_desc - dump the incoming job submit request message */
void dump_job_desc(job_desc_msg_t * job_specs);
extern void dump_job_desc(job_desc_msg_t * job_specs);
/* dump_step_desc - dump the incoming step initiate request message */
extern void dump_step_desc(step_specs *step_spec);
/* find_job_record - return a pointer to the job record with the given job_id */
extern struct job_record *find_job_record (uint32_t job_id);
......
......@@ -59,7 +59,7 @@ create_step_record (struct job_record *job_ptr)
step_record_point->job_ptr = job_ptr;
step_record_point->step_id = (job_ptr->next_step_id)++;
step_record_point->start_time = time( NULL ) ;
step_record_point->start_time = time ( NULL ) ;
if (list_append (job_ptr->step_list, step_record_point) == NULL)
fatal ("create_step_record: unable to allocate memory");
......@@ -105,6 +105,20 @@ delete_step_record (struct job_record *job_ptr, uint32_t step_id)
}
/* dump_step_desc - dump the incoming step initiate request message */
void
dump_step_desc(step_specs *step_spec)
{
if (step_spec == NULL)
return;
debug3("StepDesc: user_id=%u job_id=%u node_count=%u, cpu_count=%u\n",
step_spec->user_id, step_spec->job_id, step_spec->node_count, step_spec->cpu_count);
debug3(" relative=%u node_list=%s\n",
step_spec->relative, step_spec->node_list);
}
/*
* find_step_record - return a pointer to the step record with the given job_id and step_id
* input: job_ptr - pointer to job table entry to have step record added
......@@ -266,30 +280,36 @@ pick_step_nodes (struct job_record *job_ptr, step_specs *step_spec ) {
if (job_ptr->node_bitmap == NULL)
return NULL;
nodes_avail = bit_copy(job_ptr->node_bitmap);
nodes_avail = bit_copy (job_ptr->node_bitmap);
if ( step_spec->node_count == INFINITE) /* return all available nodes */
return nodes_avail;
if (step_spec->node_list) {
if ( step_spec->relative ) {
/* FIXME need to resolve format of relative_node_list */
info ("pick_step_nodes: relative_node_list not yet supported");
}
else {
error_code = node_name2bitmap (step_spec->node_list, &nodes_picked);
if (error_code) {
info ("pick_step_nodes: invalid node list %s", step_spec->node_list);
goto cleanup;
}
if (bit_super_set (nodes_picked, job_ptr->node_bitmap) == 0) {
info ("pick_step_nodes: requested nodes %s not part of job %u",
step_spec->node_list, job_ptr->job_id);
goto cleanup;
}
error_code = node_name2bitmap (step_spec->node_list, &nodes_picked);
if (error_code) {
info ("pick_step_nodes: invalid node list %s", step_spec->node_list);
goto cleanup;
}
if (bit_super_set (nodes_picked, job_ptr->node_bitmap) == 0) {
info ("pick_step_nodes: requested nodes %s not part of job %u",
step_spec->node_list, job_ptr->job_id);
goto cleanup;
}
}
else if (step_spec->relative) {
/* Remove first (step_spec->relative) nodes from available list */
bitstr_t *relative_nodes = NULL;
relative_nodes = bit_pick_cnt (nodes_avail, step_spec->relative);
if (relative_nodes == NULL) {
info ("pick_step_nodes: Invalid relative value (%u) for job %u",
step_spec->relative, job_ptr->job_id);
goto cleanup;
}
bit_not (relative_nodes);
bit_and (nodes_avail, relative_nodes);
bit_free (relative_nodes);
}
else
nodes_picked = bit_alloc (bit_size (nodes_avail) );
......@@ -377,6 +397,12 @@ step_create ( step_specs *step_specs, struct step_record** new_step_record )
step_specs->user_id != 0)
return ESLURM_ACCESS_DENIED ;
if ((job_ptr->job_state == JOB_COMPLETE) ||
(job_ptr->job_state == JOB_FAILED) ||
(job_ptr->job_state == JOB_TIMEOUT) ||
(job_ptr->job_state == JOB_STAGE_OUT))
return ESLURM_ALREADY_DONE;
nodeset = pick_step_nodes (job_ptr, step_specs );
if (nodeset == NULL)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment