From 176f0e6494ada4627dcb8f54e896210ac9db7645 Mon Sep 17 00:00:00 2001 From: Moe Jette <jette1@llnl.gov> Date: Mon, 5 Aug 2002 18:11:57 +0000 Subject: [PATCH] Fixed support for job steps, added dump of incoming RPC data for job step. --- src/slurmctld/controller.c | 4 ++- src/slurmctld/job_mgr.c | 2 +- src/slurmctld/pack.c | 6 ++-- src/slurmctld/slurmctld.h | 5 ++- src/slurmctld/step_mgr.c | 62 +++++++++++++++++++++++++++----------- 5 files changed, 55 insertions(+), 24 deletions(-) diff --git a/src/slurmctld/controller.c b/src/slurmctld/controller.c index df02f485b98..155e5ed4e65 100644 --- a/src/slurmctld/controller.c +++ b/src/slurmctld/controller.c @@ -1,5 +1,5 @@ /*****************************************************************************\ - * controller.c - main control machine daemon for slurm + * controller.c - main control machine daemon for slurm ***************************************************************************** * Copyright (C) 2002 The Regents of the University of California. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). @@ -1184,6 +1184,8 @@ slurm_rpc_job_step_create( slurm_msg_t* msg ) start_time = clock (); + /* issue the RPC */ + dump_step_desc ( req_step_msg ); error_code = step_create ( req_step_msg, &step_rec ); /* return result */ diff --git a/src/slurmctld/job_mgr.c b/src/slurmctld/job_mgr.c index 59edbaaac14..3312c82b2a4 100644 --- a/src/slurmctld/job_mgr.c +++ b/src/slurmctld/job_mgr.c @@ -300,7 +300,7 @@ dump_job_desc(job_desc_msg_t * job_specs) return; job_id = (job_specs->job_id != NO_VAL) ? job_specs->job_id : -1 ; - debug3("JobDesc: user_id=%u job_id=%ld partition=%s, name=%s\n", + debug3("JobDesc: user_id=%u job_id=%ld partition=%s name=%s\n", job_specs->user_id, job_id, job_specs->partition, job_specs->name); diff --git a/src/slurmctld/pack.c b/src/slurmctld/pack.c index 928cb13b2e3..32ae18e67fd 100644 --- a/src/slurmctld/pack.c +++ b/src/slurmctld/pack.c @@ -1,5 +1,5 @@ /*****************************************************************************\ - * pack.c - pack slurmctld structures into buffers understood by the + * pack.c - pack slurmctld structures into buffers understood by the * slurm_protocol ***************************************************************************** * Copyright (C) 2002 The Regents of the University of California. @@ -100,10 +100,10 @@ pack_ctld_job_step_info_reponse_msg( List steps, void** buffer_base, int* buffer uint32_t list_size = list_count(steps); current = *buffer_base = xmalloc( buffer_size ); - pack32( current_time, ¤t, ¤t_size ); /* FIXME What am I really suppose to put as the time?*/ - debug("job_step_count = %d\n"); + debug("job_step_count = %u\n", list_size); pack32( list_size , ¤t, ¤t_size ); + /* Pack the Steps */ while( ( current_step = (struct step_record*)list_next( iterator ) ) != NULL ) { diff --git a/src/slurmctld/slurmctld.h b/src/slurmctld/slurmctld.h index cfb4ded2deb..9ccb2cb678b 100644 --- a/src/slurmctld/slurmctld.h +++ b/src/slurmctld/slurmctld.h @@ -262,7 +262,10 @@ extern int delete_part_record (char *name); extern int delete_step_record (struct job_record *job_ptr, uint32_t step_id); /* dump_job_desc - dump the incoming job submit request message */ -void dump_job_desc(job_desc_msg_t * job_specs); +extern void dump_job_desc(job_desc_msg_t * job_specs); + +/* dump_step_desc - dump the incoming step initiate request message */ +extern void dump_step_desc(step_specs *step_spec); /* find_job_record - return a pointer to the job record with the given job_id */ extern struct job_record *find_job_record (uint32_t job_id); diff --git a/src/slurmctld/step_mgr.c b/src/slurmctld/step_mgr.c index 835fa65ea98..da33410cb64 100644 --- a/src/slurmctld/step_mgr.c +++ b/src/slurmctld/step_mgr.c @@ -59,7 +59,7 @@ create_step_record (struct job_record *job_ptr) step_record_point->job_ptr = job_ptr; step_record_point->step_id = (job_ptr->next_step_id)++; - step_record_point->start_time = time( NULL ) ; + step_record_point->start_time = time ( NULL ) ; if (list_append (job_ptr->step_list, step_record_point) == NULL) fatal ("create_step_record: unable to allocate memory"); @@ -105,6 +105,20 @@ delete_step_record (struct job_record *job_ptr, uint32_t step_id) } +/* dump_step_desc - dump the incoming step initiate request message */ +void +dump_step_desc(step_specs *step_spec) +{ + if (step_spec == NULL) + return; + + debug3("StepDesc: user_id=%u job_id=%u node_count=%u, cpu_count=%u\n", + step_spec->user_id, step_spec->job_id, step_spec->node_count, step_spec->cpu_count); + debug3(" relative=%u node_list=%s\n", + step_spec->relative, step_spec->node_list); +} + + /* * find_step_record - return a pointer to the step record with the given job_id and step_id * input: job_ptr - pointer to job table entry to have step record added @@ -266,30 +280,36 @@ pick_step_nodes (struct job_record *job_ptr, step_specs *step_spec ) { if (job_ptr->node_bitmap == NULL) return NULL; - nodes_avail = bit_copy(job_ptr->node_bitmap); + nodes_avail = bit_copy (job_ptr->node_bitmap); if ( step_spec->node_count == INFINITE) /* return all available nodes */ return nodes_avail; if (step_spec->node_list) { - if ( step_spec->relative ) { - /* FIXME need to resolve format of relative_node_list */ - info ("pick_step_nodes: relative_node_list not yet supported"); - - } - else { - error_code = node_name2bitmap (step_spec->node_list, &nodes_picked); - if (error_code) { - info ("pick_step_nodes: invalid node list %s", step_spec->node_list); - goto cleanup; - } - if (bit_super_set (nodes_picked, job_ptr->node_bitmap) == 0) { - info ("pick_step_nodes: requested nodes %s not part of job %u", - step_spec->node_list, job_ptr->job_id); - goto cleanup; - } + error_code = node_name2bitmap (step_spec->node_list, &nodes_picked); + if (error_code) { + info ("pick_step_nodes: invalid node list %s", step_spec->node_list); + goto cleanup; + } + if (bit_super_set (nodes_picked, job_ptr->node_bitmap) == 0) { + info ("pick_step_nodes: requested nodes %s not part of job %u", + step_spec->node_list, job_ptr->job_id); + goto cleanup; } } + else if (step_spec->relative) { + /* Remove first (step_spec->relative) nodes from available list */ + bitstr_t *relative_nodes = NULL; + relative_nodes = bit_pick_cnt (nodes_avail, step_spec->relative); + if (relative_nodes == NULL) { + info ("pick_step_nodes: Invalid relative value (%u) for job %u", + step_spec->relative, job_ptr->job_id); + goto cleanup; + } + bit_not (relative_nodes); + bit_and (nodes_avail, relative_nodes); + bit_free (relative_nodes); + } else nodes_picked = bit_alloc (bit_size (nodes_avail) ); @@ -377,6 +397,12 @@ step_create ( step_specs *step_specs, struct step_record** new_step_record ) step_specs->user_id != 0) return ESLURM_ACCESS_DENIED ; + if ((job_ptr->job_state == JOB_COMPLETE) || + (job_ptr->job_state == JOB_FAILED) || + (job_ptr->job_state == JOB_TIMEOUT) || + (job_ptr->job_state == JOB_STAGE_OUT)) + return ESLURM_ALREADY_DONE; + nodeset = pick_step_nodes (job_ptr, step_specs ); if (nodeset == NULL) -- GitLab