From 176f0e6494ada4627dcb8f54e896210ac9db7645 Mon Sep 17 00:00:00 2001
From: Moe Jette <jette1@llnl.gov>
Date: Mon, 5 Aug 2002 18:11:57 +0000
Subject: [PATCH] Fixed support for job steps, added dump of incoming RPC data
 for job step.

---
 src/slurmctld/controller.c |  4 ++-
 src/slurmctld/job_mgr.c    |  2 +-
 src/slurmctld/pack.c       |  6 ++--
 src/slurmctld/slurmctld.h  |  5 ++-
 src/slurmctld/step_mgr.c   | 62 +++++++++++++++++++++++++++-----------
 5 files changed, 55 insertions(+), 24 deletions(-)

diff --git a/src/slurmctld/controller.c b/src/slurmctld/controller.c
index df02f485b98..155e5ed4e65 100644
--- a/src/slurmctld/controller.c
+++ b/src/slurmctld/controller.c
@@ -1,5 +1,5 @@
 /*****************************************************************************\
- * controller.c - main control machine daemon for slurm
+ *  controller.c - main control machine daemon for slurm
  *****************************************************************************
  *  Copyright (C) 2002 The Regents of the University of California.
  *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
@@ -1184,6 +1184,8 @@ slurm_rpc_job_step_create( slurm_msg_t* msg )
 
 	start_time = clock ();
 
+	/* issue the RPC */
+	dump_step_desc ( req_step_msg );
 	error_code = step_create ( req_step_msg, &step_rec );
 
 	/* return result */
diff --git a/src/slurmctld/job_mgr.c b/src/slurmctld/job_mgr.c
index 59edbaaac14..3312c82b2a4 100644
--- a/src/slurmctld/job_mgr.c
+++ b/src/slurmctld/job_mgr.c
@@ -300,7 +300,7 @@ dump_job_desc(job_desc_msg_t * job_specs)
 		return;
 
 	job_id = (job_specs->job_id != NO_VAL) ? job_specs->job_id : -1 ;
-	debug3("JobDesc: user_id=%u job_id=%ld partition=%s, name=%s\n", 
+	debug3("JobDesc: user_id=%u job_id=%ld partition=%s name=%s\n", 
 		job_specs->user_id, job_id, 
 		job_specs->partition, job_specs->name);
 
diff --git a/src/slurmctld/pack.c b/src/slurmctld/pack.c
index 928cb13b2e3..32ae18e67fd 100644
--- a/src/slurmctld/pack.c
+++ b/src/slurmctld/pack.c
@@ -1,5 +1,5 @@
 /*****************************************************************************\
- * pack.c - pack slurmctld structures into buffers understood by the 
+ *  pack.c - pack slurmctld structures into buffers understood by the 
  *          slurm_protocol 
  *****************************************************************************
  *  Copyright (C) 2002 The Regents of the University of California.
@@ -100,10 +100,10 @@ pack_ctld_job_step_info_reponse_msg( List steps, void** buffer_base, int* buffer
 	uint32_t list_size = list_count(steps);
 	current = *buffer_base = xmalloc( buffer_size );
 
-
 	pack32( current_time, &current, &current_size ); /* FIXME What am I really suppose to put as the time?*/
-	debug("job_step_count = %d\n");
+	debug("job_step_count = %u\n", list_size);
 	pack32( list_size , &current, &current_size );
+
 	/* Pack the Steps */
 	while( ( current_step = (struct step_record*)list_next( iterator ) ) != NULL )
 	{
diff --git a/src/slurmctld/slurmctld.h b/src/slurmctld/slurmctld.h
index cfb4ded2deb..9ccb2cb678b 100644
--- a/src/slurmctld/slurmctld.h
+++ b/src/slurmctld/slurmctld.h
@@ -262,7 +262,10 @@ extern int delete_part_record (char *name);
 extern int delete_step_record (struct job_record *job_ptr, uint32_t step_id);
 
 /* dump_job_desc - dump the incoming job submit request message */
-void dump_job_desc(job_desc_msg_t * job_specs);
+extern void dump_job_desc(job_desc_msg_t * job_specs);
+
+/* dump_step_desc - dump the incoming step initiate request message */
+extern void dump_step_desc(step_specs *step_spec);
 
 /*  find_job_record - return a pointer to the job record with the given job_id */
 extern struct job_record *find_job_record (uint32_t job_id);
diff --git a/src/slurmctld/step_mgr.c b/src/slurmctld/step_mgr.c
index 835fa65ea98..da33410cb64 100644
--- a/src/slurmctld/step_mgr.c
+++ b/src/slurmctld/step_mgr.c
@@ -59,7 +59,7 @@ create_step_record (struct job_record *job_ptr)
 
 	step_record_point->job_ptr = job_ptr; 
 	step_record_point->step_id = (job_ptr->next_step_id)++;
-	step_record_point->start_time = time( NULL ) ;
+	step_record_point->start_time = time ( NULL ) ;
 
 	if (list_append (job_ptr->step_list, step_record_point) == NULL)
 		fatal ("create_step_record: unable to allocate memory");
@@ -105,6 +105,20 @@ delete_step_record (struct job_record *job_ptr, uint32_t step_id)
 }
 
 
+/* dump_step_desc - dump the incoming step initiate request message */
+void
+dump_step_desc(step_specs *step_spec)
+{
+	if (step_spec == NULL) 
+		return;
+
+	debug3("StepDesc: user_id=%u job_id=%u node_count=%u, cpu_count=%u\n", 
+		step_spec->user_id, step_spec->job_id, step_spec->node_count, step_spec->cpu_count);
+	debug3("   relative=%u node_list=%s\n", 
+		step_spec->relative, step_spec->node_list);
+}
+
+
 /* 
  * find_step_record - return a pointer to the step record with the given job_id and step_id
  * input: job_ptr - pointer to job table entry to have step record added
@@ -266,30 +280,36 @@ pick_step_nodes (struct job_record  *job_ptr, step_specs *step_spec ) {
 	if (job_ptr->node_bitmap == NULL)
 		return NULL;
 	
-	nodes_avail = bit_copy(job_ptr->node_bitmap);
+	nodes_avail = bit_copy (job_ptr->node_bitmap);
 
 	if ( step_spec->node_count == INFINITE)	/* return all available nodes */
 		return nodes_avail;
 
 	if (step_spec->node_list) {
-		if ( step_spec->relative ) {
-			/* FIXME need to resolve format of relative_node_list */
-			info ("pick_step_nodes: relative_node_list not yet supported");
-
-		} 
-		else {
-			error_code = node_name2bitmap (step_spec->node_list, &nodes_picked);
-			if (error_code) {
-				info ("pick_step_nodes: invalid node list %s", step_spec->node_list);
-				goto cleanup;
-			}
-			if (bit_super_set (nodes_picked, job_ptr->node_bitmap) == 0) {
-				info ("pick_step_nodes: requested nodes %s not part of job %u",
-					step_spec->node_list, job_ptr->job_id);
-				goto cleanup;
-			}
+		error_code = node_name2bitmap (step_spec->node_list, &nodes_picked);
+		if (error_code) {
+			info ("pick_step_nodes: invalid node list %s", step_spec->node_list);
+			goto cleanup;
+		}
+		if (bit_super_set (nodes_picked, job_ptr->node_bitmap) == 0) {
+			info ("pick_step_nodes: requested nodes %s not part of job %u",
+				step_spec->node_list, job_ptr->job_id);
+			goto cleanup;
 		}
 	}
+	else if (step_spec->relative) {
+		/* Remove first (step_spec->relative) nodes from available list */
+		bitstr_t *relative_nodes = NULL;
+		relative_nodes = bit_pick_cnt (nodes_avail, step_spec->relative);
+		if (relative_nodes == NULL) {
+			info ("pick_step_nodes: Invalid relative value (%u) for job %u",
+				step_spec->relative, job_ptr->job_id);
+			goto cleanup;
+		}
+		bit_not (relative_nodes);
+		bit_and (nodes_avail, relative_nodes);
+		bit_free (relative_nodes);
+	}
 	else
 		nodes_picked = bit_alloc (bit_size (nodes_avail) );
 
@@ -377,6 +397,12 @@ step_create ( step_specs *step_specs, struct step_record** new_step_record  )
 	    	step_specs->user_id != 0) 
 		return ESLURM_ACCESS_DENIED ;
 
+	if ((job_ptr->job_state == JOB_COMPLETE) || 
+	    (job_ptr->job_state == JOB_FAILED) ||
+	    (job_ptr->job_state == JOB_TIMEOUT) ||
+	    (job_ptr->job_state == JOB_STAGE_OUT))
+		return ESLURM_ALREADY_DONE;
+
 	nodeset = pick_step_nodes (job_ptr, step_specs );
 
 	if (nodeset == NULL)
-- 
GitLab